Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions cpp/src/join/filtered_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <cuco/static_set_ref.cuh>
#include <cuda/std/iterator>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/sequence.h>

namespace cudf {
namespace detail {
Expand Down Expand Up @@ -218,6 +219,7 @@ filtered_join::filtered_join(cudf::table_view const& build,
rmm::mr::polymorphic_allocator<char>{},
stream.value()}
{
if (_build.num_rows() == 0) return;
_bucket_storage.initialize(empty_sentinel_key, stream);
}

Expand All @@ -228,6 +230,7 @@ distinct_filtered_join::distinct_filtered_join(cudf::table_view const& build,
: filtered_join(build, compare_nulls, load_factor, stream)
{
cudf::scoped_range range{"distinct_filtered_join::distinct_filtered_join"};
if (_build.num_rows() == 0) return;
// Any mismatch in nullate between probe and build row operators results in UB. Ideally, nullate
// should be determined by the logical OR of probe nulls and build nulls. However, since we do not
// know if the probe has nulls apriori, we set nullate::DYNAMIC{true} (in the case of primitive
Expand Down Expand Up @@ -334,12 +337,28 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::se
std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::semi_join(
cudf::table_view const& probe, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
{
// Early return for empty build or probe table
if (_build.num_rows() == 0 || probe.num_rows() == 0) {
return std::make_unique<rmm::device_uvector<cudf::size_type>>(0, stream, mr);
}

return semi_anti_join(probe, join_kind::LEFT_SEMI_JOIN, stream, mr);
}

std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::anti_join(
cudf::table_view const& probe, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
{
// Early return for empty probe table
if (probe.num_rows() == 0) {
return std::make_unique<rmm::device_uvector<cudf::size_type>>(0, stream, mr);
}
if (_build.num_rows() == 0) {
auto result =
std::make_unique<rmm::device_uvector<cudf::size_type>>(probe.num_rows(), stream, mr);
thrust::sequence(rmm::exec_policy_nosync(stream), result->begin(), result->end());
return result;
}

return semi_anti_join(probe, join_kind::LEFT_ANTI_JOIN, stream, mr);
}

Expand Down
58 changes: 58 additions & 0 deletions cpp/tests/join/semi_anti_join_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,61 @@ TEST_F(JoinTest, AntiJoinWithStructsAndNullsOnOneSide)
auto expected = cudf::gather(left, expected_indices_col);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result);
}

TEST_F(JoinTest, AntiJoinEmptyTables)
{
cudf::table empty_build_table{};
cudf::table empty_probe_table{};
column_wrapper<int32_t> col{0, 1, 2};
auto nonempty_table = cudf::table_view{{col}};
// Empty build and probe tables
{
auto result = left_anti_join(empty_probe_table, empty_build_table, {}, {});
auto expected_indices_col = column_wrapper<cudf::size_type>{};
auto expected = cudf::gather(empty_probe_table, expected_indices_col);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result);
}
// Empty build table
{
auto result = left_anti_join(nonempty_table, empty_build_table, {0}, {});
auto expected_indices_col = column_wrapper<cudf::size_type>{0, 1, 2};
auto expected = cudf::gather(nonempty_table, expected_indices_col);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result);
}
// Empty probe table
{
auto result = left_anti_join(empty_probe_table, nonempty_table, {}, {0});
auto expected_indices_col = column_wrapper<cudf::size_type>{};
auto expected = cudf::gather(empty_probe_table, expected_indices_col);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result);
}
}

TEST_F(JoinTest, SemiJoinEmptyTables)
{
cudf::table empty_build_table{};
cudf::table empty_probe_table{};
column_wrapper<int32_t> col{0, 1, 2};
auto nonempty_table = cudf::table_view{{col}};
// Empty build and probe tables
{
auto result = left_semi_join(empty_probe_table, empty_build_table, {}, {});
auto expected_indices_col = column_wrapper<cudf::size_type>{};
auto expected = cudf::gather(empty_probe_table, expected_indices_col);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result);
}
// Empty build table
{
auto result = left_semi_join(nonempty_table, empty_build_table, {0}, {});
auto expected_indices_col = column_wrapper<cudf::size_type>{};
auto expected = cudf::gather(empty_probe_table, expected_indices_col);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result);
}
// Empty probe table
{
auto result = left_semi_join(empty_probe_table, nonempty_table, {}, {0});
auto expected_indices_col = column_wrapper<cudf::size_type>{};
auto expected = cudf::gather(empty_probe_table, expected_indices_col);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result);
}
}