diff --git a/cpp/src/join/filtered_join.cu b/cpp/src/join/filtered_join.cu index 6dbb796ff2f..7a6cb2fbdfc 100644 --- a/cpp/src/join/filtered_join.cu +++ b/cpp/src/join/filtered_join.cu @@ -28,6 +28,7 @@ #include #include #include +#include namespace cudf { namespace detail { @@ -218,6 +219,7 @@ filtered_join::filtered_join(cudf::table_view const& build, rmm::mr::polymorphic_allocator{}, stream.value()} { + if (_build.num_rows() == 0) return; _bucket_storage.initialize(empty_sentinel_key, stream); } @@ -228,6 +230,7 @@ distinct_filtered_join::distinct_filtered_join(cudf::table_view const& build, : filtered_join(build, compare_nulls, load_factor, stream) { cudf::scoped_range range{"distinct_filtered_join::distinct_filtered_join"}; + if (_build.num_rows() == 0) return; // Any mismatch in nullate between probe and build row operators results in UB. Ideally, nullate // should be determined by the logical OR of probe nulls and build nulls. However, since we do not // know if the probe has nulls apriori, we set nullate::DYNAMIC{true} (in the case of primitive @@ -334,12 +337,28 @@ std::unique_ptr> distinct_filtered_join::se std::unique_ptr> distinct_filtered_join::semi_join( cudf::table_view const& probe, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { + // Early return for empty build or probe table + if (_build.num_rows() == 0 || probe.num_rows() == 0) { + return std::make_unique>(0, stream, mr); + } + return semi_anti_join(probe, join_kind::LEFT_SEMI_JOIN, stream, mr); } std::unique_ptr> distinct_filtered_join::anti_join( cudf::table_view const& probe, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { + // Early return for empty probe table + if (probe.num_rows() == 0) { + return std::make_unique>(0, stream, mr); + } + if (_build.num_rows() == 0) { + auto result = + std::make_unique>(probe.num_rows(), stream, mr); + thrust::sequence(rmm::exec_policy_nosync(stream), result->begin(), result->end()); + return result; + } + return semi_anti_join(probe, join_kind::LEFT_ANTI_JOIN, stream, mr); } diff --git a/cpp/tests/join/semi_anti_join_tests.cpp b/cpp/tests/join/semi_anti_join_tests.cpp index da4c81c7d00..34ce8bb77a2 100644 --- a/cpp/tests/join/semi_anti_join_tests.cpp +++ b/cpp/tests/join/semi_anti_join_tests.cpp @@ -322,3 +322,61 @@ TEST_F(JoinTest, AntiJoinWithStructsAndNullsOnOneSide) auto expected = cudf::gather(left, expected_indices_col); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result); } + +TEST_F(JoinTest, AntiJoinEmptyTables) +{ + cudf::table empty_build_table{}; + cudf::table empty_probe_table{}; + column_wrapper col{0, 1, 2}; + auto nonempty_table = cudf::table_view{{col}}; + // Empty build and probe tables + { + auto result = left_anti_join(empty_probe_table, empty_build_table, {}, {}); + auto expected_indices_col = column_wrapper{}; + auto expected = cudf::gather(empty_probe_table, expected_indices_col); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result); + } + // Empty build table + { + auto result = left_anti_join(nonempty_table, empty_build_table, {0}, {}); + auto expected_indices_col = column_wrapper{0, 1, 2}; + auto expected = cudf::gather(nonempty_table, expected_indices_col); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result); + } + // Empty probe table + { + auto result = left_anti_join(empty_probe_table, nonempty_table, {}, {0}); + auto expected_indices_col = column_wrapper{}; + auto expected = cudf::gather(empty_probe_table, expected_indices_col); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result); + } +} + +TEST_F(JoinTest, SemiJoinEmptyTables) +{ + cudf::table empty_build_table{}; + cudf::table empty_probe_table{}; + column_wrapper col{0, 1, 2}; + auto nonempty_table = cudf::table_view{{col}}; + // Empty build and probe tables + { + auto result = left_semi_join(empty_probe_table, empty_build_table, {}, {}); + auto expected_indices_col = column_wrapper{}; + auto expected = cudf::gather(empty_probe_table, expected_indices_col); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result); + } + // Empty build table + { + auto result = left_semi_join(nonempty_table, empty_build_table, {0}, {}); + auto expected_indices_col = column_wrapper{}; + auto expected = cudf::gather(empty_probe_table, expected_indices_col); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result); + } + // Empty probe table + { + auto result = left_semi_join(empty_probe_table, nonempty_table, {}, {0}); + auto expected_indices_col = column_wrapper{}; + auto expected = cudf::gather(empty_probe_table, expected_indices_col); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result); + } +}