From 8c5f2ee0979a1109c7b72128dcadfa5e7fc89f78 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Thu, 19 Jan 2023 17:21:38 -0700 Subject: [PATCH 1/2] ENH: remainder() to API standard * draft the `remainder()` ufunc and turn its matching array API standad test on in the CI; many of the other changes reflect those in recent binary ufunc PRs * note that the implementation of the remainder is actually rather non-trivial here; for reference, the reviewer may want to check the following, and perhaps also the NumPy inner loop implementation: - https://stackoverflow.com/a/1907585/2942522 - https://data-apis.org/array-api/latest/API_specification/generated/array_api.remainder.html --- .github/workflows/array_api.yml | 4 +- .github/workflows/main_ci.yml | 2 +- pykokkos/__init__.py | 1 + pykokkos/lib/ufunc_workunits.py | 820 ++++++++++++++++++++++++++++++++ pykokkos/lib/ufuncs.py | 96 ++++ 5 files changed, 920 insertions(+), 3 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 2d656c21..082e1c12 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -30,7 +30,7 @@ jobs: cd /tmp git clone https://github.com/kokkos/pykokkos-base.git cd pykokkos-base - python setup.py install -- -DENABLE_LAYOUTS=ON -DENABLE_MEMORY_TRAITS=OFF + python setup.py install -- -DENABLE_LAYOUTS=ON -DENABLE_MEMORY_TRAITS=OFF -DENABLE_VIEW_RANKS=5 - name: Install pykokkos run: | python -m pip install . @@ -49,4 +49,4 @@ jobs: # for hypothesis-driven test case generation pytest $GITHUB_WORKSPACE/pre_compile_tools/pre_compile_ufuncs.py -s # only run a subset of the conformance tests to get started - pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_operators_and_elementwise_functions.py::test_log10 array_api_tests/test_operators_and_elementwise_functions.py::test_sqrt array_api_tests/test_operators_and_elementwise_functions.py::test_isfinite array_api_tests/test_operators_and_elementwise_functions.py::test_log2 array_api_tests/test_operators_and_elementwise_functions.py::test_log1p array_api_tests/test_operators_and_elementwise_functions.py::test_isinf array_api_tests/test_operators_and_elementwise_functions.py::test_log array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_sign array_api_tests/test_operators_and_elementwise_functions.py::test_square array_api_tests/test_operators_and_elementwise_functions.py::test_cos array_api_tests/test_operators_and_elementwise_functions.py::test_round array_api_tests/test_operators_and_elementwise_functions.py::test_trunc array_api_tests/test_operators_and_elementwise_functions.py::test_ceil array_api_tests/test_operators_and_elementwise_functions.py::test_floor + pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like array_api_tests/test_data_type_functions.py::test_result_type array_api_tests/test_operators_and_elementwise_functions.py::test_log10 array_api_tests/test_operators_and_elementwise_functions.py::test_sqrt array_api_tests/test_operators_and_elementwise_functions.py::test_isfinite array_api_tests/test_operators_and_elementwise_functions.py::test_log2 array_api_tests/test_operators_and_elementwise_functions.py::test_log1p array_api_tests/test_operators_and_elementwise_functions.py::test_isinf array_api_tests/test_operators_and_elementwise_functions.py::test_log array_api_tests/test_array_object.py::test_scalar_casting array_api_tests/test_operators_and_elementwise_functions.py::test_sign array_api_tests/test_operators_and_elementwise_functions.py::test_square array_api_tests/test_operators_and_elementwise_functions.py::test_cos array_api_tests/test_operators_and_elementwise_functions.py::test_round array_api_tests/test_operators_and_elementwise_functions.py::test_trunc array_api_tests/test_operators_and_elementwise_functions.py::test_ceil array_api_tests/test_operators_and_elementwise_functions.py::test_floor "array_api_tests/test_operators_and_elementwise_functions.py::test_remainder[remainder(x1, x2)]" diff --git a/.github/workflows/main_ci.yml b/.github/workflows/main_ci.yml index 000af412..35326b0c 100644 --- a/.github/workflows/main_ci.yml +++ b/.github/workflows/main_ci.yml @@ -30,7 +30,7 @@ jobs: cd /tmp git clone https://github.com/kokkos/pykokkos-base.git cd pykokkos-base - python setup.py install -- -DENABLE_LAYOUTS=ON -DENABLE_MEMORY_TRAITS=OFF + python setup.py install -- -DENABLE_LAYOUTS=ON -DENABLE_MEMORY_TRAITS=OFF -DENABLE_VIEW_RANKS=5 - name: Install pykokkos run: | python -m pip install . diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 3b83a079..3ed1f9f5 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -35,6 +35,7 @@ true_divide, logaddexp2, floor_divide, + remainder, sin, cos, tan, diff --git a/pykokkos/lib/ufunc_workunits.py b/pykokkos/lib/ufunc_workunits.py index 06f12530..08401441 100644 --- a/pykokkos/lib/ufunc_workunits.py +++ b/pykokkos/lib/ufunc_workunits.py @@ -1,6 +1,825 @@ import pykokkos as pk +@pk.workunit +def remainder_impl_5d_int8(tid: int, + view1: pk.View5D[pk.int8], + view2: pk.View5D[pk.int8], + out: pk.View5D[pk.int8]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + out[tid][i][j][k][l] = 0 + else: + out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_float(tid: int, + view1: pk.View5D[pk.float], + view2: pk.View5D[pk.float], + out: pk.View5D[pk.float]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = fmod(fmod(view1[tid][i][j][k][l], view2[tid][i][j][k][l]) + view2[tid][i][j][k][l], view2[tid][i][j][k][l]) + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_double(tid: int, + view1: pk.View5D[pk.double], + view2: pk.View5D[pk.double], + out: pk.View5D[pk.double]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + out[tid][i][j][k][l] = fmod(fmod(view1[tid][i][j][k][l], view2[tid][i][j][k][l]) + view2[tid][i][j][k][l], view2[tid][i][j][k][l]) + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_int16(tid: int, + view1: pk.View5D[pk.int16], + view2: pk.View5D[pk.int16], + out: pk.View5D[pk.int16]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + out[tid][i][j][k][l] = 0 + else: + out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_int32(tid: int, + view1: pk.View5D[pk.int32], + view2: pk.View5D[pk.int32], + out: pk.View5D[pk.int32]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + out[tid][i][j][k][l] = 0 + else: + out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_int64(tid: int, + view1: pk.View5D[pk.int64], + view2: pk.View5D[pk.int64], + out: pk.View5D[pk.int64]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + out[tid][i][j][k][l] = 0 + else: + out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_uint8(tid: int, + view1: pk.View5D[pk.uint8], + view2: pk.View5D[pk.uint8], + out: pk.View5D[pk.uint8]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + out[tid][i][j][k][l] = 0 + else: + out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_uint16(tid: int, + view1: pk.View5D[pk.uint16], + view2: pk.View5D[pk.uint16], + out: pk.View5D[pk.uint16]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + out[tid][i][j][k][l] = 0 + else: + out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_uint32(tid: int, + view1: pk.View5D[pk.uint32], + view2: pk.View5D[pk.uint32], + out: pk.View5D[pk.uint32]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + out[tid][i][j][k][l] = 0 + else: + out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + +@pk.workunit +def remainder_impl_5d_uint64(tid: int, + view1: pk.View5D[pk.uint64], + view2: pk.View5D[pk.uint64], + out: pk.View5D[pk.uint64]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + for l in range(view1.extent(4)): + if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + out[tid][i][j][k][l] = 0 + else: + out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] + if out[tid][i][j][k][l] < 0 and view2[tid][i][j][k][l] > 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + elif out[tid][i][j][k][l] > 0 and view2[tid][i][j][k][l] < 0: + out[tid][i][j][k][l] = -1 * out[tid][i][j][k][l] + + + +@pk.workunit +def remainder_impl_4d_uint8(tid: int, + view1: pk.View4D[pk.uint8], + view2: pk.View4D[pk.uint8], + out: pk.View4D[pk.uint8]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + out[tid][i][j][k] = 0 + else: + out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_4d_float(tid: int, + view1: pk.View4D[pk.float], + view2: pk.View4D[pk.float], + out: pk.View4D[pk.float]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = fmod(fmod(view1[tid][i][j][k], view2[tid][i][j][k]) + view2[tid][i][j][k], view2[tid][i][j][k]) + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_4d_double(tid: int, + view1: pk.View4D[pk.double], + view2: pk.View4D[pk.double], + out: pk.View4D[pk.double]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + out[tid][i][j][k] = fmod(fmod(view1[tid][i][j][k], view2[tid][i][j][k]) + view2[tid][i][j][k], view2[tid][i][j][k]) + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_4d_uint16(tid: int, + view1: pk.View4D[pk.uint16], + view2: pk.View4D[pk.uint16], + out: pk.View4D[pk.uint16]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + out[tid][i][j][k] = 0 + else: + out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_4d_uint32(tid: int, + view1: pk.View4D[pk.uint32], + view2: pk.View4D[pk.uint32], + out: pk.View4D[pk.uint32]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + out[tid][i][j][k] = 0 + else: + out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_4d_uint64(tid: int, + view1: pk.View4D[pk.uint64], + view2: pk.View4D[pk.uint64], + out: pk.View4D[pk.uint64]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + out[tid][i][j][k] = 0 + else: + out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_3d_uint8(tid: int, + view1: pk.View3D[pk.uint8], + view2: pk.View3D[pk.uint8], + out: pk.View3D[pk.uint8]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + out[tid][i][j] = 0 + else: + out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_3d_uint16(tid: int, + view1: pk.View3D[pk.uint16], + view2: pk.View3D[pk.uint16], + out: pk.View3D[pk.uint16]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + out[tid][i][j] = 0 + else: + out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_3d_uint32(tid: int, + view1: pk.View3D[pk.uint32], + view2: pk.View3D[pk.uint32], + out: pk.View3D[pk.uint32]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + out[tid][i][j] = 0 + else: + out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_3d_uint64(tid: int, + view1: pk.View3D[pk.uint64], + view2: pk.View3D[pk.uint64], + out: pk.View3D[pk.uint64]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + out[tid][i][j] = 0 + else: + out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_3d_float(tid: int, + view1: pk.View3D[pk.float], + view2: pk.View3D[pk.float], + out: pk.View3D[pk.float]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = fmod(fmod(view1[tid][i][j], view2[tid][i][j]) + view2[tid][i][j], view2[tid][i][j]) + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_3d_double(tid: int, + view1: pk.View3D[pk.double], + view2: pk.View3D[pk.double], + out: pk.View3D[pk.double]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + out[tid][i][j] = fmod(fmod(view1[tid][i][j], view2[tid][i][j]) + view2[tid][i][j], view2[tid][i][j]) + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_2d_uint8(tid: int, + view1: pk.View2D[pk.uint8], + view2: pk.View2D[pk.uint8], + out: pk.View2D[pk.uint8]): + for i in range(view1.extent(1)): + if (view1[tid][i] == view2[tid][i]) or view2[tid][i] == 0: + out[tid][i] = 0 + else: + out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_2d_uint16(tid: int, + view1: pk.View2D[pk.uint16], + view2: pk.View2D[pk.uint16], + out: pk.View2D[pk.uint16]): + for i in range(view1.extent(1)): + if (view1[tid][i] == view2[tid][i]) or view2[tid][i] == 0: + out[tid][i] = 0 + else: + out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_2d_uint32(tid: int, + view1: pk.View2D[pk.uint32], + view2: pk.View2D[pk.uint32], + out: pk.View2D[pk.uint32]): + for i in range(view1.extent(1)): + if (view1[tid][i] == view2[tid][i]) or view2[tid][i] == 0: + out[tid][i] = 0 + else: + out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_2d_uint64(tid: int, + view1: pk.View2D[pk.uint64], + view2: pk.View2D[pk.uint64], + out: pk.View2D[pk.uint64]): + for i in range(view1.extent(1)): + if (view1[tid][i] == view2[tid][i]) or view2[tid][i] == 0: + out[tid][i] = 0 + else: + out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_2d_float(tid: int, + view1: pk.View2D[pk.float], + view2: pk.View2D[pk.float], + out: pk.View2D[pk.float]): + for i in range(view1.extent(1)): + out[tid][i] = fmod(fmod(view1[tid][i], view2[tid][i]) + view2[tid][i], view2[tid][i]) + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_2d_double(tid: int, + view1: pk.View2D[pk.double], + view2: pk.View2D[pk.double], + out: pk.View2D[pk.double]): + for i in range(view1.extent(1)): + out[tid][i] = fmod(fmod(view1[tid][i], view2[tid][i]) + view2[tid][i], view2[tid][i]) + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_1d_uint8(tid: int, + view1: pk.View1D[pk.uint8], + view2: pk.View1D[pk.uint8], + out: pk.View1D[pk.uint8]): + if view1[tid] == view2[tid] or view2[tid] == 0: + out[tid] = 0 + else: + out[tid] = view1[tid] % view2[tid] + + +@pk.workunit +def remainder_impl_1d_float(tid: int, + view1: pk.View1D[pk.float], + view2: pk.View1D[pk.float], + out: pk.View1D[pk.float]): + out[tid] = fmod(fmod(view1[tid], view2[tid]) + view2[tid], view2[tid]) + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + +@pk.workunit +def remainder_impl_1d_double(tid: int, + view1: pk.View1D[pk.double], + view2: pk.View1D[pk.double], + out: pk.View1D[pk.double]): + out[tid] = fmod(fmod(view1[tid], view2[tid]) + view2[tid], view2[tid]) + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + +@pk.workunit +def remainder_impl_1d_int8(tid: int, + view1: pk.View1D[pk.int8], + view2: pk.View1D[pk.int8], + out: pk.View1D[pk.int8]): + if view1[tid] == view2[tid] or view2[tid] == 0: + out[tid] = 0 + else: + out[tid] = ((view1[tid] % view2[tid]) + view2[tid]) % view2[tid] + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + + +@pk.workunit +def remainder_impl_1d_int16(tid: int, + view1: pk.View1D[pk.int16], + view2: pk.View1D[pk.int16], + out: pk.View1D[pk.int16]): + if view1[tid] == view2[tid] or view2[tid] == 0: + out[tid] = 0 + else: + out[tid] = ((view1[tid] % view2[tid]) + view2[tid]) % view2[tid] + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + +@pk.workunit +def remainder_impl_1d_int32(tid: int, + view1: pk.View1D[pk.int32], + view2: pk.View1D[pk.int32], + out: pk.View1D[pk.int32]): + if view1[tid] == view2[tid] or view2[tid] == 0: + out[tid] = 0 + else: + out[tid] = ((view1[tid] % view2[tid]) + view2[tid]) % view2[tid] + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + +@pk.workunit +def remainder_impl_1d_int64(tid: int, + view1: pk.View1D[pk.int64], + view2: pk.View1D[pk.int64], + out: pk.View1D[pk.int64]): + if view1[tid] == view2[tid] or view2[tid] == 0: + out[tid] = 0 + else: + out[tid] = ((view1[tid] % view2[tid]) + view2[tid]) % view2[tid] + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + + +@pk.workunit +def remainder_impl_1d_uint16(tid: int, + view1: pk.View1D[pk.uint16], + view2: pk.View1D[pk.uint16], + out: pk.View1D[pk.uint16]): + if view1[tid] == view2[tid] or view2[tid] == 0: + out[tid] = 0 + else: + out[tid] = view1[tid] % view2[tid] + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + +@pk.workunit +def remainder_impl_1d_uint32(tid: int, + view1: pk.View1D[pk.uint32], + view2: pk.View1D[pk.uint32], + out: pk.View1D[pk.uint32]): + if view1[tid] == view2[tid] or view2[tid] == 0: + out[tid] = 0 + else: + out[tid] = view1[tid] % view2[tid] + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + +@pk.workunit +def remainder_impl_1d_uint64(tid: int, + view1: pk.View1D[pk.uint64], + view2: pk.View1D[pk.uint64], + out: pk.View1D[pk.uint64]): + if view1[tid] == view2[tid] or view2[tid] == 0: + out[tid] = 0 + else: + out[tid] = view1[tid] % view2[tid] + if out[tid] < 0 and view2[tid] > 0: + out[tid] = -1 * out[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] + + +@pk.workunit +def remainder_impl_2d_int8(tid: int, + view1: pk.View2D[pk.int8], + view2: pk.View2D[pk.int8], + out: pk.View2D[pk.int8]): + for i in range(view1.extent(1)): + if (view1[tid][i] == view2[tid][i]) or view2[tid][i] == 0: + out[tid][i] = 0 + else: + out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_2d_int16(tid: int, + view1: pk.View2D[pk.int16], + view2: pk.View2D[pk.int16], + out: pk.View2D[pk.int16]): + for i in range(view1.extent(1)): + if view1[tid][i] == view2[tid][i] or view2[tid][i] == 0: + out[tid][i] = 0 + else: + out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_2d_int32(tid: int, + view1: pk.View2D[pk.int32], + view2: pk.View2D[pk.int32], + out: pk.View2D[pk.int32]): + for i in range(view1.extent(1)): + if (view1[tid][i] == view2[tid][i]) or view2[tid][i] == 0: + out[tid][i] = 0 + else: + out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_2d_int64(tid: int, + view1: pk.View2D[pk.int64], + view2: pk.View2D[pk.int64], + out: pk.View2D[pk.int64]): + for i in range(view1.extent(1)): + if (view1[tid][i] == view2[tid][i]) or view2[tid][i] == 0: + out[tid][i] = 0 + else: + out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = -1 * out[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] + + +@pk.workunit +def remainder_impl_3d_int8(tid: int, + view1: pk.View3D[pk.int8], + view2: pk.View3D[pk.int8], + out: pk.View3D[pk.int8]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + out[tid][i][j] = 0 + else: + out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_3d_int16(tid: int, + view1: pk.View3D[pk.int16], + view2: pk.View3D[pk.int16], + out: pk.View3D[pk.int16]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + out[tid][i][j] = 0 + else: + out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_3d_int32(tid: int, + view1: pk.View3D[pk.int32], + view2: pk.View3D[pk.int32], + out: pk.View3D[pk.int32]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + out[tid][i][j] = 0 + else: + out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_3d_int64(tid: int, + view1: pk.View3D[pk.int64], + view2: pk.View3D[pk.int64], + out: pk.View3D[pk.int64]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + out[tid][i][j] = 0 + else: + out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] + if out[tid][i][j] < 0 and view2[tid][i][j] > 0: + out[tid][i][j] = -1 * out[tid][i][j] + elif out[tid][i][j] > 0 and view2[tid][i][j] < 0: + out[tid][i][j] = -1 * out[tid][i][j] + + +@pk.workunit +def remainder_impl_4d_int8(tid: int, + view1: pk.View4D[pk.int8], + view2: pk.View4D[pk.int8], + out: pk.View4D[pk.int8]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + out[tid][i][j][k] = 0 + else: + out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_4d_int16(tid: int, + view1: pk.View4D[pk.int16], + view2: pk.View4D[pk.int16], + out: pk.View4D[pk.int16]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + out[tid][i][j][k] = 0 + else: + out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_4d_int32(tid: int, + view1: pk.View4D[pk.int32], + view2: pk.View4D[pk.int32], + out: pk.View4D[pk.int32]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + out[tid][i][j][k] = 0 + else: + out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + +@pk.workunit +def remainder_impl_4d_int64(tid: int, + view1: pk.View4D[pk.int64], + view2: pk.View4D[pk.int64], + out: pk.View4D[pk.int64]): + for i in range(view1.extent(1)): + for j in range(view1.extent(2)): + for k in range(view1.extent(3)): + if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + out[tid][i][j][k] = 0 + else: + out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] + if out[tid][i][j][k] < 0 and view2[tid][i][j][k] > 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + elif out[tid][i][j][k] > 0 and view2[tid][i][j][k] < 0: + out[tid][i][j][k] = -1 * out[tid][i][j][k] + + @pk.workunit def floor_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.double]): out[tid] = floor(view[tid]) @@ -137,6 +956,7 @@ def round_impl_3d_float(tid: int, view: pk.View3D[pk.float], out: pk.View3D[pk.f for j in range(view.extent(2)): out[tid][i][j] = round(view[tid][i][j]) + @pk.workunit def isfinite_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8]): out[tid] = isfinite(view[tid]) diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 13be86e2..889740d5 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -9,6 +9,59 @@ kernel_dict = dict(getmembers(ufunc_workunits, isfunction)) +def _broadcast_views(view1, view2): + # support broadcasting by using the same + # shape matching rules as NumPy + # TODO: determine if this can be done with + # more memory efficiency? + if view1.shape != view2.shape: + new_shape = np.broadcast_shapes(view1.shape, view2.shape) + view1_new = pk.View([*new_shape], dtype=view1.dtype) + view1_new[:] = view1 + view1 = view1_new + view2_new = pk.View([*new_shape], dtype=view2.dtype) + view2_new[:] = view2 + view2 = view2_new + return view1, view2 + + +def _typematch_views(view1, view2): + # very crude casting implementation + # for binary ufuncs + dtype1 = view1.dtype + dtype2 = view2.dtype + dtype_extractor = re.compile(r".*(?:data_types|DataType)\.(\w+)") + res1 = dtype_extractor.match(str(dtype1)) + res2 = dtype_extractor.match(str(dtype2)) + effective_dtype = dtype1 + if res1 is not None and res2 is not None: + res1_dtype_str = res1.group(1) + res2_dtype_str = res2.group(1) + if res1_dtype_str == "double": + res1_dtype_str = "float64" + elif res1_dtype_str == "float": + res1_dtype_str = "float32" + if res2_dtype_str == "double": + res2_dtype_str = "float64" + elif res2_dtype_str == "float": + res2_dtype_str = "float32" + if (("int" in res1_dtype_str and "int" in res2_dtype_str) or + ("float" in res1_dtype_str and "float" in res2_dtype_str)): + dtype_1_width = int(res1_dtype_str.split("t")[1]) + dtype_2_width = int(res2_dtype_str.split("t")[1]) + if dtype_1_width >= dtype_2_width: + effective_dtype = dtype1 + view2_new = pk.View([*view2.shape], dtype=effective_dtype) + view2_new[:] = view2 + view2 = view2_new + else: + effective_dtype = dtype2 + view1_new = pk.View([*view1.shape], dtype=effective_dtype) + view1_new[:] = view1 + view1 = view1_new + return view1, view2, effective_dtype + + def _supported_types_check(dtype_str, supported_type_strings): options = "" for type_str in supported_type_strings: @@ -2679,3 +2732,46 @@ def floor(view): out=out, view=view) return out + + +def remainder(view1, view2): + """ + Returns the remainder of division for each element ``x1_i`` of the input view + ``view1`` and the respective element ``x2_i`` of the input view ``view2``. + + Parameters + ---------- + view1 : pykokkos view + Dividend input array. Should have a real-valued data type. + view2 : pykokkos view + Divisor input array. Must be broadcasting-compatible with ``view1``. Should have a real-valued data type. + + Returns + ------- + out : pykokkos view + A view containing the element-wise results. Each element-wise result must have the same sign + as the respective element ``x2_i``. The returned view must have a data type determined + by Type Promotion Rules. + """ + view1, view2 = _broadcast_views(view1, view2) + view1, view2, effective_dtype = _typematch_views(view1, view2) + ndims_1 = len(view1.shape) + ndims_2 = len(view2.shape) + if ndims_1 > 5 or ndims_2 > 5: + raise NotImplementedError("remainder() ufunc only supports up to 5D views") + if view1.size == 0: + return pk.View([*view1.shape], dtype=effective_dtype) + out = pk.View([*view1.shape], dtype=effective_dtype) + if view1.shape == (): + tid = 1 + else: + tid = view1.shape[0] + _ufunc_kernel_dispatcher(tid=tid, + dtype=effective_dtype, + ndims=ndims_1, + op="remainder", + sub_dispatcher=pk.parallel_for, + out=out, + view1=view1, + view2=view2) + return out From b89bc6f509746c9225f1a44a0f72ba8f75cec0f9 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sat, 21 Jan 2023 16:46:56 -0700 Subject: [PATCH 2/2] MAINT, BUG: PR 162 revisions * fix up some of the `remainder()` workunits that were failing array API standard test * still needs more work though--for floating type cases I need to conform to the special cases noted here more closely: https://data-apis.org/array-api/latest/API_specification/generated/array_api.remainder.html [skip actions] --- pykokkos/lib/ufunc_workunits.py | 66 +++++++++++++++++++++------------ pykokkos/lib/ufuncs.py | 5 +++ 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/pykokkos/lib/ufunc_workunits.py b/pykokkos/lib/ufunc_workunits.py index 08401441..262ae1ee 100644 --- a/pykokkos/lib/ufunc_workunits.py +++ b/pykokkos/lib/ufunc_workunits.py @@ -156,7 +156,9 @@ def remainder_impl_5d_uint32(tid: int, for j in range(view1.extent(2)): for k in range(view1.extent(3)): for l in range(view1.extent(4)): - if (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: + if (view2[tid][i][j][k][l] > view1[tid][i][j][k][l]): + out[tid][i][j][k][l] = view1[tid][i][j][k][l] + elif (view1[tid][i][j][k][l] == view2[tid][i][j][k][l]) or view2[tid][i][j][k][l] == 0: out[tid][i][j][k][l] = 0 else: out[tid][i][j][k][l] = ((view1[tid][i][j][k][l] % view2[tid][i][j][k][l]) + view2[tid][i][j][k][l]) % view2[tid][i][j][k][l] @@ -260,7 +262,9 @@ def remainder_impl_4d_uint32(tid: int, for i in range(view1.extent(1)): for j in range(view1.extent(2)): for k in range(view1.extent(3)): - if (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: + if (view2[tid][i][j][k] > view1[tid][i][j][k]): + out[tid][i][j][k] = view1[tid][i][j][k] + elif (view1[tid][i][j][k] == view2[tid][i][j][k]) or view2[tid][i][j][k] == 0: out[tid][i][j][k] = 0 else: out[tid][i][j][k] = ((view1[tid][i][j][k] % view2[tid][i][j][k]) + view2[tid][i][j][k]) % view2[tid][i][j][k] @@ -329,7 +333,9 @@ def remainder_impl_3d_uint32(tid: int, out: pk.View3D[pk.uint32]): for i in range(view1.extent(1)): for j in range(view1.extent(2)): - if (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: + if (view2[tid][i][j] > view1[tid][i][j]): + out[tid][i][j] = view1[tid][i][j] + elif (view1[tid][i][j] == view2[tid][i][j]) or view2[tid][i][j] == 0: out[tid][i][j] = 0 else: out[tid][i][j] = ((view1[tid][i][j] % view2[tid][i][j]) + view2[tid][i][j]) % view2[tid][i][j] @@ -424,6 +430,8 @@ def remainder_impl_2d_uint32(tid: int, for i in range(view1.extent(1)): if (view1[tid][i] == view2[tid][i]) or view2[tid][i] == 0: out[tid][i] = 0 + elif view2[tid][i] > view1[tid][i]: + out[tid][i] = view1[tid][i] else: out[tid][i] = ((view1[tid][i] % view2[tid][i]) + view2[tid][i]) % view2[tid][i] if out[tid][i] < 0 and view2[tid][i] > 0: @@ -454,11 +462,14 @@ def remainder_impl_2d_float(tid: int, view2: pk.View2D[pk.float], out: pk.View2D[pk.float]): for i in range(view1.extent(1)): - out[tid][i] = fmod(fmod(view1[tid][i], view2[tid][i]) + view2[tid][i], view2[tid][i]) - if out[tid][i] < 0 and view2[tid][i] > 0: - out[tid][i] = -1 * out[tid][i] - elif out[tid][i] > 0 and view2[tid][i] < 0: - out[tid][i] = -1 * out[tid][i] + if view1[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = view1[tid][i] + view2[tid][i] + else: + out[tid][i] = fmod(view1[tid][i], view2[tid][i]) + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = out[tid][i] + view2[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] @pk.workunit @@ -467,11 +478,14 @@ def remainder_impl_2d_double(tid: int, view2: pk.View2D[pk.double], out: pk.View2D[pk.double]): for i in range(view1.extent(1)): - out[tid][i] = fmod(fmod(view1[tid][i], view2[tid][i]) + view2[tid][i], view2[tid][i]) - if out[tid][i] < 0 and view2[tid][i] > 0: - out[tid][i] = -1 * out[tid][i] - elif out[tid][i] > 0 and view2[tid][i] < 0: - out[tid][i] = -1 * out[tid][i] + if view1[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = view1[tid][i] + view2[tid][i] + else: + out[tid][i] = fmod(view1[tid][i], view2[tid][i]) + if out[tid][i] < 0 and view2[tid][i] > 0: + out[tid][i] = out[tid][i] + view2[tid][i] + elif out[tid][i] > 0 and view2[tid][i] < 0: + out[tid][i] = -1 * out[tid][i] @pk.workunit @@ -490,11 +504,14 @@ def remainder_impl_1d_float(tid: int, view1: pk.View1D[pk.float], view2: pk.View1D[pk.float], out: pk.View1D[pk.float]): - out[tid] = fmod(fmod(view1[tid], view2[tid]) + view2[tid], view2[tid]) - if out[tid] < 0 and view2[tid] > 0: - out[tid] = -1 * out[tid] - elif out[tid] > 0 and view2[tid] < 0: - out[tid] = -1 * out[tid] + if view1[tid] > 0 and view2[tid] < 0: + out[tid] = view1[tid] + view2[tid] + else: + out[tid] = fmod(view1[tid], view2[tid]) + if out[tid] < 0 and view2[tid] > 0: + out[tid] = out[tid] + view2[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] @pk.workunit @@ -502,11 +519,14 @@ def remainder_impl_1d_double(tid: int, view1: pk.View1D[pk.double], view2: pk.View1D[pk.double], out: pk.View1D[pk.double]): - out[tid] = fmod(fmod(view1[tid], view2[tid]) + view2[tid], view2[tid]) - if out[tid] < 0 and view2[tid] > 0: - out[tid] = -1 * out[tid] - elif out[tid] > 0 and view2[tid] < 0: - out[tid] = -1 * out[tid] + if view1[tid] > 0 and view2[tid] < 0: + out[tid] = view1[tid] + view2[tid] + else: + out[tid] = fmod(view1[tid], view2[tid]) + if out[tid] < 0 and view2[tid] > 0: + out[tid] = out[tid] + view2[tid] + elif out[tid] > 0 and view2[tid] < 0: + out[tid] = -1 * out[tid] @pk.workunit diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 889740d5..19d1dc57 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -90,6 +90,7 @@ def _ufunc_kernel_dispatcher(tid, dtype_str = "double" function_name_str = f"{op}_impl_{ndims}d_{dtype_str}" desired_workunit = kernel_dict[function_name_str] + print("desired_workunit:", desired_workunit) # call the kernel ret = sub_dispatcher(tid, desired_workunit, **kwargs) return ret @@ -2753,10 +2754,14 @@ def remainder(view1, view2): as the respective element ``x2_i``. The returned view must have a data type determined by Type Promotion Rules. """ + print("remainder received view1, view2:", view1, view2) + print("remainder received view1.dtype, view2.dtype:", view1.dtype, view2.dtype) + print("remainder received view1.shape, view2.shape:", view1.shape, view2.shape) view1, view2 = _broadcast_views(view1, view2) view1, view2, effective_dtype = _typematch_views(view1, view2) ndims_1 = len(view1.shape) ndims_2 = len(view2.shape) + print("ndims_1, ndims_2:", ndims_1, ndims_2) if ndims_1 > 5 or ndims_2 > 5: raise NotImplementedError("remainder() ufunc only supports up to 5D views") if view1.size == 0: