diff --git a/examples/BabelStream/functor/babel_stream.py b/examples/BabelStream/functor/babel_stream.py index 45e493d7..65443b02 100644 --- a/examples/BabelStream/functor/babel_stream.py +++ b/examples/BabelStream/functor/babel_stream.py @@ -3,6 +3,7 @@ import argparse from functools import reduce import sys +import numpy as np @pk.functor @@ -10,9 +11,9 @@ class KokkosStream: def __init__( self, ARRAY_SIZE: int, initA: float, initB: float, initC: float, scalar: float ): - self.a: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) - self.b: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) - self.c: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) + self.a = np.zeros(ARRAY_SIZE, dtype=np.float64) + self.b = np.zeros(ARRAY_SIZE, dtype=np.float64) + self.c = np.zeros(ARRAY_SIZE, dtype=np.float64) self.initA: float = initA self.initB: float = initB diff --git a/examples/BabelStream/workload/babel_stream.py b/examples/BabelStream/workload/babel_stream.py index 66e591ae..b60184c7 100644 --- a/examples/BabelStream/workload/babel_stream.py +++ b/examples/BabelStream/workload/babel_stream.py @@ -1,4 +1,5 @@ import pykokkos as pk +import numpy as np import argparse from functools import reduce @@ -18,9 +19,9 @@ def __init__( ): self.array_size: int = ARRAY_SIZE - self.a: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) - self.b: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) - self.c: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) + self.a = np.zeros([ARRAY_SIZE], dtype=np.float64) + self.b = np.zeros([ARRAY_SIZE], dtype=np.float64) + self.c = np.zeros([ARRAY_SIZE], dtype=np.float64) self.initA: pk.double = initA self.initB: pk.double = initB @@ -30,7 +31,7 @@ def __init__( self.sum: pk.double = 0 self.runtime: float = 0 - self.runtimes: pk.View2D[pk.double] = pk.View([5, num_times], pk.double) + self.runtimes = np.zeros([5, num_times], dtype=np.float64) @pk.main def run(self): diff --git a/examples/ExaMiniMD/standalone/src/force_types/force_lj_cell.py b/examples/ExaMiniMD/standalone/src/force_types/force_lj_cell.py index 4e8fecf8..53ce6db7 100644 --- a/examples/ExaMiniMD/standalone/src/force_types/force_lj_cell.py +++ b/examples/ExaMiniMD/standalone/src/force_types/force_lj_cell.py @@ -1,4 +1,5 @@ from typing import List +import numpy as np import pykokkos as pk @@ -37,24 +38,32 @@ def init_scalar(self, x_: int, y_: int, z_: int) -> t_scalar3: @pk.workload class ForceLJCell(Force): - class t_fparams(pk.View2D): - def __init__(self, x: int = 0, y: int = 0, data_type: pk.DataType = pk.double): - super().__init__(x, y, data_type) + @staticmethod + def t_fparams(x: int = 0, y: int = 0, data_type: pk.DataType = pk.double): + import numpy as np + + if data_type == pk.double: + np_dtype = np.float64 + elif data_type == pk.int32: + np_dtype = np.int32 + else: + np_dtype = np.float64 + return np.zeros([x, y], dtype=np_dtype) def __init__(self, args: List[str], system: System, half_neigh: bool): super().__init__(args, system, half_neigh) - self.lj1: pk.View2D[pk.double] = self.t_fparams(system.ntypes, system.ntypes) - self.lj2: pk.View2D[pk.double] = self.t_fparams(system.ntypes, system.ntypes) - self.cutsq: pk.View2D[pk.double] = self.t_fparams(system.ntypes, system.ntypes) + self.lj1 = self.t_fparams(system.ntypes, system.ntypes) + self.lj2 = self.t_fparams(system.ntypes, system.ntypes) + self.cutsq = self.t_fparams(system.ntypes, system.ntypes) - self.bin_offsets: pk.View3D[pk.int32] = pk.View([0, 0, 0], pk.int32) - self.bin_count: pk.View3D[pk.int32] = pk.View([0, 0, 0], pk.int32) - self.permute_vector: pk.View1D[pk.int32] = pk.View([0], pk.int32) + self.bin_offsets = np.zeros([0, 0, 0], dtype=np.int32) + self.bin_count = np.zeros([0, 0, 0], dtype=np.int32) + self.permute_vector = np.zeros([0], dtype=np.int32) - self.x: pk.View2D[pk.double] = pk.View([0, 0], pk.double) - self.type: pk.View1D[pk.int32] = pk.View([0], pk.int32) - self.f: pk.View2D[pk.double] = pk.View([0, 0], pk.double) + self.x = np.zeros([0, 0], dtype=np.float64) + self.type = np.zeros([0], dtype=np.int32) + self.f = np.zeros([0, 0], dtype=np.float64) self.N_local: int = 0 self.nbinx: int = 0 diff --git a/examples/ExaMiniMD/standalone/src/force_types/force_lj_neigh.py b/examples/ExaMiniMD/standalone/src/force_types/force_lj_neigh.py index b2d3b30f..fca84c8b 100644 --- a/examples/ExaMiniMD/standalone/src/force_types/force_lj_neigh.py +++ b/examples/ExaMiniMD/standalone/src/force_types/force_lj_neigh.py @@ -13,14 +13,14 @@ @pk.workunit def fullneigh_for( i: int, - rnd_lj1: pk.View2D[float], - rnd_lj2: pk.View2D[float], - rnd_cutsq: pk.View2D[float], - num_neighs_view: pk.View1D[int], - neighs_view: pk.View2D[int], - x: pk.View2D[float], - f: pk.View2D[float], - type: pk.View1D[int], + rnd_lj1, + rnd_lj2, + rnd_cutsq, + num_neighs_view, + neighs_view, + x, + f, + type, ) -> None: x_i: float = x[i][0] y_i: float = x[i][1] @@ -62,14 +62,14 @@ def fullneigh_for( @pk.workunit def halfneigh_for( i: int, - rnd_lj1: pk.View2D[float], - rnd_lj2: pk.View2D[float], - rnd_cutsq: pk.View2D[float], - num_neighs_view: pk.View1D[int], - neighs_view: pk.View2D[int], - x: pk.View2D[float], - f: pk.View2D[float], - type: pk.View1D[int], + rnd_lj1, + rnd_lj2, + rnd_cutsq, + num_neighs_view, + neighs_view, + x, + f, + type, use_stackparams: bool, ) -> None: x_i: float = x[i][0] @@ -135,14 +135,14 @@ def halfneigh_for( def fullneigh_reduce( i: int, PE: pk.Acc[float], - rnd_lj1: pk.View2D[float], - rnd_lj2: pk.View2D[float], - rnd_cutsq: pk.View2D[float], - num_neighs_view: pk.View1D[int], - neighs_view: pk.View2D[int], - x: pk.View2D[float], - f: pk.View2D[float], - type: pk.View1D[int], + rnd_lj1, + rnd_lj2, + rnd_cutsq, + num_neighs_view, + neighs_view, + x, + f, + type, use_stackparams: bool, ) -> None: x_i: float = x[i][0] @@ -195,15 +195,15 @@ def fullneigh_reduce( def halfneigh_reduce( i: int, PE: pk.Acc[float], - rnd_lj1: pk.View2D[float], - rnd_lj2: pk.View2D[float], - rnd_cutsq: pk.View2D[float], + rnd_lj1, + rnd_lj2, + rnd_cutsq, N_local: int, - num_neighs_view: pk.View1D[int], - neighs_view: pk.View2D[int], - x: pk.View2D[float], - f: pk.View2D[float], - type: pk.View1D[int], + num_neighs_view, + neighs_view, + x, + f, + type, use_stackparams: bool, ) -> None: x_i: float = x[i][0] @@ -259,17 +259,29 @@ def halfneigh_reduce( class ForceLJNeigh(Force): - class t_fparams(pk.View): - def __init__( - self, x: int = 0, y: int = 0, data_type: pk.DataTypeClass = pk.double - ): - super().__init__([x, y], data_type) - - class t_fparams_rnd(pk.View): - def __init__( - self, x: int = 0, y: int = 0, data_type: pk.DataTypeClass = pk.double - ): - super().__init__([x, y], data_type) + @staticmethod + def t_fparams(x: int = 0, y: int = 0, data_type: pk.DataTypeClass = pk.double): + import numpy as np + + if data_type == pk.double: + np_dtype = np.float64 + elif data_type == pk.int32: + np_dtype = np.int32 + else: + np_dtype = np.float64 + return np.zeros([x, y], dtype=np_dtype) + + @staticmethod + def t_fparams_rnd(x: int = 0, y: int = 0, data_type: pk.DataTypeClass = pk.double): + import numpy as np + + if data_type == pk.double: + np_dtype = np.float64 + elif data_type == pk.int32: + np_dtype = np.int32 + else: + np_dtype = np.float64 + return np.zeros([x, y], dtype=np_dtype) def __init__(self, args: List[str], system: System, half_neigh: bool): super().__init__(args, system, half_neigh) @@ -280,18 +292,18 @@ def __init__(self, args: List[str], system: System, half_neigh: bool): self.use_stackparams: bool = False # self.use_stackparams: bool = self.ntypes <= MAX_TYPES_STACKPARAMS - self.lj1: pk.View2D[pk.double] = self.t_fparams() - self.lj2: pk.View2D[pk.double] = self.t_fparams() - self.cutsq: pk.View2D[pk.double] = self.t_fparams() + self.lj1 = self.t_fparams() + self.lj2 = self.t_fparams() + self.cutsq = self.t_fparams() if not self.use_stackparams: self.lj1 = self.t_fparams(self.ntypes, self.ntypes) self.lj2 = self.t_fparams(self.ntypes, self.ntypes) self.cutsq = self.t_fparams(self.ntypes, self.ntypes) - self.rnd_lj1: pk.View2D[pk.double] = self.t_fparams() - self.rnd_lj2: pk.View2D[pk.double] = self.t_fparams() - self.rnd_cutsq: pk.View2D[pk.double] = self.t_fparams() + self.rnd_lj1 = self.t_fparams() + self.rnd_lj2 = self.t_fparams() + self.rnd_cutsq = self.t_fparams() self.step: int = 0 diff --git a/examples/ParRes/standalone/nstream.py b/examples/ParRes/standalone/nstream.py index bfb2eb3c..ad7a7b9c 100644 --- a/examples/ParRes/standalone/nstream.py +++ b/examples/ParRes/standalone/nstream.py @@ -1,4 +1,5 @@ import pykokkos as pk +import numpy as np import argparse import sys @@ -44,9 +45,9 @@ def run() -> None: print("Vector length = ", length) print("Offset = ", offset) - A: pk.View1D = pk.View([length], pk.double) - B: pk.View1D = pk.View([length], pk.double) - C: pk.View1D = pk.View([length], pk.double) + A = np.zeros([length], dtype=np.float64) + B = np.zeros([length], dtype=np.float64) + C = np.zeros([length], dtype=np.float64) p = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, length) diff --git a/examples/kokkos-benchmarks/functor/bytes_and_flops.py b/examples/kokkos-benchmarks/functor/bytes_and_flops.py index e4386caa..a1fc1090 100644 --- a/examples/kokkos-benchmarks/functor/bytes_and_flops.py +++ b/examples/kokkos-benchmarks/functor/bytes_and_flops.py @@ -3,23 +3,45 @@ from typing import Tuple import pykokkos as pk +import numpy as np + +try: + import cupy as cp + + cupy_available = True +except ImportError: + cupy_available = False + + +def get_array_module(space: pk.ExecutionSpace): + """Return numpy or cupy module based on execution space""" + if cupy_available and space in (pk.ExecutionSpace.Cuda, pk.ExecutionSpace.HIP): + return cp + return np @pk.functor # use double type and unroll=8 class Benchmark_double_8: - def __init__(self, N: int, K: int, R: int, D: int, F: int, T: int, S: int): + def __init__( + self, + N: int, + K: int, + R: int, + D: int, + F: int, + T: int, + S: int, + space: pk.ExecutionSpace, + ): self.K: int = K self.R: int = R self.F: int = F - self.A: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) - self.B: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) - self.C: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) - - self.A.fill(1.5) - self.B.fill(2.5) - self.C.fill(3.5) + xp = get_array_module(space) + self.A = xp.full((N, K, D), 1.5, dtype=np.float64) + self.B = xp.full((N, K, D), 2.5, dtype=np.float64) + self.C = xp.full((N, K, D), 3.5, dtype=np.float64) @pk.workunit def benchmark(self, team: pk.TeamMember): @@ -111,7 +133,7 @@ def run() -> None: pk.set_default_space(space) r = pk.TeamPolicy(N, T) - w = Benchmark_double_8(N, K, R, args.D, F, T, S) + w = Benchmark_double_8(N, K, R, args.D, F, T, S, space) timer = pk.Timer() pk.parallel_for(r, w.benchmark) diff --git a/examples/kokkos-benchmarks/functor/gather.py b/examples/kokkos-benchmarks/functor/gather.py index cafa31b3..bc208e27 100644 --- a/examples/kokkos-benchmarks/functor/gather.py +++ b/examples/kokkos-benchmarks/functor/gather.py @@ -3,39 +3,56 @@ from typing import Tuple import pykokkos as pk +import numpy as np + +try: + import cupy as cp + + cupy_available = True +except ImportError: + cupy_available = False + + +def get_array_module(space: pk.ExecutionSpace): + """Return numpy or cupy module based on execution space""" + if cupy_available and space in (pk.ExecutionSpace.Cuda, pk.ExecutionSpace.HIP): + return cp + return np @pk.functor # use double type and unroll=8 class Benchmark_double_8: - def __init__(self, N: int, K: int, D: int, R: int, F: int): + def __init__( + self, N: int, K: int, D: int, R: int, F: int, space: pk.ExecutionSpace + ): self.K: int = K self.F: int = F - self.connectivity: pk.View2D[int] = pk.View([N, K], int) - self.A: pk.View1D[pk.double] = pk.View([N], pk.double) - self.B: pk.View1D[pk.double] = pk.View([N], pk.double) - self.C: pk.View1D[pk.double] = pk.View([N], pk.double) - # self.A: pk.View1D[pk.double] = pk.View([N], pk.double, trait=pk.Trait.RandomAccess) - # self.B: pk.View1D[pk.double] = pk.View([N], pk.double, trait=pk.Trait.RandomAccess) - # self.C: pk.View1D[pk.double] = pk.View([N], pk.double, trait=pk.Trait.RandomAccess) - - self.A.fill(1.5) - self.B.fill(2.0) + xp = get_array_module(space) + self.connectivity = xp.zeros((N, K), dtype=np.int32) + self.A = xp.full(N, 1.5, dtype=np.float64) + self.B = xp.full(N, 2.0, dtype=np.float64) + self.C = xp.zeros(N, dtype=np.float64) # TODO use kokkos to init in parallel random.seed(12313) + connectivity_np = np.zeros((N, K), dtype=np.int32) for i in range(N): for jj in range(K): - self.connectivity[i][jj] = (random.randrange(D) + i - D / 2 + N) % N + connectivity_np[i][jj] = (random.randrange(D) + i - D / 2 + N) % N + if xp is cp: + self.connectivity = cp.asarray(connectivity_np) + else: + self.connectivity = connectivity_np @pk.workunit def benchmark(self, i: int): c: pk.double = 0.0 for jj in range(self.K): j: int = self.connectivity[i][jj] - a1: pk.double = A[j] - b: pk.double = B[j] + a1: pk.double = self.A[j] + b: pk.double = self.B[j] a2: pk.double = a1 * 1.3 a3: pk.double = a2 * 1.1 a4: pk.double = a3 * 1.1 @@ -108,7 +125,7 @@ def run() -> None: scalar_size = 8 policy = pk.RangePolicy(0, N) - w = Benchmark_double_8(N, K, D, R, F) + w = Benchmark_double_8(N, K, D, R, F, space) timer = pk.Timer() for r in range(R): diff --git a/examples/kokkos-benchmarks/functor/gups.py b/examples/kokkos-benchmarks/functor/gups.py index befc80fd..3e216b31 100644 --- a/examples/kokkos-benchmarks/functor/gups.py +++ b/examples/kokkos-benchmarks/functor/gups.py @@ -3,13 +3,36 @@ from typing import Tuple import pykokkos as pk +import numpy as np + +try: + import cupy as cp + + cupy_available = True +except ImportError: + cupy_available = False + + +def get_array_module(space: pk.ExecutionSpace): + """Return numpy or cupy module based on execution space""" + if cupy_available and space in (pk.ExecutionSpace.Cuda, pk.ExecutionSpace.HIP): + return cp + return np @pk.functor class Benchmark: - def __init__(self, indices: int, data: int, repeats: int, use_atomics: bool): - self.indices: pk.View1D[pk.int64] = pk.View([indices], pk.int64) - self.data: pk.View1D[pk.int64] = pk.View([data], pk.int64) + def __init__( + self, + indices: int, + data: int, + repeats: int, + use_atomics: bool, + space: pk.ExecutionSpace, + ): + xp = get_array_module(space) + self.indices = xp.zeros(indices, dtype=np.int64) + self.data = xp.zeros(data, dtype=np.int64) self.datum: pk.int64 = -1 @pk.workunit @@ -56,7 +79,7 @@ def run() -> None: pk.set_default_space(space) - w = Benchmark(indices, data, repeats, use_atomics) + w = Benchmark(indices, data, repeats, use_atomics, space) range_indices = pk.RangePolicy(0, indices) range_data = pk.RangePolicy(0, data) diff --git a/examples/kokkos-tutorials/standalone/01.py b/examples/kokkos-tutorials/standalone/01.py index ba74ee0f..b25c7dcf 100644 --- a/examples/kokkos-tutorials/standalone/01.py +++ b/examples/kokkos-tutorials/standalone/01.py @@ -1,4 +1,5 @@ from typing import Tuple +import numpy as np import pykokkos as pk @@ -32,9 +33,9 @@ def run() -> None: nrepeat: int = 1 print(f"Total size S = {N * M} N = {N} M = {M}") - y = pk.View([N], pk.double) - x = pk.View([M], pk.double) - A = pk.View([N * M], pk.double) + y = np.zeros([N], dtype=np.float64) + x = np.zeros([M], dtype=np.float64) + A = np.zeros([N * M], dtype=np.float64) p = pk.RangePolicy(0, N) pk.parallel_for(p, y_init, y_view=y) diff --git a/examples/kokkos-tutorials/standalone/02.py b/examples/kokkos-tutorials/standalone/02.py index 7c29c8e2..1fbbd905 100644 --- a/examples/kokkos-tutorials/standalone/02.py +++ b/examples/kokkos-tutorials/standalone/02.py @@ -1,4 +1,5 @@ from typing import Tuple +import numpy as np import pykokkos as pk @@ -22,9 +23,9 @@ def run() -> None: nrepeat: int = 100 print(f"Total size S = {N * M} N = {N} M = {M}") - y: pk.View1D = pk.View([N], pk.double) - x: pk.View1D = pk.View([M], pk.double) - A: pk.View2D = pk.View([N, M], pk.double) + y = np.zeros([N], dtype=np.float64) + x = np.zeros([M], dtype=np.float64) + A = np.zeros([N, M], dtype=np.float64) if fill: y.fill(1) diff --git a/examples/kokkos-tutorials/standalone/03.py b/examples/kokkos-tutorials/standalone/03.py index 7c29c8e2..1fbbd905 100644 --- a/examples/kokkos-tutorials/standalone/03.py +++ b/examples/kokkos-tutorials/standalone/03.py @@ -1,4 +1,5 @@ from typing import Tuple +import numpy as np import pykokkos as pk @@ -22,9 +23,9 @@ def run() -> None: nrepeat: int = 100 print(f"Total size S = {N * M} N = {N} M = {M}") - y: pk.View1D = pk.View([N], pk.double) - x: pk.View1D = pk.View([M], pk.double) - A: pk.View2D = pk.View([N, M], pk.double) + y = np.zeros([N], dtype=np.float64) + x = np.zeros([M], dtype=np.float64) + A = np.zeros([N, M], dtype=np.float64) if fill: y.fill(1) diff --git a/examples/kokkos-tutorials/standalone/04.py b/examples/kokkos-tutorials/standalone/04.py index 65709d7f..d2d7459e 100644 --- a/examples/kokkos-tutorials/standalone/04.py +++ b/examples/kokkos-tutorials/standalone/04.py @@ -1,4 +1,5 @@ from typing import Tuple +import numpy as np import pykokkos as pk @@ -35,9 +36,9 @@ def run() -> None: pk.set_default_space(pk.ExecutionSpace.Cuda) - y: pk.View1D = pk.View([N], pk.double) - x: pk.View1D = pk.View([M], pk.double) - A: pk.View2D = pk.View([N, M], pk.double) + y = np.zeros([N], dtype=np.float64) + x = np.zeros([M], dtype=np.float64) + A = np.zeros([N, M], dtype=np.float64) p = pk.RangePolicy(0, N) pk.parallel_for(p, y_init, y_view=y) diff --git a/examples/kokkos-tutorials/standalone/subview.py b/examples/kokkos-tutorials/standalone/subview.py index a430d97e..31389cc6 100644 --- a/examples/kokkos-tutorials/standalone/subview.py +++ b/examples/kokkos-tutorials/standalone/subview.py @@ -1,4 +1,5 @@ from typing import Tuple +import numpy as np import pykokkos as pk @@ -31,9 +32,9 @@ def run() -> None: pk.set_default_space(space) print(f"Total size S = {N * M} N = {N} M = {M}") - y: pk.View1D[pk.double] = pk.View([N], pk.double) - x: pk.View1D[pk.double] = pk.View([M], pk.double) - A: pk.View2D[pk.double] = pk.View([N, M], pk.double) + y = np.zeros([N], dtype=np.float64) + x = np.zeros([M], dtype=np.float64) + A = np.zeros([N, M], dtype=np.float64) if fill: y.fill(1) diff --git a/examples/kokkos-tutorials/standalone/team_policy.py b/examples/kokkos-tutorials/standalone/team_policy.py index 6873e550..f209eb69 100644 --- a/examples/kokkos-tutorials/standalone/team_policy.py +++ b/examples/kokkos-tutorials/standalone/team_policy.py @@ -1,4 +1,5 @@ from typing import Tuple +import numpy as np import pykokkos as pk @@ -28,9 +29,10 @@ def run() -> None: nrepeat: int = 100 print(f"Total size S = {N * M} N = {N} M = {M}") - y: pk.View1D = pk.View([N], pk.double, layout=pk.Layout.LayoutRight) - x: pk.View1D = pk.View([M], pk.double, layout=pk.Layout.LayoutRight) - A: pk.View2D = pk.View([N, M], pk.double, layout=pk.Layout.LayoutRight) + # Note: layout specified via ViewTypeInfo decorator if needed + y = np.zeros([N], dtype=np.float64) + x = np.zeros([M], dtype=np.float64) + A = np.zeros([N, M], dtype=np.float64) if fill: y.fill(1) diff --git a/examples/kokkos-tutorials/standalone/team_vector_loop.py b/examples/kokkos-tutorials/standalone/team_vector_loop.py index 26f431f3..3ef55b64 100644 --- a/examples/kokkos-tutorials/standalone/team_vector_loop.py +++ b/examples/kokkos-tutorials/standalone/team_vector_loop.py @@ -1,4 +1,5 @@ from typing import Tuple +import numpy as np import pykokkos as pk @@ -39,9 +40,10 @@ def run() -> None: nrepeat: int = 1000 print(f"Total size S = {N * M} N = {N} M = {M} E = {E}") - y: pk.View2D = pk.View([E, N], pk.double, layout=pk.Layout.LayoutRight) - x: pk.View2D = pk.View([E, M], pk.double, layout=pk.Layout.LayoutRight) - A: pk.View3D = pk.View([E, N, M], pk.double, layout=pk.Layout.LayoutRight) + # Note: layout specified via ViewTypeInfo decorator if needed + y = np.zeros([E, N], dtype=np.float64) + x = np.zeros([E, M], dtype=np.float64) + A = np.zeros([E, N, M], dtype=np.float64) if fill: y.fill(1) diff --git a/examples/kokkos/inclusive_scan_team.py b/examples/kokkos/inclusive_scan_team.py index 62fd3506..609e098c 100644 --- a/examples/kokkos/inclusive_scan_team.py +++ b/examples/kokkos/inclusive_scan_team.py @@ -1,14 +1,15 @@ +import numpy as np import pykokkos as pk @pk.workunit -def init_data(i: int, view: pk.View1D[int]): +def init_data(i: int, view): view[i] = i + 1 # Test inclusive_scan with scratch memory @pk.workunit -def team_scan(team_member: pk.TeamMember, view: pk.View1D[int]): +def team_scan(team_member: pk.TeamMember, view): team_size: int = team_member.team_size() offset: int = team_member.league_rank() * team_size localIdx: int = team_member.team_rank() @@ -33,7 +34,7 @@ def main(): team_size = 32 num_teams = (N + team_size - 1) // team_size - view: pk.View1D[int] = pk.View([N], int) + view = np.zeros([N], dtype=np.int32) p_init = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, N) pk.parallel_for(p_init, init_data, view=view) diff --git a/examples/kokkos/scan_standalone.py b/examples/kokkos/scan_standalone.py index a135893b..c19ddf08 100644 --- a/examples/kokkos/scan_standalone.py +++ b/examples/kokkos/scan_standalone.py @@ -1,3 +1,4 @@ +import numpy as np import pykokkos as pk @@ -16,7 +17,7 @@ def scan(i, acc, last_pass, view): def run() -> None: N = 10 - A: pk.View1D[pk.int32] = pk.View([N], pk.int32) + A = np.zeros([N], dtype=np.int32) p = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, N) pk.parallel_for(p, init, view=A) diff --git a/examples/pykokkos/from_array.py b/examples/pykokkos/from_array.py index ff350d8f..ad5d53fb 100644 --- a/examples/pykokkos/from_array.py +++ b/examples/pykokkos/from_array.py @@ -4,13 +4,13 @@ @pk.workunit(np_arr=pk.ViewTypeInfo(space=pk.HostSpace)) -def addition_np(i: int, np_arr: pk.View2D[int]): +def addition_np(i: int, np_arr): np_arr[i][0] += 1 * i np_arr[i][1] += 2 * i @pk.workunit(cp_arr=pk.ViewTypeInfo(space=pk.CudaSpace, layout=pk.LayoutRight)) -def addition_cp(i: int, cp_arr: pk.View2D[int]): +def addition_cp(i: int, cp_arr): cp_arr[i][0] += 1 * i cp_arr[i][1] += 2 * i diff --git a/examples/pykokkos/multi_gpu.py b/examples/pykokkos/multi_gpu.py index e4645b43..4f7ddc01 100644 --- a/examples/pykokkos/multi_gpu.py +++ b/examples/pykokkos/multi_gpu.py @@ -18,7 +18,7 @@ @pk.workunit(cp_arr=pk.ViewTypeInfo(space=pk.CudaSpace)) -def reduction_cp(i: int, acc: pk.Acc[int], cp_arr: pk.View1D[int]): +def reduction_cp(i: int, acc: pk.Acc[int], cp_arr): acc += cp_arr[i] @@ -40,19 +40,19 @@ def reduction_cp(i: int, acc: pk.Acc[int], cp_arr: pk.View1D[int]): print(f"Sum: {result_0 + result_1}") pk.set_device_id(0) -view_0 = pk.View((size,), dtype=int) +view_0 = np.zeros((size,), dtype=np.int32) pk.set_device_id(1) -view_1 = pk.View((size,), dtype=int) +view_1 = np.zeros((size,), dtype=np.int32) @pk.workunit -def init_view(i: int, view: pk.View1D[int]): +def init_view(i: int, view): view[i] = i @pk.workunit -def reduce_view(i: int, acc: pk.Acc[int], view: pk.View1D[int]): +def reduce_view(i: int, acc: pk.Acc[int], view): acc += view[i] diff --git a/examples/pykokkos/team_thread_mdrange.py b/examples/pykokkos/team_thread_mdrange.py index 401316ea..a0b9ee92 100644 --- a/examples/pykokkos/team_thread_mdrange.py +++ b/examples/pykokkos/team_thread_mdrange.py @@ -1,3 +1,4 @@ +import numpy as np import pykokkos as pk @@ -17,9 +18,9 @@ def run(): N1 = 4 N2 = 4 - A = pk.View((N0, N1, N2)) - B = pk.View((N0, N1)) - C = pk.View((N2,)) + A = np.zeros((N0, N1, N2), dtype=np.float64) + B = np.zeros((N0, N1), dtype=np.float64) + C = np.zeros((N2,), dtype=np.float64) B.fill(1) C.fill(1) diff --git a/pykokkos/core/runtime.py b/pykokkos/core/runtime.py index b53c15b0..36d48f30 100644 --- a/pykokkos/core/runtime.py +++ b/pykokkos/core/runtime.py @@ -437,6 +437,7 @@ def get_arguments( if is_functor: functor: object = entity.__self__ entity_members = functor.__dict__ + self._convert_functor_arrays(entity_members) else: is_fused: bool = isinstance(entity, list) if is_fused: @@ -634,6 +635,31 @@ def get_fields(self, members: Dict[str, type]) -> Dict[str, Any]: return fields + def _convert_functor_arrays(self, members: Dict[str, Any]) -> None: + """ + Convert numpy/cupy arrays in functor members to Views (similar to convert_arrays for kwargs) + + :param members: the functor's __dict__ that will be modified in-place + """ + import numpy as np + from pykokkos.interface.views import ViewType, array + + cp_available: bool + try: + import cupy as cp + + cp_available = True + except ImportError: + cp_available = False + + for k, v in members.items(): + if isinstance(v, ViewType) or isinstance(v, np.generic): + continue + elif isinstance(v, np.ndarray): + members[k] = array(v) + elif cp_available and isinstance(v, cp.ndarray): + members[k] = array(v) + def get_views(self, members: Dict[str, type]) -> Dict[str, Any]: """ Gets all the views from the workload object diff --git a/pykokkos/core/translators/members.py b/pykokkos/core/translators/members.py index dd6b08ac..7af9f872 100644 --- a/pykokkos/core/translators/members.py +++ b/pykokkos/core/translators/members.py @@ -27,6 +27,7 @@ def __init__(self): cppast.DeclRefExpr, List[cppast.DeclRefExpr] ] = {} self.real_dtype_views: Set[cppast.DeclRefExpr] = {} + self.numpy_cupy_views: Set[cppast.DeclRefExpr] = {} self.pk_workunits: Dict[cppast.DeclRefExpr, ast.FunctionDef] = {} self.pk_functions: Dict[cppast.DeclRefExpr, ast.FunctionDef] = {} diff --git a/pykokkos/core/visitors/constructor_visitor.py b/pykokkos/core/visitors/constructor_visitor.py index 71ce3da1..95fa396b 100644 --- a/pykokkos/core/visitors/constructor_visitor.py +++ b/pykokkos/core/visitors/constructor_visitor.py @@ -58,6 +58,10 @@ def visit_FunctionDef(self, node: ast.FunctionDef) -> List[Tuple]: ann_assign: Tuple = self.visit(statement) if len(ann_assign) != 0: members.append(ann_assign) + elif self.member_type == "views" and isinstance(statement, ast.Assign): + assign: Tuple = self.visit(statement) + if len(assign) != 0: + members.append(assign) return members @@ -267,5 +271,123 @@ def is_pk_dtype(self, node: ast.Attribute) -> bool: return qualifier == self.pk_import + def visit_Assign(self, node: ast.Assign) -> Tuple[cppast.DeclRefExpr, cppast.Type]: + """ + Visit an assignment statement to detect numpy/cupy arrays + + :param node: the assignment AST node + :returns: a tuple of (name, view_type) if it's a numpy/cupy array, else empty tuple + """ + if self.member_type != "views": + return () + + # make sure that this is assignment to self attributes + if len(node.targets) != 1: + return () + target = node.targets[0] + if ( + not isinstance(target, ast.Attribute) + or not isinstance(target.value, ast.Name) + or target.value.id != "self" + ): + return () + + # make sure that value is a numpy/cupy array function call + declref: cppast.DeclRefExpr = self.visit(target) + value = node.value + if not isinstance(value, ast.Call): + return () + func = value.func + if not isinstance(func, ast.Attribute) or not isinstance(func.value, ast.Name): + return () + array_module = func.value.id + if array_module not in ("np", "numpy", "cp", "cupy"): + return () + + # infer view type from the array call + decltype = self._infer_view_type_from_array_call(value, array_module) + if decltype is None: + return () + + return (declref, decltype) + + def _infer_view_type_from_array_call( + self, call: ast.Call, array_module: str + ) -> Optional[cppast.ClassType]: + """ + Infer the View type from a numpy/cupy array creation call + + :param call: the AST call node + :param array_module: the module name (np, numpy, cp, cupy) + :returns: the inferred view type or None + """ + # shape from first positional argument + if len(call.args) == 0: + return None + + shape_arg = call.args[0] + + # determine the rank + rank = 1 + if isinstance(shape_arg, (ast.List, ast.Tuple)): + rank = len(shape_arg.elts) + elif isinstance(shape_arg, ast.Constant): + rank = 1 + + # handle dtype + dtype = None + for kw in call.keywords: + if kw.arg == "dtype": + dtype = kw.value + break + if ( + dtype is None + and len(call.args) > 1 + and isinstance(call.args[1], ast.Attribute) + ): + dtype = call.args[1] + + # translate dtype using existing get_type function + if dtype is None: + dtype_type = cppast.PrimitiveType(cppast.BuiltinType.DOUBLE) + elif ( + isinstance(dtype, ast.Attribute) + and hasattr(dtype, "value") + and isinstance(dtype.value, ast.Name) + ): + module_name = dtype.value.id + dtype_attr = dtype.attr + if module_name in ("np", "numpy", "cp", "cupy"): + # edge case: numpy/cupy float64 -> double, float32 -> float + dtype_name = ( + "double" + if dtype_attr == "float64" + else ("float" if dtype_attr == "float32" else dtype_attr) + ) + if dtype_name in visitors_util.view_dtypes: + dtype_value = visitors_util.view_dtypes[dtype_name] + if isinstance(dtype_value, cppast.BuiltinType): + dtype_type = cppast.PrimitiveType(dtype_value) + else: + dtype_type = cppast.PrimitiveType(cppast.BuiltinType.DOUBLE) + else: + dtype_type = cppast.PrimitiveType(cppast.BuiltinType.DOUBLE) + else: + dtype_type = visitors_util.get_type(dtype, self.pk_import) + if dtype_type is None or not isinstance( + dtype_type, cppast.PrimitiveType + ): + dtype_type = cppast.PrimitiveType(cppast.BuiltinType.DOUBLE) + else: + dtype_type = visitors_util.get_type(dtype, self.pk_import) + if dtype_type is None or not isinstance(dtype_type, cppast.PrimitiveType): + dtype_type = cppast.PrimitiveType(cppast.BuiltinType.DOUBLE) + + view_type_name = f"View{rank}D" + view_type = cppast.ClassType(view_type_name) + view_type.add_template_param(dtype_type) + + return view_type + def error(self, node: ast.AST, message: str): visitors_util.error(self.src, self.debug, node, message)