Skip to content

Commit 65147ed

Browse files
authored
Nvidia remastered (#464)
* nvidia: update hello world following changes in #456 * update Nvidia backend to use the new LLVM infra * update Nvidia multiplication
1 parent 0b24651 commit 65147ed

File tree

11 files changed

+437
-524
lines changed

11 files changed

+437
-524
lines changed

constantine/math_compiler/codegen_amdgpu.nim

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -67,45 +67,6 @@ proc hipDeviceInit*(deviceID = 0'i32): HipDevice =
6767

6868
return hipDevice
6969

70-
# ############################################################
71-
#
72-
# LLVM IR for AMD GPUs
73-
#
74-
# ############################################################
75-
#
76-
# Note:
77-
# __device__ functions for field and elliptic curve arithmetic
78-
# might be compiled by default with scalar codegen
79-
#
80-
# We will need to either:
81-
# - Derive explicitly a vectorized version of the warp/wave size (32)
82-
# - Derive implicitly a vectorized version, probably with __forceinline__
83-
84-
proc wrapInCallableHipKernel*(module: ModuleRef, fn: FnDef) =
85-
## Create a public wrapper of a Hip device function
86-
##
87-
## A function named `addmod` can be found by appending _public
88-
## check hipModuleGetFunction(fnPointer, cuModule, "addmod_public")
89-
90-
let pubName = fn.fnImpl.getName() & "_public"
91-
let pubFn = module.addFunction(cstring(pubName), fn.fnTy)
92-
93-
let ctx = module.getContext()
94-
let builder = ctx.createBuilder()
95-
defer: builder.dispose()
96-
97-
let blck = ctx.appendBasicBlock(pubFn, "publicKernelBody")
98-
builder.positionAtEnd(blck)
99-
100-
var args = newSeq[ValueRef](fn.fnTy.countParamTypes())
101-
for i, arg in mpairs(args):
102-
arg = pubFn.getParam(i.uint32)
103-
discard builder.call2(fn.fnTy, fn.fnImpl, args)
104-
105-
# A public kernel must return void
106-
builder.retVoid()
107-
pubFn.setCallingConvention(AMDGPU_KERNEL)
108-
10970
# ############################################################
11071
#
11172
# Code generation

constantine/math_compiler/codegen_nvidia.nim

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -109,37 +109,6 @@ proc cudaDeviceInit*(deviceID = 0'i32): CUdevice =
109109

110110
return cuDevice
111111

112-
# ############################################################
113-
#
114-
# LLVM IR for Nvidia GPUs
115-
#
116-
# ############################################################
117-
118-
proc wrapInCallableCudaKernel*(module: ModuleRef, fn: FnDef) =
119-
## Create a public wrapper of a cuda device function
120-
##
121-
## A function named `addmod` can be found by appending _public
122-
## check cuModuleGetFunction(fnPointer, cuModule, "addmod_public")
123-
124-
let pubName = fn.fnImpl.getName() & "_public"
125-
let pubFn = module.addFunction(cstring(pubName), fn.fnTy)
126-
127-
let ctx = module.getContext()
128-
let builder = ctx.createBuilder()
129-
defer: builder.dispose()
130-
131-
let blck = ctx.appendBasicBlock(pubFn, "publicKernelBody")
132-
builder.positionAtEnd(blck)
133-
134-
var args = newSeq[ValueRef](fn.fnTy.countParamTypes())
135-
for i, arg in mpairs(args):
136-
arg = pubFn.getParam(i.uint32)
137-
discard builder.call2(fn.fnTy, fn.fnImpl, args)
138-
139-
# A public kernel must return void
140-
builder.retVoid()
141-
module.tagCudaKernel((fn.fnTy, pubFn))
142-
143112
# ############################################################
144113
#
145114
# Code generation
@@ -194,9 +163,7 @@ proc codegenNvidiaPTX*(asy: Assembler_LLVM, sm: tuple[major, minor: int32]): str
194163
#
195164
# ############################################################
196165

197-
proc getCudaKernel*(cuMod: CUmodule, cm: CurveMetadata, opcode: Opcode): CUfunction =
198-
# Public kernels are appended _public
199-
let fnName = cm.genSymbol(opcode) & "_public"
166+
proc getCudaKernel*(cuMod: CUmodule, fnName: string): CUfunction =
200167
check cuModuleGetFunction(result, cuMod, fnName)
201168

202169
proc exec*[T](jitFn: CUfunction, r: var T, a, b: T) =
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Constantine
2+
# Copyright (c) 2018-2019 Status Research & Development GmbH
3+
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
4+
# Licensed and distributed under either of
5+
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
6+
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
7+
# at your option. This file may not be copied, modified, or distributed except according to those terms.
8+
9+
import
10+
constantine/platforms/llvm/llvm,
11+
./ir,
12+
./impl_fields_sat {.all.},
13+
./impl_fields_nvidia {.all.}
14+
15+
proc modadd*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
16+
case asy.backend
17+
of {bkX86_64_Linux, bkAmdGpu}:
18+
asy.modadd_sat(fd, r, a, b, M)
19+
of bkNvidiaPTX:
20+
asy.modadd_nvidia(fd, r, a, b, M)
21+
22+
proc modsub*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
23+
case asy.backend
24+
of bkNvidiaPTX:
25+
asy.modsub_nvidia(fd, r, a, b, M)
26+
else:
27+
doAssert false, "Unimplemented"
28+
29+
proc mtymul*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
30+
case asy.backend
31+
of bkNvidiaPTX:
32+
asy.mtymul_nvidia(fd, r, a, b, M)
33+
else:
34+
doAssert false, "Unimplemented"

constantine/math_compiler/impl_fields_globals.nim

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,7 @@ proc getM0ninv*(asy: Assembler_LLVM, fd: FieldDescriptor): ValueRef =
157157
fd.wordTy
158158
)
159159

160-
161-
return m0ninv
160+
return asy.load2(fd.wordTy, m0ninv, "m0ninv")
162161

163162
when isMainModule:
164163
let asy = Assembler_LLVM.new("test_module", bkX86_64_Linux)

0 commit comments

Comments
 (0)