@@ -12,6 +12,7 @@ module Runtime
1212using .. CUDAnative
1313using LLVM
1414using LLVM. Interop
15+ using CUDAdrv
1516
1617
1718# # representation of a runtime method instance
@@ -127,18 +128,43 @@ function T_prjlvalue()
127128 LLVM. PointerType (eltype (T_pjlvalue), Tracked)
128129end
129130
131+ # A function that gets replaced by the proper 'malloc' implementation
132+ # for the context it executes in. This function gets rewritten as a
133+ # call to the allocator, probably 'malloc'.
134+ @generated function managed_malloc (sz:: Csize_t )
135+ T_pint8 = LLVM. PointerType (LLVM. Int8Type (JuliaContext ()))
136+ T_size = convert (LLVMType, Csize_t)
137+ T_ptr = convert (LLVMType, Ptr{UInt8})
138+
139+ # create function
140+ llvm_f, _ = create_function (T_ptr, [T_size])
141+ mod = LLVM. parent (llvm_f)
142+
143+ intr = LLVM. Function (mod, " julia.managed_malloc" , LLVM. FunctionType (T_pint8, [T_size]))
144+
145+ # generate IR
146+ Builder (JuliaContext ()) do builder
147+ entry = BasicBlock (llvm_f, " entry" , JuliaContext ())
148+ position! (builder, entry)
149+ ptr = call! (builder, intr, [parameters (llvm_f)[1 ]])
150+ jlptr = ptrtoint! (builder, ptr, T_ptr)
151+ ret! (builder, jlptr)
152+ end
153+
154+ call_function (llvm_f, Ptr{UInt8}, Tuple{Csize_t}, :((sz,)))
155+ end
156+
130157function gc_pool_alloc (sz:: Csize_t )
131- ptr = malloc (sz)
158+ ptr = managed_malloc (sz)
132159 if ptr == C_NULL
133160 @cuprintf (" ERROR: Out of dynamic GPU memory (trying to allocate %i bytes)\n " , sz)
134161 throw (OutOfMemoryError ())
135162 end
136- return unsafe_pointer_to_objref (ptr)
163+ return
137164end
138165
139166compile (gc_pool_alloc, Any, (Csize_t,), T_prjlvalue)
140167
141-
142168# # boxing and unboxing
143169
144170const tag_type = UInt
@@ -226,5 +252,85 @@ for (T, t) in [Int8 => :int8, Int16 => :int16, Int32 => :int32, Int64 =>
226252 end
227253end
228254
255+ # # Bump allocator.
256+
257+ # Gets a pointer to a global with a particular name. If the global
258+ # does not exist yet, then it is declared in the global memory address
259+ # space.
260+ @generated function get_global_pointer (:: Val{global_name} , :: Type{T} ):: Ptr{T} where {global_name, T}
261+ T_global = convert (LLVMType, T)
262+ T_result = convert (LLVMType, Ptr{T})
263+
264+ # Create a thunk that computes a pointer to the global.
265+ llvm_f, _ = create_function (T_result)
266+ mod = LLVM. parent (llvm_f)
267+
268+ # Figure out if the global has been defined already.
269+ global_set = LLVM. globals (mod)
270+ global_name_string = String (global_name)
271+ if haskey (global_set, global_name_string)
272+ global_var = global_set[global_name_string]
273+ else
274+ # If the global hasn't been defined already, then we'll define
275+ # it in the global address space, i.e., address space one.
276+ global_var = GlobalVariable (mod, T_global, global_name_string, 1 )
277+ linkage! (global_var, LLVM. API. LLVMLinkOnceODRLinkage)
278+ initializer! (global_var, LLVM. null (T_global))
279+ end
280+
281+ # Generate IR that computes the global's address.
282+ Builder (JuliaContext ()) do builder
283+ entry = BasicBlock (llvm_f, " entry" , JuliaContext ())
284+ position! (builder, entry)
285+
286+ # Cast the global variable's type to the result type.
287+ result = ptrtoint! (builder, global_var, T_result)
288+ ret! (builder, result)
289+ end
290+
291+ # Call the function.
292+ call_function (llvm_f, Ptr{T})
293+ end
294+
295+ macro cuda_global_ptr (name, type)
296+ return :(convert (
297+ DevicePtr{T},
298+ get_global_pointer (
299+ $ (Val (Symbol (name))),
300+ $ (esc (type)))))
301+ end
302+
303+ # Allocates `bytesize` bytes of storage by bumping the global bump
304+ # allocator pointer.
305+ function bump_alloc (bytesize:: Csize_t ):: Ptr{UInt8}
306+ ptr = @cuda_global_ptr (" bump_alloc_ptr" , Csize_t)
307+ chunk_address = atomic_add! (ptr, bytesize)
308+ end_ptr = unsafe_load (@cuda_global_ptr (" bump_alloc_end" , Csize_t))
309+ if chunk_address < end_ptr
310+ return convert (Ptr{UInt8}, chunk_address)
311+ else
312+ return C_NULL
313+ end
314+ end
315+
316+ compile (bump_alloc, Ptr{UInt8}, (Csize_t,))
317+
318+ function maybe_set_global (kernel, name, value:: T ) where T
319+ try
320+ global_handle = CuGlobal {T} (kernel. mod, name)
321+ set (global_handle, value)
322+ catch exception
323+ # The interrupt pointer may not have been declared (because it is unused).
324+ # In that case, we should do nothing.
325+ if ! isa (exception, CUDAdrv. CuError) || exception. code != CUDAdrv. ERROR_NOT_FOUND. code
326+ rethrow ()
327+ end
328+ end
329+ end
330+
331+ function bump_alloc_init! (kernel, buffer_start, buffer_size)
332+ maybe_set_global (kernel, " bump_alloc_ptr" , buffer_start)
333+ maybe_set_global (kernel, " bump_alloc_end" , buffer_start + buffer_size)
334+ end
229335
230336end
0 commit comments