@@ -12,6 +12,7 @@ module Runtime
1212using .. CUDAnative
1313using LLVM
1414using LLVM. Interop
15+ using CUDAdrv
1516
1617
1718# # representation of a runtime method instance
@@ -127,8 +128,34 @@ function T_prjlvalue()
127128 LLVM. PointerType (eltype (T_pjlvalue), Tracked)
128129end
129130
131+ # A function that gets replaced by the proper 'malloc' implementation
132+ # for the context it executes in. This function gets rewritten as a
133+ # call to the allocator, probably 'malloc'.
134+ @generated function managed_malloc (sz:: Csize_t )
135+ T_pint8 = LLVM. PointerType (LLVM. Int8Type (JuliaContext ()))
136+ T_size = convert (LLVMType, Csize_t)
137+ T_ptr = convert (LLVMType, Ptr{UInt8})
138+
139+ # create function
140+ llvm_f, _ = create_function (T_ptr, [T_size])
141+ mod = LLVM. parent (llvm_f)
142+
143+ intr = LLVM. Function (mod, " julia.managed_malloc" , LLVM. FunctionType (T_pint8, [T_size]))
144+
145+ # generate IR
146+ Builder (JuliaContext ()) do builder
147+ entry = BasicBlock (llvm_f, " entry" , JuliaContext ())
148+ position! (builder, entry)
149+ ptr = call! (builder, intr, [parameters (llvm_f)[1 ]])
150+ jlptr = ptrtoint! (builder, ptr, T_ptr)
151+ ret! (builder, jlptr)
152+ end
153+
154+ call_function (llvm_f, Ptr{UInt8}, Tuple{Csize_t}, :((sz,)))
155+ end
156+
130157function gc_pool_alloc (sz:: Csize_t )
131- ptr = malloc (sz)
158+ ptr = managed_malloc (sz)
132159 if ptr == C_NULL
133160 @cuprintf (" ERROR: Out of dynamic GPU memory (trying to allocate %i bytes)\n " , sz)
134161 throw (OutOfMemoryError ())
138165
139166compile (gc_pool_alloc, Any, (Csize_t,), T_prjlvalue)
140167
141-
142168# # boxing and unboxing
143169
144170const tag_type = UInt
@@ -226,5 +252,79 @@ for (T, t) in [Int8 => :int8, Int16 => :int16, Int32 => :int32, Int64 =>
226252 end
227253end
228254
255+ # # Bump allocator.
256+
257+ # Gets a pointer to a global with a particular name. If the global
258+ # does not exist yet, then it is declared in the global memory address
259+ # space.
260+ @generated function get_global_pointer (:: Val{global_name} , :: Type{T} ):: CUDAnative.DevicePtr{T} where {global_name, T}
261+ T_global = convert (LLVMType, T)
262+ T_result = convert (LLVMType, Ptr{T})
263+
264+ # Create a thunk that computes a pointer to the global.
265+ llvm_f, _ = create_function (T_result)
266+ mod = LLVM. parent (llvm_f)
267+
268+ # Figure out if the global has been defined already.
269+ global_set = LLVM. globals (mod)
270+ global_name_string = String (global_name)
271+ if haskey (global_set, global_name_string)
272+ global_var = global_set[global_name_string]
273+ else
274+ # If the global hasn't been defined already, then we'll define
275+ # it in the global address space, i.e., address space one.
276+ global_var = GlobalVariable (mod, T_global, global_name_string, 1 )
277+ linkage! (global_var, LLVM. API. LLVMLinkOnceODRLinkage)
278+ initializer! (global_var, LLVM. null (T_global))
279+ end
280+
281+ # Generate IR that computes the global's address.
282+ Builder (JuliaContext ()) do builder
283+ entry = BasicBlock (llvm_f, " entry" , JuliaContext ())
284+ position! (builder, entry)
285+
286+ # Cast the global variable's type to the result type.
287+ result = ptrtoint! (builder, global_var, T_result)
288+ ret! (builder, result)
289+ end
290+
291+ # Call the function.
292+ quote
293+ CUDAnative. DevicePtr {T, CUDAnative.AS.Generic} (convert (Csize_t, $ (call_function (llvm_f, Ptr{T}))))
294+ end
295+ end
296+
297+ # Allocates `bytesize` bytes of storage by bumping the global bump
298+ # allocator pointer.
299+ function bump_alloc (bytesize:: Csize_t ):: Ptr{UInt8}
300+ ptr = get_global_pointer (Val (:bump_alloc_ptr ), Csize_t)
301+ chunk_address = CUDAnative. atomic_add! (ptr, bytesize)
302+ end_ptr = unsafe_load (get_global_pointer (Val (:bump_alloc_end ), Csize_t))
303+ if chunk_address < end_ptr
304+ return convert (Ptr{UInt8}, chunk_address)
305+ else
306+ return C_NULL
307+ end
308+ end
309+
310+ compile (bump_alloc, Ptr{UInt8}, (Csize_t,))
311+
312+ function maybe_set_global (kernel, name, value:: T ) where T
313+ try
314+ global_handle = CuGlobal {T} (kernel. mod, name)
315+ set (global_handle, value)
316+ catch exception
317+ # The interrupt pointer may not have been declared (because it is unused).
318+ # In that case, we should do nothing.
319+ if ! isa (exception, CUDAdrv. CuError) || exception. code != CUDAdrv. ERROR_NOT_FOUND. code
320+ rethrow ()
321+ end
322+ end
323+ end
324+
325+ function bump_alloc_init! (kernel, buffer_start, buffer_size)
326+ maybe_set_global (kernel, " bump_alloc_ptr" , buffer_start)
327+ maybe_set_global (kernel, " bump_alloc_end" , buffer_start + buffer_size)
328+ end
229329
230330end
0 commit comments