Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .github/workflows/Test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ concurrency:

jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - PoCL ${{ matrix.pocl }}
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.memory_backend }} - PoCL ${{ matrix.pocl }}
runs-on: ${{ matrix.os }}
timeout-minutes: 180
permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
Expand All @@ -26,7 +26,9 @@ jobs:
os: [ubuntu-24.04, ubuntu-24.04-arm, macOS-13, macOS-15, windows-2025]
arch: [x64, arm64]
pocl: [jll, local]
memory_backend: [usm, svm, buffer]
exclude:
# unsupported combinations
- os: ubuntu-24.04
arch: arm64
- os: windows-2025
Expand Down Expand Up @@ -125,11 +127,13 @@ jobs:
run(```$(cmake()) --build $builddir --parallel $(Sys.CPU_THREADS) --target install```)
end'

echo '[pocl_jll]' > test/LocalPreferences.toml
echo '[pocl_jll]' >> test/LocalPreferences.toml
echo 'libpocl_path="${{ github.workspace }}/target/lib/libpocl.so"' >> test/LocalPreferences.toml

- name: Setup OpenCL.jl
run: |
echo '[OpenCL]' >> test/LocalPreferences.toml
echo 'default_memory_backend="${{ matrix.memory_backend }}"' >> test/LocalPreferences.toml
julia --project -e '
using Pkg
Pkg.develop(path="lib/intrinsics")'
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
Manifest.toml
LocalPreferences.toml
7 changes: 7 additions & 0 deletions LocalPreferences.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[OpenCL]
# Which memory back-end to use for unspecified CLArray allocations. This can be:
# - "buffer": plain buffers (using pointers if `cl_ext_buffer_device_address` is available)
# - "usm": Unified Shared Memory (requiring `cl_intel_unified_shared_memory`)
# - "svm": Shared Virtual Memory (requiring coarse-grained SVM support)
# If unspecified, the default will be used based on the platform and device capabilities.
#default_memory_backend="..."
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Expand All @@ -26,6 +27,7 @@ KernelAbstractions = "0.9.2"
LLVM = "9.1"
LinearAlgebra = "1"
OpenCL_jll = "=2024.10.24"
Preferences = "1"
Printf = "1"
Random = "1"
Reexport = "1"
Expand Down
5 changes: 3 additions & 2 deletions lib/cl/CL.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module cl

import ..OpenCL
using Printf
using Preferences

include("pointer.jl")
include("api.jl")
Expand All @@ -18,8 +20,7 @@ include("device.jl")
include("context.jl")
include("cmdqueue.jl")
include("event.jl")
include("memory/memory.jl")
include("buffer.jl")
include("memory.jl")
include("program.jl")
include("kernel.jl")

Expand Down
7 changes: 3 additions & 4 deletions lib/cl/device.jl
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,7 @@ function exec_capabilities(d::Device)
)
end

function usm_supported(d::Device)
"cl_intel_unified_shared_memory" in d.extensions || return false
return true
end
usm_supported(d::Device) = "cl_intel_unified_shared_memory" in d.extensions

function usm_capabilities(d::Device)
usm_supported(d) || throw(ArgumentError("Unified Shared Memory not supported on this device"))
Expand Down Expand Up @@ -256,6 +253,8 @@ function svm_capabilities(d::Device)
)
end

bda_supported(d::Device) = "cl_ext_buffer_device_address" in d.extensions

function cl_device_type(dtype::Symbol)
if dtype == :all
cl_dtype = CL_DEVICE_TYPE_ALL
Expand Down
10 changes: 9 additions & 1 deletion lib/cl/kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ function set_arg!(k::Kernel, idx::Integer, arg::CLPtr{T}) where {T}
end

# raw memory
function set_arg!(k::Kernel, idx::Integer, arg::AbstractMemory)
function set_arg!(k::Kernel, idx::Integer, arg::AbstractPointerMemory)
# XXX: this assumes that the receiving argument is pointer-typed, which is not the case
# with Julia's `Ptr` ABI. Instead, one should reinterpret the pointer as a
# `Core.LLVMPtr`, which _is_ pointer-valued. We retain this handling for `Ptr` for
Expand All @@ -79,6 +79,8 @@ function set_arg!(k::Kernel, idx::Integer, arg::AbstractMemory)
clSetKernelArgSVMPointer(k, idx - 1, pointer(arg))
elseif arg isa UnifiedMemory
clSetKernelArgMemPointerINTEL(k, idx - 1, pointer(arg))
elseif arg isa Buffer
clSetKernelArgDevicePointerEXT(k, idx - 1, pointer(arg))
else
error("Unknown memory type")
end
Expand Down Expand Up @@ -191,6 +193,7 @@ function call(
if !isempty(indirect_memory)
svm_pointers = CLPtr{Cvoid}[]
usm_pointers = CLPtr{Cvoid}[]
bda_pointers = CLPtr{Cvoid}[]
device_access = host_access = shared_access = false
for memory in indirect_memory
ptr = pointer(memory)
Expand All @@ -200,6 +203,8 @@ function call(

if memory isa SharedVirtualMemory
push!(svm_pointers, ptr)
elseif memory isa Buffer
push!(bda_pointers, ptr)
elseif memory isa UnifiedDeviceMemory
device_access = true
push!(usm_pointers, ptr)
Expand Down Expand Up @@ -229,6 +234,9 @@ function call(
if !isempty(svm_pointers)
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(svm_pointers), svm_pointers)
end
if !isempty(bda_pointers)
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT, sizeof(bda_pointers), bda_pointers)
end
if !isempty(usm_pointers)
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(usm_pointers), usm_pointers)
end
Expand Down
Loading