Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions benchmarks/matrix_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,66 @@ function benchmark_sparse_dense_add!(
return nothing
end

"""
benchmark_sparse_sparse_add!(SUITE, array_constructor, array_type_name; N=10000, T=Float64)

Benchmark sparse + sparse matrix addition for CSC, CSR, and COO formats.

# Arguments
- `SUITE`: The BenchmarkGroup to add benchmarks to
- `array_constructor`: Function to construct arrays (e.g., `Array`, `JLArray`)
- `array_type_name`: String name for the array type (for display)

# Keyword Arguments
- `N`: Size of the matrix (default: 10000)
- `T`: Element type (default: Float64)
"""
function benchmark_sparse_sparse_add!(
SUITE,
array_constructor,
array_type_name;
N = 10000,
T = Float64,
)
# Create two sparse matrices with 1% density
sm_a_csc_std = sprand(T, N, N, 0.01)
sm_b_csc_std = sprand(T, N, N, 0.01)

# Convert to different formats
sm_a_csc = DeviceSparseMatrixCSC(sm_a_csc_std)
sm_b_csc = DeviceSparseMatrixCSC(sm_b_csc_std)
sm_a_csr = DeviceSparseMatrixCSR(sm_a_csc_std)
sm_b_csr = DeviceSparseMatrixCSR(sm_b_csc_std)
sm_a_coo = DeviceSparseMatrixCOO(sm_a_csc_std)
sm_b_coo = DeviceSparseMatrixCOO(sm_b_csc_std)

# Adapt to device
dsm_a_csc = adapt(array_constructor, sm_a_csc)
dsm_b_csc = adapt(array_constructor, sm_b_csc)
dsm_a_csr = adapt(array_constructor, sm_a_csr)
dsm_b_csr = adapt(array_constructor, sm_b_csr)
dsm_a_coo = adapt(array_constructor, sm_a_coo)
dsm_b_coo = adapt(array_constructor, sm_b_coo)

# Level 3: Format (CSC, CSR, COO - will be plotted together)
SUITE["Sparse + Sparse Addition"][array_type_name]["CSC"] = @benchmarkable begin
$dsm_a_csc + $dsm_b_csc
_synchronize_backend($dsm_a_csc)
end

SUITE["Sparse + Sparse Addition"][array_type_name]["CSR"] = @benchmarkable begin
$dsm_a_csr + $dsm_b_csr
_synchronize_backend($dsm_a_csr)
end

SUITE["Sparse + Sparse Addition"][array_type_name]["COO"] = @benchmarkable begin
$dsm_a_coo + $dsm_b_coo
_synchronize_backend($dsm_a_coo)
end

return nothing
end

"""
benchmark_kron!(SUITE, array_constructor, array_type_name; N=100, T=Float64)

Expand Down
2 changes: 2 additions & 0 deletions benchmarks/runbenchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ benchmark_matrix_vector_mul!(SUITE, Array, "Array")
benchmark_matrix_matrix_mul!(SUITE, Array, "Array")
benchmark_three_arg_dot!(SUITE, Array, "Array")
benchmark_sparse_dense_add!(SUITE, Array, "Array")
benchmark_sparse_sparse_add!(SUITE, Array, "Array")
benchmark_kron!(SUITE, Array, "Array")
benchmark_conversions!(SUITE, Array, "Array")

Expand All @@ -37,6 +38,7 @@ benchmark_matrix_vector_mul!(SUITE, JLArray, "JLArray")
benchmark_matrix_matrix_mul!(SUITE, JLArray, "JLArray")
benchmark_three_arg_dot!(SUITE, JLArray, "JLArray")
benchmark_sparse_dense_add!(SUITE, JLArray, "JLArray")
benchmark_sparse_sparse_add!(SUITE, JLArray, "JLArray")
benchmark_kron!(SUITE, JLArray, "JLArray")
benchmark_conversions!(SUITE, JLArray, "JLArray")

Expand Down
104 changes: 104 additions & 0 deletions src/matrix_coo/matrix_coo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,110 @@ function _add_sparse_to_dense!(C::DenseMatrix, A::DeviceSparseMatrixCOO)
return C
end

"""
+(A::DeviceSparseMatrixCOO, B::DeviceSparseMatrixCOO)

Add two sparse matrices in COO format. Both matrices must have the same dimensions
and be on the same backend (device).

The result is a COO matrix with entries from both A and B properly merged,
with duplicate entries (same row and column) combined by summing their values.

# Examples
```jldoctest
julia> using DeviceSparseArrays, SparseArrays

julia> A = DeviceSparseMatrixCOO(sparse([1, 2], [1, 2], [1.0, 2.0], 2, 2));

julia> B = DeviceSparseMatrixCOO(sparse([1, 2], [2, 1], [3.0, 4.0], 2, 2));

julia> C = A + B;

julia> collect(C)
2×2 Matrix{Float64}:
1.0 3.0
4.0 2.0
```
"""
function Base.:+(A::DeviceSparseMatrixCOO, B::DeviceSparseMatrixCOO)
size(A) == size(B) || throw(
DimensionMismatch(
"dimensions must match: A has dims $(size(A)), B has dims $(size(B))",
),
)

backend_A = get_backend(A)
backend_B = get_backend(B)
backend_A == backend_B ||
throw(ArgumentError("Both matrices must have the same backend"))

m, n = size(A)
Ti = eltype(getrowind(A))
Tv = promote_type(eltype(nonzeros(A)), eltype(nonzeros(B)))

# Concatenate the coordinate arrays
nnz_A = nnz(A)
nnz_B = nnz(B)
nnz_concat = nnz_A + nnz_B

# Allocate concatenated arrays
rowind_concat = similar(getrowind(A), nnz_concat)
colind_concat = similar(getcolind(A), nnz_concat)
nzval_concat = similar(nonzeros(A), Tv, nnz_concat)

# Copy entries from A and B
rowind_concat[1:nnz_A] .= getrowind(A)
colind_concat[1:nnz_A] .= getcolind(A)
nzval_concat[1:nnz_A] .= nonzeros(A)
rowind_concat[(nnz_A+1):end] .= getrowind(B)
colind_concat[(nnz_A+1):end] .= getcolind(B)
nzval_concat[(nnz_A+1):end] .= nonzeros(B)

# Sort by (row, col) using keys similar to COO->CSC conversion
backend = backend_A
keys = similar(rowind_concat, Ti, nnz_concat)
kernel_make_keys! = kernel_make_csc_keys!(backend)
kernel_make_keys!(keys, rowind_concat, colind_concat, m; ndrange = (nnz_concat,))

# Sort using AcceleratedKernels
perm = _sortperm_AK(keys)

# Apply permutation to get sorted arrays
rowind_sorted = rowind_concat[perm]
colind_sorted = colind_concat[perm]
nzval_sorted = nzval_concat[perm]

# Mark unique entries (first occurrence of each (row, col) pair)
keep_mask = similar(rowind_sorted, Bool, nnz_concat)
kernel_mark! = kernel_mark_unique_coo!(backend)
kernel_mark!(keep_mask, rowind_sorted, colind_sorted, nnz_concat; ndrange = (nnz_concat,))

# Compute write indices using cumsum
write_indices = _cumsum_AK(keep_mask)
nnz_final = allowed_getindex(write_indices, nnz_concat)

# Allocate final arrays
rowind_C = similar(getrowind(A), nnz_final)
colind_C = similar(getcolind(A), nnz_final)
nzval_C = similar(nonzeros(A), Tv, nnz_final)

# Compact: merge duplicates by summing
kernel_compact! = kernel_compact_coo!(backend)
kernel_compact!(
rowind_C,
colind_C,
nzval_C,
rowind_sorted,
colind_sorted,
nzval_sorted,
write_indices,
nnz_concat;
ndrange = (nnz_concat,),
)

return DeviceSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
end

"""
kron(A::DeviceSparseMatrixCOO, B::DeviceSparseMatrixCOO)

Expand Down
52 changes: 52 additions & 0 deletions src/matrix_coo/matrix_coo_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,55 @@ end
nzval_C[idx] = val_A * val_B
end
end

# Kernel for marking duplicate entries in sorted COO format
# Returns a mask where mask[i] = true if entry i should be kept (first occurrence or sum)
@kernel inbounds=true function kernel_mark_unique_coo!(
keep_mask,
@Const(rowind),
@Const(colind),
@Const(nnz_total),
)
i = @index(Global)

if i == 1
# Always keep the first entry
keep_mask[i] = true
elseif i <= nnz_total
# Keep if different from previous entry
keep_mask[i] = (rowind[i] != rowind[i-1] || colind[i] != colind[i-1])
end
end

# Kernel for compacting COO by summing duplicate entries
@kernel inbounds=true function kernel_compact_coo!(
rowind_out,
colind_out,
nzval_out,
@Const(rowind_in),
@Const(colind_in),
@Const(nzval_in),
@Const(write_indices),
@Const(nnz_in),
)
i = @index(Global)

if i <= nnz_in
out_idx = write_indices[i]

# If this is a new entry (or first of duplicates), write it
if i == 1 || (rowind_in[i] != rowind_in[i-1] || colind_in[i] != colind_in[i-1])
rowind_out[out_idx] = rowind_in[i]
colind_out[out_idx] = colind_in[i]

# Sum all duplicates
val_sum = nzval_in[i]
j = i + 1
while j <= nnz_in && rowind_in[j] == rowind_in[i] && colind_in[j] == colind_in[i]
val_sum += nzval_in[j]
j += 1
end
nzval_out[out_idx] = val_sum
end
end
end
86 changes: 86 additions & 0 deletions src/matrix_csc/matrix_csc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,92 @@ function _add_sparse_to_dense!(C::DenseMatrix, A::DeviceSparseMatrixCSC)
return C
end

"""
+(A::DeviceSparseMatrixCSC, B::DeviceSparseMatrixCSC)

Add two sparse matrices in CSC format. Both matrices must have the same dimensions
and be on the same backend (device).

# Examples
```jldoctest
julia> using DeviceSparseArrays, SparseArrays

julia> A = DeviceSparseMatrixCSC(sparse([1, 2], [1, 2], [1.0, 2.0], 2, 2));

julia> B = DeviceSparseMatrixCSC(sparse([1, 2], [2, 1], [3.0, 4.0], 2, 2));

julia> C = A + B;

julia> collect(C)
2×2 Matrix{Float64}:
1.0 3.0
4.0 2.0
```
"""
function Base.:+(A::DeviceSparseMatrixCSC, B::DeviceSparseMatrixCSC)
size(A) == size(B) || throw(
DimensionMismatch(
"dimensions must match: A has dims $(size(A)), B has dims $(size(B))",
),
)

backend_A = get_backend(A)
backend_B = get_backend(B)
backend_A == backend_B ||
throw(ArgumentError("Both matrices must have the same backend"))

m, n = size(A)
Ti = eltype(getcolptr(A))
Tv = promote_type(eltype(nonzeros(A)), eltype(nonzeros(B)))

# Count non-zeros per column
nnz_per_col = similar(getcolptr(A), n)
fill!(nnz_per_col, zero(Ti))

backend = backend_A
kernel_count! = kernel_count_nnz_per_col_csc!(backend)
kernel_count!(
nnz_per_col,
getcolptr(A),
getrowval(A),
getcolptr(B),
getrowval(B);
ndrange = (n,),
)

# Build colptr for result matrix using cumsum
# colptr_C[i+1] = 1 + sum(nnz_per_col[1:i])
cumsum_nnz = _cumsum_AK(nnz_per_col)
colptr_C = similar(getcolptr(A), n + 1)
# Set colptr_C[2:end] to cumsum + 1
colptr_C[2:end] .= cumsum_nnz
colptr_C[2:end] .+= one(Ti)
# Set colptr_C[1] to 1 using broadcasting
colptr_C[1:1] .= one(Ti)

# Allocate result arrays
nnz_total = allowed_getindex(colptr_C, n + 1) - one(Ti)
rowval_C = similar(getrowval(A), nnz_total)
nzval_C = similar(nonzeros(A), Tv, nnz_total)

# Merge the two matrices
kernel_merge! = kernel_merge_csc!(backend)
kernel_merge!(
rowval_C,
nzval_C,
colptr_C,
getcolptr(A),
getrowval(A),
nonzeros(A),
getcolptr(B),
getrowval(B),
nonzeros(B);
ndrange = (n,),
)

return DeviceSparseMatrixCSC(m, n, colptr_C, rowval_C, nzval_C)
end

"""
kron(A::DeviceSparseMatrixCSC, B::DeviceSparseMatrixCSC)

Expand Down
Loading
Loading