Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions benchmarks/matrix_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,66 @@ function benchmark_sparse_dense_add!(
return nothing
end

"""
benchmark_sparse_sparse_add!(SUITE, array_constructor, array_type_name; N=10000, T=Float64)

Benchmark sparse + sparse matrix addition for CSC, CSR, and COO formats.

# Arguments
- `SUITE`: The BenchmarkGroup to add benchmarks to
- `array_constructor`: Function to construct arrays (e.g., `Array`, `JLArray`)
- `array_type_name`: String name for the array type (for display)

# Keyword Arguments
- `N`: Size of the matrix (default: 10000)
- `T`: Element type (default: Float64)
"""
function benchmark_sparse_sparse_add!(
SUITE,
array_constructor,
array_type_name;
N = 10000,
T = Float64,
)
# Create two sparse matrices with 1% density
sm_a_csc_std = sprand(T, N, N, 0.01)
sm_b_csc_std = sprand(T, N, N, 0.01)

# Convert to different formats
sm_a_csc = DeviceSparseMatrixCSC(sm_a_csc_std)
sm_b_csc = DeviceSparseMatrixCSC(sm_b_csc_std)
sm_a_csr = DeviceSparseMatrixCSR(sm_a_csc_std)
sm_b_csr = DeviceSparseMatrixCSR(sm_b_csc_std)
sm_a_coo = DeviceSparseMatrixCOO(sm_a_csc_std)
sm_b_coo = DeviceSparseMatrixCOO(sm_b_csc_std)

# Adapt to device
dsm_a_csc = adapt(array_constructor, sm_a_csc)
dsm_b_csc = adapt(array_constructor, sm_b_csc)
dsm_a_csr = adapt(array_constructor, sm_a_csr)
dsm_b_csr = adapt(array_constructor, sm_b_csr)
dsm_a_coo = adapt(array_constructor, sm_a_coo)
dsm_b_coo = adapt(array_constructor, sm_b_coo)

# Level 3: Format (CSC, CSR, COO - will be plotted together)
SUITE["Sparse + Sparse Addition"][array_type_name]["CSC"] = @benchmarkable begin
$dsm_a_csc + $dsm_b_csc
_synchronize_backend($dsm_a_csc)
end

SUITE["Sparse + Sparse Addition"][array_type_name]["CSR"] = @benchmarkable begin
$dsm_a_csr + $dsm_b_csr
_synchronize_backend($dsm_a_csr)
end

SUITE["Sparse + Sparse Addition"][array_type_name]["COO"] = @benchmarkable begin
$dsm_a_coo + $dsm_b_coo
_synchronize_backend($dsm_a_coo)
end

return nothing
end

"""
benchmark_kron!(SUITE, array_constructor, array_type_name; N=100, T=Float64)

Expand Down
2 changes: 2 additions & 0 deletions benchmarks/runbenchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ benchmark_matrix_vector_mul!(SUITE, Array, "Array")
benchmark_matrix_matrix_mul!(SUITE, Array, "Array")
benchmark_three_arg_dot!(SUITE, Array, "Array")
benchmark_sparse_dense_add!(SUITE, Array, "Array")
benchmark_sparse_sparse_add!(SUITE, Array, "Array")
benchmark_kron!(SUITE, Array, "Array")
benchmark_conversions!(SUITE, Array, "Array")

Expand All @@ -37,6 +38,7 @@ benchmark_matrix_vector_mul!(SUITE, JLArray, "JLArray")
benchmark_matrix_matrix_mul!(SUITE, JLArray, "JLArray")
benchmark_three_arg_dot!(SUITE, JLArray, "JLArray")
benchmark_sparse_dense_add!(SUITE, JLArray, "JLArray")
benchmark_sparse_sparse_add!(SUITE, JLArray, "JLArray")
benchmark_kron!(SUITE, JLArray, "JLArray")
benchmark_conversions!(SUITE, JLArray, "JLArray")

Expand Down
63 changes: 63 additions & 0 deletions src/matrix_coo/matrix_coo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,69 @@ function _add_sparse_to_dense!(C::DenseMatrix, A::DeviceSparseMatrixCOO)
return C
end

"""
+(A::DeviceSparseMatrixCOO, B::DeviceSparseMatrixCOO)

Add two sparse matrices in COO format. Both matrices must have the same dimensions
and be on the same backend (device).

The result is a COO matrix with entries from both A and B concatenated. Note that
duplicate entries (same row and column) are not combined, which is valid for COO format.

# Examples
```jldoctest
julia> using DeviceSparseArrays, SparseArrays

julia> A = DeviceSparseMatrixCOO(sparse([1, 2], [1, 2], [1.0, 2.0], 2, 2));

julia> B = DeviceSparseMatrixCOO(sparse([1, 2], [2, 1], [3.0, 4.0], 2, 2));

julia> C = A + B;

julia> collect(C)
2×2 Matrix{Float64}:
1.0 3.0
4.0 2.0
```
"""
function Base.:+(A::DeviceSparseMatrixCOO, B::DeviceSparseMatrixCOO)
size(A) == size(B) || throw(
DimensionMismatch(
"dimensions must match: A has dims $(size(A)), B has dims $(size(B))",
),
)

backend_A = get_backend(A)
backend_B = get_backend(B)
backend_A == backend_B ||
throw(ArgumentError("Both matrices must have the same backend"))

m, n = size(A)
Tv = promote_type(eltype(nonzeros(A)), eltype(nonzeros(B)))

# Concatenate the coordinate arrays
nnz_A = nnz(A)
nnz_B = nnz(B)
nnz_total = nnz_A + nnz_B

# Allocate result arrays
rowind_C = similar(getrowind(A), nnz_total)
colind_C = similar(getcolind(A), nnz_total)
nzval_C = similar(nonzeros(A), Tv, nnz_total)

# Copy entries from A
rowind_C[1:nnz_A] .= getrowind(A)
colind_C[1:nnz_A] .= getcolind(A)
nzval_C[1:nnz_A] .= nonzeros(A)

# Copy entries from B
rowind_C[(nnz_A+1):end] .= getrowind(B)
colind_C[(nnz_A+1):end] .= getcolind(B)
nzval_C[(nnz_A+1):end] .= nonzeros(B)

return DeviceSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
Comment on lines 373 to 432
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot Here you are just concatenating the nonzero values, but they can also be repeated and one should merge them instead. Could you take into account also that?

Check that you merge also the other formats.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in commit 778e354. COO addition now properly merges duplicate entries by:

  1. Concatenating and sorting by (row, col)
  2. Marking unique entries
  3. Compacting with duplicate merging using kernel_compact_coo!

All three formats (CSC, CSR, COO) now correctly merge duplicates. Verified with tests.

end

"""
kron(A::DeviceSparseMatrixCOO, B::DeviceSparseMatrixCOO)

Expand Down
86 changes: 86 additions & 0 deletions src/matrix_csc/matrix_csc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,92 @@ function _add_sparse_to_dense!(C::DenseMatrix, A::DeviceSparseMatrixCSC)
return C
end

"""
+(A::DeviceSparseMatrixCSC, B::DeviceSparseMatrixCSC)

Add two sparse matrices in CSC format. Both matrices must have the same dimensions
and be on the same backend (device).

# Examples
```jldoctest
julia> using DeviceSparseArrays, SparseArrays

julia> A = DeviceSparseMatrixCSC(sparse([1, 2], [1, 2], [1.0, 2.0], 2, 2));

julia> B = DeviceSparseMatrixCSC(sparse([1, 2], [2, 1], [3.0, 4.0], 2, 2));

julia> C = A + B;

julia> collect(C)
2×2 Matrix{Float64}:
1.0 3.0
4.0 2.0
```
"""
function Base.:+(A::DeviceSparseMatrixCSC, B::DeviceSparseMatrixCSC)
size(A) == size(B) || throw(
DimensionMismatch(
"dimensions must match: A has dims $(size(A)), B has dims $(size(B))",
),
)

backend_A = get_backend(A)
backend_B = get_backend(B)
backend_A == backend_B ||
throw(ArgumentError("Both matrices must have the same backend"))

m, n = size(A)
Ti = eltype(getcolptr(A))
Tv = promote_type(eltype(nonzeros(A)), eltype(nonzeros(B)))

# Count non-zeros per column
nnz_per_col = similar(getcolptr(A), n)
fill!(nnz_per_col, zero(Ti))

backend = backend_A
kernel_count! = kernel_count_nnz_per_col_csc!(backend)
kernel_count!(
nnz_per_col,
getcolptr(A),
getrowval(A),
getcolptr(B),
getrowval(B);
ndrange = (n,),
)

# Build colptr for result matrix using cumsum
# colptr_C[i+1] = 1 + sum(nnz_per_col[1:i])
cumsum_nnz = _cumsum_AK(nnz_per_col)
colptr_C = similar(getcolptr(A), n + 1)
# Set colptr_C[2:end] to cumsum + 1
colptr_C[2:end] .= cumsum_nnz
colptr_C[2:end] .+= one(Ti)
# Set colptr_C[1] to 1 using broadcasting
colptr_C[1:1] .= one(Ti)

# Allocate result arrays
nnz_total = allowed_getindex(colptr_C, n + 1) - one(Ti)
rowval_C = similar(getrowval(A), nnz_total)
nzval_C = similar(nonzeros(A), Tv, nnz_total)

# Merge the two matrices
kernel_merge! = kernel_merge_csc!(backend)
kernel_merge!(
rowval_C,
nzval_C,
colptr_C,
getcolptr(A),
getrowval(A),
nonzeros(A),
getcolptr(B),
getrowval(B),
nonzeros(B);
ndrange = (n,),
)

return DeviceSparseMatrixCSC(m, n, colptr_C, rowval_C, nzval_C)
end

"""
kron(A::DeviceSparseMatrixCSC, B::DeviceSparseMatrixCSC)

Expand Down
99 changes: 99 additions & 0 deletions src/matrix_csc/matrix_csc_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,102 @@ end
C[rowval[j], col] += nzval[j]
end
end

# Kernel for counting non-zeros per column when adding two CSC matrices
@kernel inbounds=true function kernel_count_nnz_per_col_csc!(
nnz_per_col,
@Const(colptr_A),
@Const(rowval_A),
@Const(colptr_B),
@Const(rowval_B),
)
col = @index(Global)

i_A = colptr_A[col]
i_B = colptr_B[col]
end_A = colptr_A[col+1]
end_B = colptr_B[col+1]

count = 0
while i_A < end_A && i_B < end_B
row_A = rowval_A[i_A]
row_B = rowval_B[i_B]
if row_A < row_B
count += 1
i_A += 1
elseif row_A > row_B
count += 1
i_B += 1
else # row_A == row_B
count += 1
i_A += 1
i_B += 1
end
end

# Add remaining entries from A
count += (end_A - i_A)
# Add remaining entries from B
count += (end_B - i_B)

nnz_per_col[col] = count
end

# Kernel for merging two CSC matrices (addition)
@kernel inbounds=true function kernel_merge_csc!(
rowval_C,
nzval_C,
@Const(colptr_C),
@Const(colptr_A),
@Const(rowval_A),
@Const(nzval_A),
@Const(colptr_B),
@Const(rowval_B),
@Const(nzval_B),
)
col = @index(Global)

i_A = colptr_A[col]
i_B = colptr_B[col]
i_C = colptr_C[col]
end_A = colptr_A[col+1]
end_B = colptr_B[col+1]

while i_A < end_A && i_B < end_B
row_A = rowval_A[i_A]
row_B = rowval_B[i_B]
if row_A < row_B
rowval_C[i_C] = row_A
nzval_C[i_C] = nzval_A[i_A]
i_A += 1
i_C += 1
elseif row_A > row_B
rowval_C[i_C] = row_B
nzval_C[i_C] = nzval_B[i_B]
i_B += 1
i_C += 1
else # row_A == row_B
rowval_C[i_C] = row_A
nzval_C[i_C] = nzval_A[i_A] + nzval_B[i_B]
i_A += 1
i_B += 1
i_C += 1
end
end

# Copy remaining entries from A
while i_A < end_A
rowval_C[i_C] = rowval_A[i_A]
nzval_C[i_C] = nzval_A[i_A]
i_A += 1
i_C += 1
end

# Copy remaining entries from B
while i_B < end_B
rowval_C[i_C] = rowval_B[i_B]
nzval_C[i_C] = nzval_B[i_B]
i_B += 1
i_C += 1
end
end
Loading