Skip to content

DArray: Tile QR, UndefInitializer, and parallel trapezoidal wrappers #529

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 42 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
76727ed
DArray: Trapezoidal and Triangular wrappers
fda-tome Jun 5, 2024
8d2908a
DArray: UndefInitializer
fda-tome Jun 5, 2024
b41b2b6
DArray: slicing bug fix
fda-tome Jun 5, 2024
400286c
DArray: Tile QR Implementation
fda-tome Jun 6, 2024
99c7ba6
Rebasing commit, solving conflicts
fda-tome Jun 6, 2024
185e611
DArray: UndefInitializer
fda-tome Jun 5, 2024
afed356
DArray: slicing bug fix
fda-tome Jun 5, 2024
d05fcf4
DArray: Tile QR Implementation
fda-tome Jun 6, 2024
2022892
DArray: disabling faulty views, fixing undefinit Dims arguments
fda-tome Jun 6, 2024
b7d4f6c
Merge branch 'master' of ssh://github.com/JuliaParallel/Dagger.jl int…
fda-tome Jun 6, 2024
e101a61
Fetch and rebase commit
fda-tome Jun 6, 2024
36f6785
DArray: project.toml changes
fda-tome Jun 6, 2024
686b37a
DArray: coreblas changes and inclusion of libblastrampoline
fda-tome Jun 10, 2024
6995c56
Merge branch 'master' of ssh://github.com/JuliaParallel/Dagger.jl int…
fda-tome Jun 11, 2024
e20fab5
DArray: adding aliasing support
fda-tome Jun 11, 2024
54cfefa
DArray: adding aliasing support to CAQR
fda-tome Jun 11, 2024
1fa2ea4
Allow for workers dying in the middle of cleanup
JamesWrigley Apr 8, 2024
4d123ac
Allow for dead workers in safepoint()
JamesWrigley Apr 9, 2024
684d80c
Merge pull request #532 from JuliaParallel/dead-workers
jpsamaroo Jun 17, 2024
dbfe428
parser: Fix expression escaping
jpsamaroo Jun 18, 2024
8d29bd8
tests: Instantiate before loading packages
jpsamaroo Jun 18, 2024
515e731
parser: Support do-blocks
jpsamaroo Jun 18, 2024
705266a
parser: Support direct anonymous function calls
jpsamaroo Jun 18, 2024
86f2f5a
parser: Support getindex
jpsamaroo Jun 18, 2024
b2fd2ab
Merge pull request #533 from JuliaParallel/jps/parser-fixes
jpsamaroo Jun 19, 2024
b68ad4a
aliasing: Add optimized will_alias for views
Rabab53 Jun 20, 2024
ee6b0ce
Merge pull request #534 from JuliaParallel/jps/views-aliasing-opt
jpsamaroo Jun 21, 2024
9443ffb
DArray: Make allocations dispatchable
jpsamaroo May 30, 2024
c822206
DArray: Small matmul bugfix
jpsamaroo May 30, 2024
3ee4091
Merge pull request #535 from JuliaParallel/jps/datadeps-gpu
jpsamaroo Jun 22, 2024
c53fd0d
Rebasing commit, solving conflicts
fda-tome Jun 6, 2024
5d4a893
DArray: UndefInitializer
fda-tome Jun 5, 2024
5341414
DArray: slicing bug fix
fda-tome Jun 5, 2024
fb45ed4
DArray: Tile QR Implementation
fda-tome Jun 6, 2024
48de8fc
DArray: disabling faulty views, fixing undefinit Dims arguments
fda-tome Jun 6, 2024
10c3428
DArray: Trapezoidal and Triangular wrappers
fda-tome Jun 5, 2024
407fe83
DArray: project.toml changes
fda-tome Jun 6, 2024
9c52ef8
DArray: coreblas changes and inclusion of libblastrampoline
fda-tome Jun 10, 2024
016fae3
DArray: adding aliasing support
fda-tome Jun 11, 2024
be5c810
DArray: adding aliasing support to CAQR
fda-tome Jun 11, 2024
5c3d7d8
DArray: rebasing to add aliasing support
fda-tome Jun 22, 2024
2b74fbe
DArray: adequating undefinit to the new AllocateArray
fda-tome Jun 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ uuid = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
version = "0.18.11"

[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
Expand All @@ -23,6 +24,19 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TaskLocalValues = "ed4db957-447d-4319-bfb6-7fa9ae7ecf34"
TimespanLogging = "a526e669-04d3-4846-9525-c66122c55f63"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
libcoreblas_jll = "339d4f0c-89b5-5ae2-b52c-218a0e582e15"

[weakdeps]
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GraphViz = "f526b714-d49f-11e8-06ff-31ed36ee7ee0"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"

[extensions]
GraphVizExt = "GraphViz"
GraphVizSimpleExt = "Colors"
PlotsExt = ["DataFrames", "Plots"]

[compat]
Colors = "0.12"
Expand All @@ -43,19 +57,8 @@ TaskLocalValues = "0.1"
TimespanLogging = "0.1"
julia = "1.8"

[extensions]
GraphVizExt = "GraphViz"
GraphVizSimpleExt = "Colors"
PlotsExt = ["DataFrames", "Plots"]

[extras]
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GraphViz = "f526b714-d49f-11e8-06ff-31ed36ee7ee0"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"

[weakdeps]
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GraphViz = "f526b714-d49f-11e8-06ff-31ed36ee7ee0"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
9 changes: 7 additions & 2 deletions src/Dagger.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ import SparseArrays: sprand, SparseMatrixCSC
import MemPool
import MemPool: DRef, FileRef, poolget, poolset

import Base: collect, reduce
import Base: collect, reduce, require_one_based_indexing
import Distributed
import Distributed: Future, RemoteChannel, myid, workers, nworkers, procs, remotecall, remotecall_wait, remotecall_fetch

import LinearAlgebra
import LinearAlgebra: Adjoint, BLAS, Diagonal, Bidiagonal, Tridiagonal, LAPACK, LowerTriangular, PosDefException, Transpose, UpperTriangular, UnitLowerTriangular, UnitUpperTriangular, diagind, ishermitian, issymmetric
import LinearAlgebra: Adjoint, BLAS, Diagonal, Bidiagonal, Tridiagonal, LAPACK, LowerTriangular, PosDefException, Transpose, UpperTriangular, UnitLowerTriangular, UnitUpperTriangular, diagind, ishermitian, issymmetric, chkstride1

import UUIDs: UUID, uuid4

Expand Down Expand Up @@ -77,9 +77,14 @@ include("array/setindex.jl")
include("array/matrix.jl")
include("array/sparse_partition.jl")
include("array/sort.jl")

# Linear algebra
include("array/linalg.jl")
include("array/mul.jl")
include("array/trapezoidal.jl")
include("array/triangular.jl")
include("array/cholesky.jl")
include("array/qr.jl")

# Visualization
include("visualization.jl")
Expand Down
45 changes: 37 additions & 8 deletions src/array/alloc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ export partition

mutable struct AllocateArray{T,N} <: ArrayOp{T,N}
eltype::Type{T}
f::Function
f
want_index::Bool
domain::ArrayDomain{N}
domainchunks
partitioning::AbstractBlocks
Expand All @@ -23,17 +24,45 @@ function partition(p::AbstractBlocks, dom::ArrayDomain)
map(_cumlength, map(length, indexes(dom)), p.blocksize))
end

function allocate_array(f, T, idx, sz)
new_f = allocate_array_func(thunk_processor(), f)
return new_f(idx, T, sz)
end
function allocate_array(f, T, sz)
new_f = allocate_array_func(thunk_processor(), f)
return new_f(T, sz)
end
allocate_array_func(::Processor, f) = f
function stage(ctx, a::AllocateArray)
alloc(idx, sz) = a.f(idx, a.eltype, sz)
thunks = [Dagger.@spawn alloc(i, size(x)) for (i, x) in enumerate(a.domainchunks)]
if a.want_index
thunks = [Dagger.@spawn allocate_array(a.f, a.eltype, i, size(x)) for (i, x) in enumerate(a.domainchunks)]
else
thunks = [Dagger.@spawn allocate_array(a.f, a.eltype, size(x)) for (i, x) in enumerate(a.domainchunks)]
end
return DArray(a.eltype, a.domain, a.domainchunks, thunks, a.partitioning)
end

const BlocksOrAuto = Union{Blocks{N} where N, AutoBlocks}

function DArray{T}(::UndefInitializer, p::Blocks, dims::Dims) where {T}
d = ArrayDomain(map(x->1:x, dims))
part = partition(p, d)
f = function (T, sz)
Array{T, length(sz)}(undef, sz...)
end
a = AllocateArray(T, f, false, d, part, p)
return _to_darray(a)
end

DArray(::UndefInitializer, p::BlocksOrAuto, dims::Integer...) = DArray{Float64}(undef, p, dims)
DArray(::UndefInitializer, p::BlocksOrAuto, dims::Dims) = DArray{Float64}(undef, p, dims)
DArray{T}(::UndefInitializer, p::BlocksOrAuto, dims::Integer...) where {T} = DArray{T}(undef, p, dims)
DArray{T}(::UndefInitializer, p::BlocksOrAuto, dims::Dims) where {T} = DArray{T}(undef, p, dims)
DArray{T}(::UndefInitializer, p::AutoBlocks, dims::Dims) where {T} = DArray{T}(undef, auto_blocks(dims), dims)

function Base.rand(p::Blocks, eltype::Type, dims::Dims)
d = ArrayDomain(map(x->1:x, dims))
a = AllocateArray(eltype, (_, x...) -> rand(x...), d, partition(p, d), p)
a = AllocateArray(eltype, rand, false, d, partition(p, d), p)
return _to_darray(a)
end
Base.rand(p::BlocksOrAuto, T::Type, dims::Integer...) = rand(p, T, dims)
Expand All @@ -45,7 +74,7 @@ Base.rand(::AutoBlocks, eltype::Type, dims::Dims) =

function Base.randn(p::Blocks, eltype::Type, dims::Dims)
d = ArrayDomain(map(x->1:x, dims))
a = AllocateArray(eltype, (_, x...) -> randn(x...), d, partition(p, d), p)
a = AllocateArray(eltype, randn, false, d, partition(p, d), p)
return _to_darray(a)
end
Base.randn(p::BlocksOrAuto, T::Type, dims::Integer...) = randn(p, T, dims)
Expand All @@ -57,7 +86,7 @@ Base.randn(::AutoBlocks, eltype::Type, dims::Dims) =

function sprand(p::Blocks, eltype::Type, dims::Dims, sparsity::AbstractFloat)
d = ArrayDomain(map(x->1:x, dims))
a = AllocateArray(eltype, (_, T, _dims) -> sprand(T, _dims..., sparsity), d, partition(p, d), p)
a = AllocateArray(eltype, (T, _dims) -> sprand(T, _dims..., sparsity), false, d, partition(p, d), p)
return _to_darray(a)
end
sprand(p::BlocksOrAuto, T::Type, dims_and_sparsity::Real...) =
Expand All @@ -73,7 +102,7 @@ sprand(::AutoBlocks, eltype::Type, dims::Dims, sparsity::AbstractFloat) =

function Base.ones(p::Blocks, eltype::Type, dims::Dims)
d = ArrayDomain(map(x->1:x, dims))
a = AllocateArray(eltype, (_, x...) -> ones(x...), d, partition(p, d), p)
a = AllocateArray(eltype, ones, false, d, partition(p, d), p)
return _to_darray(a)
end
Base.ones(p::BlocksOrAuto, T::Type, dims::Integer...) = ones(p, T, dims)
Expand All @@ -85,7 +114,7 @@ Base.ones(::AutoBlocks, eltype::Type, dims::Dims) =

function Base.zeros(p::Blocks, eltype::Type, dims::Dims)
d = ArrayDomain(map(x->1:x, dims))
a = AllocateArray(eltype, (_, x...) -> zeros(x...), d, partition(p, d), p)
a = AllocateArray(eltype, zeros, false, d, partition(p, d), p)
return _to_darray(a)
end
Base.zeros(p::BlocksOrAuto, T::Type, dims::Integer...) = zeros(p, T, dims)
Expand Down
21 changes: 21 additions & 0 deletions src/array/coreblas/coreblas_gemm.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using libblastrampoline_jll
using LinearAlgebra
using libcoreblas_jll

for (gemm, T) in
((:coreblas_dgemm, Float64),
(:coreblas_sgemm, Float32),
(:coreblas_cgemm, ComplexF32),
(:coreblas_zgemm, ComplexF64))
@eval begin
function coreblas_gemm!(transa::Int64, transb::Int64,
alpha::$T, A::AbstractMatrix{$T}, B::AbstractMatrix{$T}, beta::$T, C::AbstractMatrix{$T})
m, k = size(A)
k, n = size(B)
ccall(($gemm, "libcoreblas.so"), Cvoid,
(Int64, Int64, Int64, Int64, Int64, $T, Ptr{$T}, Int64, Ptr{$T}, Int64,
$T, Ptr{$T}, Int64),
transa, transb, m, n, k, alpha, A, m, B, k, beta, C, m)
end
end
end
32 changes: 32 additions & 0 deletions src/array/coreblas/coreblas_geqrt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
for (geqrt, T) in
((:coreblas_dgeqrt, Float64),
(:coreblas_sgeqrt, Float32),
(:coreblas_cgeqrt, ComplexF32),
(:coreblas_zgeqrt, ComplexF64))
@eval begin
function coreblas_geqrt!(A::AbstractMatrix{$T},
Tau::AbstractMatrix{$T})
require_one_based_indexing(A, Tau)
chkstride1(A)
m, n = size(A)
ib, nb = size(Tau)
lda = max(1, stride(A,2))
ldt = max(1, stride(Tau,2))
work = Vector{$T}(undef, (ib)*n)
ttau = Vector{$T}(undef, n)

err = ccall(($(QuoteNode(geqrt)), :libcoreblas), Int64,
(Int64, Int64, Int64,
Ptr{$T}, Int64, Ptr{$T}, Int64,
Ptr{$T}, Ptr{$T}),
m, n, ib,
A, lda,
Tau, ldt,
ttau, work)
if err != 0
throw(ArgumentError("coreblas_geqrt failed. Error number: $err"))
end
end
end
end

45 changes: 45 additions & 0 deletions src/array/coreblas/coreblas_ormqr.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
for (geormqr, T) in
((:coreblas_dormqr, Float64),
(:coreblas_sormqr, Float32),
(:coreblas_zunmqr, ComplexF64),
(:coreblas_cunmqr, ComplexF32))
@eval begin
function coreblas_ormqr!(side::Char, trans::Char, A::AbstractMatrix{$T},
Tau::AbstractMatrix{$T}, C::AbstractMatrix{$T})

m, n = size(C)
ib, nb = size(Tau)
k = nb
if $T <: Complex
transnum = trans == 'N' ? 111 : 113
else
transnum = trans == 'N' ? 111 : 112
end
sidenum = side == 'L' ? 141 : 142

lda = max(1, stride(A,2))
ldt = max(1, stride(Tau,2))
ldc = max(1, stride(C,2))
ldwork = side == 'L' ? n : m
work = Vector{$T}(undef, ib*nb)


err = ccall(($(QuoteNode(geormqr)), :libcoreblas), Int64,
(Int64, Int64, Int64, Int64,
Int64, Int64,
Ptr{$T}, Int64, Ptr{$T}, Int64,
Ptr{$T}, Int64, Ptr{$T}, Int64),
sidenum, transnum,
m, n,
k, ib,
A, lda,
Tau, ldt,
C, ldc,
work, ldwork)
if err != 0
throw(ArgumentError("coreblas_ormqr failed. Error number: $err"))
end
end
end
end

49 changes: 49 additions & 0 deletions src/array/coreblas/coreblas_tsmqr.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
for (getsmqr, T) in
((:coreblas_dtsmqr, Float64),
(:coreblas_ctsmqr, ComplexF32),
(:coreblas_ztsmqr, ComplexF64),
(:coreblas_stsmqr, Float32))
@eval begin
function coreblas_tsmqr!(side::Char, trans::Char, A1::AbstractMatrix{$T},
A2::AbstractMatrix{$T}, V::AbstractMatrix{$T}, Tau::AbstractMatrix{$T})
m1, n1 = size(A1)
m2, n2 = size(A2)
ib, nb = size(Tau)
k = nb

if $T <: Complex
transnum = trans == 'N' ? 111 : 113
else
transnum = trans == 'N' ? 111 : 112
end

sidenum = side == 'L' ? 141 : 142

lda1 = max(1, stride(A1,2))
lda2 = max(1, stride(A2,2))
ldv = max(1, stride(V,2))
ldt = max(1, stride(Tau,2))
ldwork = side == 'L' ? ib : m1
work = Vector{$T}(undef, ib*nb)


err = ccall(($(QuoteNode(getsmqr)), :libcoreblas), Int64,
(Int64, Int64, Int64, Int64,
Int64, Int64, Int64, Int64,
Ptr{$T}, Int64, Ptr{$T}, Int64,
Ptr{$T}, Int64, Ptr{$T}, Int64, Ptr{$T}, Int64),
sidenum, transnum,
m1, n1,
m2, n2,
k, ib,
A1, lda1,
A2, lda2,
V, ldv,
Tau, ldt,
work, ldwork)
if err != 0
throw(ArgumentError("coreblas_tsmqr failed. Error number: $err"))
end
end
end
end
35 changes: 35 additions & 0 deletions src/array/coreblas/coreblas_tsqrt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

for (getsqrt,T) in
((:coreblas_dtsqrt, Float64),
(:coreblas_stsqrt, Float32),
(:coreblas_ctsqrt, ComplexF32),
(:coreblas_ztsqrt, ComplexF64))
@eval begin
function coreblas_tsqrt!(A1::AbstractMatrix{$T}, A2::AbstractMatrix{$T},
Tau::AbstractMatrix{$T})
m = size(A2)[1]
n = size(A1)[2]
ib, nb = size(Tau)
lda1 = max(1, stride(A1,2))
lda2 = max(1, stride(A2,2))
ldt = max(1, stride(Tau,2))
work = Vector{$T}(undef, (ib)*n)
ttau = Vector{$T}(undef, n)

err = ccall(($(QuoteNode(getsqrt)), :libcoreblas), Int64,
(Int64, Int64, Int64,
Ptr{$T}, Int64, Ptr{$T}, Int64,
Ptr{$T}, Int64, Ptr{$T}, Ptr{$T}),
m, n, ib,
A1, lda1,
A2, lda2,
Tau, ldt,
ttau, work)
if err != 0
throw(ArgumentError("coreblas_tsqrt failed. Error number: $err"))
end
end
end
end


Loading
Loading