diff --git a/Project.toml b/Project.toml index 088a62ea..e885c99c 100644 --- a/Project.toml +++ b/Project.toml @@ -4,11 +4,11 @@ authors = ["Frank Hellmann , Michael Lindner [v_src_idx_in_fullflat; v_dst_idx_in_fullflat]" + map::MT # input_map[:, e_idx] = [v_src_idx, v_dst_idx] + diffcache::C +end + +function EagerGBufProvider(im::IndexManager, batches) + map = zeros(Int, ne(im.g) * im.vdepth, 2) + for i in 1:ne(im.g) + map[im.e_gbufr[i], 1] .= im.e_src[i] + map[im.e_gbufr[i], 2] .= im.e_dst[i] + end + + N = ForwardDiff.pickchunksize(max(im.lastidx_dynamic, im.lastidx_p)) + EagerGBufProvider(map, DiffCache(Float64.(map), N)) +end + +function get_gbuf(bufp::EagerGBufProvider, u) + gbuf = get_tmp(bufp.diffcache, u) + NNlib.gather!(gbuf, u, bufp.map) + gbuf +end + +Base.@propagate_inbounds function get_src_dst(gbuf::AbstractArray, batch, i) + bufr = @views gbuf_range(batch, i) + src = @views gbuf[bufr, 1] + dst = @views gbuf[bufr, 2] + src, dst +end + +#### +#### Lazy version +#### + +struct LazyGBufProvider{SM,DM} <: GBufProvider + e_src::SM + e_dst::DM +end +struct LazyGBuf{LBP,UT} + lbp::LBP + u::UT +end + +LazyGBufProvider(im::IndexManager, _) = LazyGBufProvider(copy(im.e_src), copy(im.e_dst)) + +function get_gbuf(bufp::LazyGBufProvider, u) + LazyGBuf(bufp, u) +end + +Base.@propagate_inbounds function get_src_dst(gbuf::LazyGBuf, batch, i) + eidx = batch.indices[i] + lbp = gbuf.lbp + src = view(gbuf.u, lbp.e_src[eidx]) + dst = view(gbuf.u, lbp.e_dst[eidx]) + src, dst +end diff --git a/src/network_structure.jl b/src/network_structure.jl index bb1e9118..2112444b 100644 --- a/src/network_structure.jl +++ b/src/network_structure.jl @@ -47,9 +47,9 @@ usebuffer(::Type{<:ExecutionStyle{buffered}}) where {buffered} = buffered # check cuda compatibliity iscudacompatible(x) = iscudacompatible(typeof(x)) iscudacompatible(::Type{<:ExecutionStyle}) = false -iscudacompatible(::Type{<:KAExecution{true}}) = true +iscudacompatible(::Type{<:KAExecution}) = true -struct Network{EX<:ExecutionStyle,G,NL,VTup,MM} +struct Network{EX<:ExecutionStyle,G,NL,VTup,MM,CT,GBT} "vertex batches of same function" vertexbatches::VTup "network layer" @@ -57,9 +57,11 @@ struct Network{EX<:ExecutionStyle,G,NL,VTup,MM} "index manager" im::IndexManager{G} "lazy cache pool" - cachepool::LazyBufferCache{typeof(identity),typeof(identity)} + caches::@NamedTuple{state::CT,aggregation::CT} "mass matrix" mass_matrix::MM + "Gather buffer provider (lazy or eager)" + gbufprovider::GBT end executionstyle(::Network{ex}) where {ex} = ex() nvbatches(::Network) = length(vertexbatches) @@ -83,7 +85,18 @@ Graphs.nv(nw::Network) = nv(nw.im.g) Graphs.ne(nw::Network) = ne(nw.im.g) Base.broadcastable(nw::Network) = Ref(nw) -struct NetworkLayer{GT,ETup,AF,MT} +function get_state_cache(nw::Network, T) + if eltype(T) <: AbstractFloat && eltype(nw.caches.state.du) != eltype(T) + throw(ArgumentError("Network caches are initialized with $(eltype(nw.caches.state.du)) \ + but is used for $(eltype(T)) data!")) + end + get_tmp(nw.caches.state, T) +end +get_aggregation_cache(nw::Network, T) = get_tmp(nw.caches.aggregation, T) + +iscudacompatible(nw::Network) = iscudacompatible(executionstyle(nw)) && iscudacompatible(nw.layer.aggregator) + +struct NetworkLayer{GT,ETup,AF} "graph/toplogy of layer" g::GT "edge batches with same function" @@ -94,13 +107,11 @@ struct NetworkLayer{GT,ETup,AF,MT} edepth::Int # potential becomes range for multilayer "vertex dimensions visible to edges" vdepth::Int # potential becomes range for multilayer - "mapping e_idx -> [v_src_idx_in_fullflat; v_dst_idx_in_fullflat]" - gather_map::MT # input_map[:, e_idx] = [v_src_idx, v_dst_idx] end abstract type ComponentBatch{F} end -struct VertexBatch{T<:VertexFunction,F,IV<:AbstractVector{<:Int}} <: ComponentBatch{T} +struct VertexBatch{T<:VertexFunction,F,IV<:AbstractVector{<:Integer}} <: ComponentBatch{T} "vertex indices contained in batch" indices::IV "vertex function" @@ -113,7 +124,7 @@ struct VertexBatch{T<:VertexFunction,F,IV<:AbstractVector{<:Int}} <: ComponentBa aggbufstride::BatchStride end -struct EdgeBatch{T<:EdgeFunction,F,IV<:AbstractVector{<:Int}} <: ComponentBatch{T} +struct EdgeBatch{T<:EdgeFunction,F,IV<:AbstractVector{<:Integer}} <: ComponentBatch{T} "edge indices (as in edge iterator) contained in batch" indices::IV "edge function" diff --git a/test/AD_test.jl b/test/AD_test.jl index 84364077..7768769f 100644 --- a/test/AD_test.jl +++ b/test/AD_test.jl @@ -35,7 +35,7 @@ fp = function(p) nw(dx, x0, p, 0.0) dx end -# jacobian(fp, AutoEnzyme(), pflat(p0) +# jacobian(fp, AutoEnzyme(), pflat(p0)) # jacobian(fp, AutoZygote(), pflat(p0)) # jacobian(fp, AutoFiniteDifferences(), pflat(p0)) # jacobian(fp, AutoForwardDiff(), pflat(p0)) @@ -44,7 +44,8 @@ end scenarios = [Scenario{:jacobian, :in}(fx, x0; res1=jacobian(fx, AutoFiniteDiff(), x0)) , Scenario{:jacobian, :in}(fp, pflat(p0); res1=jacobian(fp, AutoFiniteDiff(), pflat(p0)))] -backends = [AutoForwardDiff(), AutoReverseDiff()] +# backends = [AutoForwardDiff(), AutoReverseDiff()] +backends = [AutoForwardDiff()] test_differentiation( backends, # the backends you want to compare scenarios, # the scenarios you defined, diff --git a/test/GPU_test.jl b/test/GPU_test.jl index 9bb485bc..7a53b23b 100644 --- a/test/GPU_test.jl +++ b/test/GPU_test.jl @@ -6,6 +6,7 @@ using StableRNGs using KernelAbstractions using Graphs using Random +using Test (isinteractive() && @__MODULE__()==Main ? includet : include)("ComponentLibrary.jl") rng = StableRNG(1) @@ -17,34 +18,67 @@ ef = [Lib.diffusion_odeedge(), Lib.diffusion_edge_fid(), Lib.diffusion_odeedge(), Lib.diffusion_edge_fid()] -nw = Network(g, vf, ef; execution=KAExecution{true}(), aggregator=KAAggregator(+)) +nw = Network(g, vf, ef) +@test_throws ArgumentError adapt(CuArray{Float32}, nw) # wrong execution +nw = Network(g, vf, ef; execution=KAExecution{true}(), aggregator=KAAggregator(+)) x0 = rand(rng, dim(nw)) p = rand(rng, pdim(nw)) dx = zeros(length(x0)) nw(dx, x0, p, NaN) to = CUDABackend() -nw_d = adapt(to, nw) -@test nw_d.vertexbatches[1].indices isa CuArray -@test nw_d.layer.edgebatches[1].indices isa CuArray -@test nw_d.layer.gather_map isa CuArray -@test nw_d.layer.aggregator.m.map isa CuArray -@test nw_d.layer.aggregator.m.symmap isa CuArray -x0_d = adapt(to, x0) -p_d = adapt(to, p) -dx_d = adapt(to, zeros(length(x0))) -nw_d(dx_d, x0_d, p_d, NaN) -@test Vector(dx_d) ≈ dx +@test_throws ArgumentError adapt(to, nw) +to = :foo +@test_throws ArgumentError adapt(to, nw) +to = CuArray([1,2,3]) +@test_throws ArgumentError adapt(to, nw) +to = cu(rand(3)) +@test_throws ArgumentError adapt(to, nw) +to = CuArray +@test_throws ArgumentError adapt(to, nw) + +to1 = CuArray{Float32} +to2 = CuArray{Float64} +nw1 = adapt(to1, nw) +nw2 = adapt(to2, nw) + +for nw in (nw1, nw2) + @test nw.vertexbatches[1].indices isa CuArray{Int} + @test nw.layer.edgebatches[1].indices isa CuArray{Int} + @test nw.gbufprovider.map isa CuArray{Int} + @test nw.layer.aggregator.m.map isa CuArray{Int} + @test nw.layer.aggregator.m.symmap isa CuArray{Int} +end + +@test nw1.caches.state.du isa CuArray{Float32} +@test nw1.caches.aggregation.du isa CuArray{Float32} +@test nw2.caches.state.du isa CuArray{Float64} +@test nw2.caches.aggregation.du isa CuArray{Float64} + +x0_d1 = adapt(to1, x0) +p_d1 = adapt(to1, p) +dx_d1 = adapt(to1, zeros(length(x0))) +nw1(dx_d1, x0_d1, p_d1, NaN) +@test Vector(dx_d1) ≈ dx +@test_throws ArgumentError nw2(dx_d1, x0_d1, p_d1, NaN) # wrong type for cache + +x0_d2 = adapt(to2, x0) +p_d2 = adapt(to2, p) +dx_d2 = adapt(to2, zeros(length(x0))) +nw2(dx_d2, x0_d2, p_d2, NaN) +@test Vector(dx_d2) ≈ dx +@test_throws ArgumentError nw1(dx_d2, x0_d2, p_d2, NaN) # wrong type for cache + # try SparseAggregator nw2 = Network(g, vf, ef; execution=KAExecution{true}(), aggregator=SparseAggregator(+)) -nw2_d = adapt(CuArray, nw2) +nw2_d = adapt(CuArray{Float32}, nw2) @test nw2_d.layer.aggregator.m isa CuSparseMatrixCSC -fill!(dx_d, 0) -nw2_d(dx_d, x0_d, p_d, NaN) -@test Vector(dx_d) ≈ dx +fill!(dx_d1, 0) +nw2_d(dx_d1, x0_d1, p_d1, NaN) +@test Vector(dx_d1) ≈ dx # mini benchmark diff --git a/test/construction_test.jl b/test/construction_test.jl index d534fb7f..881c670f 100644 --- a/test/construction_test.jl +++ b/test/construction_test.jl @@ -124,14 +124,14 @@ end @test nd.mass_matrix == I && nd.mass_matrix isa UniformScaling end -@testset "gbuf map construction" begin +@testset "eager gbuf map construction" begin using NetworkDynamics: gbuf_range e1 = StaticEdge(x->x^1, 1, 0, AntiSymmetric()) e2 = StaticEdge(x->x^1, 1, 0, AntiSymmetric()) v = ODEVertex(x->x^1, 1, 0) g = path_graph(4) nd = Network(g, v, [e1,e2,e1]) - map = nd.layer.gather_map + map = nd.gbufprovider.map for batch in nd.layer.edgebatches for batch_subi in 1:length(batch) eidx = batch.indices[batch_subi] diff --git a/test/testutils.jl b/test/testutils.jl index 28fc5c16..fa91e2de 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -64,7 +64,7 @@ function test_execution_styles(prob) end if CUDA.functional() - to = CuArray + to = CuArray{Float64} u_d = adapt(to, u) p_d = adapt(to, p) @@ -78,7 +78,7 @@ function test_execution_styles(prob) _nw_d(_du_d, u_d, p_d, t) issame = isapprox(Vector(_du_d), du; atol=1e-10) if !issame - println("CUDA execution lead to different results: extrema(Δ) = $(extrema(_du - du))") + println("CUDA execution lead to different results: extrema(Δ) = $(extrema(Vector(_du_d) - du))") end @test issame end