diff --git a/Project.toml b/Project.toml index 0375150..0cf0764 100644 --- a/Project.toml +++ b/Project.toml @@ -6,10 +6,30 @@ version = "0.1.0" [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" -DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" +BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" +BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" +ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[compat] +ArrayLayouts = "1" +ForwardDiff = "0.10.36" +Lux = "0.5.62" +Plots = "1.40.5" + +[extras] +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Lux", "ForwardDiff", "Test"] diff --git a/examples/convlayer.jl b/examples/convlayer.jl new file mode 100644 index 0000000..390cc75 --- /dev/null +++ b/examples/convlayer.jl @@ -0,0 +1,69 @@ +using LinearAlgebra, MLDatasets, Plots, DualArrays, Random, FillArrays + +#GOAL: Implement and differentiate a convolutional neural network layer +function convlayer(img, ker, xstride = 1, ystride = 1) + n, m = size(ker) + t = eltype(ker) + + n2, m2 = size(img) + n3, m3 = div(n2-n+1,xstride), div(m2-m+1,ystride) + fmap = zeros(promote_type(eltype(img), t), n3, m3) + #Apply kernel to section of image + for i= 1:xstride:n3,j = 1:ystride:m3 + ft = img[i:i+n-1,j:j+m-1] .* ker + fmap[i,j] = sum(ft) + end + fmap +end + +function softmax(x) + s = sum(exp.(x)) + exp.(x) / s +end + +function dense_layer(W, b, x, f::Function = identity) + ret = W*x + println("Multiplication complete") + ret += b + println("Addition Complete") + f(ret) +end + +function cross_entropy(x, y) + -sum(y .* log.(x)) +end + +function model_loss(x, y, w) + ker = reshape(w[1:9], 3, 3) + weights = reshape(w[10:6769], 10, 676) + biases = w[6770:6779] + println("Reshape Complete") + l1 = vec(DualMatrix(convlayer(x, ker))) + println("Conv layer complete") + l2 = dense_layer(weights, biases, l1, softmax) + println("Dense Layer Complete") + target = OneElement(1, y+1, 10) + loss = cross_entropy(l2, target) + println("Loss complete") + loss.value, loss.partials +end + +function train_model() + p = rand(6779) + epochs = 1000 + lr = 0.02 + dataset = MNIST(:train) + + for i = 1:epochs + train, test = dataset[i] + d = DualVector(p, I(6779)) + + loss, grads = model_loss(train, test, d) + println(loss) + p = p - lr * grads + end +end + +train_model() + + diff --git a/examples/heatequation.jl b/examples/heatequation.jl index e4a61b3..09b4542 100644 --- a/examples/heatequation.jl +++ b/examples/heatequation.jl @@ -9,7 +9,7 @@ # Compare performance and results. ## -using LinearAlgebra, BandedMatrices, DifferentialEquations, Plots, +using LinearAlgebra, BandedMatrices, OrdinaryDiffEqs, Plots, ForwardDiff U = 500 diff --git a/examples/neuralnet.jl b/examples/neuralnet.jl new file mode 100644 index 0000000..ff4e403 --- /dev/null +++ b/examples/neuralnet.jl @@ -0,0 +1,45 @@ +import Lux: relu +using DualArrays, LinearAlgebra, Plots, FillArrays, Test + +#GOAL: Learn exp() using simple one-layer relu neural network. + +N = 20 +d = 0.1 + +#Domain over which to learn exp +data = collect(d:d:N*d) + +function model(a, b, x) + return relu.(a .* x + b) +end + +function model_loss(w) + sum((model(w[1:N], w[(N+1):end], data) - exp.(data)) .^ 2) +end + +function gradient_descent_sparse(n, lr = 0.01) + weights = ones(2 * N) + for i = 1:n + dw = DualVector(weights, Eye{Float64}(2*N)) + grads = model_loss(dw).partials + weights -= lr * grads + end + model(weights[1:N], weights[(N + 1):end], data) +end + +function gradient_descent_dense(n, lr = 0.01) + weights = ones(2 * N) + for i = 1:n + dw = DualVector(weights, Matrix(I, 2*N, 2*N)) + grads = model_loss(dw).partials + weights -= lr * grads + end + model(weights[1:N], weights[(N + 1):end], data) +end + +@time densesol = gradient_descent_dense(500) +@time sparsesol = gradient_descent_sparse(500) +@test densesol == sparsesol +@test sparsesol ≈ exp.(data) rtol = 1e-2 + + diff --git a/examples/newtonpendulum.jl b/examples/newtonpendulum.jl index 81d727b..1d753b6 100644 --- a/examples/newtonpendulum.jl +++ b/examples/newtonpendulum.jl @@ -6,24 +6,36 @@ # via discretisation and Newton's method. ## -using LinearAlgebra, ForwardDiff, Plots, DualArrays + +using LinearAlgebra, ForwardDiff, Plots, DualArrays, FillArrays #Boundary Conditions a = 0.1 b = 0.0 #Time step, Time period and number of x for discretisation. -ts = 0.001 +ts = 0.1 + Tmax = 5.0 N = Int(Tmax/ts) - 1 #LHS of ode function f(x) n = length(x) - D = Tridiagonal([ones(Float64, n) / ts ; 0.0], [1.0; -2ones(Float64, n) / ts; 1.0], [0.0; ones(Float64, n) / ts]) + D = Tridiagonal([ones(n) / ts ; 0.0], [1.0; -2ones(n) / ts; 1.0], [0.0; ones(n) / ts]) (D * [a; x; b])[2:end-1] + sin.(x) end + +function f(u, a, b, Tmax) + h = Tmax/(length(u)-1) + [u[1] - a; + (u[1:end-2] - 2u[2:end-1] + u[3:end])/h^2 + sin.(u[2:end-1]); + u[end] - b] +end + + + #Newtons method using ForwardDiff.jl function newton_method_forwarddiff(f, x0, n) x = x0 @@ -38,18 +50,33 @@ function newton_method_dualvector(f, x0, n) x = x0 l = length(x0) for i = 1:n - ∇f = f(DualVector(x, Matrix(I, l, l))).jacobian + ∇f = f(DualVector(x, Eye(l))).jacobian + x = x - ∇f \ f(x) + end + x +end + +function newton_method_dualvector2(f, x0, n) + x = x0 + l = length(x0) + for i = 1:n + ∇f = f(DualVector(x, Eye(l)), a, b, Tmax).jacobian x = x - ∇f \ f(x) end x end #Initial guess -x0 = zeros(Float64, N) +x0 = zeros(N) #Solve and plot both solution and LHS ('deviation' from system) -@time sol = newton_method_forwarddiff(f, x0, 100) -@time sol = newton_method_dualvector(f, x0, 100) +@time sol1 = newton_method_forwarddiff(f, x0, 100); +@time sol2 = newton_method_dualvector(f, x0, 100); +@test sol1 ≈ sol2 +@time sol = newton_method_dualvector2(f, x0, 100); + +x0 = zeros(N); x0[1] = a; x0[end] = b; + plot(0:ts:Tmax, [a; sol; b]) plot!(0:ts:Tmax, [0; f(sol); 0]) diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl new file mode 100644 index 0000000..3801337 --- /dev/null +++ b/src/BlockMatrixTensor.jl @@ -0,0 +1,76 @@ +#4-Tensor with data stored in a BlockMatrix +#Enables sparsity for DualMatrix jacobian. +using BlockArrays, SparseArrays + +struct BlockMatrixTensor{T} <: AbstractArray{T, 4} + data::BlockMatrix{T} +end + +#Construct BlockMatrixTensor from 4-tensor. Useful for testing purposes. +function BlockMatrixTensor(x::AbstractArray{T, 4}) where {T} + n, m, s, t = size(x) + data = BlockMatrix(zeros(T, n * s, m * t), fill(s, n), fill(t, m)) + for i = 1:n, j = 1:m + view(data, Block(i, j)) .= x[i, j, :, :] + end + BlockMatrixTensor(data) +end + +# Useful for creating a BlockMatrixTensor from blocks() +BlockMatrixTensor(x::Matrix{T}) where {T <: AbstractMatrix} = BlockMatrixTensor(mortar(x)) + +size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data)[1,1]...) + +#Indexing entire blocks +getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = blocks(x.data)[a, b][c, d] +getindex(x::BlockMatrixTensor, a::Int, b::Int, ::Colon, ::Colon) = blocks(x.data)[a, b] +getindex(x::BlockMatrixTensor, a::Int, b, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], 1, :)) +getindex(x::BlockMatrixTensor, a, b::Int, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], :, 1)) +getindex(x::BlockMatrixTensor, a, b, ::Colon, ::Colon) = BlockMatrixTensor(blocks(x.data)[a,b]) + + +# For populating a BlockMatrixTensor +function setindex!(A::BlockMatrixTensor, value, a::Int, b::Int, ::Colon, ::Colon) + blocks(A.data)[a, b] = value +end + +function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor) + print("BlockMatrixTensor containing: \n") + show(io,m, x.data) +end +show(io::IO, x::BlockMatrixTensor) = show(io, x.data) + +for op in (:*, :/) + @eval $op(x::BlockMatrixTensor, y::Number) = BlockMatrixTensor($op(x.data, y)) + @eval $op(x::Number, y::BlockMatrixTensor) = BlockMatrixTensor($op(x, y.data)) +end + +#Block-wise broadcast +broadcasted(f::Function, x::BlockMatrixTensor, y::AbstractMatrix) = BlockMatrixTensor(f.(blocks(x.data), y)) +broadcasted(f::Function, x::BlockMatrixTensor, y::AbstractVector) = BlockMatrixTensor(f.(x, reshape(y, :, 1))) +broadcasted(f::Function, x::AbstractVecOrMat, y::BlockMatrixTensor) = f.(y, x) + +function sum(x::BlockMatrixTensor; dims = Colon()) + # Blockwise sum + if dims == 1:2 + sum(blocks(x.data)) + elseif dims == 1 || dims == 2 + BlockMatrixTensor(sum(blocks(x.data); dims)) + else + # For now, treat all other cases as if summing the 4-Tensor + sum(Array(x); dims = dims) + end +end + +function reshape(x::BlockMatrixTensor, dims::Vararg{Union{Colon, Int}, 4}) + #Reshape block-wise + #TODO: Implement non-blockwise + if dims[3] isa Colon && dims[4] isa Colon + BlockMatrixTensor(reshape(blocks(x.data), dims[1], dims[2])) + end +end + +#'Flatten': converts BlockMatrixTensor to Matrix by removing block structure +# Mimics the reshape achieved by this for general 4-Tensors +flatten(x::BlockMatrixTensor) = hcat((vcat((x[i, j, :, :] for i = 1:size(x, 1))...) for j = 1:size(x, 2))...) +flatten(x::AbstractArray{T, 4}) where {T} = reshape(x, size(x, 1) * size(x, 3), size(x, 2) * size(x, 4)) diff --git a/src/DualArray.jl b/src/DualArray.jl new file mode 100644 index 0000000..32faf3e --- /dev/null +++ b/src/DualArray.jl @@ -0,0 +1,278 @@ +# TODO: support non-banded +sparsevcat(As...) = vcat(convert.(BandedMatrix, As)...) + +# Should Partials necessarily be a vector? +# For example, if we index a DualMatrix, should partials retain the shape +# of the perturbations, thereby giving a matrix of partials? +struct Dual{T, Partials <: AbstractArray{T}} <: Real + value::T + partials::Partials +end + +zero(::Type{Dual{T}}) where {T} = Dual(zero(T), zeros(T,1)) +promote_rule(::Type{Dual{T}},::Type{S}) where {T,S} = Dual{promote_type(T,S)} + +==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials +for op in (:+, :-) + @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), a.partials) +end +for op in (:*, :/) + @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), $op(a.partials,k)) +end + +-(x::Dual) = Dual(-1 * x.value, -1 * x.partials) + +*(x::Dual, y::Dual) = Dual(x.value*y.value, x.value*y.partials + y.value*x.partials) +/(x::Dual, y::Dual) = x*Dual(y.value, -y.partials) / y.value^2 + +sparse_getindex(a...) = layout_getindex(a...) +sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2)) +sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1)) +sparse_getindex(s::AbstractSparseArray, idx...) = s[idx...] + +sparse_getindex(b::BandedMatrix, k::Integer, ::Colon) = sparsevec( + rowrange(b, k), b[k, rowrange(b, k)], size(b, 2) + ) +sparse_getindex(b::BandedMatrix, ::Colon, k::Integer) = sparsevec( + colrange(b, k), b[colrange(b, k), k], size(b, 1) + ) + +sparse_transpose(s::SparseVector) = sparse(ones(length(s.nzind)),s.nzind,s.nzval, 1, length(s)) +sparse_transpose(o::OneElement{T,1}) where {T} = reshape(o, 1, :) +sparse_transpose(x::AbstractVector) = x' + +""" +reprents an array of duals given by + + values + jacobian * 𝛜. + +where 𝛜 is an array with dual parts corresponding to each entry of values. +""" + +#Helper function to check valid dimensions of DualArray +function _checkdims(val, jac) + s1, s2 = size(val), size(jac) + if length(s1) > length(s2) + return false + end + for (i, x) in enumerate(s1) + if(x != s2[i]) + return false + end + end + return true +end + +struct DualArray{T, N, M, J <: AbstractArray{T, M}} <: AbstractArray{Dual{T}, N} + value::AbstractArray{T, N} + jacobian::J + function DualArray(value::AbstractArray{T,N},jacobian::AbstractArray{T, M}) where {T, N, M} + if(!_checkdims(value,jacobian)) + s1, s2 = size(value), size(jacobian) + throw(ArgumentError("N-dimensional array must be of equal size to first N dimensions of jacobian. \n\nGot $s1, $s2")) + end + new{T,N,2*N,typeof(jacobian)}(value,jacobian) + end +end + +function DualArray(value::AbstractArray{T,N}, jacobian::AbstractArray{S,M}) where {S,T,N,M} + t = promote_type(T, S) + DualArray(convert(AbstractArray{t,N}, value), convert(AbstractArray{t, M}, jacobian)) +end + +#Aliases and Constructors +const DualVector{T, M} = DualArray{T, 1, 2, M} where {T, M <: AbstractMatrix{T}} +const DualMatrix{T, M} = DualArray{T, 2, 4, M} where {T, M <: AbstractArray{T, 4}} + +DualVector(x::AbstractVector, j::AbstractMatrix) = DualArray(x, j) +DualVector(x::AbstractVector, j::ComponentArray) = DualArray(x, j) +DualMatrix(x::AbstractMatrix, j::AbstractArray{T,4}) where {T} = DualArray(x, j) + +function DualMatrix(x::AbstractMatrix) + val = [y.value for y in x] + jac_blocks = [y.partials for y in x] + DualMatrix(val, BlockMatrixTensor(jac_blocks)) +end + +function getindex(x::DualVector, y::Int) + Dual(x.value[y], sparse_getindex(x.jacobian,y,:)) +end +function getindex(x::DualVector, y::UnitRange) + newval = x.value[y] + newjac = sparse_getindex(x.jacobian,y,:) + #newjac = layout_getindex(x.jacobian,y,:) + DualVector(newval, newjac) +end +getindex(x::DualVector, ::Colon) = x + +function getindex(x::DualArray, y::Int...) + idx = ntuple(i -> i > length(y) ? Colon() : y[i], 2*ndims(x)) + Dual(x.value[y...], x.jacobian[idx...]) +end + +for op in (:size, :axes, :ndims) + @eval $op(x::DualArray) = $op(x.value) +end + ++(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian) +-(x::DualVector,y::DualVector) = DualVector(x.value - y.value, x.jacobian - y.jacobian) ++(x::DualVector,y::Vector) = DualVector(x.value + y, x.jacobian) +-(x::DualVector,y::Vector) = DualVector(x.value - y, x.jacobian) ++(x::Vector,y::DualVector) = DualVector(y.value + x, y.jacobian) +-(x::Vector,y::DualVector) = DualVector(x - y.value, -y.jacobian) +*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::Number, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::DualVector, y::Dual) = DualVector(x.value * y.value, y.value * x.jacobian + x.value * y.partials') +/(x::DualVector, y::Number) = DualVector(x.value / y, x.jacobian/ y) +/(x::DualVector, y::Dual) = x * Dual(y.value, -y.partials) / y.value^2 + +## +# Note about 4-tensor by matrix multiplication: +# We can consider matrix-vector multiplication as follows: +# +# (Ax)[i] = A[i,:] . x +# +# This can be generalised to higher dimensions (B a 4-tensor, X a matrix): +# +# (BX)[i,j] = B[i,j,:,:] . X +# +# This is useful since if we consider 4-tensors dimension n × m × s × t +# equivalent to linear maps from n × m matrices to s × t matrices, this +# gives us multiplication analogous to applying the linear transform on +# a matrix in the same way that matrix-vector multiplication applies the +# linear transformation to the vector. This also gives us a way to +# 'expand' the Dual part of a DualMatrix. +## + +# Assumes that the total number of epsilons is equal, i.e the array of +# perturbations are reshapes of each other. +function *(x::DualMatrix, y::DualVector) + val = x.value * y.value + jac = x.value * y.jacobian + flatten(sum(x.jacobian .* y.value'; dims = 2)) + DualVector(val, jac) +end + +function *(x::DualMatrix, y::AbstractVector) + val = x.value * y + t = promote_type(eltype(x.value), eltype(y)) + jac = zeros(t, length(val), size(x.jacobian, 3) * size(x.jacobian, 4)) + n, m = size(jac, 1), size(x.value, 2) + for i = 1:n + new_row = zeros(size(jac, 2)) + for j = 1:m + new_row += y[j] * vec(x.jacobian[i, j, :, :]) + end + jac[i,:] = new_row + end + DualVector(val, jac) +end + +function broadcasted(f::Function,d::DualVector) + jvals = zeros(eltype(d.value), length(d.value)) + for (i, x) = enumerate(d.value) + _, df = frule((ZeroTangent(), 1),f, x) + jvals[i] = df + end + DualVector(f.(d.value), Diagonal(jvals)*d.jacobian) +end + +function broadcasted(::typeof(*),x::DualVector,y::DualVector) + newval = x.value .* y.value + newjac = x.value .* y.jacobian + y.value .* x.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(*),x::AbstractArray,y::DualArray) + newval = x .* y.value + newjac = x .* y.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(*),x::DualArray,y::AbstractArray) + newval = x.value .* y + newjac = y .* x.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(^),x::DualVector,n::Int) + newval = x.value .^ n + newjac = n * x.value .^ (n - 1) .* x.jacobian + DualVector(newval,newjac) +end + +broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{n}) where n = broadcasted(^, x, n) + +### +# sum (higher-dimensional case assuming that shape of perturbations is preserved) +### +function sum(x::DualVector) + Dual(sum(x.value), vec(sum(x.jacobian; dims=1))) +end + +function sum(x::DualArray) + Dual(sum(x.value), sum(x.jacobian; dims=1:ndims(x))) +end + +_jacobian(d::Dual) = permutedims(d.partials) +_jacobian(d::DualVector) = d.jacobian +_jacobian(d::DualVector, ::Int) = d.jacobian +_jacobian(x::Number, N::Int) = Zeros{typeof(x)}(1, N) + +_value(d::DualVector) = d.value +_value(x::Number) = x + +function vcat(x::Union{Dual, DualVector}...) + if length(x) == 1 + return x[1] + end + value = vcat((d.value for d in x)...) + jacobian = vcat((_jacobian(d) for d in x)...) + DualVector(value,jacobian) +end + +function vcat(a::Dual ,x::DualVector, b::Dual) + val = vcat(a.value, x.value, b.value) + jac = sparsevcat(_jacobian(a), x.jacobian, _jacobian(b)) + DualVector(val, jac) +end + +function vcat(a::Real ,x::DualVector, b::Real) + cols = size(x.jacobian,2) + val = vcat(a, x.value, b) + jac = sparsevcat(_jacobian(a, cols), x.jacobian, _jacobian(b, cols)) + DualVector(val, jac) +end + +### +# +# enable converting between DualVector and DualMatrix. +# the 4D jacobian can be thought of as a generalisation of the 2D jacobian: +# +# n × m 2D jacobian: J[i,j] = ∂fᵢ / ∂xⱼ +# n × m × s × t 4D jacobian: J[i, j, k, l] = ∂f[i, j] / δx[k, l] +# +### + +# reshape does not preserve shape of perturbations (since jacobian can have +# any number of columns) +_tomatrix(v::AbstractVector) = reshape(v, :, 1) + +function reshape(x::DualVector,dims::Vararg{Int, 2}) + val = reshape(x.value, dims...) + blocked_jac = BlockMatrix(x.jacobian, fill(1, length(x)), [size(x.jacobian, 2)]) + jac = reshape(BlockMatrixTensor(blocked_jac), dims..., :, :) + DualMatrix(val, jac) +end + +_blockvec(x::AbstractArray{T,4}) where {T} = reshape(x, size(x, 1) * size(x, 2), size(x, 3) * size(x, 4)) +_blockvec(x::BlockMatrixTensor) = vcat(blocks(x.data)...) + +function vec(x::DualArray) + val = vec(x.value) + jac = flatten(reshape(x.jacobian, length(val), 1, :, :)) + DualVector(val, jac) +end + +show(io::IO,::MIME"text/plain", x::DualArray) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) +show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) +show(io::IO,::MIME"text/plain", x::Dual) = (print(io,x.value); print(io," + "); print(io,x.partials);print("𝛜")) \ No newline at end of file diff --git a/src/DualArrays.jl b/src/DualArrays.jl index e4074d5..bedc76f 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -1,103 +1,16 @@ module DualArrays -export DualVector -import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum, - vcat, convert, * -using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays - -struct Dual{T, Partials <: AbstractVector{T}} <: Real - value::T - partials::Partials -end - -==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials - -sparse_getindex(a...) = layout_getindex(a...) -sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2)) -sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1)) - -""" -reprents a vector of duals given by - - values + jacobian * [ε_1,…,ε_n]. - -For now the entries just return the values. -""" - -struct DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} - value::Vector{T} - jacobian::M - function DualVector(value::Vector{T},jacobian::M) where {T, M <: AbstractMatrix{T}} - if(size(jacobian)[1] != length(value)) - x,y = length(value),size(jacobian)[1] - throw(ArgumentError("vector length must match number of rows in jacobian.\n - vector length: $x \n - no. of jacobian rows: $y")) - end - new{T,M}(value,jacobian) - end -end - -function DualVector(value::AbstractVector, jacobian::AbstractMatrix) - T = promote_type(eltype(value), eltype(jacobian)) - DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian)) -end - - - -function getindex(x::DualVector, y::Int) - Dual(x.value[y], sparse_getindex(x.jacobian,y,:)) -end - -function getindex(x::DualVector, y::UnitRange) - newval = x.value[y] - newjac = sparse_getindex(x.jacobian,y,:) - DualVector(newval, newjac) -end -size(x::DualVector) = length(x.value) -axes(x::DualVector) = axes(x.value) -+(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian) -*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) - -broadcasted(::typeof(sin),x::DualVector) = DualVector(sin.(x.value),Diagonal(cos.(x.value))*x.jacobian) - -function broadcasted(::typeof(*),x::DualVector,y::DualVector) - newval = x.value .* y.value - newjac = x.value .* y.jacobian + y.value .* x.jacobian - DualVector(newval,newjac) -end - -function sum(x::DualVector) - n = length(x.value) - Dual(sum(x.value), vec(sum(x.jacobian; dims=1))) -end - -_jacobian(d::Dual) = permutedims(d.partials) -_jacobian(d::DualVector, ::Int) = d.jacobian -_jacobian(x::Number, N::Int) = zeros(typeof(x), 1, N) +export DualVector, DualArray, DualMatrix, BlockMatrixTensor +export dropzeros -_value(d::DualVector) = d.value -_value(x::Number) = x - -function vcat(x::Union{Dual, DualVector}...) - if length(x) == 1 - return x[1] - end - value = vcat((d.value for d in x)...) - jacobian = vcat((_jacobian(d) for d in x)...) - DualVector(value,jacobian) -end - -_size(x::Real) = 1 -_size(x::DualVector) = size(x) +import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum, + vcat, convert, *, -, ^, /, ndims, hcat, vec, promote_rule, zero, + reshape, setindex! +using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays, ComponentArrays, SparseArrays +import ChainRules: frule, ZeroTangent -function vcat(x::Union{Real, DualVector}...) - cols = max((_size(i) for i in x)...) - val = vcat((_value(i) for i in x)...) - jac = vcat((_jacobian(i, cols) for i in x)...) - DualVector(val, jac) -end +include("BlockMatrixTensor.jl") +include("DualArray.jl") -show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) end -# module DualArrays +# module DualArrays \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index d1cba3c..2374b14 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,42 +1,141 @@ -using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices +using DualArrays, Test, LinearAlgebra, BandedMatrices, ForwardDiff, +BlockArrays using DualArrays: Dual +using Lux: relu @testset "DualArrays" begin - @test_throws ArgumentError DualVector([1,2],I(3)) - v = DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) - @test v[1] isa Dual - @test v[1] == Dual(1,[1,2,3]) - @test v[2] == Dual(2,[4,5,6]) - @test v[3] == Dual(3,[7,8,9]) - @test_throws BoundsError v[4] - @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) - - w = v + v - @test w == DualVector([2,4,6],[2 4 6;8 10 12;14 16 18]) - @test w.jacobian == 2v.jacobian - - @test sin.(v) isa DualVector - @test sin.(v).value == [sin(1), sin(2), sin(3)] - @test sin.(v).jacobian == Diagonal(cos.(v.value)) * v.jacobian - - x,y = v[1:2],v[2:3] - @test x == DualVector([1,2],[1 2 3;4 5 6]) - @test y == DualVector([2,3],[4 5 6;7 8 9]) - @test x .* y == DualVector([2,6],[6 9 12;26 31 36]) + v = DualArray([1.,2, 3], [1 2 3; 4 5 6;7 8 9]) + @testset "constructors" begin + @test v isa DualVector + @test eltype(v) == DualArrays.Dual{Float64} + w = DualArray(ones(2,2),ones(2,2,2,2)) + @test w isa DualMatrix + @test_throws TypeError DualArray([1,2], [1,2]) + @test_throws ArgumentError DualVector([1,2],I(3)) + end + + @testset "indexing" begin + @test v[1] isa Dual + @test v[1] == Dual(1,[1,2,3]) + @test v[2] == Dual(2,[4,5,6]) + @test v[3] == Dual(3,[7,8,9]) + @test_throws BoundsError v[4] + @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) + + x,y = v[1:2],v[2:3] + @test x == DualVector([1,2],[1 2 3;4 5 6]) + @test y == DualVector([2,3],[4 5 6;7 8 9]) + + n = 10 + w = DualVector(1:n, I(n)) + @test w[2:end].jacobian isa BandedMatrix + end + + @testset "basic_operations" begin + w = v + v + @test w == DualVector([2.,4.,6.],[2. 4 6;8 10 12;14 16 18]) + @test w.jacobian == 2v.jacobian + + u = v - w + @test u == -v + + x = v.value + v - v.value + @test x == v + + @test v .^ 2 == DualVector([1, 4, 9], [2 4 6;16 20 24; 42 48 54]) + + x,y = v[1:2],v[2:3] + @test x .* y == DualVector([2,6],[6 9 12;26 31 36]) + @test sum(x .* y) isa Dual + @test sum(x .* y) == Dual(8,[32,40,48]) + end + + @testset "broadcasting" begin + @test sin.(v) isa DualVector + @test sin.(v).value == [sin(1), sin(2), sin(3)] + @test sin.(v).jacobian == Diagonal(cos.(v.value)) * v.jacobian + + @test exp.(v) isa DualVector + @test exp.(v).value == [exp(1), exp(2), exp(3)] + @test exp.(v).jacobian == Diagonal(exp.(v.value)) * v.jacobian - @test sum(x .* y) isa Dual - @test sum(x .* y) == Dual(8,[32,40,48]) + @test relu.(v) isa DualVector + @test relu.(v).value == [relu(1), relu(2), relu(3)] + @test relu.(v).jacobian == Diagonal( 0.5 * (sign.(v.value) .+ 1)) * v.jacobian + end + + @testset "misc" begin + n = 10 + v = DualVector(1:n, I(n)) + @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) + end + + @testset "vcat" begin + x = Dual(1, [1, 2, 3]) + y = DualVector([2, 3], [4 5 6;7 8 9]) + @test vcat(x) == x + @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) + @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + @test vcat(1, y, 2) == DualVector([1,2,3,2], [0 0 0; 4 5 6; 7 8 9; 0 0 0]) + end + + @testset "reshape" begin + x = DualVector([1,2,3,4], I(4)) + @test vec(reshape(x, 2, 2)) == x + end - n = 10 - v = DualVector(1:n, I(n)) - @test v[2:end].jacobian isa BandedMatrix + @testset "DualMatrix" begin + x = DualVector(ones(6), I(6)) + y = reshape(x[1:4], 2, 2) + z = x[5:6] + @test (y*z).value == [2,2] + @test (y*z).jacobian == [1 0 1 0 1 1; 0 1 0 1 1 1] + for t in (([1, 0, 0, 1, 0, 0], [1 0 -1 0 1 0;0 0 0 0 0 0]), ([1, 1, 1, 1, 1, 1], [1 0 -1 0 1 0;0 1 0 -1 0 1]), ([0, 1, 1, 0, -1, -1], zeros(2, 6))) + x = DualVector(t[1], I(6)) + y = reshape(x[1:4], 2, 2) + z = x[5:6] + @test relu.(y*[1,-1] + z).jacobian == t[2] + end + end - @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) + @testset "BlockMatrixTensor" begin + @testset "constructors" begin + x = BlockMatrixTensor(BlockArray(ones(4, 4), [2, 2], [2, 2])) + y = BlockMatrixTensor(ones(2, 2, 2, 2)) + @test x isa BlockMatrixTensor + @test y isa BlockMatrixTensor + @test x.data == y.data + end + @testset "broadcasting" begin + y = BlockMatrixTensor(ones(2, 2, 2, 2)) + z = BlockArray( + [fill(1,2,2) fill(2,2,2); fill(3,2,2) fill(4,2,2)], + [2, 2], + [2, 2] + ) + @test ([1 2;3 4] .* y).data == z + @test (reshape([3], 1, 1) .* y).data == (3 * y).data + @test ([1 2] .* y).data == BlockArray([fill(1, 4, 2) fill(2, 4, 2)], [2, 2], [2, 2]) + @test ([1 2]' .* y).data == BlockArray([fill(1, 2, 4); fill(2, 2, 4)], [2, 2], [2, 2]) + @test (y .* [1, 2]).data == BlockArray([fill(1, 2, 4); fill(2, 2, 4)], [2, 2], [2, 2]) + @test_throws DimensionMismatch [1 2;3 4;5 6] .* y + end + @testset "sum" begin + r = rand(2, 2, 2, 2) + b = BlockMatrixTensor(r) + #TODO: Fix traversal + for d in (1:2, 1, 2) + @test sum(r; dims = 1:2)[1, 1, :, :] ≈ sum(b; dims = 1:2) + end + end - x = Dual(1, [1, 2, 3]) - y = DualVector([2, 3], [4 5 6;7 8 9]) - @test vcat(x) == x - @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) - @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + @testset "reshape" begin + r= rand(3, 4, 2, 2) + b = BlockMatrixTensor(r) + for d in ((2, 6), (1, 12), (6, 2), (12, 1)) + @test BlockMatrixTensor(reshape(r, d..., 2, 2)).data == reshape(b, d..., :, :).data + end + end + end end \ No newline at end of file