diff --git a/Project.toml b/Project.toml index 0375150..a78c37d 100644 --- a/Project.toml +++ b/Project.toml @@ -6,10 +6,26 @@ version = "0.1.0" [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" -DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" +ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[compat] +ArrayLayouts = "1" +ForwardDiff = "0.10.36" +Lux = "0.5.62" +Plots = "1.40.5" + +[extras] +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Lux", "ForwardDiff", "Test"] diff --git a/examples/convlayer.jl b/examples/convlayer.jl new file mode 100644 index 0000000..2e16d64 --- /dev/null +++ b/examples/convlayer.jl @@ -0,0 +1,65 @@ +using LinearAlgebra, MLDatasets, Plots, DualArrays, Random, FillArrays + +#GOAL: Implement and differentiate a convolutional neural network layer +function convlayer(img, ker, xstride = 1, ystride = 1) + n, m = size(ker) + t = eltype(ker) + + n2, m2 = size(img) + n3, m3 = div(n2-n+1,xstride), div(m2-m+1,ystride) + fmap = zeros(promote_type(eltype(img), t), n3, m3) + + #Apply kernel to section of image + for i= 1:xstride:n3,j = 1:ystride:m3 + ft = img[i:i+n-1,j:j+m-1] .* ker + fmap[i,j] = sum(ft) + end + + fmap +end + +function softmax(x) + s = sum(exp.(x)) + exp.(x) / s +end + +function dense_layer(f, W, b, x) + f(W*x + b) +end + +function cross_entropy(x, y) + -sum(y .* log.(x)) +end + +function model_loss(x, y, w) + ker = reshape(w[1:9], 3, 3) + weights = reshape(w[10:6769], 10, 676) + biases = w[6770:6779] + l1 = vec(DualMatrix(convlayer(x, ker))) + l2 = dense_layer(softmax, weights, biases, l1) + target = OneElement(1, y+1, 10) + loss = cross_entropy(l2, target) + loss.value, loss.partials +end + +#TODO: Implement remaining layers upon more support for DualMatrix + +function train_model() + p = rand(6779) + epochs = 1000 + lr = 0.02 + dataset = MNIST(:train) + + for i = 1:epochs + train, test = dataset[i] + d = DualVector(p, I(6779)) + + loss, grads = model_loss(train, test, d) + println(loss) + p = p - lr * grads + end +end + +train_model() + + diff --git a/examples/heatequation.jl b/examples/heatequation.jl index e4a61b3..09b4542 100644 --- a/examples/heatequation.jl +++ b/examples/heatequation.jl @@ -9,7 +9,7 @@ # Compare performance and results. ## -using LinearAlgebra, BandedMatrices, DifferentialEquations, Plots, +using LinearAlgebra, BandedMatrices, OrdinaryDiffEqs, Plots, ForwardDiff U = 500 diff --git a/examples/neuralnet.jl b/examples/neuralnet.jl new file mode 100644 index 0000000..ff4e403 --- /dev/null +++ b/examples/neuralnet.jl @@ -0,0 +1,45 @@ +import Lux: relu +using DualArrays, LinearAlgebra, Plots, FillArrays, Test + +#GOAL: Learn exp() using simple one-layer relu neural network. + +N = 20 +d = 0.1 + +#Domain over which to learn exp +data = collect(d:d:N*d) + +function model(a, b, x) + return relu.(a .* x + b) +end + +function model_loss(w) + sum((model(w[1:N], w[(N+1):end], data) - exp.(data)) .^ 2) +end + +function gradient_descent_sparse(n, lr = 0.01) + weights = ones(2 * N) + for i = 1:n + dw = DualVector(weights, Eye{Float64}(2*N)) + grads = model_loss(dw).partials + weights -= lr * grads + end + model(weights[1:N], weights[(N + 1):end], data) +end + +function gradient_descent_dense(n, lr = 0.01) + weights = ones(2 * N) + for i = 1:n + dw = DualVector(weights, Matrix(I, 2*N, 2*N)) + grads = model_loss(dw).partials + weights -= lr * grads + end + model(weights[1:N], weights[(N + 1):end], data) +end + +@time densesol = gradient_descent_dense(500) +@time sparsesol = gradient_descent_sparse(500) +@test densesol == sparsesol +@test sparsesol ≈ exp.(data) rtol = 1e-2 + + diff --git a/examples/newtonpendulum.jl b/examples/newtonpendulum.jl index 81d727b..1d753b6 100644 --- a/examples/newtonpendulum.jl +++ b/examples/newtonpendulum.jl @@ -6,24 +6,36 @@ # via discretisation and Newton's method. ## -using LinearAlgebra, ForwardDiff, Plots, DualArrays + +using LinearAlgebra, ForwardDiff, Plots, DualArrays, FillArrays #Boundary Conditions a = 0.1 b = 0.0 #Time step, Time period and number of x for discretisation. -ts = 0.001 +ts = 0.1 + Tmax = 5.0 N = Int(Tmax/ts) - 1 #LHS of ode function f(x) n = length(x) - D = Tridiagonal([ones(Float64, n) / ts ; 0.0], [1.0; -2ones(Float64, n) / ts; 1.0], [0.0; ones(Float64, n) / ts]) + D = Tridiagonal([ones(n) / ts ; 0.0], [1.0; -2ones(n) / ts; 1.0], [0.0; ones(n) / ts]) (D * [a; x; b])[2:end-1] + sin.(x) end + +function f(u, a, b, Tmax) + h = Tmax/(length(u)-1) + [u[1] - a; + (u[1:end-2] - 2u[2:end-1] + u[3:end])/h^2 + sin.(u[2:end-1]); + u[end] - b] +end + + + #Newtons method using ForwardDiff.jl function newton_method_forwarddiff(f, x0, n) x = x0 @@ -38,18 +50,33 @@ function newton_method_dualvector(f, x0, n) x = x0 l = length(x0) for i = 1:n - ∇f = f(DualVector(x, Matrix(I, l, l))).jacobian + ∇f = f(DualVector(x, Eye(l))).jacobian + x = x - ∇f \ f(x) + end + x +end + +function newton_method_dualvector2(f, x0, n) + x = x0 + l = length(x0) + for i = 1:n + ∇f = f(DualVector(x, Eye(l)), a, b, Tmax).jacobian x = x - ∇f \ f(x) end x end #Initial guess -x0 = zeros(Float64, N) +x0 = zeros(N) #Solve and plot both solution and LHS ('deviation' from system) -@time sol = newton_method_forwarddiff(f, x0, 100) -@time sol = newton_method_dualvector(f, x0, 100) +@time sol1 = newton_method_forwarddiff(f, x0, 100); +@time sol2 = newton_method_dualvector(f, x0, 100); +@test sol1 ≈ sol2 +@time sol = newton_method_dualvector2(f, x0, 100); + +x0 = zeros(N); x0[1] = a; x0[end] = b; + plot(0:ts:Tmax, [a; sol; b]) plot!(0:ts:Tmax, [0; f(sol); 0]) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index e4074d5..56d705d 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -1,80 +1,236 @@ module DualArrays -export DualVector +export DualVector, DualArray, DualMatrix +export dropzeros import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum, - vcat, convert, * -using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays + vcat, convert, *, -, ^, /, ndims, hcat, vec, promote_rule, zero, + reshape +using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays, ComponentArrays +import ChainRules: frule, ZeroTangent -struct Dual{T, Partials <: AbstractVector{T}} <: Real +# TODO: support non-banded +sparsevcat(As...) = vcat(convert.(BandedMatrix, As)...) + +# Should Partials necessarily be a vector? +# For example, if we index a DualMatrix, should partials retain the shape +# of the perturbations, thereby giving a matrix of partials? +struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T partials::Partials end +zero(::Type{Dual{T}}) where {T} = Dual(zero(T), zeros(T,1)) +promote_rule(::Type{Dual{T}},::Type{S}) where {T,S} = Dual{promote_type(T,S)} + ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials +for op in (:+, :-) + @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), a.partials) +end +for op in (:*, :/) + @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), $op(a.partials,k)) +end + +-(x::Dual) = Dual(-1 * x.value, -1 * x.partials) + +*(x::Dual, y::Dual) = Dual(x.value*y.value, x.value*y.partials + y.value*x.partials) +/(x::Dual, y::Dual) = x*Dual(y.value, -y.partials) / y.value^2 sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1)) """ -reprents a vector of duals given by +reprents an array of duals given by - values + jacobian * [ε_1,…,ε_n]. + values + jacobian * 𝛜. -For now the entries just return the values. +where 𝛜 is an array with dual parts corresponding to each entry of values. """ -struct DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} - value::Vector{T} - jacobian::M - function DualVector(value::Vector{T},jacobian::M) where {T, M <: AbstractMatrix{T}} - if(size(jacobian)[1] != length(value)) - x,y = length(value),size(jacobian)[1] - throw(ArgumentError("vector length must match number of rows in jacobian.\n - vector length: $x \n - no. of jacobian rows: $y")) +#Helper function to check valid dimensions of DualArray +function _checkdims(val, jac) + s1, s2 = size(val), size(jac) + if length(s1) > length(s2) + return false + end + for (i, x) in enumerate(s1) + if(x != s2[i]) + return false end - new{T,M}(value,jacobian) end + return true end -function DualVector(value::AbstractVector, jacobian::AbstractMatrix) - T = promote_type(eltype(value), eltype(jacobian)) - DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian)) +struct DualArray{T, N, M, J <: AbstractArray{T, M}} <: AbstractArray{Dual{T}, N} + value::AbstractArray{T, N} + jacobian::J + function DualArray(value::AbstractArray{T,N},jacobian::AbstractArray{T, M}) where {T, N, M} + if(!_checkdims(value,jacobian)) + s1, s2 = size(value), size(jacobian) + throw(ArgumentError("N-dimensional array must be of equal size to first N dimensions of jacobian. \n\nGot $s1, $s2")) + end + new{T,N,2*N,typeof(jacobian)}(value,jacobian) + end +end + +function DualArray(value::AbstractArray{T,N}, jacobian::AbstractArray{S,M}) where {S,T,N,M} + t = promote_type(T, S) + DualArray(convert(AbstractArray{t,N}, value), convert(AbstractArray{t, M}, jacobian)) end +#Aliases and Constructors +const DualVector{T, M} = DualArray{T, 1, 2, M} where {T, M <: AbstractMatrix{T}} +const DualMatrix{T, M} = DualArray{T, 2, 4, M} where {T, M <: AbstractArray{T, 4}} + +DualVector(x::AbstractVector, j::AbstractMatrix) = DualArray(x, j) +DualVector(x::AbstractVector, j::ComponentArray) = DualArray(x, j) +DualMatrix(x::AbstractMatrix, j::AbstractArray{T,4}) where {T} = DualArray(x, j) +function DualMatrix(x::AbstractMatrix{Dual{T}}) where {T} + val = zeros(T, size(x)...) + jac = zeros(T, size(x)..., size(x[1,1].partials)...) + n, m = size(x) + for i = 1:n, j = 1:m + val[i,j] = x[i,j].value + jac[i,j,:,:] = x[i,j].partials + end + DualMatrix(val, jac) +end function getindex(x::DualVector, y::Int) Dual(x.value[y], sparse_getindex(x.jacobian,y,:)) end - function getindex(x::DualVector, y::UnitRange) newval = x.value[y] newjac = sparse_getindex(x.jacobian,y,:) + newjac = layout_getindex(x.jacobian,y,:) DualVector(newval, newjac) end -size(x::DualVector) = length(x.value) -axes(x::DualVector) = axes(x.value) +getindex(x::DualVector, ::Colon) = x + +function getindex(x::DualArray, y::Int...) + idx = ntuple(i -> i > length(y) ? Colon() : y[i], 2*ndims(x)) + Dual(x.value[y...], x.jacobian[idx...]) +end + +for op in (:size, :axes, :ndims) + @eval $op(x::DualArray) = $op(x.value) +end + +(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian) +-(x::DualVector,y::DualVector) = DualVector(x.value - y.value, x.jacobian - y.jacobian) ++(x::DualVector,y::Vector) = DualVector(x.value + y, x.jacobian) +-(x::DualVector,y::Vector) = DualVector(x.value - y, x.jacobian) ++(x::Vector,y::DualVector) = DualVector(y.value + x, y.jacobian) +-(x::Vector,y::DualVector) = DualVector(x - y.value, -y.jacobian) *(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::Number, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::DualVector, y::Dual) = DualVector(x.value * y.value, y.value * x.jacobian + x.value * y.partials') +/(x::DualVector, y::Number) = DualVector(x.value / y, x.jacobian/ y) +/(x::DualVector, y::Dual) = x * Dual(y.value, -y.partials) / y.value^2 -broadcasted(::typeof(sin),x::DualVector) = DualVector(sin.(x.value),Diagonal(cos.(x.value))*x.jacobian) +## +# Note about 4-tensor by matrix multiplication: +# We can consider matrix-vector multiplication as follows: +# +# (Ax)[i] = A[i,:] . x +# +# This can be generalised to higher dimensions (B a 4-tensor, X a matrix): +# +# (BX)[i,j] = B[i,j,:,:] . X +# +# This is useful since if we consider 4-tensors dimension n × m × s × t +# equivalent to linear maps from n × m matrices to s × t matrices, this +# gives us multiplication analogous to applying the linear transform on +# a matrix in the same way that matrix-vector multiplication applies the +# linear transformation to the vector. This also gives us a way to +# 'expand' the Dual part of a DualMatrix. +## + +# Assumes that the total number of epsilons is equal, i.e the array of +# perturbations are reshapes of each other. +function *(x::DualMatrix, y::DualVector) + val = x.value * y.value + t = promote_type(eltype(x.value), eltype(y.value)) + jac = zeros(t, length(val), size(y.jacobian, 2)) + n, m = size(jac, 1), size(x.value, 2) + for i = 1:n + new_row = zeros(size(jac, 2)) + for j = 1:m + new_row += x.value[i,j] * y.jacobian[j,:] + y.value[j] * vec(x.jacobian[i, j, :, :]) + end + jac[i,:] = new_row + end + DualVector(val, jac) +end + +function *(x::DualMatrix, y::AbstractVector) + val = x.value * y + t = promote_type(eltype(x.value), eltype(y)) + jac = zeros(t, length(val), size(x.jacobian, 3) * size(x.jacobian, 4)) + n, m = size(jac, 1), size(x.value, 2) + for i = 1:n + new_row = zeros(size(jac, 2)) + for j = 1:m + new_row += y[j] * vec(x.jacobian[i, j, :, :]) + end + jac[i,:] = new_row + end + DualVector(val, jac) +end + +function broadcasted(f::Function,d::DualVector) + jvals = zeros(eltype(d.value), length(d.value)) + for (i, x) = enumerate(d.value) + _, df = frule((ZeroTangent(), 1),f, x) + jvals[i] = df + end + DualVector(f.(d.value), Diagonal(jvals)*d.jacobian) +end function broadcasted(::typeof(*),x::DualVector,y::DualVector) newval = x.value .* y.value newjac = x.value .* y.jacobian + y.value .* x.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(*),x::AbstractArray,y::DualArray) + newval = x .* y.value + newjac = x .* y.jacobian + DualArray(newval,newjac) +end + +function broadcasted(::typeof(*),x::DualArray,y::AbstractArray) + newval = x.value .* y + newjac = y .* x.jacobian DualVector(newval,newjac) end +function broadcasted(::typeof(^),x::DualVector,n::Int) + newval = x.value .^ n + newjac = n * x.value .^ (n - 1) .* x.jacobian + DualVector(newval,newjac) +end + +broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{n}) where n = broadcasted(^, x, n) + +### +# sum (higher-dimensional case assuming that shape of perturbations is preserved) +### function sum(x::DualVector) - n = length(x.value) Dual(sum(x.value), vec(sum(x.jacobian; dims=1))) end +function sum(x::DualArray) + #Drop redundant leading dimensions of 1 in jacobian + idx = ntuple(i -> i > ndims(x) ? Colon() : 1, 2*ndims(x)) + Dual(sum(x.value), sum(x.jacobian; dims=1:ndims(x))[idx...]) +end + _jacobian(d::Dual) = permutedims(d.partials) +_jacobian(d::DualVector) = d.jacobian _jacobian(d::DualVector, ::Int) = d.jacobian -_jacobian(x::Number, N::Int) = zeros(typeof(x), 1, N) +_jacobian(x::Number, N::Int) = Zeros{typeof(x)}(1, N) _value(d::DualVector) = d.value _value(x::Number) = x @@ -88,16 +244,45 @@ function vcat(x::Union{Dual, DualVector}...) DualVector(value,jacobian) end -_size(x::Real) = 1 -_size(x::DualVector) = size(x) +function vcat(a::Dual ,x::DualVector, b::Dual) + val = vcat(a.value, x.value, b.value) + jac = sparsevcat(_jacobian(a), x.jacobian, _jacobian(b)) + DualVector(val, jac) +end + +function vcat(a::Real ,x::DualVector, b::Real) + cols = size(x.jacobian,2) + val = vcat(a, x.value, b) + jac = sparsevcat(_jacobian(a, cols), x.jacobian, _jacobian(b, cols)) + DualVector(val, jac) +end + +### +# +# enable converting between DualVector and DualMatrix. +# the 4D jacobian can be thought of as a generalisation of the 2D jacobian: +# +# n × m 2D jacobian: J[i,j] = ∂fᵢ / ∂xⱼ +# n × m × s × t 4D jacobian: J[i, j, k, l] = ∂f[i, j] / δx[k, l] +# +### + +# reshape does not preserve shape of perturbations (since jacobian can have +# any number of columns) +function reshape(x::DualVector,dims::Vararg{Int}) + val = reshape(x.value, dims...) + jac = reshape(x.jacobian, dims..., size(x.jacobian, 2), 1) + DualMatrix(val, jac) +end -function vcat(x::Union{Real, DualVector}...) - cols = max((_size(i) for i in x)...) - val = vcat((_value(i) for i in x)...) - jac = vcat((_jacobian(i, cols) for i in x)...) +function vec(x::DualArray) + val = vec(x.value) + jac = reshape(x.jacobian, length(val), prod(size(x.jacobian)[ndims(x)+1:end])) DualVector(val, jac) end +show(io::IO,::MIME"text/plain", x::DualArray) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜")) +show(io::IO,::MIME"text/plain", x::Dual) = (print(io,x.value); print(io," + "); print(io,x.partials);print("𝛜")) end -# module DualArrays +# module DualArrays \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index d1cba3c..9f10864 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,42 +1,100 @@ -using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices +using DualArrays, Test, LinearAlgebra, BandedMatrices, ForwardDiff, ComponentArrays using DualArrays: Dual +using Lux: relu @testset "DualArrays" begin - @test_throws ArgumentError DualVector([1,2],I(3)) - v = DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) - @test v[1] isa Dual - @test v[1] == Dual(1,[1,2,3]) - @test v[2] == Dual(2,[4,5,6]) - @test v[3] == Dual(3,[7,8,9]) - @test_throws BoundsError v[4] - @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) - - w = v + v - @test w == DualVector([2,4,6],[2 4 6;8 10 12;14 16 18]) - @test w.jacobian == 2v.jacobian - - @test sin.(v) isa DualVector - @test sin.(v).value == [sin(1), sin(2), sin(3)] - @test sin.(v).jacobian == Diagonal(cos.(v.value)) * v.jacobian - - x,y = v[1:2],v[2:3] - @test x == DualVector([1,2],[1 2 3;4 5 6]) - @test y == DualVector([2,3],[4 5 6;7 8 9]) - @test x .* y == DualVector([2,6],[6 9 12;26 31 36]) + v = DualArray([1.,2, 3], [1 2 3; 4 5 6;7 8 9]) + @testset "constructors" begin + @test v isa DualVector + @test eltype(v) == DualArrays.Dual{Float64} + w = DualArray(ones(2,2),ones(2,2,2,2)) + @test w isa DualMatrix + @test_throws TypeError DualArray([1,2], [1,2]) + @test_throws ArgumentError DualVector([1,2],I(3)) + end + + @testset "indexing" begin + @test v[1] isa Dual + @test v[1] == Dual(1,[1,2,3]) + @test v[2] == Dual(2,[4,5,6]) + @test v[3] == Dual(3,[7,8,9]) + @test_throws BoundsError v[4] + @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) + + x,y = v[1:2],v[2:3] + @test x == DualVector([1,2],[1 2 3;4 5 6]) + @test y == DualVector([2,3],[4 5 6;7 8 9]) + + n = 10 + w = DualVector(1:n, I(n)) + @test w[2:end].jacobian isa BandedMatrix + end + + @testset "basic_operations" begin + w = v + v + @test w == DualVector([2.,4.,6.],[2. 4 6;8 10 12;14 16 18]) + @test w.jacobian == 2v.jacobian + + u = v - w + @test u == -v + + x = v.value + v - v.value + @test x == v + + @test v .^ 2 == DualVector([1, 4, 9], [2 4 6;16 20 24; 42 48 54]) + + x,y = v[1:2],v[2:3] + @test x .* y == DualVector([2,6],[6 9 12;26 31 36]) + @test sum(x .* y) isa Dual + @test sum(x .* y) == Dual(8,[32,40,48]) + end + + @testset "broadcasting" begin + @test sin.(v) isa DualVector + @test sin.(v).value == [sin(1), sin(2), sin(3)] + @test sin.(v).jacobian == Diagonal(cos.(v.value)) * v.jacobian - @test sum(x .* y) isa Dual - @test sum(x .* y) == Dual(8,[32,40,48]) + @test exp.(v) isa DualVector + @test exp.(v).value == [exp(1), exp(2), exp(3)] + @test exp.(v).jacobian == Diagonal(exp.(v.value)) * v.jacobian + + @test relu.(v) isa DualVector + @test relu.(v).value == [relu(1), relu(2), relu(3)] + @test relu.(v).jacobian == Diagonal( 0.5 * (sign.(v.value) .+ 1)) * v.jacobian + end + + @testset "misc" begin + n = 10 + v = DualVector(1:n, I(n)) + @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) + end - n = 10 - v = DualVector(1:n, I(n)) - @test v[2:end].jacobian isa BandedMatrix + @testset "vcat" begin + x = Dual(1, [1, 2, 3]) + y = DualVector([2, 3], [4 5 6;7 8 9]) + @test vcat(x) == x + @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) + @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + @test vcat(1, y, 2) == DualVector([1,2,3,2], [0 0 0; 4 5 6; 7 8 9; 0 0 0]) + end - @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) + @testset "reshape" begin + x = DualVector([1,2,3,4], I(4)) + @test vec(reshape(x, 2, 2)) == x + end - x = Dual(1, [1, 2, 3]) - y = DualVector([2, 3], [4 5 6;7 8 9]) - @test vcat(x) == x - @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) - @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + @testset "DualMatrix" begin + x = DualVector(ones(6), I(6)) + y = reshape(x[1:4], 2, 2) + z = x[5:6] + @test (y*z).value == [2,2] + @test (y*z).jacobian == [1 0 1 0 1 1; 0 1 0 1 1 1] + for t in (([1, 0, 0, 1, 0, 0], [1 0 -1 0 1 0;0 0 0 0 0 0]), ([1, 1, 1, 1, 1, 1], [1 0 -1 0 1 0;0 1 0 -1 0 1]), ([0, 1, 1, 0, -1, -1], zeros(2,6))) + x = DualVector(t[1], I(6)) + y = reshape(x[1:4], 2, 2) + z = x[5:6] + @test relu.(y*[1,-1] + z).jacobian == t[2] + end + end end \ No newline at end of file