Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,37 @@
name = "DualArrays"
uuid = "429e4e16-f749-45f3-beec-30742fae38ce"
version = "0.2.0"
authors = ["Sheehan Olver <[email protected]>"]
version = "0.1.0"

[deps]
ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[compat]
ChainRules = "1.72.6"
ChainRulesCore = "1.26.0"
DifferentialEquations = "7.17.0"
ForwardDiff = "1.2.2"
Plots = "1.41.1"
SparseArrays = "1.10"

[extras]
BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[targets]
dev = ["Revise"]
examples = ["Plots", "DifferentialEquations", "ForwardDiff", "BandedMatrices"]
test = ["Test", "ForwardDiff", "BandedMatrices", "SparseArrays"]
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,19 @@
# DualArrays.jl
A package for working with arrays of dual numbers with sparsity
DualArrays.jl is a package that provides the `DualVector` structure for use in forward mode automatic differentiation (autodiff). Existing forward mode autodiff implementations such as `ForwardDiff.jl` make use of a `Dual` structure with a vector of dual parts.

There are some limitations of this when differentiating vector valued functions, as the dual components of each element will be each treated as a separate (dense) vector rather than a Jacobian. This misses the opportunity to exploit sparse Jacobian structures such as banded structures that appear when solving systems of ODEs.

`DualArrays.jl` provides a new structure, `DualVector`, consisting of a vector of real parts and a jacobian that can be any matrix structure in the Julia ecosystem. This carries over many of the optimisations provided by sparse matrix structures to the forward-mode autodiff process. The package also comes with its own `Dual` type for elementwise indexing or vector -> scalar functions. An efficient implementation of differentiating a function might be as follows:

```julia
using FillArrays

function gradient(f::Function, x::Vector)
dx = DualVector(x, Eye(length(x)))
return f(dx).jacobian
end
```

See the examples folder for more use cases.

Differentiation rules are mostly provided by the `ChainRules.jl` autodiff backend.
4 changes: 2 additions & 2 deletions examples/newtonpendulum.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ end
x0 = zeros(Float64, N)

#Solve and plot both solution and LHS ('deviation' from system)
@time sol = newton_method_forwarddiff(f, x0, 100)
@time sol = newton_method_dualvector(f, x0, 100)
@time sol = newton_method_forwarddiff(f, x0, 10)
@time sol = newton_method_dualvector(f, x0, 10)
plot(0:ts:Tmax, [a; sol; b])
plot!(0:ts:Tmax, [0; f(sol); 0])

108 changes: 15 additions & 93 deletions src/DualArrays.jl
Original file line number Diff line number Diff line change
@@ -1,103 +1,25 @@
module DualArrays
export DualVector

import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum,
vcat, convert, *
using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays

struct Dual{T, Partials <: AbstractVector{T}} <: Real
value::T
partials::Partials
end

==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials

sparse_getindex(a...) = layout_getindex(a...)
sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2))
sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1))

"""
reprents a vector of duals given by

values + jacobian * [ε_1,…,ε_n].

For now the entries just return the values.
"""

struct DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}}
value::Vector{T}
jacobian::M
function DualVector(value::Vector{T},jacobian::M) where {T, M <: AbstractMatrix{T}}
if(size(jacobian)[1] != length(value))
x,y = length(value),size(jacobian)[1]
throw(ArgumentError("vector length must match number of rows in jacobian.\n
vector length: $x \n
no. of jacobian rows: $y"))
end
new{T,M}(value,jacobian)
end
end

function DualVector(value::AbstractVector, jacobian::AbstractMatrix)
T = promote_type(eltype(value), eltype(jacobian))
DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian))
end
DualArrays

A Julia package for efficient automatic differentiation using dual numbers and dual arrays.

This package provides:
- `Dual`: A dual number type for storing values and their derivatives
- `DualVector`: A vector of dual numbers represented with a Jacobian matrix

function getindex(x::DualVector, y::Int)
Dual(x.value[y], sparse_getindex(x.jacobian,y,:))
end

function getindex(x::DualVector, y::UnitRange)
newval = x.value[y]
newjac = sparse_getindex(x.jacobian,y,:)
DualVector(newval, newjac)
end
size(x::DualVector) = length(x.value)
axes(x::DualVector) = axes(x.value)
+(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian)
*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian)

broadcasted(::typeof(sin),x::DualVector) = DualVector(sin.(x.value),Diagonal(cos.(x.value))*x.jacobian)

function broadcasted(::typeof(*),x::DualVector,y::DualVector)
newval = x.value .* y.value
newjac = x.value .* y.jacobian + y.value .* x.jacobian
DualVector(newval,newjac)
end

function sum(x::DualVector)
n = length(x.value)
Dual(sum(x.value), vec(sum(x.jacobian; dims=1)))
end

_jacobian(d::Dual) = permutedims(d.partials)
_jacobian(d::DualVector, ::Int) = d.jacobian
_jacobian(x::Number, N::Int) = zeros(typeof(x), 1, N)
Differentiation rules are mostly provided by ChainRules.jl.
"""
module DualArrays

_value(d::DualVector) = d.value
_value(x::Number) = x
export DualVector, Dual

function vcat(x::Union{Dual, DualVector}...)
if length(x) == 1
return x[1]
end
value = vcat((d.value for d in x)...)
jacobian = vcat((_jacobian(d) for d in x)...)
DualVector(value,jacobian)
end
import Base: +, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *

_size(x::Real) = 1
_size(x::DualVector) = size(x)
using LinearAlgebra, ArrayLayouts, FillArrays

function vcat(x::Union{Real, DualVector}...)
cols = max((_size(i) for i in x)...)
val = vcat((_value(i) for i in x)...)
jac = vcat((_jacobian(i, cols) for i in x)...)
DualVector(val, jac)
include("types.jl")
include("indexing.jl")
include("arithmetic.jl")
include("utilities.jl")
end

show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜"))
end
# module DualArrays
170 changes: 170 additions & 0 deletions src/arithmetic.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Arithmetic operations for DualArrays.jl

using LinearAlgebra, ChainRules

"""
Addition of DualVectors.
"""
Base.:+(x::DualVector, y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian)
Base.:+(x::DualVector, y::AbstractVector) = DualVector(x.value + y, x.jacobian)
Base.:+(x::AbstractVector, y::DualVector) = DualVector(x + y.value, y.jacobian)

"""
Matrix multiplication with a DualVector.
"""
Base.:*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian)

"""
Broadcasted multiplication of two DualVectors using the product rule.
"""
function Base.broadcasted(::typeof(*), x::DualVector, y::DualVector)
newval = x.value .* y.value
newjac = Diagonal(x.value) * y.jacobian + Diagonal(y.value) * x.jacobian
DualVector(newval, newjac)
end

"""

this section attempts to define broadcasting rules on DualVectors for functions
that either:

- take a single real argument (function applied to each element)
- are binary operations (binary operation applied to scalar and each element)

This implementation is loosely based on
https://juliadiff.org/ChainRulesOverloadGeneration.jl/dev/examples/forward_mode.html

"""

"""
Defines how a frule in ChainRules.jl for a scalar function f should be broadcasted over a DualVector.
"""
function broadcast_rule(f, d::DualVector)
val = similar(d.value)
jac = similar(d.jacobian)

@inbounds for (i, x) in pairs(d.value)
y, dy = ChainRules.frule((ChainRules.NoTangent(), d.jacobian[i, :]), f, x)
val[i] = y
jac[i, :] = dy
end

return DualVector(val, jac)
end

"""
Defines how a frule in ChainRules.jl for a binary
operation a (+) b on reals a, b should be broadcasted over:
- a (+) d (d a DualVector)
- d (+) a (d a DualVector)
"""

function broadcast_rule(f, d::DualVector, x::Real)
val = similar(d.value)
jac = similar(d.jacobian)
z = zero(jac[1, :])

@inbounds for (i, y) in pairs(d.value)
yval, dy = ChainRules.frule(
(ChainRules.NoTangent(), d.jacobian[i, :], z),
f, y, x)
val[i] = yval
jac[i, :] = dy
end

return DualVector(val, jac)
end

function broadcast_rule(f, x::Real, d::DualVector)
val = similar(d.value)
jac = similar(d.jacobian)
z = zero(jac[1, :])

@inbounds for (i, y) in pairs(d.value)
yval, dy = ChainRules.frule(
(ChainRules.NoTangent(), z, d.jacobian[i, :]),
f, x, y)
val[i] = yval
jac[i, :] = dy
end

return DualVector(val, jac)
end

"""
Extend for broadcasting Dual and DualVector
"""

function broadcast_rule(f, d::DualVector, x::Dual)
val = similar(d.value)
jac = similar(d.jacobian)
z = x.partials

@inbounds for (i, y) in pairs(d.value)
yval, dy = ChainRules.frule(
(ChainRules.NoTangent(), d.jacobian[i, :], z),
f, y, x.value)
val[i] = yval
jac[i, :] = dy
end

return DualVector(val, jac)
end

function broadcast_rule(f, x::Dual, d::DualVector)
val = similar(d.value)
jac = similar(d.jacobian)
z = x.partials

@inbounds for (i, y) in pairs(d.value)
yval, dy = ChainRules.frule(
(ChainRules.NoTangent(), z, d.jacobian[i, :]),
f, x.value, y)
val[i] = yval
jac[i, :] = dy
end

return DualVector(val, jac)
end

# a set of defined broadcasts to avoid duplicate definitions
defined = Set{DataType}()

# Get all applicable frules defined in ChainRules
# and define broadcasted versions for DualVector using vector_rule

frules = methods(ChainRules.frule)
for f in frules
# get signatures for each function with a frule
sig = Base.unwrap_unionall(Base.tuple_type_tail(f.sig))

# split into operation and args, filter for
# single argument functions and binary operations that can act on real numbers
op, args = sig.parameters[2], sig.parameters[3:end]

isconcretetype(op) || continue
op in defined && continue

# if it is a single argument function...
if length(args) == 1
args[1] isa Type || continue
Real <: args[1] || continue

@eval Base.broadcasted(fn::$op, d::DualVector) = broadcast_rule(fn, d)
push!(defined, op)
end

# if it is a binary operation...
if length(args) == 2
args[1] isa Type || continue
args[2] isa Type || continue
Real <: args[1] && Real <: args[2] || continue


@eval Base.broadcasted(fn::$op, d::DualVector, y::Real) = broadcast_rule(fn, d, y)
@eval Base.broadcasted(fn::$op, y::Real, d::DualVector) = broadcast_rule(fn, y, d)
@eval Base.broadcasted(fn::$op, d::DualVector, du::Dual) = broadcast_rule(fn, d, du)
@eval Base.broadcasted(fn::$op, du::Dual, d::DualVector) = broadcast_rule(fn, du, d)
push!(defined, op)
end
end
33 changes: 33 additions & 0 deletions src/indexing.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Indexing operations for DualArrays.jl

using ArrayLayouts, FillArrays

sparse_getindex(a...) = layout_getindex(a...)
sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2))
sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1))

"""
Extract a single Dual number from a DualVector at position y.
"""
function Base.getindex(x::DualVector, y::Int)
Dual(x.value[y], sparse_getindex(x.jacobian, y, :))
end

"""
Extract a sub-DualVector from a DualVector using a range.
"""
function Base.getindex(x::DualVector, y::UnitRange)
newval = x.value[y]
newjac = sparse_getindex(x.jacobian, y, :)
DualVector(newval, newjac)
end

"""
Return the size of the DualVector (length of the value vector).
"""
Base.size(x::DualVector) = (length(x.value),)

"""
Return the axes of the DualVector.
"""
Base.axes(x::DualVector) = axes(x.value)
Loading
Loading