diff --git a/Project.toml b/Project.toml
index 0375150..0cf0764 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,10 +6,30 @@ version = "0.1.0"
 [deps]
 ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
 BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
-DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
+BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
+ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
+ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
+ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[compat]
+ArrayLayouts = "1"
+ForwardDiff = "0.10.36"
+Lux = "0.5.62"
+Plots = "1.40.5"
+
+[extras]
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Lux", "ForwardDiff", "Test"]
diff --git a/examples/convlayer.jl b/examples/convlayer.jl
new file mode 100644
index 0000000..390cc75
--- /dev/null
+++ b/examples/convlayer.jl
@@ -0,0 +1,69 @@
+using LinearAlgebra, MLDatasets, Plots, DualArrays, Random, FillArrays
+
+#GOAL: Implement and differentiate a convolutional neural network layer
+function convlayer(img, ker, xstride = 1, ystride = 1)
+    n, m = size(ker)
+    t = eltype(ker)
+
+    n2, m2 = size(img)
+    n3, m3 = div(n2-n+1,xstride), div(m2-m+1,ystride)
+    fmap = zeros(promote_type(eltype(img), t), n3, m3)
+    #Apply kernel to section of image
+    for i= 1:xstride:n3,j = 1:ystride:m3
+        ft = img[i:i+n-1,j:j+m-1] .* ker
+        fmap[i,j] = sum(ft)
+    end
+    fmap
+end
+
+function softmax(x)
+    s = sum(exp.(x))
+    exp.(x) / s
+end
+
+function dense_layer(W, b, x, f::Function = identity)
+    ret = W*x
+    println("Multiplication complete")
+    ret += b
+    println("Addition Complete")
+    f(ret)
+end
+
+function cross_entropy(x, y)
+    -sum(y .* log.(x))
+end
+
+function model_loss(x, y, w)
+    ker = reshape(w[1:9], 3, 3)
+    weights = reshape(w[10:6769], 10, 676)
+    biases = w[6770:6779]
+    println("Reshape Complete")
+    l1 = vec(DualMatrix(convlayer(x, ker)))
+    println("Conv layer complete")
+    l2 = dense_layer(weights, biases, l1, softmax)
+    println("Dense Layer Complete")
+    target = OneElement(1, y+1, 10)
+    loss = cross_entropy(l2, target)
+    println("Loss complete")
+    loss.value, loss.partials
+end
+
+function train_model()
+    p = rand(6779)
+    epochs = 1000
+    lr = 0.02
+    dataset = MNIST(:train)
+
+    for i = 1:epochs
+        train, test = dataset[i]
+        d = DualVector(p, I(6779))
+
+        loss, grads = model_loss(train, test, d)
+        println(loss)
+        p = p - lr * grads
+    end
+end
+
+train_model()
+
+
diff --git a/examples/heatequation.jl b/examples/heatequation.jl
index e4a61b3..09b4542 100644
--- a/examples/heatequation.jl
+++ b/examples/heatequation.jl
@@ -9,7 +9,7 @@
 # Compare performance and results.
 ##
 
-using LinearAlgebra, BandedMatrices, DifferentialEquations, Plots, 
+using LinearAlgebra, BandedMatrices, OrdinaryDiffEqs, Plots, 
 ForwardDiff
 
 U = 500
diff --git a/examples/neuralnet.jl b/examples/neuralnet.jl
new file mode 100644
index 0000000..ff4e403
--- /dev/null
+++ b/examples/neuralnet.jl
@@ -0,0 +1,45 @@
+import Lux: relu
+using DualArrays, LinearAlgebra, Plots, FillArrays, Test
+
+#GOAL: Learn exp() using simple one-layer relu neural network.
+
+N = 20
+d = 0.1
+
+#Domain over which to learn exp
+data = collect(d:d:N*d)
+
+function model(a, b, x)
+    return relu.(a .* x + b)
+end
+
+function model_loss(w)
+     sum((model(w[1:N], w[(N+1):end], data) - exp.(data)) .^ 2)
+end
+
+function gradient_descent_sparse(n, lr = 0.01)
+    weights = ones(2 * N)
+    for i = 1:n
+        dw = DualVector(weights, Eye{Float64}(2*N))
+        grads = model_loss(dw).partials
+        weights -= lr * grads
+    end
+    model(weights[1:N], weights[(N + 1):end], data)
+end
+
+function gradient_descent_dense(n, lr = 0.01)
+    weights = ones(2 * N)
+    for i = 1:n
+        dw = DualVector(weights, Matrix(I, 2*N, 2*N))
+        grads = model_loss(dw).partials
+        weights -= lr * grads
+    end
+    model(weights[1:N], weights[(N + 1):end], data)
+end
+
+@time densesol = gradient_descent_dense(500)
+@time sparsesol = gradient_descent_sparse(500)
+@test densesol == sparsesol
+@test sparsesol ≈ exp.(data) rtol = 1e-2
+
+
diff --git a/examples/newtonpendulum.jl b/examples/newtonpendulum.jl
index 81d727b..1d753b6 100644
--- a/examples/newtonpendulum.jl
+++ b/examples/newtonpendulum.jl
@@ -6,24 +6,36 @@
 # via discretisation and Newton's method.
 ##
 
-using LinearAlgebra, ForwardDiff, Plots, DualArrays
+
+using LinearAlgebra, ForwardDiff, Plots, DualArrays, FillArrays
 
 #Boundary Conditions
 a = 0.1
 b = 0.0
 
 #Time step, Time period and number of x for discretisation.
-ts = 0.001
+ts = 0.1
+
 Tmax = 5.0
 N = Int(Tmax/ts) - 1
 
 #LHS of ode
 function f(x)
     n = length(x)
-    D = Tridiagonal([ones(Float64, n) / ts ; 0.0], [1.0; -2ones(Float64, n) / ts; 1.0], [0.0; ones(Float64, n) / ts])
+    D = Tridiagonal([ones(n) / ts ; 0.0], [1.0; -2ones(n) / ts; 1.0], [0.0; ones(n) / ts])
     (D * [a; x; b])[2:end-1] + sin.(x)
 end
 
+
+function f(u, a, b, Tmax)
+    h = Tmax/(length(u)-1)
+    [u[1] - a;
+     (u[1:end-2] - 2u[2:end-1] + u[3:end])/h^2 + sin.(u[2:end-1]);
+     u[end] - b]
+end
+
+
+
 #Newtons method using ForwardDiff.jl
 function newton_method_forwarddiff(f, x0, n)
     x = x0
@@ -38,18 +50,33 @@ function newton_method_dualvector(f, x0, n)
     x = x0
     l = length(x0)
     for i = 1:n
-        ∇f = f(DualVector(x, Matrix(I, l, l))).jacobian
+        ∇f = f(DualVector(x, Eye(l))).jacobian
+        x = x - ∇f \ f(x)
+    end
+    x
+end
+
+function newton_method_dualvector2(f, x0, n)
+    x = x0
+    l = length(x0)
+    for i = 1:n
+        ∇f = f(DualVector(x, Eye(l)), a, b, Tmax).jacobian
         x = x - ∇f \ f(x)
     end
     x
 end
 
 #Initial guess
-x0 = zeros(Float64, N)
+x0 = zeros(N)
 
 #Solve and plot both solution and LHS ('deviation' from system)
-@time sol = newton_method_forwarddiff(f, x0, 100)
-@time sol = newton_method_dualvector(f, x0, 100)
+@time sol1 = newton_method_forwarddiff(f, x0, 100);
+@time sol2 = newton_method_dualvector(f, x0, 100);
+@test sol1 ≈ sol2
+@time sol = newton_method_dualvector2(f, x0, 100);
+
+x0 = zeros(N); x0[1] = a; x0[end] = b;
+
 plot(0:ts:Tmax, [a; sol; b])
 plot!(0:ts:Tmax, [0; f(sol); 0])
 
diff --git a/src/BlockMatrixTensor.jl b/src/BlockMatrixTensor.jl
new file mode 100644
index 0000000..3801337
--- /dev/null
+++ b/src/BlockMatrixTensor.jl
@@ -0,0 +1,76 @@
+#4-Tensor with data stored in a BlockMatrix
+#Enables sparsity for DualMatrix jacobian.
+using BlockArrays, SparseArrays
+
+struct BlockMatrixTensor{T} <: AbstractArray{T, 4}
+    data::BlockMatrix{T}
+end
+
+#Construct BlockMatrixTensor from 4-tensor. Useful for testing purposes.
+function BlockMatrixTensor(x::AbstractArray{T, 4}) where {T}
+    n, m, s, t = size(x)
+    data = BlockMatrix(zeros(T, n * s, m * t), fill(s, n), fill(t, m))
+    for i = 1:n, j = 1:m
+        view(data, Block(i, j)) .= x[i, j, :, :] 
+    end
+    BlockMatrixTensor(data)
+end
+
+# Useful for creating a BlockMatrixTensor from blocks()
+BlockMatrixTensor(x::Matrix{T}) where {T <: AbstractMatrix} = BlockMatrixTensor(mortar(x))
+
+size(x::BlockMatrixTensor) = (blocksize(x.data)..., blocksizes(x.data)[1,1]...)
+
+#Indexing entire blocks
+getindex(x::BlockMatrixTensor, a::Int, b::Int, c, d) = blocks(x.data)[a, b][c, d]
+getindex(x::BlockMatrixTensor, a::Int, b::Int, ::Colon, ::Colon) = blocks(x.data)[a, b]
+getindex(x::BlockMatrixTensor, a::Int, b, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], 1, :))
+getindex(x::BlockMatrixTensor, a, b::Int, ::Colon, ::Colon) = BlockMatrixTensor(reshape(blocks(x.data)[a, b], :, 1))
+getindex(x::BlockMatrixTensor, a, b, ::Colon, ::Colon) = BlockMatrixTensor(blocks(x.data)[a,b])
+
+
+# For populating a BlockMatrixTensor
+function setindex!(A::BlockMatrixTensor, value, a::Int, b::Int, ::Colon, ::Colon)
+    blocks(A.data)[a, b] = value
+end
+
+function show(io::IO,m::MIME"text/plain", x::BlockMatrixTensor)
+    print("BlockMatrixTensor containing: \n")
+    show(io,m, x.data)
+end
+show(io::IO, x::BlockMatrixTensor) = show(io, x.data)
+
+for op in (:*, :/)
+    @eval $op(x::BlockMatrixTensor, y::Number) = BlockMatrixTensor($op(x.data, y))
+    @eval $op(x::Number, y::BlockMatrixTensor) = BlockMatrixTensor($op(x, y.data))
+end
+
+#Block-wise broadcast 
+broadcasted(f::Function, x::BlockMatrixTensor, y::AbstractMatrix) = BlockMatrixTensor(f.(blocks(x.data), y))
+broadcasted(f::Function, x::BlockMatrixTensor, y::AbstractVector) = BlockMatrixTensor(f.(x, reshape(y, :, 1)))
+broadcasted(f::Function, x::AbstractVecOrMat, y::BlockMatrixTensor) = f.(y, x)
+
+function sum(x::BlockMatrixTensor; dims = Colon())
+    # Blockwise sum
+    if dims == 1:2
+        sum(blocks(x.data))
+    elseif dims == 1 || dims == 2
+        BlockMatrixTensor(sum(blocks(x.data); dims))
+    else
+        # For now, treat all other cases as if summing the 4-Tensor
+        sum(Array(x); dims = dims)
+    end
+end
+
+function reshape(x::BlockMatrixTensor, dims::Vararg{Union{Colon, Int}, 4})
+    #Reshape block-wise
+    #TODO: Implement non-blockwise
+    if dims[3] isa Colon && dims[4] isa Colon
+        BlockMatrixTensor(reshape(blocks(x.data), dims[1], dims[2]))
+    end
+end
+
+#'Flatten': converts BlockMatrixTensor to Matrix by removing block structure
+# Mimics the reshape achieved by this for general 4-Tensors
+flatten(x::BlockMatrixTensor) = hcat((vcat((x[i, j, :, :] for i = 1:size(x, 1))...) for j = 1:size(x, 2))...)
+flatten(x::AbstractArray{T, 4}) where {T} = reshape(x, size(x, 1) * size(x, 3), size(x, 2) * size(x, 4))
diff --git a/src/DualArray.jl b/src/DualArray.jl
new file mode 100644
index 0000000..32faf3e
--- /dev/null
+++ b/src/DualArray.jl
@@ -0,0 +1,278 @@
+# TODO: support non-banded
+sparsevcat(As...) = vcat(convert.(BandedMatrix,  As)...)
+
+# Should Partials necessarily be a vector?
+# For example, if we index a DualMatrix, should partials retain the shape
+# of the perturbations, thereby giving a matrix of partials?
+struct Dual{T, Partials <: AbstractArray{T}} <: Real
+    value::T
+    partials::Partials
+end
+
+zero(::Type{Dual{T}}) where {T} = Dual(zero(T), zeros(T,1))
+promote_rule(::Type{Dual{T}},::Type{S}) where {T,S} = Dual{promote_type(T,S)}
+
+==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials
+for op in (:+, :-)
+    @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), a.partials)
+end
+for op in (:*, :/)
+    @eval $op(a::Dual, k::Real) = Dual($op(a.value, k), $op(a.partials,k))
+end
+
+-(x::Dual) = Dual(-1 * x.value, -1 * x.partials)
+
+*(x::Dual, y::Dual) = Dual(x.value*y.value, x.value*y.partials + y.value*x.partials)
+/(x::Dual, y::Dual) = x*Dual(y.value, -y.partials) / y.value^2
+
+sparse_getindex(a...) = layout_getindex(a...)
+sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2))
+sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1))
+sparse_getindex(s::AbstractSparseArray, idx...) = s[idx...]
+
+sparse_getindex(b::BandedMatrix, k::Integer, ::Colon) = sparsevec(
+    rowrange(b, k), b[k, rowrange(b, k)], size(b, 2)
+    )
+sparse_getindex(b::BandedMatrix, ::Colon, k::Integer) = sparsevec(
+    colrange(b, k), b[colrange(b, k), k], size(b, 1)
+    )
+
+sparse_transpose(s::SparseVector) = sparse(ones(length(s.nzind)),s.nzind,s.nzval, 1, length(s))
+sparse_transpose(o::OneElement{T,1}) where {T} = reshape(o, 1, :)
+sparse_transpose(x::AbstractVector) = x'
+
+"""
+reprents an array of duals given by
+    
+    values + jacobian * 𝛜.
+
+where 𝛜 is an array with dual parts corresponding to each entry of values.
+"""
+
+#Helper function to check valid dimensions of DualArray
+function _checkdims(val, jac)
+    s1, s2 = size(val), size(jac)
+    if length(s1) > length(s2)
+        return false
+    end
+    for (i, x) in enumerate(s1)
+        if(x != s2[i])
+            return false
+        end
+    end
+    return true
+end
+
+struct DualArray{T, N, M, J <: AbstractArray{T, M}} <: AbstractArray{Dual{T}, N}
+    value::AbstractArray{T, N}
+    jacobian::J
+    function DualArray(value::AbstractArray{T,N},jacobian::AbstractArray{T, M}) where {T, N, M}
+        if(!_checkdims(value,jacobian))
+            s1, s2 = size(value), size(jacobian)
+            throw(ArgumentError("N-dimensional array must be of equal size to first N dimensions of jacobian. \n\nGot $s1, $s2"))
+        end
+        new{T,N,2*N,typeof(jacobian)}(value,jacobian)
+    end
+end
+
+function DualArray(value::AbstractArray{T,N}, jacobian::AbstractArray{S,M}) where {S,T,N,M}
+    t = promote_type(T, S)
+    DualArray(convert(AbstractArray{t,N}, value), convert(AbstractArray{t, M}, jacobian))
+end
+
+#Aliases and Constructors
+const DualVector{T, M} = DualArray{T, 1, 2, M} where {T, M <: AbstractMatrix{T}}
+const DualMatrix{T, M} = DualArray{T, 2, 4, M} where {T, M <: AbstractArray{T, 4}}
+
+DualVector(x::AbstractVector, j::AbstractMatrix) = DualArray(x, j)
+DualVector(x::AbstractVector, j::ComponentArray) = DualArray(x, j)
+DualMatrix(x::AbstractMatrix, j::AbstractArray{T,4}) where {T} = DualArray(x, j)
+
+function DualMatrix(x::AbstractMatrix)
+    val = [y.value for y in x]
+    jac_blocks = [y.partials for y in x]
+    DualMatrix(val, BlockMatrixTensor(jac_blocks))
+end
+
+function getindex(x::DualVector, y::Int)
+    Dual(x.value[y], sparse_getindex(x.jacobian,y,:))
+end
+function getindex(x::DualVector, y::UnitRange)
+    newval = x.value[y]
+    newjac = sparse_getindex(x.jacobian,y,:)
+    #newjac = layout_getindex(x.jacobian,y,:)
+    DualVector(newval, newjac)
+end
+getindex(x::DualVector, ::Colon) = x
+
+function getindex(x::DualArray, y::Int...)
+    idx = ntuple(i -> i > length(y) ? Colon() : y[i], 2*ndims(x))
+    Dual(x.value[y...], x.jacobian[idx...])
+end
+
+for op in (:size, :axes, :ndims)
+    @eval $op(x::DualArray) = $op(x.value)
+end
+
++(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian)
+-(x::DualVector,y::DualVector) = DualVector(x.value - y.value, x.jacobian - y.jacobian)
++(x::DualVector,y::Vector) = DualVector(x.value + y, x.jacobian)
+-(x::DualVector,y::Vector) = DualVector(x.value - y, x.jacobian)
++(x::Vector,y::DualVector) = DualVector(y.value + x, y.jacobian)
+-(x::Vector,y::DualVector) = DualVector(x - y.value, -y.jacobian)
+*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian)
+*(x::Number, y::DualVector) = DualVector(x * y.value, x * y.jacobian)
+*(x::DualVector, y::Dual) = DualVector(x.value * y.value, y.value * x.jacobian + x.value * y.partials')
+/(x::DualVector, y::Number) = DualVector(x.value / y, x.jacobian/ y)
+/(x::DualVector, y::Dual) = x * Dual(y.value, -y.partials) / y.value^2
+
+##
+# Note about 4-tensor by matrix multiplication:
+#   We can consider matrix-vector multiplication as follows:
+#
+#       (Ax)[i] = A[i,:] . x
+#   
+#   This can be generalised to higher dimensions (B a 4-tensor, X a matrix):
+#
+#       (BX)[i,j] = B[i,j,:,:] . X
+#
+#   This is useful since if we consider 4-tensors dimension n × m × s × t
+#   equivalent to linear maps from n × m matrices to s × t matrices, this 
+#   gives us multiplication analogous to applying the linear transform on
+#   a matrix in the same way that matrix-vector multiplication applies the
+#   linear transformation to the vector. This also gives us a way to 
+#   'expand' the Dual part of a DualMatrix.
+##
+
+# Assumes that the total number of epsilons is equal, i.e the array of
+# perturbations are reshapes of each other.
+function *(x::DualMatrix, y::DualVector)
+    val = x.value * y.value
+    jac = x.value * y.jacobian + flatten(sum(x.jacobian .* y.value'; dims = 2))
+    DualVector(val, jac)
+end
+
+function *(x::DualMatrix, y::AbstractVector)
+    val = x.value * y
+    t = promote_type(eltype(x.value), eltype(y))
+    jac = zeros(t, length(val), size(x.jacobian, 3) * size(x.jacobian, 4))
+    n, m = size(jac, 1), size(x.value, 2)
+    for i = 1:n
+        new_row = zeros(size(jac, 2))
+        for j = 1:m
+            new_row += y[j] * vec(x.jacobian[i, j, :, :])
+        end
+        jac[i,:] = new_row
+    end
+    DualVector(val, jac)
+end
+
+function broadcasted(f::Function,d::DualVector)
+    jvals = zeros(eltype(d.value), length(d.value))
+    for (i, x) = enumerate(d.value)
+        _, df = frule((ZeroTangent(), 1),f, x)
+        jvals[i] = df
+    end
+    DualVector(f.(d.value), Diagonal(jvals)*d.jacobian)
+end
+
+function broadcasted(::typeof(*),x::DualVector,y::DualVector)
+    newval = x.value .* y.value
+    newjac = x.value .* y.jacobian + y.value .* x.jacobian
+    DualArray(newval,newjac)
+end
+
+function broadcasted(::typeof(*),x::AbstractArray,y::DualArray)
+    newval = x .* y.value
+    newjac = x .* y.jacobian
+    DualArray(newval,newjac)
+end
+
+function broadcasted(::typeof(*),x::DualArray,y::AbstractArray)
+    newval = x.value .* y
+    newjac = y .* x.jacobian
+    DualArray(newval,newjac)
+end
+
+function broadcasted(::typeof(^),x::DualVector,n::Int)
+    newval = x.value .^ n
+    newjac = n * x.value .^ (n - 1) .* x.jacobian
+    DualVector(newval,newjac)
+end
+
+broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{n}) where n = broadcasted(^, x, n)
+
+###
+# sum (higher-dimensional case assuming that shape of perturbations is preserved)
+###
+function sum(x::DualVector)
+    Dual(sum(x.value), vec(sum(x.jacobian; dims=1)))
+end
+
+function sum(x::DualArray)
+    Dual(sum(x.value), sum(x.jacobian; dims=1:ndims(x)))
+end
+
+_jacobian(d::Dual) = permutedims(d.partials)
+_jacobian(d::DualVector) = d.jacobian
+_jacobian(d::DualVector, ::Int) = d.jacobian
+_jacobian(x::Number, N::Int) = Zeros{typeof(x)}(1, N)
+
+_value(d::DualVector) = d.value
+_value(x::Number) = x
+
+function vcat(x::Union{Dual, DualVector}...)
+    if length(x) == 1
+        return x[1]
+    end
+    value = vcat((d.value for d in x)...)
+    jacobian = vcat((_jacobian(d) for d in x)...)
+    DualVector(value,jacobian)
+end
+
+function vcat(a::Dual ,x::DualVector, b::Dual)
+    val = vcat(a.value, x.value, b.value)
+    jac = sparsevcat(_jacobian(a), x.jacobian, _jacobian(b))
+    DualVector(val, jac)
+end
+
+function vcat(a::Real ,x::DualVector, b::Real)
+    cols = size(x.jacobian,2)
+    val = vcat(a, x.value, b)
+    jac = sparsevcat(_jacobian(a, cols), x.jacobian, _jacobian(b, cols))
+    DualVector(val, jac)
+end
+
+###
+#
+# enable converting between DualVector and DualMatrix.
+# the 4D jacobian can be thought of as a generalisation of the 2D jacobian:
+#
+#   n × m 2D jacobian:  J[i,j] = ∂fᵢ / ∂xⱼ
+#   n × m × s × t 4D jacobian: J[i, j, k, l] = ∂f[i, j] / δx[k, l]
+#
+###
+
+# reshape does not preserve shape of perturbations (since jacobian can have
+# any number of columns)
+_tomatrix(v::AbstractVector) = reshape(v, :, 1)
+
+function reshape(x::DualVector,dims::Vararg{Int, 2})
+    val = reshape(x.value, dims...)
+    blocked_jac = BlockMatrix(x.jacobian, fill(1, length(x)), [size(x.jacobian, 2)])
+    jac = reshape(BlockMatrixTensor(blocked_jac), dims..., :, :)
+    DualMatrix(val, jac)
+end
+
+_blockvec(x::AbstractArray{T,4}) where {T} = reshape(x, size(x, 1) * size(x, 2), size(x, 3) * size(x, 4))
+_blockvec(x::BlockMatrixTensor) = vcat(blocks(x.data)...)
+
+function vec(x::DualArray)
+    val = vec(x.value)
+    jac = flatten(reshape(x.jacobian, length(val), 1, :, :))
+    DualVector(val, jac)
+end
+
+show(io::IO,::MIME"text/plain", x::DualArray) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜"))
+show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜"))
+show(io::IO,::MIME"text/plain", x::Dual) = (print(io,x.value); print(io," + "); print(io,x.partials);print("𝛜"))
\ No newline at end of file
diff --git a/src/DualArrays.jl b/src/DualArrays.jl
index e4074d5..bedc76f 100644
--- a/src/DualArrays.jl
+++ b/src/DualArrays.jl
@@ -1,103 +1,16 @@
 module DualArrays
-export DualVector
 
-import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum,
-             vcat, convert, *
-using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays
-
-struct Dual{T, Partials <: AbstractVector{T}} <: Real
-    value::T
-    partials::Partials
-end
-
-==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials
-
-sparse_getindex(a...) = layout_getindex(a...)
-sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D,2))
-sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D,1))
-
-"""
-reprents a vector of duals given by
-    
-    values + jacobian * [ε_1,…,ε_n].
-
-For now the entries just return the values.
-"""
-
-struct DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}}
-    value::Vector{T}
-    jacobian::M
-    function DualVector(value::Vector{T},jacobian::M) where {T, M <: AbstractMatrix{T}}
-        if(size(jacobian)[1] != length(value))
-            x,y = length(value),size(jacobian)[1]
-            throw(ArgumentError("vector length must match number of rows in jacobian.\n
-            vector length: $x \n
-            no. of jacobian rows: $y"))
-        end
-        new{T,M}(value,jacobian)
-    end
-end
-
-function DualVector(value::AbstractVector, jacobian::AbstractMatrix)
-    T = promote_type(eltype(value), eltype(jacobian))
-    DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian))
-end
-
-
-
-function getindex(x::DualVector, y::Int)
-    Dual(x.value[y], sparse_getindex(x.jacobian,y,:))
-end
-
-function getindex(x::DualVector, y::UnitRange)
-    newval = x.value[y]
-    newjac = sparse_getindex(x.jacobian,y,:)
-    DualVector(newval, newjac)
-end
-size(x::DualVector) = length(x.value)
-axes(x::DualVector) = axes(x.value)
-+(x::DualVector,y::DualVector) = DualVector(x.value + y.value, x.jacobian + y.jacobian)
-*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian)
-
-broadcasted(::typeof(sin),x::DualVector) = DualVector(sin.(x.value),Diagonal(cos.(x.value))*x.jacobian)
-
-function broadcasted(::typeof(*),x::DualVector,y::DualVector)
-    newval = x.value .* y.value
-    newjac = x.value .* y.jacobian + y.value .* x.jacobian
-    DualVector(newval,newjac)
-end
-
-function sum(x::DualVector)
-    n = length(x.value)
-    Dual(sum(x.value), vec(sum(x.jacobian; dims=1)))
-end
-
-_jacobian(d::Dual) = permutedims(d.partials)
-_jacobian(d::DualVector, ::Int) = d.jacobian
-_jacobian(x::Number, N::Int) = zeros(typeof(x), 1, N)
+export DualVector, DualArray, DualMatrix, BlockMatrixTensor
+export dropzeros
 
-_value(d::DualVector) = d.value
-_value(x::Number) = x
-
-function vcat(x::Union{Dual, DualVector}...)
-    if length(x) == 1
-        return x[1]
-    end
-    value = vcat((d.value for d in x)...)
-    jacobian = vcat((_jacobian(d) for d in x)...)
-    DualVector(value,jacobian)
-end
-
-_size(x::Real) = 1
-_size(x::DualVector) = size(x)
+import Base: +, ==, getindex, size, broadcast, axes, broadcasted, show, sum,
+             vcat, convert, *, -, ^, /, ndims, hcat, vec, promote_rule, zero,
+             reshape, setindex!
+using LinearAlgebra, ArrayLayouts, BandedMatrices, FillArrays, ComponentArrays, SparseArrays
+import ChainRules: frule, ZeroTangent
 
-function vcat(x::Union{Real, DualVector}...)
-    cols = max((_size(i) for i in x)...)
-    val = vcat((_value(i) for i in x)...)
-    jac = vcat((_jacobian(i, cols) for i in x)...)
-    DualVector(val, jac)
-end
+include("BlockMatrixTensor.jl")
+include("DualArray.jl")
 
-show(io::IO,::MIME"text/plain", x::DualVector) = (print(io,x.value); print(io," + "); print(io,x.jacobian);print("𝛜"))
 end
-# module DualArrays
+# module DualArrays
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index d1cba3c..2374b14 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,42 +1,141 @@
-using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices
+using DualArrays, Test, LinearAlgebra, BandedMatrices, ForwardDiff,
+BlockArrays
 using DualArrays: Dual
+using Lux: relu
 
 @testset "DualArrays" begin
-    @test_throws ArgumentError DualVector([1,2],I(3))
     
-    v = DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9])
-    @test v[1] isa Dual
-    @test v[1] == Dual(1,[1,2,3])
-    @test v[2] == Dual(2,[4,5,6])
-    @test v[3] == Dual(3,[7,8,9])
-    @test_throws BoundsError v[4]
-    @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9])
-
-    w = v + v
-    @test w == DualVector([2,4,6],[2 4 6;8 10 12;14 16 18])
-    @test w.jacobian == 2v.jacobian
-
-    @test sin.(v) isa DualVector
-    @test sin.(v).value == [sin(1), sin(2), sin(3)]
-    @test sin.(v).jacobian == Diagonal(cos.(v.value)) * v.jacobian
-
-    x,y = v[1:2],v[2:3]
-    @test x == DualVector([1,2],[1 2 3;4 5 6])
-    @test y == DualVector([2,3],[4 5 6;7 8 9])
-    @test x .* y == DualVector([2,6],[6 9 12;26 31 36])
+    v = DualArray([1.,2, 3], [1 2 3; 4 5 6;7 8 9])
+    @testset "constructors" begin
+        @test v isa DualVector
+        @test eltype(v) == DualArrays.Dual{Float64}
+        w = DualArray(ones(2,2),ones(2,2,2,2))
+        @test w isa DualMatrix
+        @test_throws TypeError DualArray([1,2], [1,2])
+        @test_throws ArgumentError DualVector([1,2],I(3))
+    end
+
+    @testset "indexing" begin
+        @test v[1] isa Dual
+        @test v[1] == Dual(1,[1,2,3])
+        @test v[2] == Dual(2,[4,5,6])
+        @test v[3] == Dual(3,[7,8,9])
+        @test_throws BoundsError v[4]
+        @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9])
+
+        x,y = v[1:2],v[2:3]
+        @test x == DualVector([1,2],[1 2 3;4 5 6])
+        @test y == DualVector([2,3],[4 5 6;7 8 9])
+
+        n = 10
+        w = DualVector(1:n, I(n))
+        @test w[2:end].jacobian isa BandedMatrix
+    end
+
+    @testset "basic_operations" begin
+        w = v + v
+        @test w == DualVector([2.,4.,6.],[2. 4 6;8 10 12;14 16 18])
+        @test w.jacobian == 2v.jacobian
+
+        u = v - w
+        @test u == -v
+
+        x = v.value + v - v.value
+        @test x == v
+
+        @test v .^ 2 == DualVector([1, 4, 9], [2 4 6;16 20 24; 42 48 54])
+
+        x,y = v[1:2],v[2:3]
+        @test x .* y == DualVector([2,6],[6 9 12;26 31 36])
+        @test sum(x .* y) isa Dual
+        @test sum(x .* y) == Dual(8,[32,40,48])
+    end
+
+    @testset "broadcasting" begin
+        @test sin.(v) isa DualVector
+        @test sin.(v).value == [sin(1), sin(2), sin(3)]
+        @test sin.(v).jacobian == Diagonal(cos.(v.value)) * v.jacobian
+    
+        @test exp.(v) isa DualVector
+        @test exp.(v).value == [exp(1), exp(2), exp(3)]
+        @test exp.(v).jacobian == Diagonal(exp.(v.value)) * v.jacobian
     
-    @test sum(x .* y) isa Dual
-    @test sum(x .* y) == Dual(8,[32,40,48])
+        @test relu.(v) isa DualVector
+        @test relu.(v).value == [relu(1), relu(2), relu(3)]
+        @test relu.(v).jacobian == Diagonal( 0.5 * (sign.(v.value) .+ 1)) * v.jacobian
+    end
+
+    @testset "misc" begin
+        n = 10
+        v = DualVector(1:n, I(n))
+        @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n)
+    end
+
+    @testset "vcat" begin
+        x = Dual(1, [1, 2, 3])
+        y = DualVector([2, 3], [4 5 6;7 8 9])
+        @test vcat(x) == x
+        @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3])
+        @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9])
+        @test vcat(1, y, 2) == DualVector([1,2,3,2], [0 0 0; 4 5 6; 7 8 9; 0 0 0])
+    end
+
+    @testset "reshape" begin
+        x = DualVector([1,2,3,4], I(4))
+        @test vec(reshape(x, 2, 2)) == x
+    end
 
-    n = 10
-    v = DualVector(1:n, I(n))
-    @test v[2:end].jacobian isa BandedMatrix
+    @testset "DualMatrix" begin
+        x = DualVector(ones(6), I(6))
+        y = reshape(x[1:4], 2, 2)
+        z = x[5:6]
+        @test (y*z).value == [2,2]
+        @test (y*z).jacobian == [1 0 1 0 1 1; 0 1 0 1 1 1]
+        for t in (([1, 0, 0, 1, 0, 0], [1 0 -1 0 1 0;0 0 0 0 0 0]), ([1, 1, 1, 1, 1, 1], [1 0 -1 0 1 0;0 1 0 -1 0 1]), ([0, 1, 1, 0, -1, -1], zeros(2, 6)))
+            x = DualVector(t[1], I(6))
+            y = reshape(x[1:4], 2, 2)
+            z = x[5:6]
+            @test relu.(y*[1,-1] + z).jacobian == t[2]
+        end
+    end
 
-    @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n)
+    @testset "BlockMatrixTensor" begin
+        @testset "constructors" begin
+            x = BlockMatrixTensor(BlockArray(ones(4, 4), [2, 2], [2, 2]))
+            y = BlockMatrixTensor(ones(2, 2, 2, 2))
+            @test x isa BlockMatrixTensor
+            @test y isa BlockMatrixTensor
+            @test x.data == y.data
+        end
+        @testset "broadcasting" begin
+            y = BlockMatrixTensor(ones(2, 2, 2, 2))
+            z = BlockArray(
+                [fill(1,2,2) fill(2,2,2); fill(3,2,2) fill(4,2,2)],
+                [2, 2],
+                [2, 2]
+            )
+            @test ([1 2;3 4] .* y).data == z
+            @test (reshape([3], 1, 1) .* y).data == (3 * y).data
+            @test ([1 2] .* y).data == BlockArray([fill(1, 4, 2) fill(2, 4, 2)], [2, 2], [2, 2])
+            @test ([1 2]' .* y).data == BlockArray([fill(1, 2, 4); fill(2, 2, 4)], [2, 2], [2, 2])
+            @test (y .* [1, 2]).data == BlockArray([fill(1, 2, 4); fill(2, 2, 4)], [2, 2], [2, 2])
+            @test_throws DimensionMismatch [1 2;3 4;5 6] .* y
+        end
+        @testset "sum" begin
+            r = rand(2, 2, 2, 2)
+            b = BlockMatrixTensor(r)
+            #TODO: Fix traversal
+            for d in (1:2, 1, 2)
+                @test sum(r; dims = 1:2)[1, 1, :, :] ≈ sum(b; dims = 1:2)
+            end
+        end
 
-    x = Dual(1, [1, 2, 3])
-    y = DualVector([2, 3], [4 5 6;7 8 9])
-    @test vcat(x) == x
-    @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3])
-    @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9])
+        @testset "reshape" begin
+            r= rand(3, 4, 2, 2)
+            b = BlockMatrixTensor(r)
+            for d in ((2, 6), (1, 12), (6, 2), (12, 1))
+                @test BlockMatrixTensor(reshape(r, d..., 2, 2)).data == reshape(b, d..., :, :).data
+            end
+        end
+    end
 end
\ No newline at end of file