diff --git a/docs/Manifest.toml b/docs/Manifest.toml
index 036320b5ea..31bf764c5b 100644
--- a/docs/Manifest.toml
+++ b/docs/Manifest.toml
@@ -1,8 +1,8 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.12.1"
+julia_version = "1.12.2"
 manifest_format = "2.0"
-project_hash = "d27b93eebadb7b949d59f30f43496bc5d02daae3"
+project_hash = "c5207eab8c38920eccf3e9776e5686ce1b2e94e9"
 
 [[deps.ADTypes]]
 git-tree-sha1 = "27cecae79e5cc9935255f90c53bb831cc3c870d7"
@@ -437,9 +437,9 @@ version = "1.15.1"
 
 [[deps.DifferentiationInterface]]
 deps = ["ADTypes", "LinearAlgebra"]
-git-tree-sha1 = "6d5153dc500d644d4d672723aa27a614ee84ab3b"
+git-tree-sha1 = "80bd15222b3e8d0bc70d921d2201aa0084810ce5"
 uuid = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
-version = "0.7.11"
+version = "0.7.12"
 
     [deps.DifferentiationInterface.extensions]
     DifferentiationInterfaceChainRulesCoreExt = "ChainRulesCore"
@@ -521,7 +521,7 @@ version = "1.4.1"
 [[deps.Downloads]]
 deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
 uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-version = "1.6.0"
+version = "1.7.0"
 
 [[deps.EnumX]]
 git-tree-sha1 = "bddad79635af6aec424f53ed8aad5d7555dc6f00"
@@ -1066,7 +1066,7 @@ version = "0.6.4"
 [[deps.LibCURL_jll]]
 deps = ["Artifacts", "LibSSH2_jll", "Libdl", "OpenSSL_jll", "Zlib_jll", "nghttp2_jll"]
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-version = "8.11.1+1"
+version = "8.15.0+0"
 
 [[deps.LibGit2]]
 deps = ["LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
@@ -1440,7 +1440,7 @@ version = "1.6.0"
 [[deps.OpenSSL_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
-version = "3.5.1+0"
+version = "3.5.4+0"
 
 [[deps.OpenSpecFun_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl"]
@@ -2419,9 +2419,9 @@ uuid = "1317d2d5-d96f-522e-a858-c73665f53c3e"
 version = "2022.0.0+1"
 
 [[deps.p7zip_jll]]
-deps = ["Artifacts", "Libdl"]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
 uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
-version = "17.5.0+2"
+version = "17.7.0+0"
 
 [[deps.x264_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl"]
diff --git a/docs/Project.toml b/docs/Project.toml
index f9ab78f0ac..ea7a68caf3 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,6 +3,7 @@ Bibliography = "f1be7e48-bf82-45af-a471-ae754a193061"
 BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 Changelog = "5217a498-cd5d-4ec6-b8c2-9b85a09b6e3e"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
 Ferrite = "c061ca5d-56c9-439f-9c0e-210fe06d3992"
@@ -27,3 +28,6 @@ Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
 WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
+
+[sources]
+Ferrite = {path = ".."}
diff --git a/docs/src/literate-gallery/landau.jl b/docs/src/literate-gallery/landau.jl
index 94fc6bd485..f3c00c1447 100644
--- a/docs/src/literate-gallery/landau.jl
+++ b/docs/src/literate-gallery/landau.jl
@@ -9,8 +9,9 @@
 # Optimized
 
 # In this example a basic Ginzburg-Landau model is solved.
-# This example gives an idea of how the API together with ForwardDiff can be leveraged to
-# performantly solve non standard problems on a FEM grid.
+# This example gives an idea of how the API together with DifferentiationInterface.jl
+# (using ForwardDiff as backend) can be leveraged to performantly solve non standard
+# problems on a FEM grid.
 # A large portion of the code is there only for performance reasons,
 # but since this usually really matters and is what takes the most time to optimize,
 # it is included.
@@ -21,7 +22,8 @@
 # This means that they are performed for each cell separately instead of for the
 # grid as a whole.
 
-using ForwardDiff: ForwardDiff, GradientConfig, HessianConfig, Chunk
+using DifferentiationInterface
+using ForwardDiff: ForwardDiff
 using Ferrite
 using Optim, LineSearches
 using SparseArrays
@@ -48,7 +50,7 @@ end
 
 # ### ThreadCache
 # This holds the values that each thread will use during the assembly.
-struct ThreadCache{CV, T, DIM, F <: Function, GC <: GradientConfig, HC <: HessianConfig}
+struct ThreadCache{CV, T, DIM, F <: Function, GP, HP, GB, HB}
     cvP::CV
     element_indices::Vector{Int}
     element_dofs::Vector{T}
@@ -56,19 +58,21 @@ struct ThreadCache{CV, T, DIM, F <: Function, GC <: GradientConfig, HC <: Hessia
     element_hessian::Matrix{T}
     element_coords::Vector{Vec{DIM, T}}
     element_potential::F
-    gradconf::GC
-    hessconf::HC
+    grad_prep::GP
+    hess_prep::HP
+    grad_backend::GB
+    hess_backend::HB
 end
-function ThreadCache(dpc::Int, nodespercell, cvP::CellValues, modelparams, elpotential)
+function ThreadCache(dpc::Int, nodespercell, cvP::CellValues, modelparams, elpotential, grad_backend, hess_backend)
     element_indices = zeros(Int, dpc)
     element_dofs = zeros(dpc)
     element_gradient = zeros(dpc)
     element_hessian = zeros(dpc, dpc)
     element_coords = zeros(Vec{3, Float64}, nodespercell)
     potfunc = x -> elpotential(x, cvP, modelparams)
-    gradconf = GradientConfig(potfunc, zeros(dpc), Chunk{12}())
-    hessconf = HessianConfig(potfunc, zeros(dpc), Chunk{4}())
-    return ThreadCache(cvP, element_indices, element_dofs, element_gradient, element_hessian, element_coords, potfunc, gradconf, hessconf)
+    grad_prep = prepare_gradient(potfunc, grad_backend, zeros(dpc))
+    hess_prep = prepare_hessian(potfunc, hess_backend, zeros(dpc))
+    return ThreadCache(cvP, element_indices, element_dofs, element_gradient, element_hessian, element_coords, potfunc, grad_prep, hess_prep, grad_backend, hess_backend)
 end
 
 # ## The Model
@@ -81,7 +85,7 @@ mutable struct LandauModel{T, DH <: DofHandler, CH <: ConstraintHandler, TC <: T
     threadcaches::Vector{TC}
 end
 
-function LandauModel(α, G, gridsize, left::Vec{DIM, T}, right::Vec{DIM, T}, elpotential) where {DIM, T}
+function LandauModel(α, G, gridsize, left::Vec{DIM, T}, right::Vec{DIM, T}, elpotential, grad_backend, hess_backend) where {DIM, T}
     grid = generate_grid(Tetrahedron, gridsize, left, right)
     threadindices = Ferrite.create_coloring(grid)
 
@@ -106,7 +110,7 @@ function LandauModel(α, G, gridsize, left::Vec{DIM, T}, right::Vec{DIM, T}, elp
 
     dpc = ndofs_per_cell(dofhandler)
     cpc = length(grid.cells[1].nodes)
-    caches = [ThreadCache(dpc, cpc, copy(cvP), ModelParams(α, G), elpotential) for t in 1:Threads.maxthreadid()]
+    caches = [ThreadCache(dpc, cpc, copy(cvP), ModelParams(α, G), elpotential, grad_backend, hess_backend) for t in 1:Threads.maxthreadid()]
     return LandauModel(dofvector, dofhandler, boundaryconds, threadindices, caches)
 end
 
@@ -159,7 +163,7 @@ end
 function ∇F!(∇f::Vector{T}, dofvector::Vector{T}, model::LandauModel{T}) where {T}
     fill!(∇f, zero(T))
     @assemble! begin
-        ForwardDiff.gradient!(cache.element_gradient, cache.element_potential, eldofs, cache.gradconf)
+        gradient!(cache.element_potential, cache.element_gradient, cache.grad_prep, cache.grad_backend, eldofs)
         @inbounds assemble!(∇f, cache.element_indices, cache.element_gradient)
     end
     return
@@ -169,7 +173,7 @@ end
 function ∇²F!(∇²f::SparseMatrixCSC, dofvector::Vector{T}, model::LandauModel{T}) where {T}
     assemblers = [start_assemble(∇²f) for t in 1:Threads.maxthreadid()]
     @assemble! begin
-        ForwardDiff.hessian!(cache.element_hessian, cache.element_potential, eldofs, cache.hessconf)
+        hessian!(cache.element_potential, cache.element_hessian, cache.hess_prep, cache.hess_backend, eldofs)
         @inbounds assemble!(assemblers[Threads.threadid()], cache.element_indices, cache.element_hessian)
     end
     return
@@ -182,8 +186,7 @@ function calcall(∇²f::SparseMatrixCSC, ∇f::Vector{T}, dofvector::Vector{T},
     assemblers = [start_assemble(∇²f, ∇f) for t in 1:Threads.maxthreadid()]
     @assemble! begin
         outs[Threads.threadid()] += cache.element_potential(eldofs)
-        ForwardDiff.hessian!(cache.element_hessian, cache.element_potential, eldofs, cache.hessconf)
-        ForwardDiff.gradient!(cache.element_gradient, cache.element_potential, eldofs, cache.gradconf)
+        value_gradient_and_hessian!(cache.element_potential, cache.element_gradient, cache.element_hessian, cache.hess_prep, cache.hess_backend, eldofs)
         @inbounds assemble!(assemblers[Threads.threadid()], cache.element_indices, cache.element_gradient, cache.element_hessian)
     end
     return sum(outs)
@@ -222,7 +225,7 @@ end
 
 # ## Testing it
 # This calculates the contribution of each element to the total energy,
-# it is also the function that will be put through ForwardDiff for the gradient and Hessian.
+# it is also the function that will be differentiated for the gradient and Hessian.
 function element_potential(eldofs::AbstractVector{T}, cvP, params) where {T}
     energy = zero(T)
     for qp in 1:getnquadpoints(cvP)
@@ -255,7 +258,9 @@ G = V2T(1.0e2, 0.0, 1.0e2)
 α = Vec{3}((-1.0, 1.0, 1.0))
 left = Vec{3}((-75.0, -25.0, -2.0))
 right = Vec{3}((75.0, 25.0, 2.0))
-model = LandauModel(α, G, (50, 50, 2), left, right, element_potential)
+grad_backend = AutoForwardDiff(; chunksize = 12)
+hess_backend = AutoForwardDiff(; chunksize = 4)
+model = LandauModel(α, G, (50, 50, 2), left, right, element_potential, grad_backend, hess_backend)
 
 save_landau("landauorig", model)
 @time minimize!(model)