From 8aa5e0a7d7a8b0efd3513f90eb4eea42b0ac96fe Mon Sep 17 00:00:00 2001 From: kagalenko-m-b <16374215+kagalenko-m-b@users.noreply.github.com> Date: Sat, 12 Sep 2020 19:34:57 +0300 Subject: [PATCH 1/4] Avoid normal equations by using QR decomposition --- src/levenberg_marquardt.jl | 96 +++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl index 1861fbf..32e6bf3 100644 --- a/src/levenberg_marquardt.jl +++ b/src/levenberg_marquardt.jl @@ -94,13 +94,13 @@ function levenberg_marquardt( lower::AbstractVector{T}=Array{T}(undef, 0), upper::AbstractVector{T}=Array{T}(undef, 0), avv!::Union{Function,Nothing,Avv}=nothing, -) where {T} +) where {T<:Real} # First evaluation value_jacobian!!(df, initial_x) - + if isfinite(tau) - lambda = tau * maximum(jacobian(df)' * jacobian(df)) + lambda = tau*maximum(real, jacobian(df)'*jacobian(df)) end @@ -152,8 +152,8 @@ function levenberg_marquardt( # and an alias for the jacobian J = jacobian(df) - dir_deriv = Array{T}(undef, m) - v = Array{T}(undef, n) + dir_deriv = Array{T}(undef,m) + v = Array{T}(undef,n) # Maintain a trace of the system. tr = LMTrace{LevenbergMarquardt}() @@ -162,8 +162,8 @@ function levenberg_marquardt( os = LMState{LevenbergMarquardt}(iterCt, sum(abs2, value(df)), NaN, d) push!(tr, os) if show_trace - println(os) - end + println(os) + end end startTime = time() @@ -181,35 +181,45 @@ function levenberg_marquardt( # It is additionally useful to bound the elements of DtD below to help # prevent "parameter evaporation". - DtD = vec(sum(abs2, J, dims=1)) - for i = 1:length(DtD) - if DtD[i] <= MIN_DIAGONAL - DtD[i] = MIN_DIAGONAL - end - end - - # delta_x = ( J'*J + lambda * Diagonal(DtD) ) \ ( -J'*value(df) ) - mul!(JJ, transpose(J), J) - @simd for i = 1:n - @inbounds JJ[i, i] += lambda * DtD[i] + # DtD = vec(sum(abs2, J, dims=1)) + # for i in 1:length(DtD) + # if DtD[i] <= MIN_DIAGONAL + # DtD[i] = MIN_DIAGONAL + # end + # end + + # # delta_x = ( J'*J + lambda * Diagonal(DtD) ) \ ( -J'*value(df) ) + # mul!(JJ, J', J) + # @simd for i in 1:n + # @inbounds JJ[i, i] += lambda * DtD[i] + # end + # #n_buffer is delta C, JJ is g compared to Mark's code + # mul!(n_buffer, J', value(df)) + # rmul!(n_buffer, -1) + + # v .= JJ \ n_buffer + Q,R,p = qr(J, Val(true)) + rhs = -Matrix(Q)'*value(df) + if isreal(R) + RR = vcat(R, lambda*I) + rhs = vcat(rhs, zeros(T, n)) + else + RR = vcat(real.(R), imag.(R), lambda*I) # nonlinear parameters are real + rhs = vcat(real.(rhs), imag.(rhs), zeros(T, n)) end - # n_buffer is delta C, JJ is g compared to Mark's code - mul!(n_buffer, transpose(J), value(df)) - rmul!(n_buffer, -1) - - v .= JJ \ n_buffer - + v[p] = (RR\rhs) - if avv! != nothing - # GEODESIC ACCELERATION PART + if avv! != nothing && isreal(J) # Geodesic acceleration for complex Jacobian + # is not implemented yet + #GEODESIC ACCELERATION PART avv!(dir_deriv, x, v) mul!(a, transpose(J), dir_deriv) - rmul!(a, -1) # we multiply by -1 before the decomposition/division - LAPACK.potrf!('U', JJ) # in place cholesky decomposition - LAPACK.potrs!('U', JJ, a) # divides a by JJ, taking into account the fact that JJ is now the `U` cholesky decoposition of what it was before + rmul!(a, -1) #we multiply by -1 before the decomposition/division + LAPACK.potrf!('U', JJ) #in place cholesky decomposition + LAPACK.potrs!('U', JJ, a) #divides a by JJ, taking into account the fact that JJ is now the `U` cholesky decoposition of what it was before rmul!(a, 0.5) delta_x .= v .+ a - # end of the GEODESIC ACCELERATION PART + #end of the GEODESIC ACCELERATION PART else delta_x = v end @@ -220,13 +230,13 @@ function levenberg_marquardt( # apply box constraints if !isempty(lower) - @simd for i = 1:n - @inbounds delta_x[i] = max(x[i] + delta_x[i], lower[i]) - x[i] + @simd for i in 1:n + @inbounds delta_x[i] = max(x[i] + delta_x[i], lower[i]) - x[i] end end if !isempty(upper) - @simd for i = 1:n - @inbounds delta_x[i] = min(x[i] + delta_x[i], upper[i]) - x[i] + @simd for i in 1:n + @inbounds delta_x[i] = min(x[i] + delta_x[i], upper[i]) - x[i] end end @@ -259,11 +269,11 @@ function levenberg_marquardt( residual = trial_residual if rho > good_step_quality # increase trust region radius - lambda = max(lambda_decrease * lambda, MIN_LAMBDA) + lambda = max(lambda_decrease*lambda, MIN_LAMBDA) end else # decrease trust region radius - lambda = min(lambda_increase * lambda, MAX_LAMBDA) + lambda = min(lambda_increase*lambda, MAX_LAMBDA) end iterCt += 1 @@ -275,8 +285,8 @@ function levenberg_marquardt( os = LMState{LevenbergMarquardt}(iterCt, sum(abs2, value(df)), g_norm, d) push!(tr, os) if show_trace - println(os) - end + println(os) + end end # check convergence criteria: @@ -285,24 +295,24 @@ function levenberg_marquardt( if norm(J' * value(df), Inf) < g_tol g_converged = true end - if norm(delta_x) < x_tol * (x_tol + norm(x)) + if norm(delta_x) < x_tol*(x_tol + norm(x)) x_converged = true end converged = g_converged | x_converged end LMResults( - LevenbergMarquardt(), # method + LevenbergMarquardt(), # method initial_x, # initial_x x, # minimizer - sum(abs2, value(df)), # minimum + sum(abs2, value(df)), # minimum iterCt, # iterations !converged, # iteration_converged x_converged, # x_converged g_converged, # g_converged Tval(g_tol), # g_tol tr, # trace - first(df.f_calls), # f_calls - first(df.df_calls), # g_calls + first(df.f_calls), # f_calls + first(df.df_calls), # g_calls ) end From 12d7af3cc0c11639df1f614b996e9035a67f308b Mon Sep 17 00:00:00 2001 From: kagalenko-m-b Date: Sat, 12 Sep 2020 22:34:04 +0300 Subject: [PATCH 2/4] Fix geodesic acceleration to work with QR decomposition --- src/levenberg_marquardt.jl | 40 +++++--------------------------------- 1 file changed, 5 insertions(+), 35 deletions(-) diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl index 32e6bf3..7f25db1 100644 --- a/src/levenberg_marquardt.jl +++ b/src/levenberg_marquardt.jl @@ -98,7 +98,7 @@ function levenberg_marquardt( # First evaluation value_jacobian!!(df, initial_x) - + if isfinite(tau) lambda = tau*maximum(real, jacobian(df)'*jacobian(df)) end @@ -172,32 +172,8 @@ function levenberg_marquardt( # jacobian! will check if x is new or not, so it is only actually # evaluated if x was updated last iteration. jacobian!(df, x) # has alias J - - # we want to solve: + # Use QR with column pivoting to solve the regularized least squares problem # argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2 - # Solving for the minimum gives: - # (J'*J + lambda*diagm(DtD)) * delta_x == -J' * f(x), where DtD = sum(abs2, J,1) - # Where we have used the equivalence: diagm(J'*J) = diagm(sum(abs2, J,1)) - # It is additionally useful to bound the elements of DtD below to help - # prevent "parameter evaporation". - - # DtD = vec(sum(abs2, J, dims=1)) - # for i in 1:length(DtD) - # if DtD[i] <= MIN_DIAGONAL - # DtD[i] = MIN_DIAGONAL - # end - # end - - # # delta_x = ( J'*J + lambda * Diagonal(DtD) ) \ ( -J'*value(df) ) - # mul!(JJ, J', J) - # @simd for i in 1:n - # @inbounds JJ[i, i] += lambda * DtD[i] - # end - # #n_buffer is delta C, JJ is g compared to Mark's code - # mul!(n_buffer, J', value(df)) - # rmul!(n_buffer, -1) - - # v .= JJ \ n_buffer Q,R,p = qr(J, Val(true)) rhs = -Matrix(Q)'*value(df) if isreal(R) @@ -210,13 +186,11 @@ function levenberg_marquardt( v[p] = (RR\rhs) if avv! != nothing && isreal(J) # Geodesic acceleration for complex Jacobian - # is not implemented yet #GEODESIC ACCELERATION PART avv!(dir_deriv, x, v) - mul!(a, transpose(J), dir_deriv) - rmul!(a, -1) #we multiply by -1 before the decomposition/division - LAPACK.potrf!('U', JJ) #in place cholesky decomposition - LAPACK.potrs!('U', JJ, a) #divides a by JJ, taking into account the fact that JJ is now the `U` cholesky decoposition of what it was before + mul!(a, J', dir_deriv) + rmul!(a, -1) + LAPACK.potrs!('U', R, a) rmul!(a, 0.5) delta_x .= v .+ a #end of the GEODESIC ACCELERATION PART @@ -224,10 +198,6 @@ function levenberg_marquardt( delta_x = v end - - - - # apply box constraints if !isempty(lower) @simd for i in 1:n From 5256e212e2b1203dabb0605e90bf4a1f5e05385e Mon Sep 17 00:00:00 2001 From: kagalenko-m-b Date: Sun, 13 Sep 2020 16:49:49 +0300 Subject: [PATCH 3/4] Correct the linear solution in geodesic acceleration --- src/levenberg_marquardt.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl index 7f25db1..1ff4447 100644 --- a/src/levenberg_marquardt.jl +++ b/src/levenberg_marquardt.jl @@ -190,7 +190,8 @@ function levenberg_marquardt( avv!(dir_deriv, x, v) mul!(a, J', dir_deriv) rmul!(a, -1) - LAPACK.potrs!('U', R, a) + a = (R\(R'\a[p])) + a[p] = a rmul!(a, 0.5) delta_x .= v .+ a #end of the GEODESIC ACCELERATION PART From 609ac95cb3784e4d52006484846be75eb043db55 Mon Sep 17 00:00:00 2001 From: kagalenko-m-b <16374215+kagalenko-m-b@users.noreply.github.com> Date: Wed, 28 Oct 2020 16:50:22 +0300 Subject: [PATCH 4/4] Implement stylistic suggestions by antoine-levitt --- .gitignore | 1 + src/levenberg_marquardt.jl | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 90debbd..a474617 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +Manifest.toml benchmarks/graphs/* *~ *.kate-swp diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl index 1ff4447..492b197 100644 --- a/src/levenberg_marquardt.jl +++ b/src/levenberg_marquardt.jl @@ -174,9 +174,9 @@ function levenberg_marquardt( jacobian!(df, x) # has alias J # Use QR with column pivoting to solve the regularized least squares problem # argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2 - Q,R,p = qr(J, Val(true)) + Q,R,p = qr(J, ColumnNorm()) rhs = -Matrix(Q)'*value(df) - if isreal(R) + if eltype(J) <: Real RR = vcat(R, lambda*I) rhs = vcat(rhs, zeros(T, n)) else @@ -185,7 +185,7 @@ function levenberg_marquardt( end v[p] = (RR\rhs) - if avv! != nothing && isreal(J) # Geodesic acceleration for complex Jacobian + if avv! != nothing && isreal(J) # Geodesic acceleration for complex Jacobian needs to be verified for correctness. It might work as is. #GEODESIC ACCELERATION PART avv!(dir_deriv, x, v) mul!(a, J', dir_deriv)