From b7a07b19a83d2df0fc9713d5ea992792dd0b6fce Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 5 Jan 2026 13:45:37 +0100 Subject: [PATCH 1/3] add opnorm_upper_bound function --- src/utils.jl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/utils.jl b/src/utils.jl index ee257198..a900fa3b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -15,6 +15,52 @@ function power_method!(B::M, v₀::S, v₁::S, max_iter::Int = 1) where {M, S} return abs(dot(v₀, v₁)) end +# Compute upper bounds for μ‖B‖₂, where μ ∈ (0, 1]. + +# For matrices, we compute the Frobenius norm. +function opnorm_upper_bound(B::AbstractMatrix) + return norm(B, 2) +end + +# For LBFGS, using the formula Bₖ = B\_{k-1} - aₖaₖᵀ + bₖbₖᵀ, we compute +# ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ ‖bᵢ‖₂² +function opnorm_upper_bound(B::LBFGSOperator{T}) where{T} + data = B.data + approx = data.scaling ? 1/data.scaling_factor : T(1) + approx += norm(data.b, 2)^2 + return approx +end + +# For LSR1, we use the formula Bₖ = B\_{k-1} + σₖaₖaₖᵀ, we compute +# ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ |σᵢ|‖aᵢ‖₂² +function opnorm_upper_bound(B::LSR1Operator{T}) where{T} + data = B.data + approx = data.scaling ? 1/data.scaling_factor : T(1) + @inbounds for i = 1:data.mem + if data.as[i] != 0 + approx += norm(data.a[i])^2/abs(data.as[i]) + end + end + return approx +end + +# For diagonal operators, we compute the exact operator norm +function opnorm_upper_bound(B::AbstractDiagonalQuasiNewtonOperator) + return norm(B.d, Inf) +end + +# In the general case, we either use the power_method or Arpack, +# Note: Arpack allocates and the power method might be unreliable. +function opnorm_upper_bound(B::AbstractLinearOperator; v₀ = nothing, v₁ = nothing, max_iter = -1) + # Fallback to either the power_method or arpack + if max_iter ≥ 1 + @assert !(isnothing(v₀) && isnothing(v₁)) + return power_method!(B, v₀, v₁, max_iter = max_iter) + else + return opnorm(B) + end +end + # use Arpack to obtain largest eigenvalue in magnitude with a minimum of robustness function LinearAlgebra.opnorm(B; kwargs...) m, n = size(B) From 429d03673debe9d3f1f733b8d0ab07de13c56e5f Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 5 Jan 2026 14:14:52 +0100 Subject: [PATCH 2/3] add opnorm_upper_bound in TR and R2N and add !isnan tests for norm methods --- src/R2N.jl | 10 +++++----- src/TR_alg.jl | 4 ++-- src/utils.jl | 45 ++++++++++++++++++++++----------------------- 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/src/R2N.jl b/src/R2N.jl index 14d47075..ea95da7b 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -139,7 +139,7 @@ For advanced usage, first define a solver "R2NSolver" to preallocate the memory - `η2::T = T(0.9)`: very successful iteration threshold; - `γ::T = T(3)`: regularization parameter multiplier, σ := σ/γ when the iteration is very successful and σ := σγ when the iteration is unsuccessful; - `θ::T = 1/(1 + eps(T)^(1 / 5))`: is the model decrease fraction with respect to the decrease of the Cauchy model; -- `opnorm_maxiter::Int = 5`: how many iterations of the power method to use to compute the operator norm of Bₖ. If a negative number is provided, then Arpack is used instead; +- `opnorm_maxiter::Int = 5`: how many iterations of the power method to use to compute the operator norm of Bₖ. If a negative number is provided, an upper bound of the operator norm is computed: see `opnorm_upper_bound`. - `m_monotone::Int = 1`: monotonicity parameter. By default, R2N is monotone but the non-monotone variant will be used if `m_monotone > 1`; - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`. @@ -295,9 +295,9 @@ function SolverCore.solve!( solver.subpb.model.B = hess_op(nlp, xk) if opnorm_maxiter ≤ 0 - λmax, found_λ = opnorm(solver.subpb.model.B) + λmax, found_λ = opnorm_upper_bound(solver.subpb.model.B) else - λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) + λmax, found_λ = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) end found_λ || error("operator norm computation failed") @@ -448,9 +448,9 @@ function SolverCore.solve!( solver.subpb.model.B = hess_op(nlp, xk) if opnorm_maxiter ≤ 0 - λmax, found_λ = opnorm(solver.subpb.model.B) + λmax, found_λ = opnorm_upper_bound(solver.subpb.model.B) else - λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) + λmax, found_λ = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) end found_λ || error("operator norm computation failed") end diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 0b75df70..b7893a0f 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -299,7 +299,7 @@ function SolverCore.solve!( if opnorm_maxiter ≤ 0 λmax, found_λ = opnorm(solver.subpb.model.B) else - λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) + λmax, found_λ = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) end found_λ || error("operator norm computation failed") @@ -460,7 +460,7 @@ function SolverCore.solve!( if opnorm_maxiter ≤ 0 λmax, found_λ = opnorm(solver.subpb.model.B) else - λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) + λmax, found_λ = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) end found_λ || error("operator norm computation failed") diff --git a/src/utils.jl b/src/utils.jl index a900fa3b..c7a43526 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -12,53 +12,52 @@ function power_method!(B::M, v₀::S, v₁::S, max_iter::Int = 1) where {M, S} normalize!(v₁) end mul!(v₁, B, v₀) - return abs(dot(v₀, v₁)) + approx = abs(dot(v₀, v₁)) + return approx, !isnan(approx) end # Compute upper bounds for μ‖B‖₂, where μ ∈ (0, 1]. # For matrices, we compute the Frobenius norm. -function opnorm_upper_bound(B::AbstractMatrix) - return norm(B, 2) +function opnorm_upper_bound(B::AbstractMatrix) + bound = norm(B, 2) + return bound, !isnan(bound) end # For LBFGS, using the formula Bₖ = B\_{k-1} - aₖaₖᵀ + bₖbₖᵀ, we compute -# ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ ‖bᵢ‖₂² +# ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ ‖bᵢ‖₂². function opnorm_upper_bound(B::LBFGSOperator{T}) where{T} data = B.data - approx = data.scaling ? 1/data.scaling_factor : T(1) - approx += norm(data.b, 2)^2 - return approx + bound = data.scaling ? 1/data.scaling_factor : T(1) + @inbounds for i = 1:data.mem + bound += norm(data.b[i], 2)^2 + end + return bound, !isnan(bound) end # For LSR1, we use the formula Bₖ = B\_{k-1} + σₖaₖaₖᵀ, we compute -# ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ |σᵢ|‖aᵢ‖₂² +# ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ |σᵢ|‖aᵢ‖₂². function opnorm_upper_bound(B::LSR1Operator{T}) where{T} data = B.data - approx = data.scaling ? 1/data.scaling_factor : T(1) + bound = data.scaling ? 1/data.scaling_factor : T(1) @inbounds for i = 1:data.mem if data.as[i] != 0 - approx += norm(data.a[i])^2/abs(data.as[i]) + bound += norm(data.a[i])^2/abs(data.as[i]) end end - return approx + return bound, !isnan(bound) end -# For diagonal operators, we compute the exact operator norm +# For diagonal operators, we compute the exact operator norm. function opnorm_upper_bound(B::AbstractDiagonalQuasiNewtonOperator) - return norm(B.d, Inf) + bound = norm(B.d, Inf) + return bound, !isnan(bound) end -# In the general case, we either use the power_method or Arpack, -# Note: Arpack allocates and the power method might be unreliable. -function opnorm_upper_bound(B::AbstractLinearOperator; v₀ = nothing, v₁ = nothing, max_iter = -1) - # Fallback to either the power_method or arpack - if max_iter ≥ 1 - @assert !(isnothing(v₀) && isnothing(v₁)) - return power_method!(B, v₀, v₁, max_iter = max_iter) - else - return opnorm(B) - end +# In the general case, we use Arpack. +# Note: Arpack allocates. +function opnorm_upper_bound(B::AbstractLinearOperator) + return opnorm(B) end # use Arpack to obtain largest eigenvalue in magnitude with a minimum of robustness From 4732ee7140234ddef263d4d002ec9dd004e8825c Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 5 Jan 2026 15:05:33 +0100 Subject: [PATCH 3/3] add tests --- src/utils.jl | 4 +-- test/runtests.jl | 2 ++ test/test-utils.jl | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 test/test-utils.jl diff --git a/src/utils.jl b/src/utils.jl index c7a43526..51a24d96 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -28,7 +28,7 @@ end # ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ ‖bᵢ‖₂². function opnorm_upper_bound(B::LBFGSOperator{T}) where{T} data = B.data - bound = data.scaling ? 1/data.scaling_factor : T(1) + bound = data.scaling && !iszero(data.scaling_factor) ? 1/abs(data.scaling_factor) : T(1) @inbounds for i = 1:data.mem bound += norm(data.b[i], 2)^2 end @@ -39,7 +39,7 @@ end # ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ |σᵢ|‖aᵢ‖₂². function opnorm_upper_bound(B::LSR1Operator{T}) where{T} data = B.data - bound = data.scaling ? 1/data.scaling_factor : T(1) + bound = data.scaling && !iszero(data.scaling_factor) ? 1/abs(data.scaling_factor) : T(1) @inbounds for i = 1:data.mem if data.as[i] != 0 bound += norm(data.a[i])^2/abs(data.as[i]) diff --git a/test/runtests.jl b/test/runtests.jl index 071616c0..81fdcded 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using LinearAlgebra: length using LinearAlgebra, Random, Test using ProximalOperators using ADNLPModels, + LinearOperators, OptimizationProblems, OptimizationProblems.ADNLPProblems, NLPModels, @@ -19,6 +20,7 @@ const global bpdn2, bpdn_nls2, sol2 = bpdn_model(compound, bounds = true) const global λ = norm(grad(bpdn, zeros(bpdn.meta.nvar)), Inf) / 10 include("test_AL.jl") +include("test-utils.jl") for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1"), (IndBallL0(10 * compound), "B0")) diff --git a/test/test-utils.jl b/test/test-utils.jl new file mode 100644 index 00000000..220e1345 --- /dev/null +++ b/test/test-utils.jl @@ -0,0 +1,71 @@ +function test_opnorm_upper_bound(B, m, n) + T = eltype(B) + is_upper_bound = true + for _ = 1:m + upper_bound, found = RegularizedOptimization.opnorm_upper_bound(B) + if opnorm(Matrix(B)) > upper_bound || !found + is_upper_bound = false + break + end + + push!(B, randn(T, n), randn(T, n)) + end + + nallocs = @allocated RegularizedOptimization.opnorm_upper_bound(B) + @test nallocs == 0 + @test is_upper_bound == true +end + +# Test norm functions + +@testset "Test opnorm upper bound functions" begin + n = 10 + m = 40 + @testset "LBFGS" begin + B = LBFGSOperator(Float64, n, scaling = false) + test_opnorm_upper_bound(B, m, n) + + B = LBFGSOperator(Float64, n, scaling = true) + test_opnorm_upper_bound(B, m, n) + + B = LBFGSOperator(Float32, n, scaling = false) + test_opnorm_upper_bound(B, m, n) + + B = LBFGSOperator(Float32, n, scaling = true) + test_opnorm_upper_bound(B, m, n) + end + + @testset "LSR1" begin + B = LSR1Operator(Float64, n, scaling = false) + test_opnorm_upper_bound(B, m, n) + + B = LSR1Operator(Float64, n, scaling = true) + test_opnorm_upper_bound(B, m, n) + + B = LSR1Operator(Float32, n, scaling = false) + test_opnorm_upper_bound(B, m, n) + + B = LSR1Operator(Float32, n, scaling = true) + test_opnorm_upper_bound(B, m, n) + end + + @testset "Diagonal" begin + B = SpectralGradient(randn(Float64), n) + test_opnorm_upper_bound(B, m, n) + + B = SpectralGradient(randn(Float32), n) + test_opnorm_upper_bound(B, m, n) + + B = DiagonalPSB(randn(Float64, n)) + test_opnorm_upper_bound(B, m, n) + + B = DiagonalPSB(randn(Float32, n)) + test_opnorm_upper_bound(B, m, n) + + B = DiagonalAndrei(randn(Float64, n)) + test_opnorm_upper_bound(B, m, n) + + B = DiagonalAndrei(randn(Float32, n)) + test_opnorm_upper_bound(B, m, n) + end +end \ No newline at end of file