diff --git a/src/R2N.jl b/src/R2N.jl index 14d47075..ea95da7b 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -139,7 +139,7 @@ For advanced usage, first define a solver "R2NSolver" to preallocate the memory - `η2::T = T(0.9)`: very successful iteration threshold; - `γ::T = T(3)`: regularization parameter multiplier, σ := σ/γ when the iteration is very successful and σ := σγ when the iteration is unsuccessful; - `θ::T = 1/(1 + eps(T)^(1 / 5))`: is the model decrease fraction with respect to the decrease of the Cauchy model; -- `opnorm_maxiter::Int = 5`: how many iterations of the power method to use to compute the operator norm of Bₖ. If a negative number is provided, then Arpack is used instead; +- `opnorm_maxiter::Int = 5`: how many iterations of the power method to use to compute the operator norm of Bₖ. If a negative number is provided, an upper bound of the operator norm is computed: see `opnorm_upper_bound`. - `m_monotone::Int = 1`: monotonicity parameter. By default, R2N is monotone but the non-monotone variant will be used if `m_monotone > 1`; - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`. @@ -295,9 +295,9 @@ function SolverCore.solve!( solver.subpb.model.B = hess_op(nlp, xk) if opnorm_maxiter ≤ 0 - λmax, found_λ = opnorm(solver.subpb.model.B) + λmax, found_λ = opnorm_upper_bound(solver.subpb.model.B) else - λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) + λmax, found_λ = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) end found_λ || error("operator norm computation failed") @@ -448,9 +448,9 @@ function SolverCore.solve!( solver.subpb.model.B = hess_op(nlp, xk) if opnorm_maxiter ≤ 0 - λmax, found_λ = opnorm(solver.subpb.model.B) + λmax, found_λ = opnorm_upper_bound(solver.subpb.model.B) else - λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) + λmax, found_λ = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) end found_λ || error("operator norm computation failed") end diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 0b75df70..b7893a0f 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -299,7 +299,7 @@ function SolverCore.solve!( if opnorm_maxiter ≤ 0 λmax, found_λ = opnorm(solver.subpb.model.B) else - λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) + λmax, found_λ = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) end found_λ || error("operator norm computation failed") @@ -460,7 +460,7 @@ function SolverCore.solve!( if opnorm_maxiter ≤ 0 λmax, found_λ = opnorm(solver.subpb.model.B) else - λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) + λmax, found_λ = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter) end found_λ || error("operator norm computation failed") diff --git a/src/utils.jl b/src/utils.jl index ee257198..51a24d96 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -12,7 +12,52 @@ function power_method!(B::M, v₀::S, v₁::S, max_iter::Int = 1) where {M, S} normalize!(v₁) end mul!(v₁, B, v₀) - return abs(dot(v₀, v₁)) + approx = abs(dot(v₀, v₁)) + return approx, !isnan(approx) +end + +# Compute upper bounds for μ‖B‖₂, where μ ∈ (0, 1]. + +# For matrices, we compute the Frobenius norm. +function opnorm_upper_bound(B::AbstractMatrix) + bound = norm(B, 2) + return bound, !isnan(bound) +end + +# For LBFGS, using the formula Bₖ = B\_{k-1} - aₖaₖᵀ + bₖbₖᵀ, we compute +# ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ ‖bᵢ‖₂². +function opnorm_upper_bound(B::LBFGSOperator{T}) where{T} + data = B.data + bound = data.scaling && !iszero(data.scaling_factor) ? 1/abs(data.scaling_factor) : T(1) + @inbounds for i = 1:data.mem + bound += norm(data.b[i], 2)^2 + end + return bound, !isnan(bound) +end + +# For LSR1, we use the formula Bₖ = B\_{k-1} + σₖaₖaₖᵀ, we compute +# ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ |σᵢ|‖aᵢ‖₂². +function opnorm_upper_bound(B::LSR1Operator{T}) where{T} + data = B.data + bound = data.scaling && !iszero(data.scaling_factor) ? 1/abs(data.scaling_factor) : T(1) + @inbounds for i = 1:data.mem + if data.as[i] != 0 + bound += norm(data.a[i])^2/abs(data.as[i]) + end + end + return bound, !isnan(bound) +end + +# For diagonal operators, we compute the exact operator norm. +function opnorm_upper_bound(B::AbstractDiagonalQuasiNewtonOperator) + bound = norm(B.d, Inf) + return bound, !isnan(bound) +end + +# In the general case, we use Arpack. +# Note: Arpack allocates. +function opnorm_upper_bound(B::AbstractLinearOperator) + return opnorm(B) end # use Arpack to obtain largest eigenvalue in magnitude with a minimum of robustness diff --git a/test/runtests.jl b/test/runtests.jl index 071616c0..81fdcded 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using LinearAlgebra: length using LinearAlgebra, Random, Test using ProximalOperators using ADNLPModels, + LinearOperators, OptimizationProblems, OptimizationProblems.ADNLPProblems, NLPModels, @@ -19,6 +20,7 @@ const global bpdn2, bpdn_nls2, sol2 = bpdn_model(compound, bounds = true) const global λ = norm(grad(bpdn, zeros(bpdn.meta.nvar)), Inf) / 10 include("test_AL.jl") +include("test-utils.jl") for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1"), (IndBallL0(10 * compound), "B0")) diff --git a/test/test-utils.jl b/test/test-utils.jl new file mode 100644 index 00000000..220e1345 --- /dev/null +++ b/test/test-utils.jl @@ -0,0 +1,71 @@ +function test_opnorm_upper_bound(B, m, n) + T = eltype(B) + is_upper_bound = true + for _ = 1:m + upper_bound, found = RegularizedOptimization.opnorm_upper_bound(B) + if opnorm(Matrix(B)) > upper_bound || !found + is_upper_bound = false + break + end + + push!(B, randn(T, n), randn(T, n)) + end + + nallocs = @allocated RegularizedOptimization.opnorm_upper_bound(B) + @test nallocs == 0 + @test is_upper_bound == true +end + +# Test norm functions + +@testset "Test opnorm upper bound functions" begin + n = 10 + m = 40 + @testset "LBFGS" begin + B = LBFGSOperator(Float64, n, scaling = false) + test_opnorm_upper_bound(B, m, n) + + B = LBFGSOperator(Float64, n, scaling = true) + test_opnorm_upper_bound(B, m, n) + + B = LBFGSOperator(Float32, n, scaling = false) + test_opnorm_upper_bound(B, m, n) + + B = LBFGSOperator(Float32, n, scaling = true) + test_opnorm_upper_bound(B, m, n) + end + + @testset "LSR1" begin + B = LSR1Operator(Float64, n, scaling = false) + test_opnorm_upper_bound(B, m, n) + + B = LSR1Operator(Float64, n, scaling = true) + test_opnorm_upper_bound(B, m, n) + + B = LSR1Operator(Float32, n, scaling = false) + test_opnorm_upper_bound(B, m, n) + + B = LSR1Operator(Float32, n, scaling = true) + test_opnorm_upper_bound(B, m, n) + end + + @testset "Diagonal" begin + B = SpectralGradient(randn(Float64), n) + test_opnorm_upper_bound(B, m, n) + + B = SpectralGradient(randn(Float32), n) + test_opnorm_upper_bound(B, m, n) + + B = DiagonalPSB(randn(Float64, n)) + test_opnorm_upper_bound(B, m, n) + + B = DiagonalPSB(randn(Float32, n)) + test_opnorm_upper_bound(B, m, n) + + B = DiagonalAndrei(randn(Float64, n)) + test_opnorm_upper_bound(B, m, n) + + B = DiagonalAndrei(randn(Float32, n)) + test_opnorm_upper_bound(B, m, n) + end +end \ No newline at end of file