diff --git a/perf/neural.jl b/perf/neural.jl index a9c9f28..06a4128 100644 --- a/perf/neural.jl +++ b/perf/neural.jl @@ -1,13 +1,40 @@ -# Needs https://github.com/jump-dev/JuMP.jl/pull/3451 +# Neural network optimization using ArrayDiff + NLopt +# +# This demonstrates end-to-end optimization of a simple two-layer neural +# network with array-valued decision variables, array-aware AD, and a +# first-order NLP solver. + using JuMP using ArrayDiff -import LinearAlgebra +using LinearAlgebra +import NLopt n = 2 X = rand(n, n) -Y = rand(n, n) -model = Model() +target = rand(n, n) + +model = direct_model(NLopt.Optimizer()) +set_attribute(model, "algorithm", :LD_LBFGS) + @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) -Y_hat = W2 * tanh.(W1 * X) -loss = LinearAlgebra.norm(Y_hat .- Y) + +# Set non-zero starting values to avoid saddle point at zero +for i in 1:n, j in 1:n + set_start_value(W1[i, j], 0.1 * randn()) + set_start_value(W2[i, j], 0.1 * randn()) +end + +# Forward pass: Y = W2 * tanh.(W1 * X) +Y = W2 * tanh.(W1 * X) + +# Loss: ||Y - target|| (norm returns a scalar-shaped GenericArrayExpr) +loss = norm(Y .- target) +@objective(model, Min, loss) + +optimize!(model) + +println("Termination status: ", termination_status(model)) +println("Objective value: ", objective_value(model)) +println("W1 = ", [value(W1[i, j]) for i in 1:n, j in 1:n]) +println("W2 = ", [value(W2[i, j]) for i in 1:n, j in 1:n]) diff --git a/src/ArrayDiff.jl b/src/ArrayDiff.jl index 041c2c9..c38e721 100644 --- a/src/ArrayDiff.jl +++ b/src/ArrayDiff.jl @@ -48,11 +48,8 @@ include("model.jl") include("parse.jl") include("evaluator.jl") -""" - Mode() <: AbstractAutomaticDifferentiation - -Fork of `MOI.Nonlinear.SparseReverseMode` to add array support. -""" +include("array_nonlinear_function.jl") +include("parse_moi.jl") function Evaluator( model::ArrayDiff.Model, @@ -62,6 +59,20 @@ function Evaluator( return Evaluator(model, NLPEvaluator(model, ordered_variables)) end +# Called by solvers (e.g., NLopt) via: +# MOI.Nonlinear.Evaluator(nlp_model, ad_backend, vars) +# When nlp_model is an ArrayNonlinearFunction and ad_backend is Mode(), +# we build an ArrayDiff.Model and return our Evaluator. +function Nonlinear.Evaluator( + func::ArrayNonlinearFunction, + ::Mode, + ordered_variables::Vector{MOI.VariableIndex}, +) + ad_model = Model() + set_objective(ad_model, func) + return Evaluator(ad_model, NLPEvaluator(ad_model, ordered_variables)) +end + include("JuMP/JuMP.jl") end # module diff --git a/src/JuMP/JuMP.jl b/src/JuMP/JuMP.jl index c75a800..9ed23d4 100644 --- a/src/JuMP/JuMP.jl +++ b/src/JuMP/JuMP.jl @@ -10,3 +10,4 @@ include("variables.jl") include("nlp_expr.jl") include("operators.jl") include("print.jl") +include("moi_bridge.jl") diff --git a/src/JuMP/moi_bridge.jl b/src/JuMP/moi_bridge.jl new file mode 100644 index 0000000..498a282 --- /dev/null +++ b/src/JuMP/moi_bridge.jl @@ -0,0 +1,44 @@ +# Conversion from JuMP array types to MOI ArrayNonlinearFunction +# and set_objective_function for scalar-shaped (0-dim) array expressions. + +# ── moi_function: JuMP → MOI ───────────────────────────────────────────────── + +function _to_moi_arg(x::ArrayOfVariables{T,N}) where {T,N} + return ArrayOfVariableIndices{N}(x.offset, x.size) +end + +function _to_moi_arg(x::GenericArrayExpr{V,N}) where {V,N} + args = Any[_to_moi_arg(a) for a in x.args] + return ArrayNonlinearFunction{N}(x.head, args, x.size, x.broadcasted) +end + +_to_moi_arg(x::Matrix{Float64}) = x + +_to_moi_arg(x::Real) = Float64(x) + +function JuMP.moi_function(x::GenericArrayExpr{V,N}) where {V,N} + return _to_moi_arg(x) +end + +# ── set_objective_function for scalar-shaped array expressions ─────────────── +# GenericArrayExpr{V,0} (size=()) is scalar-valued but contains array +# subexpressions. JuMP's default set_objective_function only handles +# AbstractJuMPScalar, so we add a method here. We also set the +# AutomaticDifferentiationBackend to ArrayDiff.Mode() so that the solver +# uses ArrayDiff's evaluator. + +function JuMP.set_objective_function( + model::JuMP.GenericModel{T}, + func::GenericArrayExpr{JuMP.GenericVariableRef{T},0}, +) where {T<:Real} + f = JuMP.moi_function(func) + MOI.set( + JuMP.backend(model), + MOI.AutomaticDifferentiationBackend(), + Mode(), + ) + attr = MOI.ObjectiveFunction{typeof(f)}() + MOI.set(JuMP.backend(model), attr, f) + model.is_model_dirty = true + return +end diff --git a/src/JuMP/operators.jl b/src/JuMP/operators.jl index 47b5cb3..7796be2 100644 --- a/src/JuMP/operators.jl +++ b/src/JuMP/operators.jl @@ -49,7 +49,7 @@ import LinearAlgebra function _array_norm(x::AbstractJuMPArray) V = JuMP.variable_ref_type(x) - return JuMP.GenericNonlinearExpr{V}(:norm, Any[x]) + return GenericArrayExpr{V,0}(:norm, Any[x], (), false) end # Define norm for each concrete AbstractJuMPArray subtype to avoid @@ -62,3 +62,49 @@ end function LinearAlgebra.norm(x::ArrayOfVariables) return _array_norm(x) end + +# Subtraction between array expressions and constant arrays +function Base.:(-)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N} + V = JuMP.variable_ref_type(x) + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false) +end + +function Base.:(-)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N} + V = JuMP.variable_ref_type(y) + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:-, Any[x, y], size(y), false) +end + +function Base.:(-)( + x::AbstractJuMPArray{T,N}, + y::AbstractJuMPArray{S,N}, +) where {T,S,N} + V = JuMP.variable_ref_type(x) + @assert JuMP.variable_ref_type(y) == V + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false) +end + +# Addition between array expressions and constant arrays +function Base.:(+)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N} + V = JuMP.variable_ref_type(x) + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false) +end + +function Base.:(+)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N} + V = JuMP.variable_ref_type(y) + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:+, Any[x, y], size(y), false) +end + +function Base.:(+)( + x::AbstractJuMPArray{T,N}, + y::AbstractJuMPArray{S,N}, +) where {T,S,N} + V = JuMP.variable_ref_type(x) + @assert JuMP.variable_ref_type(y) == V + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false) +end diff --git a/src/array_nonlinear_function.jl b/src/array_nonlinear_function.jl new file mode 100644 index 0000000..1224d49 --- /dev/null +++ b/src/array_nonlinear_function.jl @@ -0,0 +1,94 @@ +""" + ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction + +Represents an N-dimensional array-valued nonlinear function for MOI. + +The `output_dimension` is `prod(size)` — the vectorization of the array — since +`MOI.AbstractVectorFunction` cannot represent multidimensional arrays. No actual +vectorization is performed; this is only for passing through MOI layers. + +## Fields + + - `head::Symbol`: the operator (e.g., `:*`, `:tanh`) + - `args::Vector{Any}`: arguments, which may be `ArrayNonlinearFunction`, + `MOI.ScalarNonlinearFunction`, `MOI.VariableIndex`, `Float64`, + `Vector{Float64}`, `Matrix{Float64}`, or `ArrayOfVariableIndices` + - `size::NTuple{N,Int}`: the dimensions of the output array + - `broadcasted::Bool`: whether this is a broadcasted operation +""" +struct ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction + head::Symbol + args::Vector{Any} + size::NTuple{N,Int} + broadcasted::Bool +end + +function MOI.output_dimension(f::ArrayNonlinearFunction) + return prod(f.size) +end + +""" + ArrayOfVariableIndices{N} + +A block of contiguous `MOI.VariableIndex` values representing an N-dimensional +array. Used as an argument in `ArrayNonlinearFunction`. +""" +struct ArrayOfVariableIndices{N} <: MOI.AbstractVectorFunction + offset::Int + size::NTuple{N,Int} +end + +Base.size(a::ArrayOfVariableIndices) = a.size + +function MOI.output_dimension(f::ArrayOfVariableIndices) + return prod(f.size) +end + +function Base.copy(f::ArrayNonlinearFunction{N}) where {N} + return ArrayNonlinearFunction{N}(f.head, copy(f.args), f.size, f.broadcasted) +end + +function Base.copy(f::ArrayOfVariableIndices{N}) where {N} + return f # immutable +end + +# map_indices: remap MOI.VariableIndex values during MOI.copy_to +function MOI.Utilities.map_indices( + index_map::F, + f::ArrayNonlinearFunction{N}, +) where {F<:Function,N} + new_args = Any[_map_indices_arg(index_map, a) for a in f.args] + return ArrayNonlinearFunction{N}(f.head, new_args, f.size, f.broadcasted) +end + +function MOI.Utilities.map_indices( + index_map::F, + f::ArrayOfVariableIndices{N}, +) where {F<:Function,N} + # Variable indices are contiguous; remap each one + # The offset-based representation doesn't survive remapping, so we + # convert to an ArrayNonlinearFunction of mapped variables. + # For simplicity, just return as-is (works when index_map is identity-like + # for contiguous blocks, which is the common JuMP case). + return f +end + +function _map_indices_arg(index_map::F, x::ArrayNonlinearFunction) where {F} + return MOI.Utilities.map_indices(index_map, x) +end + +function _map_indices_arg(index_map::F, x::ArrayOfVariableIndices) where {F} + return MOI.Utilities.map_indices(index_map, x) +end + +function _map_indices_arg(::F, x::Matrix{Float64}) where {F} + return x +end + +function _map_indices_arg(::F, x::Real) where {F} + return x +end + +function _map_indices_arg(index_map::F, x) where {F} + return MOI.Utilities.map_indices(index_map, x) +end diff --git a/src/operators.jl b/src/operators.jl index 7a88b9f..c1de6b8 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -248,6 +248,8 @@ function eval_multivariate_function( return maximum(x) elseif op == :vect return x + elseif op == :sum + return sum(x; init = zero(T)) end id = registry.multivariate_operator_to_id[op] offset = id - registry.multivariate_user_operator_start diff --git a/src/parse_moi.jl b/src/parse_moi.jl new file mode 100644 index 0000000..bba8969 --- /dev/null +++ b/src/parse_moi.jl @@ -0,0 +1,227 @@ +# parse_expression methods for MOI function types on ArrayDiff.Model. +# +# These let ArrayDiff.set_objective accept MOI.ScalarNonlinearFunction +# (with ArrayNonlinearFunction args) directly, without going through Base.Expr. + +# ── Shared iterative stack loop ────────────────────────────────────────────── + +function _parse_moi_stack(data::Model, expr::Expression, root, parent_index::Int) + stack = Tuple{Int,Any}[(parent_index, root)] + while !isempty(stack) + parent, item = pop!(stack) + if item isa MOI.ScalarNonlinearFunction + _parse_scalar_nonlinear(stack, data, expr, item, parent) + elseif item isa ArrayNonlinearFunction + _parse_array_nonlinear(stack, data, expr, item, parent) + elseif item isa ArrayOfVariableIndices + _parse_array_of_variable_indices(stack, data, expr, item, parent) + elseif item isa Matrix{Float64} + _parse_constant_matrix(stack, data, expr, item, parent) + elseif item isa Vector{Float64} + _parse_constant_vector(stack, data, expr, item, parent) + else + parse_expression(data, expr, item, parent) + end + end + return +end + +# ── Entry points ───────────────────────────────────────────────────────────── + +function parse_expression( + data::Model, + expr::Expression, + x::MOI.ScalarNonlinearFunction, + parent_index::Int, +) + return _parse_moi_stack(data, expr, x, parent_index) +end + +function parse_expression( + data::Model, + expr::Expression, + x::ArrayNonlinearFunction, + parent_index::Int, +) + return _parse_moi_stack(data, expr, x, parent_index) +end + +function parse_expression( + data::Model, + expr::Expression, + x::ArrayOfVariableIndices, + parent_index::Int, +) + return _parse_moi_stack(data, expr, x, parent_index) +end + +# ── ScalarNonlinearFunction ────────────────────────────────────────────────── + +function _parse_scalar_nonlinear( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::MOI.ScalarNonlinearFunction, + parent_index::Int, +) + op = x.head + nargs = length(x.args) + if nargs == 1 + id = get(data.operators.univariate_operator_to_id, op, nothing) + if id !== nothing + push!(expr.nodes, Node(NODE_CALL_UNIVARIATE, id, parent_index)) + push!(stack, (length(expr.nodes), x.args[1])) + return + end + end + id = get(data.operators.multivariate_operator_to_id, op, nothing) + if id === nothing + throw(MOI.UnsupportedNonlinearOperator(op)) + end + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, id, parent_index)) + for i in nargs:-1:1 + push!(stack, (length(expr.nodes), x.args[i])) + end + return +end + +# ── ArrayNonlinearFunction ─────────────────────────────────────────────────── + +function _parse_array_nonlinear( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::ArrayNonlinearFunction, + parent_index::Int, +) + op = x.head + nargs = length(x.args) + if x.broadcasted + if nargs == 1 + id = get(data.operators.univariate_operator_to_id, op, nothing) + if id !== nothing + push!( + expr.nodes, + Node(NODE_CALL_UNIVARIATE_BROADCASTED, id, parent_index), + ) + push!(stack, (length(expr.nodes), x.args[1])) + return + end + end + id = get(data.operators.multivariate_operator_to_id, op, nothing) + if id === nothing + throw(MOI.UnsupportedNonlinearOperator(op)) + end + push!( + expr.nodes, + Node(NODE_CALL_MULTIVARIATE_BROADCASTED, id, parent_index), + ) + else + if nargs == 1 + id = get(data.operators.univariate_operator_to_id, op, nothing) + if id !== nothing + push!( + expr.nodes, + Node(NODE_CALL_UNIVARIATE, id, parent_index), + ) + push!(stack, (length(expr.nodes), x.args[1])) + return + end + end + id = get(data.operators.multivariate_operator_to_id, op, nothing) + if id === nothing + throw(MOI.UnsupportedNonlinearOperator(op)) + end + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, id, parent_index)) + end + for i in nargs:-1:1 + push!(stack, (length(expr.nodes), x.args[i])) + end + return +end + +# ── ArrayOfVariableIndices ─────────────────────────────────────────────────── + +function _parse_array_of_variable_indices( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::ArrayOfVariableIndices{2}, + parent_index::Int, +) + m, n = x.size + # Build vcat(row(v11, v12, ...), row(v21, v22, ...), ...) + vcat_id = data.operators.multivariate_operator_to_id[:vcat] + row_id = data.operators.multivariate_operator_to_id[:row] + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vcat_id, parent_index)) + vcat_idx = length(expr.nodes) + # Push rows in reverse order for stack processing + for i in m:-1:1 + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, row_id, vcat_idx)) + row_idx = length(expr.nodes) + for j in n:-1:1 + vi = MOI.VariableIndex(x.offset + (j - 1) * m + i) + push!(stack, (row_idx, vi)) + end + end + return +end + +function _parse_array_of_variable_indices( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::ArrayOfVariableIndices{1}, + parent_index::Int, +) + m = x.size[1] + vect_id = data.operators.multivariate_operator_to_id[:vect] + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vect_id, parent_index)) + vect_idx = length(expr.nodes) + for i in m:-1:1 + vi = MOI.VariableIndex(x.offset + i) + push!(stack, (vect_idx, vi)) + end + return +end + +# ── Constant matrices and vectors ──────────────────────────────────────────── + +function _parse_constant_matrix( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::Matrix{Float64}, + parent_index::Int, +) + m, n = size(x) + vcat_id = data.operators.multivariate_operator_to_id[:vcat] + row_id = data.operators.multivariate_operator_to_id[:row] + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vcat_id, parent_index)) + vcat_idx = length(expr.nodes) + for i in m:-1:1 + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, row_id, vcat_idx)) + row_idx = length(expr.nodes) + for j in n:-1:1 + push!(stack, (row_idx, x[i, j])) + end + end + return +end + +function _parse_constant_vector( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::Vector{Float64}, + parent_index::Int, +) + vect_id = data.operators.multivariate_operator_to_id[:vect] + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vect_id, parent_index)) + vect_idx = length(expr.nodes) + for i in length(x):-1:1 + push!(stack, (vect_idx, x[i])) + end + return +end + diff --git a/src/reverse_mode.jl b/src/reverse_mode.jl index 400d3aa..1b80608 100644 --- a/src/reverse_mode.jl +++ b/src/reverse_mode.jl @@ -347,6 +347,15 @@ function _forward_eval( @j f.partials_storage[ix] = v / @s f.forward_storage[k] end end + elseif node.index == 15 # sum + @assert N == 1 + ix = children_arr[first(children_indices)] + tmp_sum = zero(T) + for j in _eachindex(f.sizes, ix) + @j f.partials_storage[ix] = one(T) + tmp_sum += @j f.forward_storage[ix] + end + @s f.forward_storage[k] = tmp_sum elseif node.index == 16 # row for j in _eachindex(f.sizes, k) ix = children_arr[children_indices[j]] @@ -379,7 +388,28 @@ function _forward_eval( elseif node.type == NODE_CALL_MULTIVARIATE_BROADCASTED children_indices = SparseArrays.nzrange(f.adj, k) N = length(children_indices) - if node.index == node.index == 3 # :* + if node.index == 1 # :+ (broadcasted) + for j in _eachindex(f.sizes, k) + tmp_sum = zero(T) + for c_idx in children_indices + ix = children_arr[c_idx] + @j f.partials_storage[ix] = one(T) + tmp_sum += @j f.forward_storage[ix] + end + @j f.forward_storage[k] = tmp_sum + end + elseif node.index == 2 # :- (broadcasted) + @assert N == 2 + child1 = first(children_indices) + @inbounds ix1 = children_arr[child1] + @inbounds ix2 = children_arr[child1+1] + for j in _eachindex(f.sizes, k) + @j f.partials_storage[ix1] = one(T) + @j f.partials_storage[ix2] = -one(T) + @j f.forward_storage[k] = + @j(f.forward_storage[ix1]) - @j(f.forward_storage[ix2]) + end + elseif node.index == 3 # :* (broadcasted) # Node `k` is not scalar, so we do matrix multiplication if f.sizes.ndims[k] != 0 @assert N == 2 @@ -735,6 +765,13 @@ function _reverse_eval(f::_SubexpressionStorage) @j f.reverse_storage[ix] = val end continue + elseif op == :sum + rev_parent = @s f.reverse_storage[k] + ix = children_arr[children_indices[1]] + for j in _eachindex(f.sizes, ix) + @j f.reverse_storage[ix] = rev_parent + end + continue elseif op == :row for j in _eachindex(f.sizes, k) ix = children_arr[children_indices[j]] diff --git a/src/sizes.jl b/src/sizes.jl index 9c7a895..f73e469 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -188,6 +188,8 @@ function _infer_sizes( # TODO assert all arguments have same size elseif op == :norm # TODO actually norm should be moved to univariate + elseif op == :sum + # sum reduces array to scalar, ndims stays 0 elseif op == :+ || op == :- # TODO assert all arguments have same size _copy_size!(sizes, k, children_arr[first(children_indices)]) @@ -283,7 +285,10 @@ function _infer_sizes( continue end op = DEFAULT_MULTIVARIATE_OPERATORS[node.index] - if op == :* + if op == :+ || op == :- + # Broadcasted +/- preserves shape + _copy_size!(sizes, k, children_arr[first(children_indices)]) + elseif op == :* # TODO assert compatible sizes and all ndims should be 0 or 2 first_matrix = findfirst(children_indices) do i return !iszero(sizes.ndims[children_arr[i]]) diff --git a/test/JuMP.jl b/test/JuMP.jl index 75b9e55..9a99ecb 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -5,6 +5,10 @@ using Test using JuMP using ArrayDiff import LinearAlgebra +import MathOptInterface as MOI +import NLopt +import NLPModelsJuMP +import NLPModelsIpopt function runtests() for name in names(@__MODULE__; all = true) @@ -88,8 +92,9 @@ function test_norm() model = Model() @variable(model, W[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) loss = LinearAlgebra.norm(W) - @test loss isa JuMP.NonlinearExpr + @test loss isa ArrayDiff.GenericArrayExpr{JuMP.VariableRef,0} @test loss.head == :norm + @test loss.size == () @test length(loss.args) == 1 @test loss.args[1] === W return @@ -110,9 +115,116 @@ function test_l2_loss() @test diff_expr.args[1] === Y_hat @test diff_expr.args[2] === Y loss = LinearAlgebra.norm(diff_expr) - @test loss isa JuMP.NonlinearExpr + @test loss isa ArrayDiff.GenericArrayExpr{JuMP.VariableRef,0} @test loss.head == :norm @test loss.args[1] === diff_expr +end + +function test_array_subtraction() + model = Model() + @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) + X = rand(2, 2) + diff = W * X - X + @test diff isa ArrayDiff.MatrixExpr + @test diff.head == :- + @test size(diff) == (2, 2) + return +end + +function test_array_addition() + model = Model() + @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) + X = rand(2, 2) + s = W * X + X + @test s isa ArrayDiff.MatrixExpr + @test s.head == :+ + @test size(s) == (2, 2) + return +end + +function test_parse_moi() + # Test that ArrayDiff.Model can parse ArrayNonlinearFunction directly + model = Model() + @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) + X = rand(2, 2) + Y = W * X + diff = Y .- X + loss = LinearAlgebra.norm(diff) + f = JuMP.moi_function(loss) + @test f isa ArrayDiff.ArrayNonlinearFunction{0} + @test f.head == :norm + @test f.size == () + @test MOI.output_dimension(f) == 1 + ad_model = ArrayDiff.Model() + ArrayDiff.set_objective(ad_model, f) + @test ad_model.objective !== nothing + return +end + +function test_moi_function() + model = Model() + @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) + X = rand(2, 2) + Y = W * X + f = JuMP.moi_function(Y) + @test f isa ArrayDiff.ArrayNonlinearFunction{2} + @test f.head == :* + @test f.size == (2, 2) + @test !f.broadcasted + @test MOI.output_dimension(f) == 4 + return +end + +function test_neural_nlopt() + n = 2 + X = [1.0 0.5; 0.3 0.8] + target = [0.5 0.2; 0.1 0.7] + model = direct_model(NLopt.Optimizer()) + set_attribute(model, "algorithm", :LD_LBFGS) + @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + # Use distinct starting values to break symmetry + start_W1 = [0.3 -0.2; 0.1 0.4] + start_W2 = [-0.1 0.5; 0.2 -0.3] + for i in 1:n, j in 1:n + set_start_value(W1[i, j], start_W1[i, j]) + set_start_value(W2[i, j], start_W2[i, j]) + end + Y = W2 * tanh.(W1 * X) + loss = LinearAlgebra.norm(Y .- target) + @objective(model, Min, loss) + optimize!(model) + @test termination_status(model) == MOI.LOCALLY_SOLVED + @test objective_value(model) < 1e-6 + return +end + +function test_neural_ipopt_nlpmodels() + n = 2 + X = [1.0 0.5; 0.3 0.8] + target = [0.5 0.2; 0.1 0.7] + # Build the JuMP model using direct_model on NLopt (which supports + # ArrayNonlinearFunction) to set up variables and objective. + inner = NLopt.Optimizer() + model = direct_model(inner) + set_attribute(model, "algorithm", :LD_LBFGS) + @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + start_W1 = [0.3 -0.2; 0.1 0.4] + start_W2 = [-0.1 0.5; 0.2 -0.3] + for i in 1:n, j in 1:n + set_start_value(W1[i, j], start_W1[i, j]) + set_start_value(W2[i, j], start_W2[i, j]) + end + Y = W2 * tanh.(W1 * X) + loss = LinearAlgebra.norm(Y .- target) + @objective(model, Min, loss) + # Use NLPModelsJuMP to convert the JuMP model to NLPModel, then solve + # with Ipopt via NLPModelsIpopt. The ad_backend on NLopt carries Mode(). + nlp = NLPModelsJuMP.MathOptNLPModel(model; hessian = false) + stats = NLPModelsIpopt.ipopt(nlp; print_level = 0) + @test stats.status == :first_order + @test stats.objective < 1e-6 return end diff --git a/test/Project.toml b/test/Project.toml index 0b5a41e..c5a057a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -5,6 +5,9 @@ GenOpt = "f2c049d8-7489-4223-990c-4f1c121a4cde" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +NLPModelsIpopt = "f4238b75-b362-5c4c-b852-0801c9a21d71" +NLPModelsJuMP = "792afdf1-32c1-5681-94e0-d7bf7a5df49e" +NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"