Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions perf/neural.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,40 @@
# Needs https://github.com/jump-dev/JuMP.jl/pull/3451
# Neural network optimization using ArrayDiff + NLopt
#
# This demonstrates end-to-end optimization of a simple two-layer neural
# network with array-valued decision variables, array-aware AD, and a
# first-order NLP solver.

using JuMP
using ArrayDiff
import LinearAlgebra
using LinearAlgebra
import NLopt

n = 2
X = rand(n, n)
Y = rand(n, n)
model = Model()
target = rand(n, n)

model = direct_model(NLopt.Optimizer())
set_attribute(model, "algorithm", :LD_LBFGS)

@variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables)
@variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables)
Y_hat = W2 * tanh.(W1 * X)
loss = LinearAlgebra.norm(Y_hat .- Y)

# Set non-zero starting values to avoid saddle point at zero
for i in 1:n, j in 1:n
set_start_value(W1[i, j], 0.1 * randn())
set_start_value(W2[i, j], 0.1 * randn())
end

# Forward pass: Y = W2 * tanh.(W1 * X)
Y = W2 * tanh.(W1 * X)

# Loss: ||Y - target|| (norm returns a scalar-shaped GenericArrayExpr)
loss = norm(Y .- target)
@objective(model, Min, loss)

optimize!(model)

println("Termination status: ", termination_status(model))
println("Objective value: ", objective_value(model))
println("W1 = ", [value(W1[i, j]) for i in 1:n, j in 1:n])
println("W2 = ", [value(W2[i, j]) for i in 1:n, j in 1:n])
21 changes: 16 additions & 5 deletions src/ArrayDiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,8 @@ include("model.jl")
include("parse.jl")
include("evaluator.jl")

"""
Mode() <: AbstractAutomaticDifferentiation

Fork of `MOI.Nonlinear.SparseReverseMode` to add array support.
"""
include("array_nonlinear_function.jl")
include("parse_moi.jl")

function Evaluator(
model::ArrayDiff.Model,
Expand All @@ -62,6 +59,20 @@ function Evaluator(
return Evaluator(model, NLPEvaluator(model, ordered_variables))
end

# Called by solvers (e.g., NLopt) via:
# MOI.Nonlinear.Evaluator(nlp_model, ad_backend, vars)
# When nlp_model is an ArrayNonlinearFunction and ad_backend is Mode(),
# we build an ArrayDiff.Model and return our Evaluator.
function Nonlinear.Evaluator(
func::ArrayNonlinearFunction,
::Mode,
ordered_variables::Vector{MOI.VariableIndex},
)
ad_model = Model()
set_objective(ad_model, func)
return Evaluator(ad_model, NLPEvaluator(ad_model, ordered_variables))
end

include("JuMP/JuMP.jl")

end # module
1 change: 1 addition & 0 deletions src/JuMP/JuMP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ include("variables.jl")
include("nlp_expr.jl")
include("operators.jl")
include("print.jl")
include("moi_bridge.jl")
44 changes: 44 additions & 0 deletions src/JuMP/moi_bridge.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Conversion from JuMP array types to MOI ArrayNonlinearFunction
# and set_objective_function for scalar-shaped (0-dim) array expressions.

# ── moi_function: JuMP → MOI ─────────────────────────────────────────────────

function _to_moi_arg(x::ArrayOfVariables{T,N}) where {T,N}
return ArrayOfVariableIndices{N}(x.offset, x.size)
end

function _to_moi_arg(x::GenericArrayExpr{V,N}) where {V,N}
args = Any[_to_moi_arg(a) for a in x.args]
return ArrayNonlinearFunction{N}(x.head, args, x.size, x.broadcasted)
end

_to_moi_arg(x::Matrix{Float64}) = x

_to_moi_arg(x::Real) = Float64(x)

function JuMP.moi_function(x::GenericArrayExpr{V,N}) where {V,N}
return _to_moi_arg(x)
end

# ── set_objective_function for scalar-shaped array expressions ───────────────
# GenericArrayExpr{V,0} (size=()) is scalar-valued but contains array
# subexpressions. JuMP's default set_objective_function only handles
# AbstractJuMPScalar, so we add a method here. We also set the
# AutomaticDifferentiationBackend to ArrayDiff.Mode() so that the solver
# uses ArrayDiff's evaluator.

function JuMP.set_objective_function(
model::JuMP.GenericModel{T},
func::GenericArrayExpr{JuMP.GenericVariableRef{T},0},
) where {T<:Real}
f = JuMP.moi_function(func)
MOI.set(
JuMP.backend(model),
MOI.AutomaticDifferentiationBackend(),
Mode(),
)
attr = MOI.ObjectiveFunction{typeof(f)}()
MOI.set(JuMP.backend(model), attr, f)
model.is_model_dirty = true
return
end
48 changes: 47 additions & 1 deletion src/JuMP/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ import LinearAlgebra

function _array_norm(x::AbstractJuMPArray)
V = JuMP.variable_ref_type(x)
return JuMP.GenericNonlinearExpr{V}(:norm, Any[x])
return GenericArrayExpr{V,0}(:norm, Any[x], (), false)
end

# Define norm for each concrete AbstractJuMPArray subtype to avoid
Expand All @@ -62,3 +62,49 @@ end
function LinearAlgebra.norm(x::ArrayOfVariables)
return _array_norm(x)
end

# Subtraction between array expressions and constant arrays
function Base.:(-)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N}
V = JuMP.variable_ref_type(x)
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false)
end

function Base.:(-)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N}
V = JuMP.variable_ref_type(y)
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:-, Any[x, y], size(y), false)
end

function Base.:(-)(
x::AbstractJuMPArray{T,N},
y::AbstractJuMPArray{S,N},
) where {T,S,N}
V = JuMP.variable_ref_type(x)
@assert JuMP.variable_ref_type(y) == V
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false)
end

# Addition between array expressions and constant arrays
function Base.:(+)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N}
V = JuMP.variable_ref_type(x)
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false)
end

function Base.:(+)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N}
V = JuMP.variable_ref_type(y)
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:+, Any[x, y], size(y), false)
end

function Base.:(+)(
x::AbstractJuMPArray{T,N},
y::AbstractJuMPArray{S,N},
) where {T,S,N}
V = JuMP.variable_ref_type(x)
@assert JuMP.variable_ref_type(y) == V
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false)
end
94 changes: 94 additions & 0 deletions src/array_nonlinear_function.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction

Represents an N-dimensional array-valued nonlinear function for MOI.

The `output_dimension` is `prod(size)` — the vectorization of the array — since
`MOI.AbstractVectorFunction` cannot represent multidimensional arrays. No actual
vectorization is performed; this is only for passing through MOI layers.

## Fields

- `head::Symbol`: the operator (e.g., `:*`, `:tanh`)
- `args::Vector{Any}`: arguments, which may be `ArrayNonlinearFunction`,
`MOI.ScalarNonlinearFunction`, `MOI.VariableIndex`, `Float64`,
`Vector{Float64}`, `Matrix{Float64}`, or `ArrayOfVariableIndices`
- `size::NTuple{N,Int}`: the dimensions of the output array
- `broadcasted::Bool`: whether this is a broadcasted operation
"""
struct ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction
head::Symbol
args::Vector{Any}
size::NTuple{N,Int}
broadcasted::Bool
end

function MOI.output_dimension(f::ArrayNonlinearFunction)
return prod(f.size)
end

"""
ArrayOfVariableIndices{N}

A block of contiguous `MOI.VariableIndex` values representing an N-dimensional
array. Used as an argument in `ArrayNonlinearFunction`.
"""
struct ArrayOfVariableIndices{N} <: MOI.AbstractVectorFunction
offset::Int
size::NTuple{N,Int}
end

Base.size(a::ArrayOfVariableIndices) = a.size

function MOI.output_dimension(f::ArrayOfVariableIndices)
return prod(f.size)
end

function Base.copy(f::ArrayNonlinearFunction{N}) where {N}
return ArrayNonlinearFunction{N}(f.head, copy(f.args), f.size, f.broadcasted)
end

function Base.copy(f::ArrayOfVariableIndices{N}) where {N}
return f # immutable
end

# map_indices: remap MOI.VariableIndex values during MOI.copy_to
function MOI.Utilities.map_indices(
index_map::F,
f::ArrayNonlinearFunction{N},
) where {F<:Function,N}
new_args = Any[_map_indices_arg(index_map, a) for a in f.args]
return ArrayNonlinearFunction{N}(f.head, new_args, f.size, f.broadcasted)
end

function MOI.Utilities.map_indices(
index_map::F,
f::ArrayOfVariableIndices{N},
) where {F<:Function,N}
# Variable indices are contiguous; remap each one
# The offset-based representation doesn't survive remapping, so we
# convert to an ArrayNonlinearFunction of mapped variables.
# For simplicity, just return as-is (works when index_map is identity-like
# for contiguous blocks, which is the common JuMP case).
return f
end

function _map_indices_arg(index_map::F, x::ArrayNonlinearFunction) where {F}
return MOI.Utilities.map_indices(index_map, x)
end

function _map_indices_arg(index_map::F, x::ArrayOfVariableIndices) where {F}
return MOI.Utilities.map_indices(index_map, x)
end

function _map_indices_arg(::F, x::Matrix{Float64}) where {F}
return x
end

function _map_indices_arg(::F, x::Real) where {F}
return x
end

function _map_indices_arg(index_map::F, x) where {F}
return MOI.Utilities.map_indices(index_map, x)
end
2 changes: 2 additions & 0 deletions src/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ function eval_multivariate_function(
return maximum(x)
elseif op == :vect
return x
elseif op == :sum
return sum(x; init = zero(T))
end
id = registry.multivariate_operator_to_id[op]
offset = id - registry.multivariate_user_operator_start
Expand Down
Loading
Loading