Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,577 changes: 1,814 additions & 763 deletions Cargo.Bazel.json.lock

Large diffs are not rendered by default.

366 changes: 271 additions & 95 deletions Cargo.Bazel.toml.lock

Large diffs are not rendered by default.

312 changes: 253 additions & 59 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ members = [
"packages/icrc-ledger-types",
"packages/pocket-ic",
"packages/pocket-ic/test_canister",
"rs/ai_agent",
"rs/artifact_pool",
"rs/backup",
"rs/bitcoin/adapter",
Expand Down
64 changes: 64 additions & 0 deletions bazel/rust.MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,10 @@ crate.spec(
package = "memchr",
version = "2.7",
)
crate.spec(
package = "meval",
version = "^0.2.0",
)
crate.spec(
# When updating this, please make sure that the built
# binary exports metrics http_cache_* after one
Expand Down Expand Up @@ -1221,6 +1225,21 @@ crate.spec(
package = "quinn",
version = "^0.11.5",
)
crate.spec(
# Pinned at =0.11.7 deliberately. Newer point releases (0.11.13+) added
# an unconditional `ring = { features = ["wasm32_unknown_unknown_js"] }`
# and `getrandom = { features = ["wasm_js"] }` for `wasm32-unknown-unknown`.
# Bazel's crate_universe target-unifies features per package, so those
# transitively activate `getrandom 0.2.10/js` (= wasm-bindgen + js-sys)
# for *every* wasm32 crate in the workspace -- which causes IC canister
# wasms to fail Wasm-validation with `Module imports function
# '__wbindgen_describe' from '__wbindgen_placeholder__' that is not
# exported by the runtime`. Holding quinn-proto at 0.11.7 keeps the
# NNS canister Wasm clean while letting the host-side reqwest 0.12
# (which transitively pulls quinn) keep working.
package = "quinn-proto",
version = "=0.11.7",
)
crate.spec(
package = "quinn-udp",
version = "^0.5.5",
Expand Down Expand Up @@ -1304,6 +1323,18 @@ crate.spec(
package = "ring",
version = "^0.17.7",
)
crate.spec(
# See ai_agent docs / commit history for the full saga. Short version:
# we keep rig at default-features = false, features = ["reqwest"], and
# supply the missing TLS plumbing for reqwest 0.13 via the
# `rustls-no-provider` feature -- not by enabling rig's `rustls`
# feature (that drags in aws-lc-rs and conflicts with our pinned
# quinn 0.11.5).
default_features = False,
features = ["reqwest"],
package = "rig-core",
version = "^0.36.0",
)
crate.spec(
package = "ripemd",
version = "^0.1.1",
Expand Down Expand Up @@ -2061,6 +2092,39 @@ crate.annotation(
crate = "libz-sys",
crate_features = ["static"],
)
crate.annotation(
# rig 0.36 transitively builds reqwest 0.13, but rig only enables
# reqwest's `charset, http2, system-proxy` features -- never any TLS
# backend. As a result, reqwest 0.13's `Client::builder().build()`
# falls back to the plain-HTTP hyper connector and every https://
# request fails synchronously with `client error (Connect) ->
# invalid URL, scheme is not http`.
#
# Activate `rustls-no-provider` (which gates the rustls connector
# code in reqwest source) *and* also inject the rustls deps the
# feature would normally pull in -- `crate.annotation crate_features`
# only sets compile-time `cfg(feature = ...)` flags; it does NOT
# re-trigger Cargo's dep resolution, so we have to wire the deps in
# by hand via the `deps` attribute. We deliberately use
# `rustls-no-provider` (not `rustls`) so that aws-lc-rs is not
# pulled in alongside the workspace's existing `ring` rustls
# provider; the ai_agent main() installs the ring provider at
# startup so reqwest's rustls path picks it up.
crate = "reqwest",
crate_features = [
"__rustls",
"__tls",
"rustls-no-provider",
],
version = "0.13.3",
deps = [
"@crate_index__hyper-rustls-0.27.7//:hyper_rustls",
"@crate_index__rustls-0.23.27//:rustls",
"@crate_index__rustls-pki-types-1.12.0//:rustls_pki_types",
"@crate_index__rustls-platform-verifier-0.6.2//:rustls_platform_verifier",
"@crate_index__tokio-rustls-0.26.0//:tokio_rustls",
],
)
crate.annotation(
crate = "curve25519-dalek",
rustc_flags = [
Expand Down
10 changes: 10 additions & 0 deletions ic-os/components/guestos.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ def component_files(mode):
Label("guestos/ollama/generate-ollama-tls-cert.service"): "/etc/systemd/system/generate-ollama-tls-cert.service",
Label("guestos/ollama/ollama-tls.conf"): "/etc/stunnel/ollama-tls.conf",
Label("guestos/ollama/ollama-tls.service"): "/etc/systemd/system/ollama-tls.service",
# IC AI agent orchestration HTTP API (started on AI nodes only,
# alongside ollama). Same TLS-via-stunnel pattern: the agent
# listens on 127.0.0.1:11501 and stunnel terminates TLS on
# :::11500. All three units are explicitly disabled in the
# GuestOS Dockerfile so non-AI nodes never run them.
Label("guestos/ai-agent/ic-ai-agent.service"): "/etc/systemd/system/ic-ai-agent.service",
Label("guestos/ai-agent/ic-ai-agent-tls.service"): "/etc/systemd/system/ic-ai-agent-tls.service",
Label("guestos/ai-agent/ic-ai-agent-tls.conf"): "/etc/stunnel/ic-ai-agent-tls.conf",
Label("guestos/ai-agent/generate-ic-ai-agent-tls-cert.sh"): "/opt/ic/bin/generate-ic-ai-agent-tls-cert.sh",
Label("guestos/ai-agent/generate-ic-ai-agent-tls-cert.service"): "/etc/systemd/system/generate-ic-ai-agent-tls-cert.service",
Label("guestos/remote-attestation-server.service"): "/etc/systemd/system/remote-attestation-server.service",
Label("guestos/generate-ic-config/generate-ic-config.service"): "/etc/systemd/system/generate-ic-config.service",
Label("guestos/share/ic-boundary.env"): "/opt/ic/share/ic-boundary.env",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[Unit]
Description=Generate self-signed TLS cert for the ic-ai-agent stunnel proxy
Documentation=man:openssl-req(1)

After=var.mount local-fs.target
RequiresMountsFor=/var/lib

ConditionPathExists=!/var/lib/ic-ai-agent-tls/stunnel.pem

[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/opt/ic/bin/generate-ic-ai-agent-tls-cert.sh

[Install]
# Disabled by default in the GuestOS Dockerfile. Started on demand by
# manage-ai-agent.sh, itself driven by the orchestrator's AiNodeManager.
WantedBy=multi-user.target
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/sh
# Generate a self-signed TLS certificate used by stunnel to terminate TLS in
# front of the local ic-ai-agent backend.
#
# Mirrors generate-ollama-tls-cert.sh in shape; idempotent oneshot.

set -eu

CERT_DIR=/var/lib/ic-ai-agent-tls
CERT_FILE="${CERT_DIR}/cert.pem"
KEY_FILE="${CERT_DIR}/key.pem"
COMBINED_FILE="${CERT_DIR}/stunnel.pem"

mkdir -p "${CERT_DIR}"
chmod 0750 "${CERT_DIR}"

if [ -s "${CERT_FILE}" ] && [ -s "${KEY_FILE}" ] && [ -s "${COMBINED_FILE}" ]; then
echo "TLS material already present at ${CERT_DIR}, nothing to do." >&2
exit 0
fi

# Stable Subject CN derived from the machine-id.
CN="ic-ai-agent"
if [ -s /etc/machine-id ]; then
CN="ic-ai-agent-$(cat /etc/machine-id)"
fi

umask 077

if ! openssl req \
-x509 \
-newkey rsa:2048 \
-keyout "${KEY_FILE}" \
-out "${CERT_FILE}" \
-days 3650 \
-nodes \
-subj "/CN=${CN}" \
-addext "subjectAltName=DNS:${CN},DNS:localhost,IP:127.0.0.1,IP:0.0.0.0" \
2>/tmp/openssl-stderr.$$; then
echo "openssl req failed:" >&2
cat /tmp/openssl-stderr.$$ >&2 || true
rm -f /tmp/openssl-stderr.$$
exit 1
fi
rm -f /tmp/openssl-stderr.$$

cat "${CERT_FILE}" "${KEY_FILE}" >"${COMBINED_FILE}"

TARGET_GROUP="root"
if getent group stunnel4 >/dev/null 2>&1; then
TARGET_GROUP="stunnel4"
fi
chown "root:${TARGET_GROUP}" "${KEY_FILE}" "${CERT_FILE}" "${COMBINED_FILE}"
chmod 0640 "${KEY_FILE}" "${CERT_FILE}" "${COMBINED_FILE}"

echo "Generated self-signed TLS cert at ${CERT_FILE} for CN=${CN} (group=${TARGET_GROUP})." >&2
24 changes: 24 additions & 0 deletions ic-os/components/guestos/ai-agent/ic-ai-agent-tls.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# stunnel configuration for the IC AI agent TLS reverse proxy.
#
# Terminates TLS on :::11500 with a self-signed certificate generated at
# first boot by generate-ic-ai-agent-tls-cert.service, and forwards
# plaintext traffic to the local agent backend on 127.0.0.1:11501.

# Run in foreground so systemd supervises the main PID directly.
foreground = yes
pid =

# Drop privileges after binding to the listener.
setuid = stunnel4
setgid = stunnel4

# Combined PEM (cert + key).
cert = /var/lib/ic-ai-agent-tls/stunnel.pem

# We don't authenticate clients (untrusted self-signed cert anyway).
verify = 0

[ic-ai-agent]
# IPv6 wildcard with dual-stack enabled accepts both v4 and v6.
accept = :::11500
connect = 127.0.0.1:11501
27 changes: 27 additions & 0 deletions ic-os/components/guestos/ai-agent/ic-ai-agent-tls.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[Unit]
Description=stunnel TLS terminator in front of the IC AI agent
Documentation=man:stunnel(8)

# Mirrors the design of ollama-tls.service: stunnel runs independently of
# ic-ai-agent.service and just listens on 11500/tcp, forwarding plaintext
# to 127.0.0.1:11501. If the agent isn't running, clients get a refused
# upstream connection but the TLS listener stays up.
#
# Do NOT use BindsTo=ic-ai-agent.service; the agent service is explicitly
# disabled in the GuestOS Dockerfile and BindsTo would cause systemd to
# Stop this unit at boot.
After=network-online.target generate-ic-ai-agent-tls-cert.service
Wants=network-online.target generate-ic-ai-agent-tls-cert.service

# The cert is mandatory; if the generator failed, refuse to start.
ConditionPathExists=/var/lib/ic-ai-agent-tls/stunnel.pem

[Service]
Type=simple
ExecStart=/usr/bin/stunnel /etc/stunnel/ic-ai-agent-tls.conf

Restart=on-failure
RestartSec=5s

[Install]
WantedBy=multi-user.target
49 changes: 49 additions & 0 deletions ic-os/components/guestos/ai-agent/ic-ai-agent.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
[Unit]
Description=IC AI agent orchestration HTTP API (disabled by default)
Documentation=https://github.com/anomalyco/opencode

# Disabled by default in the GuestOS Dockerfile. Started on demand by
# manage-ai-agent.sh (driven by the orchestrator's AiNodeManager) together
# with the TLS cert generator and the stunnel proxy. Regular non-AI nodes
# never bring this up.

After=network-online.target
Wants=network-online.target

[Service]
Type=simple

# Run as the dedicated `ic-ai-agent` system user. That user is set up in
# the GuestOS Dockerfile and joined to:
#
# * `nonconfidential` — read access to /var/lib/ic/data/ic_state
# * `ic-registry-local-store` — read access to the local registry store
#
# We deliberately do NOT use DynamicUser=yes here: the IC observability
# tools (`ic_state`, `ic_metrics`, `ic_logs`) need to read directories
# owned by `ic-replica:nonconfidential`, which a dynamic UID (with no
# static group memberships) cannot do.
User=ic-ai-agent
Group=ic-ai-agent

# Bind to loopback only on a non-privileged port. External clients reach
# the service through the stunnel TLS terminator on :::11500 (see
# /etc/systemd/system/ic-ai-agent-tls.service), which forwards plaintext to
# 127.0.0.1:11501.
Environment=IC_AI_AGENT_ADDR=127.0.0.1:11501

ExecStart=/opt/ic/bin/ic-ai-agent --addr 127.0.0.1:11501

Restart=on-failure
RestartSec=5s

# The agent is a stateless HTTP server: no on-disk state of its own.
# Light hardening that doesn't conflict with reading the replica's
# state/registry trees.
PrivateTmp=yes
ProtectSystem=strict
ProtectHome=yes
NoNewPrivileges=yes

[Install]
WantedBy=multi-user.target
20 changes: 16 additions & 4 deletions ic-os/components/guestos/ollama/manage-ollama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,28 @@ ACTION="$1"
# of this up.
case "$ACTION" in
start)
# Cert must exist before stunnel starts; service is `Type=oneshot`
# with `RemainAfterExit=yes`, so `start` is idempotent.
# Cert must exist before stunnel starts; cert services are
# `Type=oneshot` with `RemainAfterExit=yes`, so `start` is
# idempotent.
/bin/systemctl start generate-ollama-tls-cert.service
/bin/systemctl start ollama-tls.service
/bin/systemctl start ollama.service

# The IC AI agent service runs alongside ollama on AI nodes,
# exposing an HTTP orchestration API on a separate TLS port
# (11500). It's started with the same lifecycle as ollama: any
# node that flips to AI mode brings both up, any node that flips
# away brings both down.
/bin/systemctl start generate-ic-ai-agent-tls-cert.service
/bin/systemctl start ic-ai-agent-tls.service
/bin/systemctl start ic-ai-agent.service
;;
stop)
# Stop in reverse order. The cert generator is `RemainAfterExit=yes`
# and has nothing to tear down; leave it active so the cert remains
# Stop in reverse order. Cert generators are `RemainAfterExit=yes`
# and have nothing to tear down; leave them active so certs remain
# valid for the next start.
/bin/systemctl stop ic-ai-agent.service
/bin/systemctl stop ic-ai-agent-tls.service
/bin/systemctl stop ollama.service
/bin/systemctl stop ollama-tls.service
;;
Expand Down
25 changes: 24 additions & 1 deletion ic-os/guestos/context/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,10 @@ RUN systemctl disable \
fstrim.timer \
ollama.service \
ollama-tls.service \
generate-ollama-tls-cert.service
generate-ollama-tls-cert.service \
ic-ai-agent.service \
ic-ai-agent-tls.service \
generate-ic-ai-agent-tls-cert.service

# ------ GUESTOS WORK --------------------------------------------

Expand Down Expand Up @@ -254,6 +257,26 @@ RUN addgroup --system ollama && \
chown ollama:ollama /var/lib/ollama && \
chmod 0750 /var/lib/ollama

# The "ic-ai-agent" account. Used to run the `ic-ai-agent` HTTP service
# (started on AI nodes by manage-ai-agent.sh, driven by the
# orchestrator's AiNodeManager).
#
# The agent's IC-observability tools need read access to two on-disk
# trees written by the state-sync replica:
#
# * /var/lib/ic/data/ic_state (group nonconfidential)
# * /var/lib/ic/data/ic_registry_local_store
# (group ic-registry-local-store)
#
# Both are set up by setup-permissions.sh as group-readable. We add
# `ic-ai-agent` to those groups here so the unix permission check passes;
# without this the service runs under DynamicUser/an isolated UID and
# gets EACCES walking the checkpoints directory.
RUN addgroup --system ic-ai-agent && \
adduser --system --disabled-password --shell /usr/sbin/nologin --no-create-home --ingroup ic-ai-agent -c "IC AI Agent" ic-ai-agent && \
adduser ic-ai-agent nonconfidential && \
adduser ic-ai-agent ic-registry-local-store

# ------ INSTALL SCRIPTS -----------------------------------------

# Install IC binaries and other data late -- this means everything above
Expand Down
1 change: 1 addition & 0 deletions ic-os/guestos/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def image_deps(mode, malicious = False):
"//rs/ic_os/release:custom_metrics": "/opt/ic/bin/custom_metrics:0755", # Collects and reports custom metrics.
"//rs/ic_os/remote_attestation/server": "/opt/ic/bin/remote_attestation_server:0755", # Remote Attestation service
"//rs/ic_os/guest_upgrade/client": "/opt/ic/bin/guest_upgrade_client:0755", # Disk encryption key exchange client
"//rs/ai_agent:ic-ai-agent": "/opt/ic/bin/ic-ai-agent:0755", # AI agent orchestration HTTP API (started on AI nodes)

# additional libraries to install
"//rs/ic_os/release:nss_icos": "/usr/lib/x86_64-linux-gnu/libnss_icos.so.2:0644", # Allows referring to the guest IPv6 by name guestos from host, and host as hostos from guest.
Expand Down
1 change: 1 addition & 0 deletions publish/binaries/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ ALL_BINARIES = {
# Keep sorted
"canister_sandbox": "//rs/canister_sandbox",
"compiler_sandbox": "//rs/canister_sandbox:compiler_sandbox",
"ic-ai-agent": "//rs/ai_agent:ic-ai-agent",
"ic-btc-adapter": "//rs/bitcoin/adapter:ic-btc-adapter",
"replica": "//rs/replica:replica",
"rate-limiting-canister-client": "//rs/boundary_node/rate_limits/canister_client:rate-limiting-canister-client",
Expand Down
Loading
Loading