Skip to content

Commit 979f700

Browse files
committed
feat(orchestrator): Implement Phase 1 ADF cost optimisation with token and latency tracking Refs #116
- Extend CostTracker with ExecutionMetrics for per-run token/latency/cost capture - Add AgentMetrics for aggregated per-agent statistics - Create metrics_persistence module for terraphim_persistence integration - Implement InMemoryMetricsPersistence for testing - Add fleet-wide metrics aggregation - Export new types from lib.rs Phase 1 complete: Per-agent cost/token/latency metrics captured and ready for storage.
1 parent 8b1f896 commit 979f700

3 files changed

Lines changed: 718 additions & 3 deletions

File tree

crates/terraphim_orchestrator/src/cost_tracker.rs

Lines changed: 342 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use chrono::{Datelike, Utc};
1+
use chrono::{DateTime, Datelike, Utc};
22
use serde::{Deserialize, Serialize};
33
use std::collections::HashMap;
44
use std::fmt;
@@ -53,6 +53,191 @@ impl fmt::Display for BudgetVerdict {
5353
}
5454
}
5555

56+
/// Per-execution metrics for an agent run.
57+
#[derive(Debug, Clone, Serialize, Deserialize)]
58+
pub struct ExecutionMetrics {
59+
/// Timestamp when the execution started.
60+
pub started_at: DateTime<Utc>,
61+
/// Timestamp when the execution completed.
62+
pub completed_at: DateTime<Utc>,
63+
/// Input token count (prompt tokens).
64+
pub input_tokens: u64,
65+
/// Output token count (completion tokens).
66+
pub output_tokens: u64,
67+
/// Total token count (input + output).
68+
pub total_tokens: u64,
69+
/// Latency in milliseconds.
70+
pub latency_ms: u64,
71+
/// Estimated cost in USD for this execution.
72+
pub estimated_cost_usd: f64,
73+
/// Whether the execution succeeded.
74+
pub success: bool,
75+
/// Optional error message if execution failed.
76+
pub error_message: Option<String>,
77+
/// Model used for this execution.
78+
pub model: Option<String>,
79+
/// Provider used for this execution.
80+
pub provider: Option<String>,
81+
}
82+
83+
impl ExecutionMetrics {
84+
/// Create a new execution metrics record.
85+
pub fn new(started_at: DateTime<Utc>) -> Self {
86+
Self {
87+
started_at,
88+
completed_at: started_at,
89+
input_tokens: 0,
90+
output_tokens: 0,
91+
total_tokens: 0,
92+
latency_ms: 0,
93+
estimated_cost_usd: 0.0,
94+
success: true,
95+
error_message: None,
96+
model: None,
97+
provider: None,
98+
}
99+
}
100+
101+
/// Mark the execution as completed with metrics.
102+
pub fn complete(
103+
mut self,
104+
input_tokens: u64,
105+
output_tokens: u64,
106+
cost_usd: f64,
107+
success: bool,
108+
) -> Self {
109+
self.completed_at = Utc::now();
110+
self.input_tokens = input_tokens;
111+
self.output_tokens = output_tokens;
112+
self.total_tokens = input_tokens + output_tokens;
113+
self.latency_ms = (self.completed_at - self.started_at).num_milliseconds() as u64;
114+
self.estimated_cost_usd = cost_usd;
115+
self.success = success;
116+
self
117+
}
118+
119+
/// Mark execution as failed with error message.
120+
pub fn fail(mut self, error: String) -> Self {
121+
self.completed_at = Utc::now();
122+
self.success = false;
123+
self.error_message = Some(error);
124+
self.latency_ms = (self.completed_at - self.started_at).num_milliseconds() as u64;
125+
self
126+
}
127+
128+
/// Set model and provider information.
129+
pub fn with_model(mut self, model: String, provider: String) -> Self {
130+
self.model = Some(model);
131+
self.provider = Some(provider);
132+
self
133+
}
134+
}
135+
136+
/// Aggregated metrics for an agent over time.
137+
#[derive(Debug, Clone, Serialize, Deserialize)]
138+
pub struct AgentMetrics {
139+
/// Agent name.
140+
pub agent_name: String,
141+
/// Total number of executions.
142+
pub total_executions: u64,
143+
/// Number of successful executions.
144+
pub successful_executions: u64,
145+
/// Number of failed executions.
146+
pub failed_executions: u64,
147+
/// Total input tokens across all executions.
148+
pub total_input_tokens: u64,
149+
/// Total output tokens across all executions.
150+
pub total_output_tokens: u64,
151+
/// Total tokens across all executions.
152+
pub total_tokens: u64,
153+
/// Total latency in milliseconds across all executions.
154+
pub total_latency_ms: u64,
155+
/// Total estimated cost in USD across all executions.
156+
pub total_cost_usd: f64,
157+
/// Average tokens per execution.
158+
pub avg_tokens_per_execution: f64,
159+
/// Average latency per execution in milliseconds.
160+
pub avg_latency_ms: f64,
161+
/// Average cost per execution in USD.
162+
pub avg_cost_usd: f64,
163+
/// Success rate (0.0 - 1.0).
164+
pub success_rate: f64,
165+
/// Timestamp of first execution.
166+
pub first_execution_at: Option<DateTime<Utc>>,
167+
/// Timestamp of most recent execution.
168+
pub last_execution_at: Option<DateTime<Utc>>,
169+
/// Recent execution history (last 100 executions).
170+
pub recent_executions: Vec<ExecutionMetrics>,
171+
}
172+
173+
impl AgentMetrics {
174+
/// Create new agent metrics for the given agent.
175+
pub fn new(agent_name: String) -> Self {
176+
Self {
177+
agent_name,
178+
total_executions: 0,
179+
successful_executions: 0,
180+
failed_executions: 0,
181+
total_input_tokens: 0,
182+
total_output_tokens: 0,
183+
total_tokens: 0,
184+
total_latency_ms: 0,
185+
total_cost_usd: 0.0,
186+
avg_tokens_per_execution: 0.0,
187+
avg_latency_ms: 0.0,
188+
avg_cost_usd: 0.0,
189+
success_rate: 0.0,
190+
first_execution_at: None,
191+
last_execution_at: None,
192+
recent_executions: Vec::with_capacity(100),
193+
}
194+
}
195+
196+
/// Record a new execution and update aggregated metrics.
197+
pub fn record_execution(&mut self, execution: ExecutionMetrics) {
198+
self.total_executions += 1;
199+
200+
if execution.success {
201+
self.successful_executions += 1;
202+
self.total_input_tokens += execution.input_tokens;
203+
self.total_output_tokens += execution.output_tokens;
204+
self.total_tokens += execution.total_tokens;
205+
self.total_cost_usd += execution.estimated_cost_usd;
206+
} else {
207+
self.failed_executions += 1;
208+
}
209+
210+
self.total_latency_ms += execution.latency_ms;
211+
212+
// Update timestamps
213+
if self.first_execution_at.is_none() {
214+
self.first_execution_at = Some(execution.started_at);
215+
}
216+
self.last_execution_at = Some(execution.completed_at);
217+
218+
// Update averages
219+
self.avg_tokens_per_execution = self.total_tokens as f64 / self.total_executions as f64;
220+
self.avg_latency_ms = self.total_latency_ms as f64 / self.total_executions as f64;
221+
self.avg_cost_usd = self.total_cost_usd / self.total_executions as f64;
222+
self.success_rate = self.successful_executions as f64 / self.total_executions as f64;
223+
224+
// Add to recent executions, keeping only last 100
225+
self.recent_executions.push(execution);
226+
if self.recent_executions.len() > 100 {
227+
self.recent_executions.remove(0);
228+
}
229+
}
230+
231+
/// Get cost efficiency metric (tokens per dollar).
232+
pub fn tokens_per_dollar(&self) -> f64 {
233+
if self.total_cost_usd > 0.0 {
234+
self.total_tokens as f64 / self.total_cost_usd
235+
} else {
236+
0.0
237+
}
238+
}
239+
}
240+
56241
/// Internal cost tracking for a single agent.
57242
struct AgentCost {
58243
/// Spend in hundredths-of-a-cent (1 USD = 10_000 sub-cents).
@@ -137,16 +322,19 @@ pub struct CostSnapshot {
137322
pub verdict: String,
138323
}
139324

140-
/// Tracks per-agent spend with budget enforcement.
325+
/// Tracks per-agent spend with budget enforcement and detailed metrics.
141326
pub struct CostTracker {
142327
agents: HashMap<String, AgentCost>,
328+
/// Per-agent detailed metrics.
329+
metrics: HashMap<String, AgentMetrics>,
143330
}
144331

145332
impl CostTracker {
146333
/// Create a new empty CostTracker.
147334
pub fn new() -> Self {
148335
Self {
149336
agents: HashMap::new(),
337+
metrics: HashMap::new(),
150338
}
151339
}
152340

@@ -155,6 +343,10 @@ impl CostTracker {
155343
pub fn register(&mut self, agent_name: &str, budget_monthly_cents: Option<u64>) {
156344
self.agents
157345
.insert(agent_name.to_string(), AgentCost::new(budget_monthly_cents));
346+
self.metrics.insert(
347+
agent_name.to_string(),
348+
AgentMetrics::new(agent_name.to_string()),
349+
);
158350
}
159351

160352
/// Record a cost for an agent and return the budget verdict.
@@ -166,6 +358,32 @@ impl CostTracker {
166358
}
167359
}
168360

361+
/// Record execution metrics for an agent.
362+
pub fn record_execution(&mut self, agent_name: &str, execution: ExecutionMetrics) {
363+
// Record the cost
364+
let _ = self.record_cost(agent_name, execution.estimated_cost_usd);
365+
366+
// Record the detailed metrics
367+
if let Some(metrics) = self.metrics.get_mut(agent_name) {
368+
metrics.record_execution(execution);
369+
}
370+
}
371+
372+
/// Get metrics for a specific agent.
373+
pub fn get_metrics(&self, agent_name: &str) -> Option<&AgentMetrics> {
374+
self.metrics.get(agent_name)
375+
}
376+
377+
/// Get mutable metrics for a specific agent.
378+
pub fn get_metrics_mut(&mut self, agent_name: &str) -> Option<&mut AgentMetrics> {
379+
self.metrics.get_mut(agent_name)
380+
}
381+
382+
/// Get all agent metrics.
383+
pub fn all_metrics(&self) -> &HashMap<String, AgentMetrics> {
384+
&self.metrics
385+
}
386+
169387
/// Check budget status for a specific agent.
170388
/// Returns Uncapped for unregistered agents.
171389
pub fn check(&self, agent_name: &str) -> BudgetVerdict {
@@ -222,6 +440,32 @@ impl CostTracker {
222440
.map(|agent_cost| agent_cost.spent_usd())
223441
.sum()
224442
}
443+
444+
/// Get total fleet metrics.
445+
pub fn fleet_metrics(&self) -> AgentMetrics {
446+
let mut fleet = AgentMetrics::new("fleet".to_string());
447+
448+
for metrics in self.metrics.values() {
449+
fleet.total_executions += metrics.total_executions;
450+
fleet.successful_executions += metrics.successful_executions;
451+
fleet.failed_executions += metrics.failed_executions;
452+
fleet.total_input_tokens += metrics.total_input_tokens;
453+
fleet.total_output_tokens += metrics.total_output_tokens;
454+
fleet.total_tokens += metrics.total_tokens;
455+
fleet.total_latency_ms += metrics.total_latency_ms;
456+
fleet.total_cost_usd += metrics.total_cost_usd;
457+
}
458+
459+
if fleet.total_executions > 0 {
460+
fleet.avg_tokens_per_execution =
461+
fleet.total_tokens as f64 / fleet.total_executions as f64;
462+
fleet.avg_latency_ms = fleet.total_latency_ms as f64 / fleet.total_executions as f64;
463+
fleet.avg_cost_usd = fleet.total_cost_usd / fleet.total_executions as f64;
464+
fleet.success_rate = fleet.successful_executions as f64 / fleet.total_executions as f64;
465+
}
466+
467+
fleet
468+
}
225469
}
226470

227471
impl Default for CostTracker {
@@ -477,4 +721,100 @@ mod tests {
477721
snapshot.spent_usd
478722
);
479723
}
724+
725+
#[test]
726+
fn test_execution_metrics_recording() {
727+
let mut tracker = CostTracker::new();
728+
tracker.register("test-agent", Some(10000));
729+
730+
let execution = ExecutionMetrics::new(Utc::now())
731+
.complete(100, 50, 0.005, true)
732+
.with_model("gpt-4".to_string(), "openai".to_string());
733+
734+
tracker.record_execution("test-agent", execution);
735+
736+
let metrics = tracker.get_metrics("test-agent").unwrap();
737+
assert_eq!(metrics.total_executions, 1);
738+
assert_eq!(metrics.successful_executions, 1);
739+
assert_eq!(metrics.total_input_tokens, 100);
740+
assert_eq!(metrics.total_output_tokens, 50);
741+
assert_eq!(metrics.total_tokens, 150);
742+
assert!((metrics.total_cost_usd - 0.005).abs() < 0.0001);
743+
assert_eq!(metrics.success_rate, 1.0);
744+
assert_eq!(metrics.recent_executions.len(), 1);
745+
}
746+
747+
#[test]
748+
fn test_agent_metrics_aggregation() {
749+
let mut tracker = CostTracker::new();
750+
tracker.register("test-agent", Some(10000));
751+
752+
// Record multiple executions
753+
for i in 0..5 {
754+
let execution = ExecutionMetrics::new(Utc::now()).complete(
755+
100 + i as u64 * 10,
756+
50 + i as u64 * 5,
757+
0.005,
758+
true,
759+
);
760+
tracker.record_execution("test-agent", execution);
761+
}
762+
763+
let metrics = tracker.get_metrics("test-agent").unwrap();
764+
assert_eq!(metrics.total_executions, 5);
765+
assert_eq!(metrics.successful_executions, 5);
766+
assert_eq!(metrics.total_input_tokens, 100 + 110 + 120 + 130 + 140);
767+
assert_eq!(metrics.total_output_tokens, 50 + 55 + 60 + 65 + 70);
768+
assert!(metrics.avg_tokens_per_execution > 0.0);
769+
assert!(metrics.avg_cost_usd > 0.0);
770+
assert_eq!(metrics.success_rate, 1.0);
771+
}
772+
773+
#[test]
774+
fn test_failed_execution_recording() {
775+
let mut tracker = CostTracker::new();
776+
tracker.register("test-agent", Some(10000));
777+
778+
let execution = ExecutionMetrics::new(Utc::now()).fail("API timeout".to_string());
779+
780+
tracker.record_execution("test-agent", execution);
781+
782+
let metrics = tracker.get_metrics("test-agent").unwrap();
783+
assert_eq!(metrics.total_executions, 1);
784+
assert_eq!(metrics.successful_executions, 0);
785+
assert_eq!(metrics.failed_executions, 1);
786+
assert_eq!(metrics.success_rate, 0.0);
787+
}
788+
789+
#[test]
790+
fn test_fleet_metrics() {
791+
let mut tracker = CostTracker::new();
792+
tracker.register("agent-1", Some(10000));
793+
tracker.register("agent-2", Some(10000));
794+
795+
let execution1 = ExecutionMetrics::new(Utc::now()).complete(100, 50, 0.01, true);
796+
let execution2 = ExecutionMetrics::new(Utc::now()).complete(200, 100, 0.02, true);
797+
798+
tracker.record_execution("agent-1", execution1);
799+
tracker.record_execution("agent-2", execution2);
800+
801+
let fleet = tracker.fleet_metrics();
802+
assert_eq!(fleet.total_executions, 2);
803+
assert_eq!(fleet.total_input_tokens, 300);
804+
assert_eq!(fleet.total_output_tokens, 150);
805+
assert!((fleet.total_cost_usd - 0.03).abs() < 0.001);
806+
}
807+
808+
#[test]
809+
fn test_tokens_per_dollar() {
810+
let mut metrics = AgentMetrics::new("test".to_string());
811+
metrics.total_tokens = 1000;
812+
metrics.total_cost_usd = 0.01;
813+
814+
assert_eq!(metrics.tokens_per_dollar(), 100000.0);
815+
816+
// Test zero cost case
817+
metrics.total_cost_usd = 0.0;
818+
assert_eq!(metrics.tokens_per_dollar(), 0.0);
819+
}
480820
}

0 commit comments

Comments
 (0)