@@ -262,15 +262,43 @@ type APIConfig struct {
262262type ObservabilityConfig struct {
263263 // Tracing configuration for distributed tracing
264264 Tracing TracingConfig `yaml:"tracing"`
265- // Metrics configuration for Prometheus metrics endpoint
265+
266+ // Metrics configuration for enhanced metrics collection
266267 Metrics MetricsConfig `yaml:"metrics"`
267268}
268269
269- // MetricsConfig represents configuration for metrics endpoint
270+ // MetricsConfig represents configuration for metrics collection
270271type MetricsConfig struct {
271272 // Enabled controls whether the Prometheus metrics endpoint is served
272273 // When omitted, defaults to true
273274 Enabled * bool `yaml:"enabled,omitempty"`
275+
276+ // Enable windowed metrics collection for load balancing
277+ WindowedMetrics WindowedMetricsConfig `yaml:"windowed_metrics"`
278+ }
279+
280+ // WindowedMetricsConfig represents configuration for time-windowed metrics
281+ type WindowedMetricsConfig struct {
282+ // Enable windowed metrics collection
283+ Enabled bool `yaml:"enabled"`
284+
285+ // Time windows to track (in duration format, e.g., "1m", "5m", "15m", "1h", "24h")
286+ // Default: ["1m", "5m", "15m", "1h", "24h"]
287+ TimeWindows []string `yaml:"time_windows,omitempty"`
288+
289+ // Update interval for windowed metrics computation (e.g., "10s", "30s")
290+ // Default: "10s"
291+ UpdateInterval string `yaml:"update_interval,omitempty"`
292+
293+ // Enable model-level metrics tracking
294+ ModelMetrics bool `yaml:"model_metrics"`
295+
296+ // Enable queue depth estimation
297+ QueueDepthEstimation bool `yaml:"queue_depth_estimation"`
298+
299+ // Maximum number of models to track (to prevent cardinality explosion)
300+ // Default: 100
301+ MaxModels int `yaml:"max_models,omitempty"`
274302}
275303
276304// TracingConfig represents configuration for distributed tracing
0 commit comments