Skip to content

Commit 0c3245a

Browse files
committed
feat(metrics): add histogram metric type for packet size distribution
Replace 20 individual counter metrics for packet size buckets with two native Prometheus histograms (RX/TX). This maps SAI port stat fields to cumulative histogram buckets via a new `histogram` transform in the metrics config.
1 parent f9f1525 commit 0c3245a

5 files changed

Lines changed: 254 additions & 189 deletions

File tree

docs/usage/metrics.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ These are defined in YAML and can be customized or extended by operators. The de
6262
| `sonic_switch_interface_fec_frames_total` | counter | `interface`, `type` | FEC frame counters (correctable, uncorrectable, symbol_errors) |
6363
| `sonic_switch_interface_queue_length` | gauge | `interface` | Current output queue length |
6464
| `sonic_switch_interface_pfc_packets_total` | counter | `interface`, `direction`, `priority` | PFC packets per priority (0-7) |
65-
| `sonic_switch_interface_packet_size_total` | counter | `interface`, `direction`, `size` | Packets by size bucket |
65+
| `sonic_switch_interface_rx_packet_size_bytes` | histogram | `interface` | RX packet size distribution (buckets: 64, 127, 255, 511, 1023, 1518, 2047, 4095, 9216, 16383) |
66+
| `sonic_switch_interface_tx_packet_size_bytes` | histogram | `interface` | TX packet size distribution (buckets: 64, 127, 255, 511, 1023, 1518, 2047, 4095, 9216, 16383) |
6667
| `sonic_switch_interface_anomaly_packets_total` | counter | `interface`, `type` | Anomalous packets (undersize, oversize, fragments, jabbers, unknown_protos) |
6768

6869
## Metrics configuration schema
@@ -99,7 +100,7 @@ Each entry maps a Redis hash field (or set of fields) to a Prometheus metric.
99100
| `field` | no | — | Specific Redis hash field name. Mutually exclusive with `field_pattern` |
100101
| `field_pattern` | no | — | Set to `*` to iterate all hash fields. Mutually exclusive with `field` |
101102
| `metric` | yes | — | Prometheus metric name |
102-
| `type` | yes | — | `gauge` or `counter` |
103+
| `type` | yes | — | `gauge`, `counter`, or `histogram` |
103104
| `help` | no | — | Metric help string |
104105
| `value` | no | — | Fixed metric value (ignores field value). Use for `_info` pattern metrics |
105106
| `labels` | no | — | Map of label names to [value templates](#label-value-templates) |
@@ -191,6 +192,29 @@ transform:
191192
dom_flag_severity: true
192193
```
193194

195+
#### `histogram`
196+
197+
Maps multiple Redis hash fields to a single Prometheus histogram. Each entry in `buckets` maps an upper bound (float64) to a Redis hash field name. The transform reads each field, parses the count as an unsigned integer, and accumulates cumulative bucket counts. The resulting histogram has `sum=0` because SAI counters don't provide total bytes — but bucket-based percentile queries and heatmap visualizations still work. Requires `type: "histogram"`.
198+
199+
```yaml
200+
- metric: sonic_switch_interface_rx_packet_size_bytes
201+
type: histogram
202+
help: "RX packet size distribution"
203+
labels:
204+
interface: "$port_name"
205+
transform:
206+
histogram:
207+
buckets:
208+
64: SAI_PORT_STAT_ETHER_IN_PKTS_64_OCTETS
209+
127: SAI_PORT_STAT_ETHER_IN_PKTS_65_TO_127_OCTETS
210+
255: SAI_PORT_STAT_ETHER_IN_PKTS_128_TO_255_OCTETS
211+
511: SAI_PORT_STAT_ETHER_IN_PKTS_256_TO_511_OCTETS
212+
1023: SAI_PORT_STAT_ETHER_IN_PKTS_512_TO_1023_OCTETS
213+
1518: SAI_PORT_STAT_ETHER_IN_PKTS_1024_TO_1518_OCTETS
214+
```
215+
216+
This emits `_bucket`, `_count`, and `_sum` series automatically — Prometheus handles the histogram suffixes.
217+
194218
## Examples
195219

196220
### Adding a new counter from COUNTERS_DB

internal/agent/metrics/config.go

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ package metrics
55

66
import (
77
"embed"
8+
"encoding/json"
89
"fmt"
910
"os"
1011
"regexp"
12+
"strconv"
1113

1214
"sigs.k8s.io/yaml"
1315
)
@@ -76,6 +78,8 @@ type Transform struct {
7678
RegexCapture *RegexCapture `json:"regex_capture,omitempty"`
7779
// DOMFlagSeverity computes a severity rollup (0=ok, 1=warning, 2=alarm) from all hash fields.
7880
DOMFlagSeverity bool `json:"dom_flag_severity,omitempty"`
81+
// Histogram maps upper bounds to Redis field names, emitting a Prometheus histogram.
82+
Histogram *HistogramBuckets `json:"histogram,omitempty"`
7983
}
8084

8185
// RegexCapture defines a regex-based field name matching transform.
@@ -86,6 +90,35 @@ type RegexCapture struct {
8690
Pattern string `json:"pattern"`
8791
}
8892

93+
// HistogramBuckets defines a histogram transform that maps Redis field names to
94+
// Prometheus histogram bucket upper bounds.
95+
type HistogramBuckets struct {
96+
// Buckets maps upper bounds (in bytes, seconds, etc.) to Redis hash field names.
97+
// Values are read, parsed as uint64, and accumulated into cumulative histogram buckets.
98+
Buckets map[float64]string `json:"buckets"`
99+
}
100+
101+
// UnmarshalJSON implements custom JSON unmarshaling for HistogramBuckets.
102+
// sigs.k8s.io/yaml converts YAML→JSON, so numeric YAML keys become JSON string keys.
103+
// This method parses those string keys back to float64.
104+
func (hb *HistogramBuckets) UnmarshalJSON(data []byte) error {
105+
var raw struct {
106+
Buckets map[string]string `json:"buckets"`
107+
}
108+
if err := json.Unmarshal(data, &raw); err != nil {
109+
return err
110+
}
111+
hb.Buckets = make(map[float64]string, len(raw.Buckets))
112+
for k, v := range raw.Buckets {
113+
f, err := strconv.ParseFloat(k, 64)
114+
if err != nil {
115+
return fmt.Errorf("histogram bucket key %q is not a valid number: %w", k, err)
116+
}
117+
hb.Buckets[f] = v
118+
}
119+
return nil
120+
}
121+
89122
// effectiveSeparator returns the key separator, defaulting to "|".
90123
func (m *MetricMapping) effectiveSeparator() string {
91124
if m.KeySeparator != "" {
@@ -135,8 +168,13 @@ func validateConfig(cfg *MetricsConfig) error {
135168
if f.Metric == "" {
136169
return fmt.Errorf("metrics[%d].fields[%d]: metric is required", i, j)
137170
}
138-
if f.Type != "gauge" && f.Type != "counter" {
139-
return fmt.Errorf("metrics[%d].fields[%d]: type must be 'gauge' or 'counter', got %q", i, j, f.Type)
171+
if f.Type != metricTypeGauge && f.Type != metricTypeCounter && f.Type != metricTypeHistogram {
172+
return fmt.Errorf("metrics[%d].fields[%d]: type must be 'gauge', 'counter', or 'histogram', got %q", i, j, f.Type)
173+
}
174+
if f.Type == metricTypeHistogram {
175+
if f.Transform == nil || f.Transform.Histogram == nil || len(f.Transform.Histogram.Buckets) == 0 {
176+
return fmt.Errorf("metrics[%d].fields[%d]: histogram type requires transform.histogram.buckets", i, j)
177+
}
140178
}
141179
if f.Field != "" && f.FieldPattern != "" {
142180
return fmt.Errorf("metrics[%d].fields[%d]: field and field_pattern are mutually exclusive", i, j)

internal/agent/metrics/config_collector.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@ func (c *ConfigCollector) Collect(ch chan<- prometheus.Metric) {
136136
continue
137137
}
138138

139+
// histogram operates on the whole hash — reads specific bucket fields
140+
if fm.Type == "histogram" && fm.Transform != nil && fm.Transform.Histogram != nil {
141+
desc := c.descs[fm.Metric]
142+
labels := resolveLabels(fm.Labels, keySuffix, portName, "", fields)
143+
collectHistogram(ch, desc, fm.Transform.Histogram, fields, labels)
144+
continue
145+
}
146+
139147
if fm.FieldPattern == "*" {
140148
// Iterate all fields
141149
c.collectAllFields(ch, fi, fm, fields, keySuffix, portName)
@@ -311,3 +319,47 @@ func appendUnique(slice []string, items ...string) []string {
311319
}
312320
return slice
313321
}
322+
323+
// collectHistogram reads bucket fields from the hash, accumulates cumulative counts,
324+
// and emits a prometheus.MustNewConstHistogram.
325+
func collectHistogram(
326+
ch chan<- prometheus.Metric,
327+
desc *prometheus.Desc,
328+
hb *HistogramBuckets,
329+
hashFields map[string]string,
330+
labels []string,
331+
) {
332+
// Sort upper bounds
333+
bounds := make([]float64, 0, len(hb.Buckets))
334+
for ub := range hb.Buckets {
335+
bounds = append(bounds, ub)
336+
}
337+
sort.Float64s(bounds)
338+
339+
// Read non-cumulative counts from Redis and accumulate into cumulative buckets.
340+
var totalCount uint64
341+
cumBuckets := make(map[float64]uint64, len(bounds))
342+
var cumulative uint64
343+
for _, ub := range bounds {
344+
fieldName := hb.Buckets[ub]
345+
val, ok := hashFields[fieldName]
346+
if !ok {
347+
cumBuckets[ub] = cumulative
348+
continue
349+
}
350+
n, err := strconv.ParseUint(val, 10, 64)
351+
if err != nil {
352+
cumBuckets[ub] = cumulative
353+
continue
354+
}
355+
cumulative += n
356+
cumBuckets[ub] = cumulative
357+
}
358+
totalCount = cumulative
359+
360+
// +Inf bucket count equals totalCount (Prometheus adds it automatically).
361+
// sum is 0 — SAI doesn't provide total bytes, only bucket counts.
362+
ch <- prometheus.MustNewConstHistogram(
363+
desc, totalCount, 0, cumBuckets, labels...,
364+
)
365+
}

internal/agent/metrics/default_config.yaml

Lines changed: 34 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -440,169 +440,45 @@ metrics:
440440
direction: "tx"
441441
priority: "7"
442442

443-
# Packet size distribution (RX)
444-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_64_OCTETS
445-
metric: sonic_switch_interface_packet_size_total
446-
type: counter
447-
help: "Total packets by size bucket"
448-
labels:
449-
interface: "$port_name"
450-
direction: "rx"
451-
size: "64"
452-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_65_TO_127_OCTETS
453-
metric: sonic_switch_interface_packet_size_total
454-
type: counter
455-
help: "Total packets by size bucket"
456-
labels:
457-
interface: "$port_name"
458-
direction: "rx"
459-
size: "65_to_127"
460-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_128_TO_255_OCTETS
461-
metric: sonic_switch_interface_packet_size_total
462-
type: counter
463-
help: "Total packets by size bucket"
464-
labels:
465-
interface: "$port_name"
466-
direction: "rx"
467-
size: "128_to_255"
468-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_256_TO_511_OCTETS
469-
metric: sonic_switch_interface_packet_size_total
470-
type: counter
471-
help: "Total packets by size bucket"
472-
labels:
473-
interface: "$port_name"
474-
direction: "rx"
475-
size: "256_to_511"
476-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_512_TO_1023_OCTETS
477-
metric: sonic_switch_interface_packet_size_total
478-
type: counter
479-
help: "Total packets by size bucket"
443+
# Packet size distribution (RX) — Prometheus histogram
444+
- metric: sonic_switch_interface_rx_packet_size_bytes
445+
type: histogram
446+
help: "RX packet size distribution"
480447
labels:
481448
interface: "$port_name"
482-
direction: "rx"
483-
size: "512_to_1023"
484-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_1024_TO_1518_OCTETS
485-
metric: sonic_switch_interface_packet_size_total
486-
type: counter
487-
help: "Total packets by size bucket"
488-
labels:
489-
interface: "$port_name"
490-
direction: "rx"
491-
size: "1024_to_1518"
492-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_1519_TO_2047_OCTETS
493-
metric: sonic_switch_interface_packet_size_total
494-
type: counter
495-
help: "Total packets by size bucket"
496-
labels:
497-
interface: "$port_name"
498-
direction: "rx"
499-
size: "1519_to_2047"
500-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_2048_TO_4095_OCTETS
501-
metric: sonic_switch_interface_packet_size_total
502-
type: counter
503-
help: "Total packets by size bucket"
504-
labels:
505-
interface: "$port_name"
506-
direction: "rx"
507-
size: "2048_to_4095"
508-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_4096_TO_9216_OCTETS
509-
metric: sonic_switch_interface_packet_size_total
510-
type: counter
511-
help: "Total packets by size bucket"
512-
labels:
513-
interface: "$port_name"
514-
direction: "rx"
515-
size: "4096_to_9216"
516-
- field: SAI_PORT_STAT_ETHER_IN_PKTS_9217_TO_16383_OCTETS
517-
metric: sonic_switch_interface_packet_size_total
518-
type: counter
519-
help: "Total packets by size bucket"
520-
labels:
521-
interface: "$port_name"
522-
direction: "rx"
523-
size: "9217_to_16383"
449+
transform:
450+
histogram:
451+
buckets:
452+
64: SAI_PORT_STAT_ETHER_IN_PKTS_64_OCTETS
453+
127: SAI_PORT_STAT_ETHER_IN_PKTS_65_TO_127_OCTETS
454+
255: SAI_PORT_STAT_ETHER_IN_PKTS_128_TO_255_OCTETS
455+
511: SAI_PORT_STAT_ETHER_IN_PKTS_256_TO_511_OCTETS
456+
1023: SAI_PORT_STAT_ETHER_IN_PKTS_512_TO_1023_OCTETS
457+
1518: SAI_PORT_STAT_ETHER_IN_PKTS_1024_TO_1518_OCTETS
458+
2047: SAI_PORT_STAT_ETHER_IN_PKTS_1519_TO_2047_OCTETS
459+
4095: SAI_PORT_STAT_ETHER_IN_PKTS_2048_TO_4095_OCTETS
460+
9216: SAI_PORT_STAT_ETHER_IN_PKTS_4096_TO_9216_OCTETS
461+
16383: SAI_PORT_STAT_ETHER_IN_PKTS_9217_TO_16383_OCTETS
524462

525-
# Packet size distribution (TX)
526-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_64_OCTETS
527-
metric: sonic_switch_interface_packet_size_total
528-
type: counter
529-
help: "Total packets by size bucket"
530-
labels:
531-
interface: "$port_name"
532-
direction: "tx"
533-
size: "64"
534-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_65_TO_127_OCTETS
535-
metric: sonic_switch_interface_packet_size_total
536-
type: counter
537-
help: "Total packets by size bucket"
538-
labels:
539-
interface: "$port_name"
540-
direction: "tx"
541-
size: "65_to_127"
542-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_128_TO_255_OCTETS
543-
metric: sonic_switch_interface_packet_size_total
544-
type: counter
545-
help: "Total packets by size bucket"
546-
labels:
547-
interface: "$port_name"
548-
direction: "tx"
549-
size: "128_to_255"
550-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_256_TO_511_OCTETS
551-
metric: sonic_switch_interface_packet_size_total
552-
type: counter
553-
help: "Total packets by size bucket"
554-
labels:
555-
interface: "$port_name"
556-
direction: "tx"
557-
size: "256_to_511"
558-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_512_TO_1023_OCTETS
559-
metric: sonic_switch_interface_packet_size_total
560-
type: counter
561-
help: "Total packets by size bucket"
463+
# Packet size distribution (TX) — Prometheus histogram
464+
- metric: sonic_switch_interface_tx_packet_size_bytes
465+
type: histogram
466+
help: "TX packet size distribution"
562467
labels:
563468
interface: "$port_name"
564-
direction: "tx"
565-
size: "512_to_1023"
566-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_1024_TO_1518_OCTETS
567-
metric: sonic_switch_interface_packet_size_total
568-
type: counter
569-
help: "Total packets by size bucket"
570-
labels:
571-
interface: "$port_name"
572-
direction: "tx"
573-
size: "1024_to_1518"
574-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_1519_TO_2047_OCTETS
575-
metric: sonic_switch_interface_packet_size_total
576-
type: counter
577-
help: "Total packets by size bucket"
578-
labels:
579-
interface: "$port_name"
580-
direction: "tx"
581-
size: "1519_to_2047"
582-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_2048_TO_4095_OCTETS
583-
metric: sonic_switch_interface_packet_size_total
584-
type: counter
585-
help: "Total packets by size bucket"
586-
labels:
587-
interface: "$port_name"
588-
direction: "tx"
589-
size: "2048_to_4095"
590-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_4096_TO_9216_OCTETS
591-
metric: sonic_switch_interface_packet_size_total
592-
type: counter
593-
help: "Total packets by size bucket"
594-
labels:
595-
interface: "$port_name"
596-
direction: "tx"
597-
size: "4096_to_9216"
598-
- field: SAI_PORT_STAT_ETHER_OUT_PKTS_9217_TO_16383_OCTETS
599-
metric: sonic_switch_interface_packet_size_total
600-
type: counter
601-
help: "Total packets by size bucket"
602-
labels:
603-
interface: "$port_name"
604-
direction: "tx"
605-
size: "9217_to_16383"
469+
transform:
470+
histogram:
471+
buckets:
472+
64: SAI_PORT_STAT_ETHER_OUT_PKTS_64_OCTETS
473+
127: SAI_PORT_STAT_ETHER_OUT_PKTS_65_TO_127_OCTETS
474+
255: SAI_PORT_STAT_ETHER_OUT_PKTS_128_TO_255_OCTETS
475+
511: SAI_PORT_STAT_ETHER_OUT_PKTS_256_TO_511_OCTETS
476+
1023: SAI_PORT_STAT_ETHER_OUT_PKTS_512_TO_1023_OCTETS
477+
1518: SAI_PORT_STAT_ETHER_OUT_PKTS_1024_TO_1518_OCTETS
478+
2047: SAI_PORT_STAT_ETHER_OUT_PKTS_1519_TO_2047_OCTETS
479+
4095: SAI_PORT_STAT_ETHER_OUT_PKTS_2048_TO_4095_OCTETS
480+
9216: SAI_PORT_STAT_ETHER_OUT_PKTS_4096_TO_9216_OCTETS
481+
16383: SAI_PORT_STAT_ETHER_OUT_PKTS_9217_TO_16383_OCTETS
606482

607483
# Anomaly counters
608484
- field: SAI_PORT_STAT_ETHER_STATS_UNDERSIZE_PKTS

0 commit comments

Comments
 (0)