-
Notifications
You must be signed in to change notification settings - Fork 2
[RPC Metric Part 1] Support two basic metrics in RPC client : Latency and error rate #89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a3174e7
24a83d3
2f3fab6
b892ff4
b5471e0
0b3bc02
65f6952
0e4e902
cd5fc94
2925e01
7a913aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,16 +1,23 @@ | ||
| package metrics | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "strconv" | ||
| "time" | ||
|
|
||
| "github.com/prometheus/client_golang/prometheus" | ||
| "github.com/prometheus/client_golang/prometheus/promauto" | ||
| "go.opentelemetry.io/otel/attribute" | ||
| "go.opentelemetry.io/otel/metric" | ||
|
|
||
| "github.com/smartcontractkit/chainlink-common/pkg/beholder" | ||
| ) | ||
|
|
||
| var ( | ||
| RPCCallLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{ | ||
| Name: "rpc_call_latency", | ||
| Help: "The duration of an RPC call in milliseconds", | ||
| Help: "The duration of an RPC call in seconds", | ||
| Buckets: []float64{ | ||
| float64(50 * time.Millisecond), | ||
| float64(100 * time.Millisecond), | ||
|
|
@@ -22,4 +29,100 @@ var ( | |
| float64(8 * time.Second), | ||
| }, | ||
| }, []string{"chainFamily", "chainID", "rpcUrl", "isSendOnly", "success", "rpcCallName"}) | ||
|
|
||
| RPCCallErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ | ||
| Name: "rpc_call_errors_total", | ||
| Help: "The total number of failed RPC calls", | ||
| }, []string{"chainFamily", "chainID", "rpcUrl", "isSendOnly", "rpcCallName"}) | ||
| ) | ||
|
|
||
| const ( | ||
| rpcCallLatencyBeholder = "rpc_call_latency" | ||
| rpcCallErrorsTotalBeholder = "rpc_call_errors_total" | ||
|
Comment on lines
+40
to
+41
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we're defining these, let's use them above. |
||
| ) | ||
|
|
||
| // RPCClientMetrics records RPC latency and errors to Prometheus and Beholder (same pattern as multinode metrics). | ||
| type RPCClientMetrics interface { | ||
| // RecordRequest records latency for an RPC call (observed in seconds for Prometheus). | ||
| // If err is non-nil, increments rpc_call_errors_total. | ||
| RecordRequest(ctx context.Context, callName string, latency time.Duration, err error) | ||
| } | ||
|
|
||
| var _ RPCClientMetrics = (*rpcClientMetrics)(nil) | ||
|
|
||
| type rpcClientMetrics struct { | ||
| chainFamily string | ||
| chainID string | ||
| rpcURL string | ||
| isSendOnly bool | ||
| latency metric.Float64Histogram | ||
| errorsTotal metric.Int64Counter | ||
| } | ||
|
|
||
| // RPCClientMetricsConfig holds fixed labels for an RPC client instance. | ||
| type RPCClientMetricsConfig struct { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is going to be called from |
||
| ChainFamily string | ||
| ChainID string | ||
| RPCURL string | ||
| IsSendOnly bool | ||
| } | ||
|
|
||
| // NewRPCClientMetrics creates RPC client metrics that publish to Prometheus and Beholder. | ||
| func NewRPCClientMetrics(cfg RPCClientMetricsConfig) (RPCClientMetrics, error) { | ||
| latency, err := beholder.GetMeter().Float64Histogram(rpcCallLatencyBeholder) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to register RPC call latency metric: %w", err) | ||
| } | ||
| errorsTotal, err := beholder.GetMeter().Int64Counter(rpcCallErrorsTotalBeholder) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to register RPC call errors metric: %w", err) | ||
| } | ||
| return &rpcClientMetrics{ | ||
| chainFamily: cfg.ChainFamily, | ||
| chainID: cfg.ChainID, | ||
| rpcURL: cfg.RPCURL, | ||
| isSendOnly: cfg.IsSendOnly, | ||
| latency: latency, | ||
| errorsTotal: errorsTotal, | ||
|
Comment on lines
+85
to
+86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe call them |
||
| }, nil | ||
| } | ||
|
|
||
| func (m *rpcClientMetrics) RecordRequest(ctx context.Context, callName string, latency time.Duration, err error) { | ||
| successStr := "true" | ||
| if err != nil { | ||
| successStr = "false" | ||
| } | ||
| sendStr := strconv.FormatBool(m.isSendOnly) | ||
| sec := latency.Seconds() | ||
|
|
||
| RPCCallLatency.WithLabelValues(m.chainFamily, m.chainID, m.rpcURL, sendStr, successStr, callName).Observe(sec) | ||
|
|
||
| latAttrs := metric.WithAttributes( | ||
| attribute.String("chainFamily", m.chainFamily), | ||
| attribute.String("chainID", m.chainID), | ||
| attribute.String("rpcUrl", m.rpcURL), | ||
| attribute.String("isSendOnly", sendStr), | ||
| attribute.String("success", successStr), | ||
| attribute.String("rpcCallName", callName), | ||
| ) | ||
| m.latency.Record(ctx, sec, latAttrs) | ||
|
|
||
| if err != nil { | ||
| RPCCallErrorsTotal.WithLabelValues(m.chainFamily, m.chainID, m.rpcURL, sendStr, callName).Inc() | ||
| errAttrs := metric.WithAttributes( | ||
| attribute.String("chainFamily", m.chainFamily), | ||
| attribute.String("chainID", m.chainID), | ||
| attribute.String("rpcUrl", m.rpcURL), | ||
| attribute.String("isSendOnly", sendStr), | ||
| attribute.String("rpcCallName", callName), | ||
| ) | ||
| m.errorsTotal.Add(ctx, 1, errAttrs) | ||
| } | ||
| } | ||
|
|
||
| // NoopRPCClientMetrics is a no-op implementation for when metrics are disabled. | ||
| type NoopRPCClientMetrics struct{} | ||
|
|
||
| func (NoopRPCClientMetrics) RecordRequest(context.Context, string, time.Duration, error) {} | ||
|
|
||
| var _ RPCClientMetrics = NoopRPCClientMetrics{} | ||
|
Comment on lines
+123
to
+128
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not exactly a golang wizz, but is this necessary?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's compile time interface check, recommended here |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| package metrics | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| func TestNewRPCClientMetrics(t *testing.T) { | ||
| m, err := NewRPCClientMetrics(RPCClientMetricsConfig{ | ||
| ChainFamily: "evm", | ||
| ChainID: "1", | ||
| RPCURL: "http://localhost:8545", | ||
| IsSendOnly: false, | ||
| }) | ||
| require.NoError(t, err) | ||
| require.NotNil(t, m) | ||
|
|
||
| ctx := context.Background() | ||
| m.RecordRequest(ctx, "latest_block", 100*time.Millisecond, nil) | ||
| m.RecordRequest(ctx, "latest_block", 50*time.Millisecond, errors.New("rpc error")) | ||
| } | ||
|
|
||
| func TestNoopRPCClientMetrics_RecordRequest(t *testing.T) { | ||
| var m NoopRPCClientMetrics | ||
| ctx := context.Background() | ||
| m.RecordRequest(ctx, "latest_block", 100*time.Millisecond, nil) | ||
| m.RecordRequest(ctx, "latest_block", 50*time.Millisecond, errors.New("rpc error")) | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You sure about this?