Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 104 additions & 1 deletion metrics/client.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
package metrics

import (
"context"
"fmt"
"strconv"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"

"github.com/smartcontractkit/chainlink-common/pkg/beholder"
)

var (
RPCCallLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "rpc_call_latency",
Help: "The duration of an RPC call in milliseconds",
Help: "The duration of an RPC call in seconds",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You sure about this?

Buckets: []float64{
float64(50 * time.Millisecond),
float64(100 * time.Millisecond),
Expand All @@ -22,4 +29,100 @@ var (
float64(8 * time.Second),
},
}, []string{"chainFamily", "chainID", "rpcUrl", "isSendOnly", "success", "rpcCallName"})

RPCCallErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "rpc_call_errors_total",
Help: "The total number of failed RPC calls",
}, []string{"chainFamily", "chainID", "rpcUrl", "isSendOnly", "rpcCallName"})
)

const (
rpcCallLatencyBeholder = "rpc_call_latency"
rpcCallErrorsTotalBeholder = "rpc_call_errors_total"
Comment on lines +40 to +41
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're defining these, let's use them above.

)

// RPCClientMetrics records RPC latency and errors to Prometheus and Beholder (same pattern as multinode metrics).
type RPCClientMetrics interface {
// RecordRequest records latency for an RPC call (observed in seconds for Prometheus).
// If err is non-nil, increments rpc_call_errors_total.
RecordRequest(ctx context.Context, callName string, latency time.Duration, err error)
}

var _ RPCClientMetrics = (*rpcClientMetrics)(nil)

type rpcClientMetrics struct {
chainFamily string
chainID string
rpcURL string
isSendOnly bool
latency metric.Float64Histogram
errorsTotal metric.Int64Counter
}

// RPCClientMetricsConfig holds fixed labels for an RPC client instance.
type RPCClientMetricsConfig struct {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is going to be called from chainlink-evm's logResult, If I'm reading this correctly, the only "static" fields are ChainFamily and ChainID, no? The others come from the request and would be passed on each increment, not when creating the metrics instance. Did you test locally logResult calling RecordRequest instead of RPCCallLatency directly?

ChainFamily string
ChainID string
RPCURL string
IsSendOnly bool
}

// NewRPCClientMetrics creates RPC client metrics that publish to Prometheus and Beholder.
func NewRPCClientMetrics(cfg RPCClientMetricsConfig) (RPCClientMetrics, error) {
latency, err := beholder.GetMeter().Float64Histogram(rpcCallLatencyBeholder)
if err != nil {
return nil, fmt.Errorf("failed to register RPC call latency metric: %w", err)
}
errorsTotal, err := beholder.GetMeter().Int64Counter(rpcCallErrorsTotalBeholder)
if err != nil {
return nil, fmt.Errorf("failed to register RPC call errors metric: %w", err)
}
return &rpcClientMetrics{
chainFamily: cfg.ChainFamily,
chainID: cfg.ChainID,
rpcURL: cfg.RPCURL,
isSendOnly: cfg.IsSendOnly,
latency: latency,
errorsTotal: errorsTotal,
Comment on lines +85 to +86
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe call them latencyHist and errorsCounter for clarity?

}, nil
}

func (m *rpcClientMetrics) RecordRequest(ctx context.Context, callName string, latency time.Duration, err error) {
successStr := "true"
if err != nil {
successStr = "false"
}
sendStr := strconv.FormatBool(m.isSendOnly)
sec := latency.Seconds()

RPCCallLatency.WithLabelValues(m.chainFamily, m.chainID, m.rpcURL, sendStr, successStr, callName).Observe(sec)

latAttrs := metric.WithAttributes(
attribute.String("chainFamily", m.chainFamily),
attribute.String("chainID", m.chainID),
attribute.String("rpcUrl", m.rpcURL),
attribute.String("isSendOnly", sendStr),
attribute.String("success", successStr),
attribute.String("rpcCallName", callName),
)
m.latency.Record(ctx, sec, latAttrs)

if err != nil {
RPCCallErrorsTotal.WithLabelValues(m.chainFamily, m.chainID, m.rpcURL, sendStr, callName).Inc()
errAttrs := metric.WithAttributes(
attribute.String("chainFamily", m.chainFamily),
attribute.String("chainID", m.chainID),
attribute.String("rpcUrl", m.rpcURL),
attribute.String("isSendOnly", sendStr),
attribute.String("rpcCallName", callName),
)
m.errorsTotal.Add(ctx, 1, errAttrs)
}
}

// NoopRPCClientMetrics is a no-op implementation for when metrics are disabled.
type NoopRPCClientMetrics struct{}

func (NoopRPCClientMetrics) RecordRequest(context.Context, string, time.Duration, error) {}

var _ RPCClientMetrics = NoopRPCClientMetrics{}
Comment on lines +123 to +128
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not exactly a golang wizz, but is this necessary?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's compile time interface check, recommended here

32 changes: 32 additions & 0 deletions metrics/client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package metrics

import (
"context"
"errors"
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestNewRPCClientMetrics(t *testing.T) {
m, err := NewRPCClientMetrics(RPCClientMetricsConfig{
ChainFamily: "evm",
ChainID: "1",
RPCURL: "http://localhost:8545",
IsSendOnly: false,
})
require.NoError(t, err)
require.NotNil(t, m)

ctx := context.Background()
m.RecordRequest(ctx, "latest_block", 100*time.Millisecond, nil)
m.RecordRequest(ctx, "latest_block", 50*time.Millisecond, errors.New("rpc error"))
}

func TestNoopRPCClientMetrics_RecordRequest(t *testing.T) {
var m NoopRPCClientMetrics
ctx := context.Background()
m.RecordRequest(ctx, "latest_block", 100*time.Millisecond, nil)
m.RecordRequest(ctx, "latest_block", 50*time.Millisecond, errors.New("rpc error"))
}
48 changes: 24 additions & 24 deletions metrics/multinode.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,29 +135,29 @@ type GenericMultiNodeMetrics interface {
var _ GenericMultiNodeMetrics = &multiNodeMetrics{}

type multiNodeMetrics struct {
network string
chainID string
nodeStates metric.Int64Gauge
nodeClientVersion metric.Int64Gauge
nodeVerifies metric.Int64Counter
nodeVerifiesFailed metric.Int64Counter
nodeVerifiesSuccess metric.Int64Counter
nodeTransitionsToAlive metric.Int64Counter
nodeTransitionsToInSync metric.Int64Counter
nodeTransitionsToOutOfSync metric.Int64Counter
nodeTransitionsToUnreachable metric.Int64Counter
nodeTransitionsToInvalidChainID metric.Int64Counter
nodeTransitionsToUnusable metric.Int64Counter
nodeTransitionsToSyncing metric.Int64Counter
highestSeenBlock metric.Int64Gauge
highestFinalizedBlock metric.Int64Gauge
seenBlocks metric.Int64Counter
polls metric.Int64Counter
pollsFailed metric.Int64Counter
pollsSuccess metric.Int64Counter
finalizedStateFailed metric.Int64Counter
nodeTransitionsToFinalizedStateNotAvailable metric.Int64Counter
invariantViolations metric.Int64Counter
network string
chainID string
nodeStates metric.Int64Gauge
nodeClientVersion metric.Int64Gauge
nodeVerifies metric.Int64Counter
nodeVerifiesFailed metric.Int64Counter
nodeVerifiesSuccess metric.Int64Counter
nodeTransitionsToAlive metric.Int64Counter
nodeTransitionsToInSync metric.Int64Counter
nodeTransitionsToOutOfSync metric.Int64Counter
nodeTransitionsToUnreachable metric.Int64Counter
nodeTransitionsToInvalidChainID metric.Int64Counter
nodeTransitionsToUnusable metric.Int64Counter
nodeTransitionsToSyncing metric.Int64Counter
highestSeenBlock metric.Int64Gauge
highestFinalizedBlock metric.Int64Gauge
seenBlocks metric.Int64Counter
polls metric.Int64Counter
pollsFailed metric.Int64Counter
pollsSuccess metric.Int64Counter
finalizedStateFailed metric.Int64Counter
nodeTransitionsToFinalizedStateNotAvailable metric.Int64Counter
invariantViolations metric.Int64Counter
}

func NewGenericMultiNodeMetrics(network string, chainID string) (GenericMultiNodeMetrics, error) {
Expand Down Expand Up @@ -289,7 +289,7 @@ func NewGenericMultiNodeMetrics(network string, chainID string) (GenericMultiNod
pollsSuccess: pollsSuccess,
finalizedStateFailed: finalizedStateFailed,
nodeTransitionsToFinalizedStateNotAvailable: nodeTransitionsToFinalizedStateNotAvailable,
invariantViolations: invariantViolations,
invariantViolations: invariantViolations,
}, nil
}

Expand Down
Loading