Skip to content

Commit 23e2f7b

Browse files
committed
make metrics poll interval configurable
1 parent 137c05e commit 23e2f7b

8 files changed

Lines changed: 67 additions & 16 deletions

File tree

e2e-tests/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ client_trusts_lsp = true
143143
144144
[metrics]
145145
enabled = true
146+
poll_metrics_interval = 1
146147
"#,
147148
storage_dir = storage_dir.display(),
148149
);

e2e-tests/tests/e2e.rs

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -856,10 +856,36 @@ async fn test_metrics_endpoint() {
856856
assert!(metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0"));
857857
assert!(metrics.contains("ldk_server_total_lightning_balance_sats 0"));
858858

859-
// Set up channel and make a payment to trigger metric update
859+
// Set up channel and make a payment to trigger metrics update
860860
setup_funded_channel(&bitcoind, &server_a, &server_b, 100_000).await;
861861

862-
// Make a payment to trigger payment metric updates.
862+
// Poll for channel, peer and balance metrics.
863+
let timeout = Duration::from_secs(10);
864+
let start = std::time::Instant::now();
865+
loop {
866+
let metrics = client.get_metrics().await.unwrap();
867+
if metrics.contains("ldk_server_total_peers_count 1")
868+
&& metrics.contains("ldk_server_total_channels_count 1")
869+
&& metrics.contains("ldk_server_total_public_channels_count 1")
870+
&& metrics.contains("ldk_server_total_payments_count 2")
871+
&& !metrics.contains("ldk_server_total_lightning_balance_sats 0")
872+
&& !metrics.contains("ldk_server_total_onchain_balance_sats 0")
873+
&& !metrics.contains("ldk_server_spendable_onchain_balance_sats 0")
874+
&& !metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0")
875+
{
876+
break;
877+
}
878+
879+
if start.elapsed() > timeout {
880+
let current_metrics = client.get_metrics().await.unwrap();
881+
panic!(
882+
"Timed out waiting for channel, peer and balance metrics to update. Current metrics:\n{}",
883+
current_metrics
884+
);
885+
}
886+
tokio::time::sleep(Duration::from_secs(1)).await;
887+
}
888+
863889
let invoice_resp = server_b
864890
.client()
865891
.bolt11_receive(Bolt11ReceiveRequest {
@@ -880,7 +906,6 @@ async fn test_metrics_endpoint() {
880906
loop {
881907
let metrics = client.get_metrics().await.unwrap();
882908
if metrics.contains("ldk_server_total_successful_payments_count 1")
883-
&& metrics.contains("ldk_server_total_channels_count 1")
884909
&& !metrics.contains("ldk_server_total_lightning_balance_sats 0")
885910
&& !metrics.contains("ldk_server_total_onchain_balance_sats 0")
886911
&& !metrics.contains("ldk_server_spendable_onchain_balance_sats 0")

ldk-server-client/src/client.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ use ldk_server_protos::endpoints::{
4343
GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH,
4444
LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, ONCHAIN_RECEIVE_PATH,
4545
ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH,
46-
SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH,
47-
VERIFY_SIGNATURE_PATH,
46+
SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
4847
};
4948
use ldk_server_protos::error::{ErrorCode, ErrorResponse};
5049
use prost::bytes::Bytes;

ldk-server/ldk-server-config.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,4 @@ client_trusts_lsp = false
9292
# Metrics settings
9393
[metrics]
9494
enabled = false
95+
poll_metrics_interval = 60 # The polling interval for metrics in seconds. Defaults to 60secs if unset and metrics enabled.

ldk-server/src/main.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::fs;
1616
use std::os::unix::fs::PermissionsExt;
1717
use std::path::{Path, PathBuf};
1818
use std::sync::Arc;
19-
use std::time::{SystemTime, UNIX_EPOCH};
19+
use std::time::{Duration, SystemTime, UNIX_EPOCH};
2020

2121
use clap::Parser;
2222
use hex::DisplayHex;
@@ -50,7 +50,7 @@ use crate::io::persist::{
5050
use crate::service::NodeService;
5151
use crate::util::config::{load_config, ArgsConfig, ChainSource};
5252
use crate::util::logger::ServerLogger;
53-
use crate::util::metrics::{Metrics, BUILD_METRICS_INTERVAL};
53+
use crate::util::metrics::Metrics;
5454
use crate::util::proto_adapter::{forwarded_payment_to_proto, payment_to_proto};
5555
use crate::util::systemd;
5656
use crate::util::tls::get_or_generate_tls_config;
@@ -276,8 +276,9 @@ fn main() {
276276
let event_node = Arc::clone(&node);
277277

278278
let metrics: Option<Arc<Metrics>> = if config_file.metrics_enabled {
279+
let poll_metrics_interval = Duration::from_secs(config_file.poll_metrics_interval.unwrap_or(60));
279280
let metrics_node = Arc::clone(&node);
280-
let mut interval = tokio::time::interval(BUILD_METRICS_INTERVAL);
281+
let mut interval = tokio::time::interval(poll_metrics_interval);
281282
let metrics = Arc::new(Metrics::new());
282283
let metrics_bg = Arc::clone(&metrics);
283284

ldk-server/src/service.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ use ldk_server_protos::endpoints::{
2828
GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH,
2929
LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, ONCHAIN_RECEIVE_PATH,
3030
ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH,
31-
SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH,
32-
VERIFY_SIGNATURE_PATH,
31+
SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
3332
};
3433
use prost::Message;
3534

ldk-server/src/util/config.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ pub struct Config {
5959
pub log_file_path: Option<String>,
6060
pub pathfinding_scores_source_url: Option<String>,
6161
pub metrics_enabled: bool,
62+
pub poll_metrics_interval: Option<u64>,
6263
}
6364

6465
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -105,6 +106,7 @@ struct ConfigBuilder {
105106
log_file_path: Option<String>,
106107
pathfinding_scores_source_url: Option<String>,
107108
metrics_enabled: Option<bool>,
109+
poll_metrics_interval: Option<u64>,
108110
}
109111

110112
impl ConfigBuilder {
@@ -167,6 +169,8 @@ impl ConfigBuilder {
167169

168170
if let Some(metrics) = toml.metrics {
169171
self.metrics_enabled = metrics.enabled.or(self.metrics_enabled);
172+
self.poll_metrics_interval =
173+
metrics.poll_metrics_interval.or(self.poll_metrics_interval);
170174
}
171175
}
172176

@@ -214,6 +218,10 @@ impl ConfigBuilder {
214218
if args.metrics_enabled {
215219
self.metrics_enabled = Some(true);
216220
}
221+
222+
if let Some(poll_metrics_interval) = &args.poll_metrics_interval {
223+
self.poll_metrics_interval = Some(*poll_metrics_interval);
224+
}
217225
}
218226

219227
fn build(self) -> io::Result<Config> {
@@ -374,6 +382,8 @@ impl ConfigBuilder {
374382

375383
let metrics_enabled = self.metrics_enabled.unwrap_or(false);
376384

385+
let poll_metrics_interval = self.poll_metrics_interval;
386+
377387
Ok(Config {
378388
network,
379389
listening_addrs,
@@ -392,6 +402,7 @@ impl ConfigBuilder {
392402
log_file_path: self.log_file_path,
393403
pathfinding_scores_source_url,
394404
metrics_enabled,
405+
poll_metrics_interval,
395406
})
396407
}
397408
}
@@ -471,6 +482,7 @@ struct TomlTlsConfig {
471482
#[derive(Deserialize, Serialize)]
472483
struct MetricsTomlConfig {
473484
enabled: Option<bool>,
485+
poll_metrics_interval: Option<u64>,
474486
}
475487

476488
#[derive(Deserialize, Serialize)]
@@ -638,6 +650,14 @@ pub struct ArgsConfig {
638650
help = "The option to enable the metrics endpoint. WARNING: This endpoint is unauthenticated."
639651
)]
640652
metrics_enabled: bool,
653+
654+
#[arg(
655+
long,
656+
env = "LDK_SERVER_POLL_METRICS_INTERVAL",
657+
help = "The polling interval for metrics in seconds. Required when
658+
metrics is enabled, but defaults to 60secs if unset."
659+
)]
660+
poll_metrics_interval: Option<u64>,
641661
}
642662

643663
pub fn load_config(args: &ArgsConfig) -> io::Result<Config> {
@@ -772,6 +792,7 @@ mod tests {
772792
node_alias: Some(String::from("LDK Server CLI")),
773793
pathfinding_scores_source_url: Some(String::from("https://example.com/")),
774794
metrics_enabled: false,
795+
poll_metrics_interval: None,
775796
}
776797
}
777798

@@ -789,6 +810,7 @@ mod tests {
789810
storage_dir_path: None,
790811
pathfinding_scores_source_url: None,
791812
metrics_enabled: false,
813+
poll_metrics_interval: None,
792814
}
793815
}
794816

@@ -866,6 +888,7 @@ mod tests {
866888
log_file_path: Some("/var/log/ldk-server.log".to_string()),
867889
pathfinding_scores_source_url: None,
868890
metrics_enabled: false,
891+
poll_metrics_interval: None,
869892
};
870893

871894
assert_eq!(config.listening_addrs, expected.listening_addrs);
@@ -1192,6 +1215,7 @@ mod tests {
11921215
log_file_path: Some("/var/log/ldk-server.log".to_string()),
11931216
pathfinding_scores_source_url: Some("https://example.com/".to_string()),
11941217
metrics_enabled: false,
1218+
poll_metrics_interval: None,
11951219
};
11961220

11971221
assert_eq!(config.listening_addrs, expected.listening_addrs);
@@ -1306,6 +1330,7 @@ mod tests {
13061330
log_file_path: Some("/var/log/ldk-server.log".to_string()),
13071331
pathfinding_scores_source_url: Some("https://example.com/".to_string()),
13081332
metrics_enabled: false,
1333+
poll_metrics_interval: None,
13091334
};
13101335

13111336
assert_eq!(config.listening_addrs, expected.listening_addrs);

ldk-server/src/util/metrics.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,25 @@
1414
//!
1515
//! The metrics are updated through two main mechanisms:
1616
//! 1. **Periodic Polling**: The `update_all_pollable_metrics` function is called at a regular
17-
//! interval (`BUILD_METRICS_INTERVAL`) to perform a full recount of metrics like peer count,
18-
//! channels count, and balances.
17+
//! interval (`poll_metrics_interval`) configurable via the config file but defaults to 60secs if unset, to perform a full recount of metrics like peer count,
18+
//! payments count, and channels metrics.
1919
//! 2. **Event-Driven Updates**: For metrics that can change frequently and where a full recount
20-
//! would be inefficient (e.g., total_successful_payments_count), a hybrid approach is used.
20+
//! would be inefficient (e.g., total_successful_payments_count, balances), a hybrid approach is used.
2121
//! - `initialize_payment_metrics` is called once at startup to get the accurate persisted state.
2222
//! - `update_payments_count` is called incrementally whenever a relevant event (like
2323
//! `PaymentSuccessful` or `PaymentFailed`) occurs.
24+
//! - `update_all_balances` is called when we receive a `PaymentSuccessful` event to update all balance metrics.
25+
//! - `update_channels_count` is called when we receive a `ChannelReady` or `ChannelClosed` event to update the channels metrics.
2426
//!
2527
//! The `gather_metrics` function collects all current metric values and formats them into the
2628
//! plain-text format that Prometheus scrapers expect. This output is exposed via an
2729
//! unauthenticated `/metrics` HTTP endpoint on the rest service address.
2830
2931
use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
30-
use std::time::Duration;
3132

3233
use ldk_node::payment::PaymentStatus;
3334
use ldk_node::Node;
3435

35-
pub const BUILD_METRICS_INTERVAL: Duration = Duration::from_secs(60);
36-
3736
/// Holds all the metrics that are tracked for LDK Server.
3837
///
3938
/// These metrics are exposed in a Prometheus-compatible format. The values are stored
@@ -153,6 +152,7 @@ impl Metrics {
153152
self.total_private_channels_count.store(private_channels_count, Ordering::Relaxed);
154153

155154
self.update_peer_count(node);
155+
self.update_all_balances(node);
156156
}
157157

158158
/// Gathers all metrics and formats them into the Prometheus text-based format.

0 commit comments

Comments
 (0)