|
| 1 | +// This file is Copyright its original authors, visible in version control |
| 2 | +// history. |
| 3 | +// |
| 4 | +// This file is licensed under the Apache License, Version 2.0 <LICENSE-APACHE |
| 5 | +// or http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your option. |
| 7 | +// You may not use this file except in accordance with one or both of these |
| 8 | +// licenses. |
| 9 | + |
| 10 | +use std::sync::atomic::{AtomicI64, Ordering}; |
| 11 | +use std::time::Duration; |
| 12 | + |
| 13 | +use ldk_node::Node; |
| 14 | + |
| 15 | +pub const BUILD_METRICS_INTERVAL: Duration = Duration::from_secs(60); |
| 16 | + |
| 17 | +/// This represents a [`Metrics`] type that can go up and down in value. |
| 18 | +pub struct IntGauge { |
| 19 | + inner: AtomicI64, |
| 20 | +} |
| 21 | + |
| 22 | +impl IntGauge { |
| 23 | + pub fn new() -> Self { |
| 24 | + Self { inner: AtomicI64::new(0) } |
| 25 | + } |
| 26 | + |
| 27 | + pub fn set(&self, value: i64) { |
| 28 | + self.inner.store(value, Ordering::Relaxed); |
| 29 | + } |
| 30 | + |
| 31 | + pub fn get(&self) -> i64 { |
| 32 | + self.inner.load(Ordering::Relaxed) |
| 33 | + } |
| 34 | +} |
| 35 | + |
| 36 | +/// Represents the [`Metrics`] output values and type. |
| 37 | +pub struct MetricsOutput { |
| 38 | + name: String, |
| 39 | + help_text: String, |
| 40 | + metric_type: String, |
| 41 | + value: String, |
| 42 | +} |
| 43 | + |
| 44 | +impl MetricsOutput { |
| 45 | + pub fn new(name: &str, help_text: &str, metric_type: &str, value: &str) -> Self { |
| 46 | + Self { |
| 47 | + name: name.to_string(), |
| 48 | + help_text: help_text.to_string(), |
| 49 | + metric_type: metric_type.to_string(), |
| 50 | + value: value.to_string(), |
| 51 | + } |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +pub struct Metrics { |
| 56 | + pub service_health_score: IntGauge, |
| 57 | +} |
| 58 | + |
| 59 | +impl Metrics { |
| 60 | + pub fn new() -> Self { |
| 61 | + Self { service_health_score: IntGauge::new() } |
| 62 | + } |
| 63 | + |
| 64 | + pub fn update_service_health_score(&self, node: &Node) { |
| 65 | + let score = self.calculate_ldk_server_health_score(node); |
| 66 | + self.service_health_score.set(score); |
| 67 | + } |
| 68 | + |
| 69 | + /// The health score computation is pretty basic for now and simply |
| 70 | + /// calculated based on the impacted events on the components of the |
| 71 | + /// `Node`. The events severity and weightage value are as follows: |
| 72 | + /// |
| 73 | + /// - Critical: 0 (Total failure) |
| 74 | + /// - Major: 35% |
| 75 | + /// - Minor: 25% |
| 76 | + /// |
| 77 | + /// Using the assigned score above, the health score of the `Node` is |
| 78 | + /// computed as: |
| 79 | + /// |
| 80 | + /// Health score = Maximum health score - Sum(Event severity score) |
| 81 | + /// |
| 82 | + /// Where: |
| 83 | + /// |
| 84 | + /// - Maximum health score = 100 |
| 85 | + /// |
| 86 | + /// If the `Node` is not running/online, i.e `is_running` is false, |
| 87 | + /// the severity is critical with a weightage value of -100%. |
| 88 | + /// |
| 89 | + /// If the `Node` is running but isn't connected to any peer yet, |
| 90 | + /// the severity is major with a weightage value of -35%. |
| 91 | + /// |
| 92 | + /// If the `Node` is running but the Lightning Wallet hasn't been synced |
| 93 | + /// yet, the severity is minor with a weightage value of -25%. |
| 94 | + pub fn calculate_ldk_server_health_score(&self, node: &Node) -> i64 { |
| 95 | + Self::compute_health_score( |
| 96 | + node.status().is_running, |
| 97 | + !node.list_peers().is_empty(), |
| 98 | + node.status().latest_lightning_wallet_sync_timestamp.is_some(), |
| 99 | + ) |
| 100 | + } |
| 101 | + |
| 102 | + pub fn format_metrics_output(&self, buffer: &mut String, options: &MetricsOutput) { |
| 103 | + buffer.push_str(&format!("# HELP {} {}\n", options.name, options.help_text)); |
| 104 | + buffer.push_str(&format!("# TYPE {} {}\n", options.name, options.metric_type)); |
| 105 | + buffer.push_str(&format!("{} {}\n", options.name, options.value)); |
| 106 | + } |
| 107 | + |
| 108 | + pub fn gather_metrics(&self) -> String { |
| 109 | + let mut buffer = String::new(); |
| 110 | + let options = &MetricsOutput::new( |
| 111 | + "ldk_server_health_score", |
| 112 | + "Current health score (0-100)", |
| 113 | + "gauge", |
| 114 | + &self.service_health_score.get().to_string(), |
| 115 | + ); |
| 116 | + |
| 117 | + self.format_metrics_output(&mut buffer, options); |
| 118 | + |
| 119 | + buffer |
| 120 | + } |
| 121 | + |
| 122 | + fn compute_health_score(is_running: bool, has_peers: bool, is_wallet_synced: bool) -> i64 { |
| 123 | + if !is_running { |
| 124 | + return 0; |
| 125 | + } |
| 126 | + |
| 127 | + let mut health_score = 100; |
| 128 | + |
| 129 | + if !has_peers { |
| 130 | + health_score -= 35; |
| 131 | + } |
| 132 | + |
| 133 | + if !is_wallet_synced { |
| 134 | + health_score -= 25; |
| 135 | + } |
| 136 | + |
| 137 | + health_score |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +#[cfg(test)] |
| 142 | +mod tests { |
| 143 | + |
| 144 | + use super::*; |
| 145 | + |
| 146 | + #[test] |
| 147 | + fn test_compute_health_score() { |
| 148 | + // Node is not running |
| 149 | + assert_eq!(Metrics::compute_health_score(false, true, true), 0); |
| 150 | + assert_eq!(Metrics::compute_health_score(false, false, false), 0); |
| 151 | + |
| 152 | + // Node is running, connected to a peer and wallet is synced |
| 153 | + assert_eq!(Metrics::compute_health_score(true, true, true), 100); |
| 154 | + |
| 155 | + // Node is running, not connected to a peer but wallet is synced |
| 156 | + assert_eq!(Metrics::compute_health_score(true, false, true), 65); |
| 157 | + |
| 158 | + // Node is running, connected to a peer but wallet is not synced |
| 159 | + assert_eq!(Metrics::compute_health_score(true, true, false), 75); |
| 160 | + |
| 161 | + // Node is running, not connected to a peer and wallet is not synced |
| 162 | + assert_eq!(Metrics::compute_health_score(true, false, false), 40); |
| 163 | + } |
| 164 | + |
| 165 | + #[test] |
| 166 | + fn test_gather_metrics_format() { |
| 167 | + let metrics = Metrics::new(); |
| 168 | + |
| 169 | + let result = metrics.gather_metrics(); |
| 170 | + assert!(result.contains("ldk_server_health_score")); |
| 171 | + } |
| 172 | +} |
0 commit comments