Skip to content

Commit 5c92ad8

Browse files
committed
Fix stats server to reduce websocket memory churn
* Stop mutating shared stats payloads in unpause flow: - keep one shared JSON payload for normal clients - build one isolated unpause payload per tick using `deep_clone`. * Trim history arrays in place (write-index) instead of copy-trim to reduce transient allocations and RSS high-water pressure. * Change history persistence to chunked append with file locking, plus periodic compaction (every 30 saves or when file exceeds 5MB). * Update history loading to read/merge chunk lines under shared lock. * Fix network `/proc` aggregation loops to iterate over hash keys explicitly (Thanks to Joe). * Localize startup ALRM handler to the server startup scope and cancel it after start. * Replace per-connection SIG{ALRM} usage with a per-connection `connect_deadline` checked in `on_tick`, and clear it after verification.
1 parent 9309895 commit 5c92ad8

2 files changed

Lines changed: 326 additions & 197 deletions

File tree

stats-lib-funcs.pl

Lines changed: 142 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#
66
use strict;
77
use feature 'state';
8+
use Fcntl qw(:flock);
89

910
our $json;
1011
eval "use JSON::XS";
@@ -43,6 +44,7 @@
4344
temp => acl_system_status('temp'),
4445
};
4546

47+
# Return localized stats text with placeholders expanded
4648
sub stats_text
4749
{
4850
my $rv = $stats_text{ $_[0] };
@@ -73,6 +75,7 @@ sub jsonify
7375
return (ref($json_obj) eq 'HASH' && keys %{$json_obj}) ? $json_obj : {};
7476
}
7577

78+
# Build an empty stats payload with default graph points
7679
sub get_stats_empty
7780
{
7881
my $time = time();
@@ -93,6 +96,7 @@ sub get_stats_empty
9396
return $stats;
9497
}
9598

99+
# Collect a current snapshot of system stats and graph points
96100
sub get_stats_now
97101
{
98102
my %data;
@@ -109,12 +113,17 @@ sub get_stats_now
109113
if ($foreign_check_proc) {
110114
# CPU stats
111115
if ($acl_system_status->{'cpu'}) {
112-
my @cpuinfo = defined(&proc::get_cpu_info) ? proc::get_cpu_info() : ();
113-
my @cpuusage = defined(&proc::get_cpu_io_usage) ? proc::get_cpu_io_usage() : ();
116+
my @cpuinfo = defined(&proc::get_cpu_info)
117+
? proc::get_cpu_info()
118+
: ();
119+
my @cpuusage = defined(&proc::get_cpu_io_usage)
120+
? proc::get_cpu_io_usage()
121+
: ();
114122
if (@cpuinfo && @cpuusage) {
115123
# CPU load
116124
my $cpu = int($cpuusage[0] + $cpuusage[1] + $cpuusage[3]);
117-
$data{'cpu'} = [$cpu, stats_text('body_load', ($cpuinfo[0], $cpuinfo[1], $cpuinfo[2]))];
125+
$data{'cpu'} = [$cpu, stats_text('body_load',
126+
($cpuinfo[0], $cpuinfo[1], $cpuinfo[2]))];
118127
$gadd->('cpu', $cpu);
119128
# Disk I/O
120129
my $in = $cpuusage[5];
@@ -131,27 +140,31 @@ sub get_stats_now
131140
}
132141
# Memory stats
133142
if ($acl_system_status->{'mem'}) {
134-
my @memory = defined(&proc::get_memory_info) ? proc::get_memory_info() : ();
143+
my @memory = defined(&proc::get_memory_info)
144+
? proc::get_memory_info()
145+
: ();
135146
if (@memory) {
136147
$data{'mem'} = [];
137148
$data{'virt'} = [];
138149
if (@memory && $memory[0] && $memory[0] > 0) {
139150
my $mem = (100 - int(($memory[1] / $memory[0]) * 100));
140-
$data{'mem'} = [$mem,
141-
stats_text(($memory[4] ? 'body_used_cached_total' : 'body_used'),
142-
nice_size($memory[0] * 1024),
143-
nice_size(($memory[0] - $memory[1]) * 1024),
144-
($memory[4] ? nice_size($memory[4] * 1024) : undef)
145-
)];
151+
$data{'mem'} =
152+
[$mem, stats_text(($memory[4]
153+
? 'body_used_cached_total'
154+
: 'body_used'),
155+
nice_size($memory[0] * 1024),
156+
nice_size(($memory[0] - $memory[1]) * 1024),
157+
($memory[4] ? nice_size($memory[4] * 1024) : undef)
158+
)];
146159
$gadd->('mem', $mem);
147160
}
148161
if (@memory && $memory[2] && $memory[2] > 0) {
149162
my $virt = (100 - int(($memory[3] / $memory[2]) * 100));
150-
$data{'virt'} = [$virt,
151-
stats_text('body_used',
152-
nice_size(($memory[2]) * 1024),
153-
nice_size(($memory[2] - $memory[3]) * 1024)
154-
)];
163+
$data{'virt'} =
164+
[$virt, stats_text('body_used',
165+
nice_size(($memory[2]) * 1024),
166+
nice_size(($memory[2] - $memory[3]) * 1024)
167+
)];
155168
$gadd->('virt', $virt);
156169
}
157170
# Release memory
@@ -204,16 +217,53 @@ sub get_stats_now
204217
return \%data;
205218
}
206219

220+
# Return the on-disk path for persisted real-time history
207221
sub get_stats_history_file
208222
{
209223
return "$var_directory/modules/$current_theme/real-time-monitoring.json";
210224
}
211225

226+
# Deep clone given data structures
227+
sub deep_clone
228+
{
229+
my ($value) = @_;
230+
my $reftype = ref($value);
231+
return $value if (!$reftype);
232+
if ($reftype eq 'ARRAY') {
233+
return [map { deep_clone($_) } @{$value}];
234+
}
235+
if ($reftype eq 'HASH') {
236+
my %copy = map { $_ => deep_clone($value->{$_}) } keys %{$value};
237+
return \%copy;
238+
}
239+
return $value;
240+
}
241+
242+
# Read persisted history chunks and return merged graph data
212243
sub get_stats_history
213244
{
214245
my ($noempty) = @_;
215246
my $file = get_stats_history_file();
216-
my $graphs = jsonify(theme_read_file_contents($file));
247+
my $graphs = {};
248+
if (-r $file) {
249+
my $fh;
250+
if (!open($fh, '<', $file)) {
251+
error_stderr("Failed to open file '$file' for reading: $!");
252+
} elsif (!flock($fh, LOCK_SH)) {
253+
error_stderr("Failed to acquire shared lock on file '$file': $!");
254+
close($fh);
255+
} else {
256+
while (my $line = <$fh>) {
257+
$line =~ s/^\s+|\s+$//g;
258+
next if (!length($line));
259+
my $chunk = jsonify($line);
260+
next if (!keys %{$chunk});
261+
$graphs = merge_stats($graphs, $chunk);
262+
}
263+
flock($fh, LOCK_UN);
264+
close($fh);
265+
}
266+
}
217267
# No data yet
218268
if (!keys %{$graphs}) {
219269
unlink($file);
@@ -249,6 +299,7 @@ sub get_stats_history
249299
return { graphs => $graphs };
250300
}
251301

302+
# Keep only graph points within configured retention window
252303
sub trim_stats_history
253304
{
254305
my ($graphs) = @_;
@@ -263,17 +314,23 @@ sub trim_stats_history
263314
$n = 1200;
264315
}
265316
foreach my $k (keys %{$graphs}) {
266-
my @new_array;
267-
foreach my $entry (@{ $graphs->{$k} }) {
317+
my $graph = $graphs->{$k};
318+
if (ref($graph) ne 'ARRAY') {
319+
$graphs->{$k} = $get_default_graph->($k, $time);
320+
next;
321+
}
322+
my $write_idx = 0;
323+
foreach my $entry (@{$graph}) {
268324
if (defined($entry) && ($time - $entry->{'x'}) <= $n) {
269-
push(@new_array, $entry);
325+
$graph->[$write_idx++] = $entry;
270326
}
271327
}
272-
$graphs->{$k} =
273-
@new_array ? \@new_array : $get_default_graph->($k, $time);
328+
$#{$graph} = $write_idx - 1;
329+
$graphs->{$k} = $write_idx ? $graph : $get_default_graph->($k, $time);
274330
}
275331
}
276332

333+
# Merge graph arrays from second hash into first hash in-place
277334
sub merge_stats {
278335
my ($graphs1, $graphs2) = @_;
279336
foreach my $key (keys %{$graphs2}) {
@@ -288,25 +345,69 @@ sub merge_stats {
288345
return $graphs1;
289346
}
290347

348+
# Append one JSON graph chunk line to the history file
349+
sub append_stats_history_chunk
350+
{
351+
my ($file, $graphs_chunk) = @_;
352+
my $chunk_json = $json->encode($graphs_chunk);
353+
my $fh;
354+
if (!open($fh, '+>>', $file)) {
355+
error_stderr("Failed to open file '$file' for appending: $!");
356+
return 0;
357+
}
358+
if (!flock($fh, LOCK_EX)) {
359+
error_stderr("Failed to acquire exclusive lock on file '$file': $!");
360+
close($fh);
361+
return 0;
362+
}
363+
my $size = -s $fh;
364+
if ($size) {
365+
seek($fh, -1, 2);
366+
my $last_char = '';
367+
read($fh, $last_char, 1);
368+
print $fh "\n" if ($last_char ne "\n");
369+
}
370+
if (!print $fh $chunk_json . "\n") {
371+
error_stderr("Failed to append data to file '$file': $!");
372+
flock($fh, LOCK_UN);
373+
close($fh);
374+
return 0;
375+
}
376+
flock($fh, LOCK_UN);
377+
close($fh);
378+
return 1;
379+
}
380+
381+
# Compact chunked history into one merged snapshot line
382+
sub compact_stats_history_file
383+
{
384+
my ($file) = @_;
385+
my $history = get_stats_history(1);
386+
if ($history && $history->{'graphs'} && keys %{$history->{'graphs'}}) {
387+
theme_write_file_contents(
388+
$file, $json->encode($history->{'graphs'}) . "\n");
389+
} else {
390+
unlink($file);
391+
}
392+
}
393+
394+
# Persist one history chunk and trigger periodic file compaction
291395
sub save_stats_history
292396
{
293-
# Store complete dataset
397+
# Append chunk and periodically compact full history dataset
294398
my ($graphs_chunk) = @_;
295-
# Load stored data
296-
my $all_stats_histoy = get_stats_history()->{'graphs'};
297-
# Merge data
298-
my $graphs = merge_stats($all_stats_histoy, $graphs_chunk);
299-
# Trim dataset
300-
trim_stats_history($graphs);
301-
# Save data
302-
my $file = "$var_directory/modules/$current_theme/real-time-monitoring.json";
303-
theme_write_file_contents($file, $json->encode($graphs));
399+
my $file = get_stats_history_file();
400+
append_stats_history_chunk($file, $graphs_chunk);
401+
state $save_counter = 0;
402+
$save_counter++;
403+
if ($save_counter % 30 == 0 || (-s $file || 0) > 5 * 1024 * 1024) {
404+
compact_stats_history_file($file);
405+
}
304406
# Release memory
305407
undef($graphs_chunk);
306-
undef($all_stats_histoy);
307-
undef($graphs);
308408
}
309409

410+
# Persist non-graph "current stats" snapshot for quick reads
310411
sub save_stats_now
311412
{
312413
# Store stats now data
@@ -317,6 +418,7 @@ sub save_stats_now
317418
$json->encode($stats_now));
318419
}
319420

421+
# Cache dynamic feature availability checks per module/type pair
320422
sub has_stats
321423
{
322424
my ($mod, $type) = @_;
@@ -329,6 +431,7 @@ sub has_stats
329431
return $cache{$cached_func};
330432
}
331433

434+
# Detect whether virtual memory metrics are available
332435
sub has_stats_virt
333436
{
334437
state $has_virt_memory;
@@ -344,11 +447,13 @@ sub has_stats_virt
344447
return 0;
345448
}
346449

450+
# Prefer /proc stats and fall back to netstat stats collection
347451
sub stats_network
348452
{
349453
return stats_network_proc() || stats_network_netstat();
350454
}
351455

456+
# Read network I/O from /proc/net/dev and compute per-second rates
352457
sub stats_network_proc
353458
{
354459
# Return if not available
@@ -390,7 +495,7 @@ sub stats_network_proc
390495
my $wait_interval = 0.25;
391496
select(undef, undef, undef, $wait_interval);
392497
$results = stats_network_proc(1);
393-
# Parse data after dalay
498+
# Parse data after delay
394499
foreach (keys %$results) {
395500
$rbytes2 += $results->{$_}->{'rbytes'};
396501
$tbytes2 += $results->{$_}->{'tbytes'};
@@ -405,6 +510,7 @@ sub stats_network_proc
405510
return \%result;
406511
}
407512

513+
# Fallback network I/O collector using netstat snapshots
408514
sub stats_network_netstat
409515
{
410516
state $no_stats_network_netstat = 0;
@@ -439,7 +545,8 @@ sub stats_network_netstat
439545
# Capture network statistics after the interval
440546
my $after_stats = $get_net_stats->();
441547
# Calculate the total received and transmitted bytes
442-
my ($total_rx_before, $total_tx_before, $total_rx_after, $total_tx_after) = (0, 0, 0, 0);
548+
my ($total_rx_before, $total_tx_before,
549+
$total_rx_after, $total_tx_after) = (0, 0, 0, 0);
443550
foreach my $iface (keys %$before_stats) {
444551
$total_rx_before += $before_stats->{$iface}->[0];
445552
$total_tx_before += $before_stats->{$iface}->[1];

0 commit comments

Comments
 (0)