Skip to content

Commit b7a5455

Browse files
hyperpolymathclaude
andcommitted
feat: Add port-endoscope — automatic port recovery tool
Rust CLI tool that detects stuck, orphaned, or zombie processes holding TCP/UDP ports and reclaims them. Reads /proc/net/tcp and /proc/*/fd directly (no external dependencies on ss/lsof). Commands: - check <port>: show PID, process name, state, age, fd - free <port>: SIGTERM → grace period → SIGKILL, verify release - watch <port> --allow <process>: auto-reclaim on zombie/TIME_WAIT - status: all listening ports with stuck-holder detection Fits the hospital model as an Operating Room diagnostic instrument. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 8a3502d commit b7a5455

6 files changed

Lines changed: 806 additions & 1 deletion

File tree

Cargo.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: PMPL-1.0-or-later
22
[workspace]
3-
members = ["hardware-crash-team", "clinician", "contracts-rust"]
3+
members = ["hardware-crash-team", "clinician", "contracts-rust", "port-endoscope"]
44
exclude = ["personal-sysadmin", "displace", "panoptes", "_pathroot/rust/mustfile-orchestrator", "ambulances/network"]
55
resolver = "2"

port-endoscope/Cargo.toml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SPDX-License-Identifier: PMPL-1.0-or-later
2+
[package]
3+
name = "port-endoscope"
4+
version = "0.1.0"
5+
edition = "2021"
6+
authors = ["Jonathan D.A. Jewell <j.d.a.jewell@open.ac.uk>"]
7+
description = "Automatic port recovery daemon — detects stuck/orphaned ports and reclaims them"
8+
license = "PMPL-1.0-or-later"
9+
repository = "https://github.com/hyperpolymath/ambientops"
10+
11+
[dependencies]
12+
clap = { version = "4", features = ["derive"] }
13+
nix = { version = "0.29", features = ["signal", "process"] }
14+
serde = { version = "1", features = ["derive"] }
15+
serde_json = "1"
16+
anyhow = "1"
17+
libc = "0.2"

port-endoscope/src/main.rs

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
// SPDX-License-Identifier: PMPL-1.0-or-later
2+
// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
3+
4+
//! # port-endoscope
5+
//!
6+
//! Automatic port recovery tool. Detects stuck, orphaned, or zombie processes
7+
//! holding TCP/UDP ports and reclaims them — either on demand or as a background
8+
//! watcher.
9+
//!
10+
//! ## Problem
11+
//!
12+
//! When you close a dev server (deno, node, python, etc.), the port often stays
13+
//! locked: TIME_WAIT state, zombie process, or a child that didn't get SIGTERM.
14+
//! Restarting the server fails with "address already in use". Every developer
15+
//! hits this; no tool solves it automatically.
16+
//!
17+
//! ## Solution
18+
//!
19+
//! port-endoscope provides three modes:
20+
//!
21+
//! - **check**: show what holds a port (PID, process name, state, age)
22+
//! - **free**: kill the holder and reclaim the port
23+
//! - **watch**: continuously monitor a port and auto-reclaim when the expected
24+
//! process dies but the port stays locked
25+
//!
26+
//! ## Usage
27+
//!
28+
//! ```bash
29+
//! port-endoscope check 6860 # Who holds port 6860?
30+
//! port-endoscope free 6860 # Kill it and reclaim
31+
//! port-endoscope free 6860 --grace 5 # SIGTERM, wait 5s, then SIGKILL
32+
//! port-endoscope watch 6860 --for deno # Auto-reclaim when deno dies
33+
//! port-endoscope status # Show all ports with stuck holders
34+
//! ```
35+
36+
use anyhow::{Context, Result};
37+
use clap::{Parser, Subcommand};
38+
use std::collections::HashMap;
39+
40+
mod port;
41+
mod process;
42+
43+
/// Automatic port recovery tool — never suffer "address already in use" again
44+
#[derive(Parser)]
45+
#[command(name = "port-endoscope", version, about)]
46+
struct Cli {
47+
#[command(subcommand)]
48+
command: Command,
49+
}
50+
51+
#[derive(Subcommand)]
52+
enum Command {
53+
/// Show what process holds a port
54+
Check {
55+
/// Port number to inspect
56+
port: u16,
57+
/// Protocol: tcp (default) or udp
58+
#[arg(long, default_value = "tcp")]
59+
proto: String,
60+
},
61+
62+
/// Kill the process holding a port and reclaim it
63+
Free {
64+
/// Port number to free
65+
port: u16,
66+
/// Seconds to wait between SIGTERM and SIGKILL (0 = immediate SIGKILL)
67+
#[arg(long, default_value = "3")]
68+
grace: u64,
69+
/// Protocol: tcp (default) or udp
70+
#[arg(long, default_value = "tcp")]
71+
proto: String,
72+
/// Don't actually kill — just show what would happen
73+
#[arg(long)]
74+
dry_run: bool,
75+
},
76+
77+
/// Watch a port and auto-reclaim when the expected process dies
78+
Watch {
79+
/// Port number to watch
80+
port: u16,
81+
/// Only allow this process name on the port (kill anything else)
82+
#[arg(long, value_name = "PROCESS")]
83+
allow: Option<String>,
84+
/// Poll interval in seconds
85+
#[arg(long, default_value = "2")]
86+
interval: u64,
87+
/// Grace period before SIGKILL (seconds)
88+
#[arg(long, default_value = "3")]
89+
grace: u64,
90+
},
91+
92+
/// Show all listening ports with process info and detect stuck holders
93+
Status {
94+
/// Only show ports that appear stuck (TIME_WAIT, zombie, or no listener)
95+
#[arg(long)]
96+
stuck: bool,
97+
},
98+
}
99+
100+
fn main() -> Result<()> {
101+
let cli = Cli::parse();
102+
103+
match cli.command {
104+
Command::Check { port, proto } => cmd_check(port, &proto),
105+
Command::Free { port, grace, proto, dry_run } => cmd_free(port, grace, &proto, dry_run),
106+
Command::Watch { port, allow, interval, grace } => cmd_watch(port, allow, interval, grace),
107+
Command::Status { stuck } => cmd_status(stuck),
108+
}
109+
}
110+
111+
/// Show what process holds a port.
112+
fn cmd_check(port: u16, proto: &str) -> Result<()> {
113+
let holders = port::find_port_holders(port, proto)
114+
.context("Failed to query port holders")?;
115+
116+
if holders.is_empty() {
117+
println!("Port {}/{} is free — no process holds it.", port, proto);
118+
return Ok(());
119+
}
120+
121+
println!("Port {}/{}:", port, proto);
122+
for h in &holders {
123+
let proc_info = process::get_process_info(h.pid);
124+
let zombie_marker = if proc_info.is_zombie { " [ZOMBIE]" } else { "" };
125+
let age_str = match proc_info.age_secs {
126+
Some(age) => format!(" (running {}s)", age),
127+
None => String::new(),
128+
};
129+
130+
println!(
131+
" PID {:<8} {:<20} state={:<12} fd={}{}{} ",
132+
h.pid,
133+
proc_info.name,
134+
h.socket_state,
135+
h.fd.map_or("-".to_string(), |fd| fd.to_string()),
136+
age_str,
137+
zombie_marker,
138+
);
139+
}
140+
141+
Ok(())
142+
}
143+
144+
/// Kill the process holding a port and reclaim it.
145+
fn cmd_free(port: u16, grace_secs: u64, proto: &str, dry_run: bool) -> Result<()> {
146+
let holders = port::find_port_holders(port, proto)
147+
.context("Failed to query port holders")?;
148+
149+
if holders.is_empty() {
150+
println!("Port {}/{} is already free.", port, proto);
151+
return Ok(());
152+
}
153+
154+
// Deduplicate by PID (a process may hold multiple FDs on the same port)
155+
let mut seen_pids: HashMap<u32, bool> = HashMap::new();
156+
157+
for h in &holders {
158+
if seen_pids.contains_key(&h.pid) {
159+
continue;
160+
}
161+
seen_pids.insert(h.pid, true);
162+
163+
let proc_info = process::get_process_info(h.pid);
164+
165+
if dry_run {
166+
println!(
167+
"[dry-run] Would kill PID {} ({}) holding port {}/{}",
168+
h.pid, proc_info.name, port, proto
169+
);
170+
continue;
171+
}
172+
173+
println!(
174+
"Freeing port {}/{}: killing PID {} ({})...",
175+
port, proto, h.pid, proc_info.name
176+
);
177+
178+
match process::kill_gracefully(h.pid, grace_secs) {
179+
Ok(()) => println!(" PID {} terminated.", h.pid),
180+
Err(e) => eprintln!(" Failed to kill PID {}: {}", h.pid, e),
181+
}
182+
}
183+
184+
// Verify the port is now free
185+
if !dry_run {
186+
std::thread::sleep(std::time::Duration::from_millis(500));
187+
let remaining = port::find_port_holders(port, proto).unwrap_or_default();
188+
if remaining.is_empty() {
189+
println!("Port {}/{} is now free.", port, proto);
190+
} else {
191+
eprintln!(
192+
"Warning: port {}/{} still has {} holder(s) — may be in TIME_WAIT.",
193+
port, proto, remaining.len()
194+
);
195+
}
196+
}
197+
198+
Ok(())
199+
}
200+
201+
/// Watch a port and auto-reclaim when the expected process dies.
202+
fn cmd_watch(port: u16, allowed_process: Option<String>, interval_secs: u64, grace: u64) -> Result<()> {
203+
println!(
204+
"Watching port {} (poll every {}s, grace {}s{})",
205+
port,
206+
interval_secs,
207+
grace,
208+
allowed_process
209+
.as_ref()
210+
.map_or(String::new(), |p| format!(", allow: {}", p)),
211+
);
212+
213+
let interval = std::time::Duration::from_secs(interval_secs);
214+
215+
loop {
216+
let holders = port::find_port_holders(port, "tcp").unwrap_or_default();
217+
218+
for h in &holders {
219+
let info = process::get_process_info(h.pid);
220+
221+
// Check if this holder is a zombie or TIME_WAIT orphan
222+
let is_zombie = info.is_zombie;
223+
let is_time_wait = h.socket_state == "TIME-WAIT" || h.socket_state == "TIME_WAIT";
224+
let is_wrong_process = allowed_process.as_ref().map_or(false, |allowed| {
225+
!info.name.contains(allowed.as_str())
226+
});
227+
228+
if is_zombie || is_time_wait || is_wrong_process {
229+
let reason = if is_zombie {
230+
"zombie process"
231+
} else if is_time_wait {
232+
"TIME_WAIT orphan"
233+
} else {
234+
"unauthorized process"
235+
};
236+
237+
eprintln!(
238+
"[port-endoscope] Port {} held by PID {} ({}) — {} — reclaiming...",
239+
port, h.pid, info.name, reason
240+
);
241+
242+
if !is_time_wait {
243+
// TIME_WAIT sockets don't have a killable process
244+
match process::kill_gracefully(h.pid, grace) {
245+
Ok(()) => eprintln!("[port-endoscope] PID {} terminated.", h.pid),
246+
Err(e) => eprintln!("[port-endoscope] Failed to kill PID {}: {}", h.pid, e),
247+
}
248+
} else {
249+
eprintln!(
250+
"[port-endoscope] TIME_WAIT on port {} — will clear in ~60s (kernel handles this).",
251+
port
252+
);
253+
}
254+
}
255+
}
256+
257+
std::thread::sleep(interval);
258+
}
259+
}
260+
261+
/// Show all listening ports with process info.
262+
fn cmd_status(stuck_only: bool) -> Result<()> {
263+
let all_ports = port::find_all_listening()
264+
.context("Failed to enumerate listening ports")?;
265+
266+
if all_ports.is_empty() {
267+
println!("No listening ports found.");
268+
return Ok(());
269+
}
270+
271+
println!(
272+
"{:<8} {:<8} {:<20} {:<12} {:<10} {}",
273+
"PORT", "PID", "PROCESS", "STATE", "AGE", "FLAGS"
274+
);
275+
println!("{}", "-".repeat(72));
276+
277+
for h in &all_ports {
278+
let info = process::get_process_info(h.pid);
279+
let age_str = info.age_secs.map_or("-".to_string(), |a| format!("{}s", a));
280+
281+
let mut flags = Vec::new();
282+
if info.is_zombie {
283+
flags.push("ZOMBIE");
284+
}
285+
if h.socket_state == "TIME-WAIT" || h.socket_state == "TIME_WAIT" {
286+
flags.push("TIME_WAIT");
287+
}
288+
289+
let is_stuck = info.is_zombie
290+
|| h.socket_state == "TIME-WAIT"
291+
|| h.socket_state == "TIME_WAIT";
292+
293+
if stuck_only && !is_stuck {
294+
continue;
295+
}
296+
297+
println!(
298+
"{:<8} {:<8} {:<20} {:<12} {:<10} {}",
299+
h.local_port,
300+
h.pid,
301+
info.name,
302+
h.socket_state,
303+
age_str,
304+
flags.join(", "),
305+
);
306+
}
307+
308+
Ok(())
309+
}

0 commit comments

Comments
 (0)