diff --git a/crates/gitlawb-node/src/api/mod.rs b/crates/gitlawb-node/src/api/mod.rs index f1de9f2..3d2b0fc 100644 --- a/crates/gitlawb-node/src/api/mod.rs +++ b/crates/gitlawb-node/src/api/mod.rs @@ -11,6 +11,7 @@ pub mod peers; pub mod protect; pub mod pulls; pub mod register; +pub mod replicas; pub mod repos; pub mod resolve; pub mod stars; diff --git a/crates/gitlawb-node/src/api/replicas.rs b/crates/gitlawb-node/src/api/replicas.rs new file mode 100644 index 0000000..61f802a --- /dev/null +++ b/crates/gitlawb-node/src/api/replicas.rs @@ -0,0 +1,202 @@ +//! Repo replica registration API. +//! +//! Lets a node tell the origin "I'm hosting a replica of your repo." The +//! origin records the (replica DID, replica URL) pair and exposes the list +//! publicly so anyone can see how many nodes are mirroring a given repo. +//! +//! Endpoints: +//! - `PUT /api/v1/repos/:owner/:repo/replicas` (auth) register +//! - `DELETE /api/v1/repos/:owner/:repo/replicas` (auth) unregister +//! - `GET /api/v1/repos/:owner/:repo/replicas` (public) list +//! +//! Auth model: the caller's DID (verified via HTTP Signatures) is the +//! replica's identity. There's no separate "claim this URL" check — replicas +//! self-report their public URL. Operators viewing the list should treat +//! replica URLs as advisory until they actually reach out and verify. + +use axum::extract::{Extension, Path, State}; +use axum::http::StatusCode; +use axum::Json; +use serde::Deserialize; + +use crate::auth::AuthenticatedDid; +use crate::error::{AppError, Result}; +use crate::state::AppState; + +#[derive(Debug, Deserialize)] +pub struct RegisterReplicaRequest { + /// Publicly reachable URL of the replica node (e.g. `https://my-node.example.com`). + pub url: String, +} + +/// PUT /api/v1/repos/:owner/:repo/replicas +/// Idempotent — first registration returns 201, subsequent ones update the URL and return 200. +pub async fn register_replica( + State(state): State, + Extension(auth): Extension, + Path((owner, repo)): Path<(String, String)>, + Json(req): Json, +) -> Result<(StatusCode, Json)> { + validate_replica_url(&req.url)?; + + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + + let replica_did = &auth.0; + + // Don't let an owner register themselves as a replica of their own repo. + if replica_did == &record.owner_did { + return Err(AppError::BadRequest( + "the repo owner is not a replica of their own repo".into(), + )); + } + + let inserted = state + .db + .register_replica(&record.id, replica_did, &req.url) + .await?; + let count = state.db.count_replicas(&record.id).await?; + + let status = if inserted { + StatusCode::CREATED + } else { + StatusCode::OK + }; + + tracing::info!( + repo = %repo, + replica = %replica_did, + url = %req.url, + "replica registered" + ); + + Ok(( + status, + Json(serde_json::json!({ + "status": "registered", + "repo": format!("{owner}/{repo}"), + "replica_did": replica_did, + "replica_url": req.url, + "replica_count": count, + })), + )) +} + +/// DELETE /api/v1/repos/:owner/:repo/replicas +/// Idempotent — no error if the caller wasn't registered. +pub async fn unregister_replica( + State(state): State, + Extension(auth): Extension, + Path((owner, repo)): Path<(String, String)>, +) -> Result> { + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + + let replica_did = &auth.0; + state.db.unregister_replica(&record.id, replica_did).await?; + let count = state.db.count_replicas(&record.id).await?; + + tracing::info!(repo = %repo, replica = %replica_did, "replica unregistered"); + + Ok(Json(serde_json::json!({ + "status": "unregistered", + "repo": format!("{owner}/{repo}"), + "replica_count": count, + }))) +} + +/// GET /api/v1/repos/:owner/:repo/replicas +/// Public — returns the list of replicas (DID + URL + registration timestamp). +pub async fn list_replicas( + State(state): State, + Path((owner, repo)): Path<(String, String)>, +) -> Result> { + let record = state + .db + .get_repo(&owner, &repo) + .await? + .ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?; + + let replicas = state.db.list_replicas(&record.id).await?; + + Ok(Json(serde_json::json!({ + "repo": format!("{owner}/{repo}"), + "replica_count": replicas.len(), + "replicas": replicas, + }))) +} + +/// Basic URL hygiene — must be http(s), parseable, length-bounded. +fn validate_replica_url(url: &str) -> Result<()> { + if url.is_empty() { + return Err(AppError::BadRequest("replica url is empty".into())); + } + if url.len() > 512 { + return Err(AppError::BadRequest("replica url exceeds 512 chars".into())); + } + if !(url.starts_with("http://") || url.starts_with("https://")) { + return Err(AppError::BadRequest( + "replica url must start with http:// or https://".into(), + )); + } + // No spaces / control chars / newlines. + if url.chars().any(|c| c.is_whitespace() || c.is_control()) { + return Err(AppError::BadRequest( + "replica url contains whitespace or control characters".into(), + )); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn url_accepts_normal_https() { + validate_replica_url("https://node.example.com").unwrap(); + validate_replica_url("https://my-node.example.com:7545").unwrap(); + validate_replica_url("http://localhost:7545").unwrap(); + } + + #[test] + fn url_rejects_empty() { + assert!(validate_replica_url("").is_err()); + } + + #[test] + fn url_rejects_non_http_scheme() { + for bad in [ + "ftp://host", + "file:///etc/passwd", + "javascript:alert(1)", + "/path", + ] { + assert!(validate_replica_url(bad).is_err(), "{bad:?} must reject"); + } + } + + #[test] + fn url_rejects_whitespace_and_control() { + for bad in [ + "https://host .com", + "https://host\n.com", + "https://host\t.com", + "https://host\0evil.com", + ] { + assert!(validate_replica_url(bad).is_err(), "{bad:?} must reject"); + } + } + + #[test] + fn url_rejects_overlong() { + let long = format!("https://{}.com", "a".repeat(600)); + assert!(validate_replica_url(&long).is_err()); + } +} diff --git a/crates/gitlawb-node/src/db/mod.rs b/crates/gitlawb-node/src/db/mod.rs index f1d2421..1aa11e7 100644 --- a/crates/gitlawb-node/src/db/mod.rs +++ b/crates/gitlawb-node/src/db/mod.rs @@ -100,6 +100,13 @@ pub struct PeerRecord { pub announced_at: String, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RepoReplica { + pub replica_did: String, + pub replica_url: String, + pub registered_at: String, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PinnedCidRecord { pub sha256_hex: String, @@ -379,6 +386,21 @@ impl Db { )"#, "CREATE INDEX IF NOT EXISTS idx_repo_stars_repo ON repo_stars(repo_id)", "CREATE INDEX IF NOT EXISTS idx_repo_stars_agent ON repo_stars(agent_did)", + // ── Repo replicas (network resilience) ────────────────────────── + // Tracks which nodes are hosting a replica of a repo. Populated + // when a replica node calls PUT /api/v1/repos/{owner}/{repo}/replicas + // on the origin. Public via GET on the same path — anyone can see + // how many nodes are mirroring a given repo. + r#"CREATE TABLE IF NOT EXISTS repo_replicas ( + id TEXT NOT NULL PRIMARY KEY, + repo_id TEXT NOT NULL, + replica_did TEXT NOT NULL, + replica_url TEXT NOT NULL, + registered_at TEXT NOT NULL, + UNIQUE(repo_id, replica_did) + )"#, + "CREATE INDEX IF NOT EXISTS idx_repo_replicas_repo ON repo_replicas(repo_id)", + "CREATE INDEX IF NOT EXISTS idx_repo_replicas_did ON repo_replicas(replica_did)", // ── PR comments ───────────────────────────────────────────────── r#"CREATE TABLE IF NOT EXISTS pr_comments ( id TEXT NOT NULL PRIMARY KEY, @@ -1790,6 +1812,74 @@ impl Db { Ok(row.get::("cnt")) } + // ── Repo replicas ────────────────────────────────────────────────── + + /// Register a replica for a repo. Returns true if inserted, false if the + /// replica was already registered (URL updated either way). + pub async fn register_replica( + &self, + repo_id: &str, + replica_did: &str, + replica_url: &str, + ) -> Result { + let now = Utc::now().to_rfc3339(); + let id = format!("{repo_id}:{replica_did}"); + let result = sqlx::query( + "INSERT INTO repo_replicas (id, repo_id, replica_did, replica_url, registered_at) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT (repo_id, replica_did) DO UPDATE + SET replica_url = EXCLUDED.replica_url", + ) + .bind(&id) + .bind(repo_id) + .bind(replica_did) + .bind(replica_url) + .bind(&now) + .execute(&self.pool) + .await?; + Ok(result.rows_affected() > 0) + } + + /// Unregister a replica. Idempotent. + pub async fn unregister_replica(&self, repo_id: &str, replica_did: &str) -> Result<()> { + sqlx::query("DELETE FROM repo_replicas WHERE repo_id = $1 AND replica_did = $2") + .bind(repo_id) + .bind(replica_did) + .execute(&self.pool) + .await?; + Ok(()) + } + + /// List all replicas for a repo, oldest registration first. + pub async fn list_replicas(&self, repo_id: &str) -> Result> { + let rows = sqlx::query( + "SELECT replica_did, replica_url, registered_at + FROM repo_replicas + WHERE repo_id = $1 + ORDER BY registered_at ASC", + ) + .bind(repo_id) + .fetch_all(&self.pool) + .await?; + Ok(rows + .into_iter() + .map(|r| RepoReplica { + replica_did: r.get("replica_did"), + replica_url: r.get("replica_url"), + registered_at: r.get("registered_at"), + }) + .collect()) + } + + /// Count replicas registered for a repo. + pub async fn count_replicas(&self, repo_id: &str) -> Result { + let row = sqlx::query("SELECT COUNT(*) as cnt FROM repo_replicas WHERE repo_id = $1") + .bind(repo_id) + .fetch_one(&self.pool) + .await?; + Ok(row.get::("cnt")) + } + /// Check whether a specific agent has starred a repo. #[allow(dead_code)] pub async fn is_starred(&self, repo_id: &str, agent_did: &str) -> Result { diff --git a/crates/gitlawb-node/src/server.rs b/crates/gitlawb-node/src/server.rs index f726308..84887ec 100644 --- a/crates/gitlawb-node/src/server.rs +++ b/crates/gitlawb-node/src/server.rs @@ -14,7 +14,7 @@ use tracing::Level; use crate::api::{ agents, arweave, bounties, certs, changelog, events, ipfs, issues, labels, peers, protect, - pulls, register, repos, resolve, stars, tasks, webhooks, + pulls, register, replicas, repos, resolve, stars, tasks, webhooks, }; use crate::auth; use crate::state::AppState; @@ -95,6 +95,14 @@ pub fn build_router(state: AppState) -> Router { "/api/v1/repos/{owner}/{repo}/star", axum::routing::delete(stars::unstar_repo), ) + .route( + "/api/v1/repos/{owner}/{repo}/replicas", + axum::routing::put(replicas::register_replica), + ) + .route( + "/api/v1/repos/{owner}/{repo}/replicas", + axum::routing::delete(replicas::unregister_replica), + ) .route("/api/v1/repos/{owner}/{repo}/fork", post(repos::fork_repo)) .route( "/api/v1/repos/{owner}/{repo}/labels", @@ -278,6 +286,10 @@ pub fn build_router(state: AppState) -> Router { "/api/v1/repos/{owner}/{repo}/star", get(stars::get_star_status), ) + .route( + "/api/v1/repos/{owner}/{repo}/replicas", + get(replicas::list_replicas), + ) .route("/{owner}/{repo}/info/refs", get(repos::git_info_refs)); // git-upload-pack (clone/fetch) — same raised body limit as receive-pack so diff --git a/crates/gl/src/repo.rs b/crates/gl/src/repo.rs index 431b4dc..455a55f 100644 --- a/crates/gl/src/repo.rs +++ b/crates/gl/src/repo.rs @@ -118,6 +118,35 @@ pub enum RepoCmd { #[arg(long)] json: bool, }, + /// Register this node as a replica of someone else's repo + ReplicaRegister { + /// Repository in owner/repo format + repo: String, + /// Publicly-reachable URL of YOUR node (the one hosting the replica) + #[arg(long)] + url: String, + /// URL of the origin node (where the repo lives) + #[arg(long, default_value = "https://node.gitlawb.com", env = "GITLAWB_NODE")] + node: String, + #[arg(long)] + dir: Option, + }, + /// Unregister this node as a replica + ReplicaUnregister { + /// Repository in owner/repo format + repo: String, + #[arg(long, default_value = "https://node.gitlawb.com", env = "GITLAWB_NODE")] + node: String, + #[arg(long)] + dir: Option, + }, + /// List nodes currently mirroring a repo + Replicas { + /// Repository in owner/repo format + repo: String, + #[arg(long, default_value = "https://node.gitlawb.com", env = "GITLAWB_NODE")] + node: String, + }, } pub async fn run(args: RepoArgs) -> Result<()> { @@ -165,6 +194,16 @@ pub async fn run(args: RepoArgs) -> Result<()> { dir, json, } => cmd_owner(repo, node, dir, json).await, + RepoCmd::ReplicaRegister { + repo, + url, + node, + dir, + } => cmd_replica_register(repo, url, node, dir).await, + RepoCmd::ReplicaUnregister { repo, node, dir } => { + cmd_replica_unregister(repo, node, dir).await + } + RepoCmd::Replicas { repo, node } => cmd_replicas(repo, node).await, } } @@ -326,6 +365,120 @@ async fn cmd_info(repo: String, node: String, dir: Option) -> Result<() if let Some(desc) = r["description"].as_str().filter(|s| !s.is_empty()) { println!(" Desc: {desc}"); } + + // Replica count — failure to fetch is non-fatal (older nodes don't expose this). + if let Ok(resp) = client + .get(&format!("/api/v1/repos/{owner}/{name}/replicas")) + .await + { + if resp.status().is_success() { + if let Ok(json) = resp.json::().await { + if let Some(count) = json["replica_count"].as_i64() { + println!(" Replicas: {count}"); + } + } + } + } + + Ok(()) +} + +async fn cmd_replica_register( + repo: String, + url: String, + node: String, + dir: Option, +) -> Result<()> { + let (owner, name) = repo + .split_once('/') + .map(|(o, n)| (o.to_string(), n.to_string())) + .context("use owner/repo format (e.g. did:key:.../myrepo)")?; + + let kp = load_keypair_from_dir(dir.as_deref()) + .context("identity not found — run `gl identity new` first")?; + let client = NodeClient::new(&node, Some(kp)); + + let body = serde_json::to_vec(&json!({ "url": url }))?; + let resp = client + .put(&format!("/api/v1/repos/{owner}/{name}/replicas"), &body) + .await + .context("failed to connect to origin node")?; + + let status = resp.status(); + let body: Value = resp.json().await.unwrap_or_default(); + if !status.is_success() { + let msg = body["message"].as_str().unwrap_or("unknown error"); + anyhow::bail!("replica register failed ({status}): {msg}"); + } + + let count = body["replica_count"].as_i64().unwrap_or(0); + println!("Registered as replica of {owner}/{name}"); + println!(" Your URL: {url}"); + println!(" Replicas: {count} total"); + println!(); + println!("Next: ensure your node has a copy of the repo —"); + println!(" git clone gitlawb://{owner}/{name}"); + Ok(()) +} + +async fn cmd_replica_unregister(repo: String, node: String, dir: Option) -> Result<()> { + let (owner, name) = repo + .split_once('/') + .map(|(o, n)| (o.to_string(), n.to_string())) + .context("use owner/repo format")?; + + let kp = load_keypair_from_dir(dir.as_deref()) + .context("identity not found — run `gl identity new` first")?; + let client = NodeClient::new(&node, Some(kp)); + + let resp = client + .delete(&format!("/api/v1/repos/{owner}/{name}/replicas"), b"") + .await + .context("failed to connect to origin node")?; + + let status = resp.status(); + let body: Value = resp.json().await.unwrap_or_default(); + if !status.is_success() { + let msg = body["message"].as_str().unwrap_or("unknown error"); + anyhow::bail!("replica unregister failed ({status}): {msg}"); + } + + let count = body["replica_count"].as_i64().unwrap_or(0); + println!("Unregistered as replica of {owner}/{name} ({count} replicas remaining)"); + Ok(()) +} + +async fn cmd_replicas(repo: String, node: String) -> Result<()> { + let (owner, name) = repo + .split_once('/') + .map(|(o, n)| (o.to_string(), n.to_string())) + .context("use owner/repo format")?; + + let client = NodeClient::new(&node, None); + let resp = client + .get(&format!("/api/v1/repos/{owner}/{name}/replicas")) + .await + .context("failed to connect to node")?; + let status = resp.status(); + let body: Value = resp.json().await.unwrap_or_default(); + if !status.is_success() { + let msg = body["message"].as_str().unwrap_or("unknown error"); + anyhow::bail!("replicas list failed ({status}): {msg}"); + } + + let count = body["replica_count"].as_i64().unwrap_or(0); + println!("{owner}/{name}: {count} replicas"); + if let Some(arr) = body["replicas"].as_array() { + for r in arr { + let did = r["replica_did"].as_str().unwrap_or("?"); + let url = r["replica_url"].as_str().unwrap_or("?"); + let registered = r["registered_at"] + .as_str() + .map(|s| &s[..10.min(s.len())]) + .unwrap_or("?"); + println!(" {registered} {did} → {url}"); + } + } Ok(()) }