Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/gitlawb-node/src/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub mod peers;
pub mod protect;
pub mod pulls;
pub mod register;
pub mod replicas;
pub mod repos;
pub mod resolve;
pub mod stars;
Expand Down
202 changes: 202 additions & 0 deletions crates/gitlawb-node/src/api/replicas.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
//! Repo replica registration API.
//!
//! Lets a node tell the origin "I'm hosting a replica of your repo." The
//! origin records the (replica DID, replica URL) pair and exposes the list
//! publicly so anyone can see how many nodes are mirroring a given repo.
//!
//! Endpoints:
//! - `PUT /api/v1/repos/:owner/:repo/replicas` (auth) register
//! - `DELETE /api/v1/repos/:owner/:repo/replicas` (auth) unregister
//! - `GET /api/v1/repos/:owner/:repo/replicas` (public) list
//!
//! Auth model: the caller's DID (verified via HTTP Signatures) is the
//! replica's identity. There's no separate "claim this URL" check — replicas
//! self-report their public URL. Operators viewing the list should treat
//! replica URLs as advisory until they actually reach out and verify.

use axum::extract::{Extension, Path, State};
use axum::http::StatusCode;
use axum::Json;
use serde::Deserialize;

use crate::auth::AuthenticatedDid;
use crate::error::{AppError, Result};
use crate::state::AppState;

#[derive(Debug, Deserialize)]
pub struct RegisterReplicaRequest {
/// Publicly reachable URL of the replica node (e.g. `https://my-node.example.com`).
pub url: String,
}

/// PUT /api/v1/repos/:owner/:repo/replicas
/// Idempotent — first registration returns 201, subsequent ones update the URL and return 200.
pub async fn register_replica(
State(state): State<AppState>,
Extension(auth): Extension<AuthenticatedDid>,
Path((owner, repo)): Path<(String, String)>,
Json(req): Json<RegisterReplicaRequest>,
) -> Result<(StatusCode, Json<serde_json::Value>)> {
validate_replica_url(&req.url)?;

let record = state
.db
.get_repo(&owner, &repo)
.await?
.ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?;

let replica_did = &auth.0;

// Don't let an owner register themselves as a replica of their own repo.
if replica_did == &record.owner_did {
return Err(AppError::BadRequest(
"the repo owner is not a replica of their own repo".into(),
));
}

let inserted = state
.db
.register_replica(&record.id, replica_did, &req.url)
.await?;
let count = state.db.count_replicas(&record.id).await?;

let status = if inserted {
StatusCode::CREATED
} else {
StatusCode::OK
};

tracing::info!(
repo = %repo,
replica = %replica_did,
url = %req.url,
"replica registered"
);

Ok((
status,
Json(serde_json::json!({
"status": "registered",
"repo": format!("{owner}/{repo}"),
"replica_did": replica_did,
"replica_url": req.url,
"replica_count": count,
})),
))
}

/// DELETE /api/v1/repos/:owner/:repo/replicas
/// Idempotent — no error if the caller wasn't registered.
pub async fn unregister_replica(
State(state): State<AppState>,
Extension(auth): Extension<AuthenticatedDid>,
Path((owner, repo)): Path<(String, String)>,
) -> Result<Json<serde_json::Value>> {
let record = state
.db
.get_repo(&owner, &repo)
.await?
.ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?;

let replica_did = &auth.0;
state.db.unregister_replica(&record.id, replica_did).await?;
let count = state.db.count_replicas(&record.id).await?;

tracing::info!(repo = %repo, replica = %replica_did, "replica unregistered");

Ok(Json(serde_json::json!({
"status": "unregistered",
"repo": format!("{owner}/{repo}"),
"replica_count": count,
})))
}

/// GET /api/v1/repos/:owner/:repo/replicas
/// Public — returns the list of replicas (DID + URL + registration timestamp).
pub async fn list_replicas(
State(state): State<AppState>,
Path((owner, repo)): Path<(String, String)>,
) -> Result<Json<serde_json::Value>> {
let record = state
.db
.get_repo(&owner, &repo)
.await?
.ok_or_else(|| AppError::RepoNotFound(format!("{owner}/{repo}")))?;

let replicas = state.db.list_replicas(&record.id).await?;

Ok(Json(serde_json::json!({
"repo": format!("{owner}/{repo}"),
"replica_count": replicas.len(),
"replicas": replicas,
})))
}

/// Basic URL hygiene — must be http(s), parseable, length-bounded.
fn validate_replica_url(url: &str) -> Result<()> {
if url.is_empty() {
return Err(AppError::BadRequest("replica url is empty".into()));
}
if url.len() > 512 {
return Err(AppError::BadRequest("replica url exceeds 512 chars".into()));
}
if !(url.starts_with("http://") || url.starts_with("https://")) {
return Err(AppError::BadRequest(
"replica url must start with http:// or https://".into(),
));
}
// No spaces / control chars / newlines.
if url.chars().any(|c| c.is_whitespace() || c.is_control()) {
return Err(AppError::BadRequest(
"replica url contains whitespace or control characters".into(),
));
}
Ok(())
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn url_accepts_normal_https() {
validate_replica_url("https://node.example.com").unwrap();
validate_replica_url("https://my-node.example.com:7545").unwrap();
validate_replica_url("http://localhost:7545").unwrap();
}

#[test]
fn url_rejects_empty() {
assert!(validate_replica_url("").is_err());
}

#[test]
fn url_rejects_non_http_scheme() {
for bad in [
"ftp://host",
"file:///etc/passwd",
"javascript:alert(1)",
"/path",
] {
assert!(validate_replica_url(bad).is_err(), "{bad:?} must reject");
}
}

#[test]
fn url_rejects_whitespace_and_control() {
for bad in [
"https://host .com",
"https://host\n.com",
"https://host\t.com",
"https://host\0evil.com",
] {
assert!(validate_replica_url(bad).is_err(), "{bad:?} must reject");
}
}

#[test]
fn url_rejects_overlong() {
let long = format!("https://{}.com", "a".repeat(600));
assert!(validate_replica_url(&long).is_err());
}
}
90 changes: 90 additions & 0 deletions crates/gitlawb-node/src/db/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ pub struct PeerRecord {
pub announced_at: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RepoReplica {
pub replica_did: String,
pub replica_url: String,
pub registered_at: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PinnedCidRecord {
pub sha256_hex: String,
Expand Down Expand Up @@ -379,6 +386,21 @@ impl Db {
)"#,
"CREATE INDEX IF NOT EXISTS idx_repo_stars_repo ON repo_stars(repo_id)",
"CREATE INDEX IF NOT EXISTS idx_repo_stars_agent ON repo_stars(agent_did)",
// ── Repo replicas (network resilience) ──────────────────────────
// Tracks which nodes are hosting a replica of a repo. Populated
// when a replica node calls PUT /api/v1/repos/{owner}/{repo}/replicas
// on the origin. Public via GET on the same path — anyone can see
// how many nodes are mirroring a given repo.
r#"CREATE TABLE IF NOT EXISTS repo_replicas (
id TEXT NOT NULL PRIMARY KEY,
repo_id TEXT NOT NULL,
replica_did TEXT NOT NULL,
replica_url TEXT NOT NULL,
registered_at TEXT NOT NULL,
UNIQUE(repo_id, replica_did)
)"#,
"CREATE INDEX IF NOT EXISTS idx_repo_replicas_repo ON repo_replicas(repo_id)",
"CREATE INDEX IF NOT EXISTS idx_repo_replicas_did ON repo_replicas(replica_did)",
// ── PR comments ─────────────────────────────────────────────────
r#"CREATE TABLE IF NOT EXISTS pr_comments (
id TEXT NOT NULL PRIMARY KEY,
Expand Down Expand Up @@ -1790,6 +1812,74 @@ impl Db {
Ok(row.get::<i64, _>("cnt"))
}

// ── Repo replicas ──────────────────────────────────────────────────

/// Register a replica for a repo. Returns true if inserted, false if the
/// replica was already registered (URL updated either way).
pub async fn register_replica(
&self,
repo_id: &str,
replica_did: &str,
replica_url: &str,
) -> Result<bool> {
let now = Utc::now().to_rfc3339();
let id = format!("{repo_id}:{replica_did}");
let result = sqlx::query(
"INSERT INTO repo_replicas (id, repo_id, replica_did, replica_url, registered_at)
VALUES ($1, $2, $3, $4, $5)
ON CONFLICT (repo_id, replica_did) DO UPDATE
SET replica_url = EXCLUDED.replica_url",
)
.bind(&id)
.bind(repo_id)
.bind(replica_did)
.bind(replica_url)
.bind(&now)
.execute(&self.pool)
.await?;
Ok(result.rows_affected() > 0)
}

/// Unregister a replica. Idempotent.
pub async fn unregister_replica(&self, repo_id: &str, replica_did: &str) -> Result<()> {
sqlx::query("DELETE FROM repo_replicas WHERE repo_id = $1 AND replica_did = $2")
.bind(repo_id)
.bind(replica_did)
.execute(&self.pool)
.await?;
Ok(())
}

/// List all replicas for a repo, oldest registration first.
pub async fn list_replicas(&self, repo_id: &str) -> Result<Vec<RepoReplica>> {
let rows = sqlx::query(
"SELECT replica_did, replica_url, registered_at
FROM repo_replicas
WHERE repo_id = $1
ORDER BY registered_at ASC",
)
.bind(repo_id)
.fetch_all(&self.pool)
.await?;
Ok(rows
.into_iter()
.map(|r| RepoReplica {
replica_did: r.get("replica_did"),
replica_url: r.get("replica_url"),
registered_at: r.get("registered_at"),
})
.collect())
}

/// Count replicas registered for a repo.
pub async fn count_replicas(&self, repo_id: &str) -> Result<i64> {
let row = sqlx::query("SELECT COUNT(*) as cnt FROM repo_replicas WHERE repo_id = $1")
.bind(repo_id)
.fetch_one(&self.pool)
.await?;
Ok(row.get::<i64, _>("cnt"))
}

/// Check whether a specific agent has starred a repo.
#[allow(dead_code)]
pub async fn is_starred(&self, repo_id: &str, agent_did: &str) -> Result<bool> {
Expand Down
14 changes: 13 additions & 1 deletion crates/gitlawb-node/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use tracing::Level;

use crate::api::{
agents, arweave, bounties, certs, changelog, events, ipfs, issues, labels, peers, protect,
pulls, register, repos, resolve, stars, tasks, webhooks,
pulls, register, replicas, repos, resolve, stars, tasks, webhooks,
};
use crate::auth;
use crate::state::AppState;
Expand Down Expand Up @@ -95,6 +95,14 @@ pub fn build_router(state: AppState) -> Router {
"/api/v1/repos/{owner}/{repo}/star",
axum::routing::delete(stars::unstar_repo),
)
.route(
"/api/v1/repos/{owner}/{repo}/replicas",
axum::routing::put(replicas::register_replica),
)
.route(
"/api/v1/repos/{owner}/{repo}/replicas",
axum::routing::delete(replicas::unregister_replica),
)
.route("/api/v1/repos/{owner}/{repo}/fork", post(repos::fork_repo))
.route(
"/api/v1/repos/{owner}/{repo}/labels",
Expand Down Expand Up @@ -278,6 +286,10 @@ pub fn build_router(state: AppState) -> Router {
"/api/v1/repos/{owner}/{repo}/star",
get(stars::get_star_status),
)
.route(
"/api/v1/repos/{owner}/{repo}/replicas",
get(replicas::list_replicas),
)
.route("/{owner}/{repo}/info/refs", get(repos::git_info_refs));

// git-upload-pack (clone/fetch) — same raised body limit as receive-pack so
Expand Down
Loading
Loading