diff --git a/Cargo.lock b/Cargo.lock
index b1bef640f..591896e50 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1297,12 +1297,6 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
-[[package]]
-name = "hex"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
-
 [[package]]
 name = "http"
 version = "1.4.0"
@@ -1551,7 +1545,6 @@ dependencies = [
  "serde",
  "serde_json",
  "serial_test",
- "sha256",
  "signal-hook-registry",
  "tempfile",
  "termcolor",
@@ -3236,19 +3229,6 @@ dependencies = [
  "digest",
 ]
 
-[[package]]
-name = "sha256"
-version = "1.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f880fc8562bdeb709793f00eb42a2ad0e672c4f883bbe59122b926eca935c8f6"
-dependencies = [
- "async-trait",
- "bytes",
- "hex",
- "sha2",
- "tokio",
-]
-
 [[package]]
 name = "sharded-slab"
 version = "0.1.7"
diff --git a/docs/hyperlight-surrogate-development-notes.md b/docs/hyperlight-surrogate-development-notes.md
index 827093cce..431cc85da 100644
--- a/docs/hyperlight-surrogate-development-notes.md
+++ b/docs/hyperlight-surrogate-development-notes.md
@@ -4,6 +4,8 @@
 
 > Note: The use of surrogates is a temporary workaround on Windows until WHP allows us to create more than one partition per running process.
 
-These surrogate processes are managed by the host via the [surrogate_process_manager](./src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs) which will launch several of these surrogates (up to the 512), assign memory to them, then launch partitions from there, and reuse them as necessary.
+These surrogate processes are managed by the host via the [surrogate_process_manager](./src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs) which pre-creates an initial pool of surrogates at startup (512 by default, configurable via the `HYPERLIGHT_INITIAL_SURROGATES` environment variable). If the pool is exhausted, additional processes are created on demand up to a configurable maximum (`HYPERLIGHT_MAX_SURROGATES`, also defaulting to 512). Once the maximum is reached, callers block until a process is returned to the pool.
 
-`hyperlight_surrogate.exe` gets built during `hyperlight-host`'s build script, gets embedded into the `hyperlight-host` Rust library via [rust-embed](https://crates.io/crates/rust-embed), and is extracted at runtime next to the executable when the surrogate process manager is initialized.
\ No newline at end of file
+> **Note:** `HYPERLIGHT_MAX_SURROGATES` is authoritative — if `HYPERLIGHT_INITIAL_SURROGATES` exceeds it, the initial count is silently clamped down to the maximum. For example, setting only `HYPERLIGHT_MAX_SURROGATES=256` limits both the initial pool and the ceiling to 256.
+
+`hyperlight_surrogate.exe` gets built during `hyperlight-host`'s build script, gets embedded into the `hyperlight-host` Rust library via [rust-embed](https://crates.io/crates/rust-embed), and is extracted at runtime next to the executable when the surrogate process manager is initialized. The extracted filename includes a short BLAKE3 hash of the binary content (e.g., `hyperlight_surrogate_a1b2c3d4.exe`) so that multiple hyperlight versions can coexist without file-deletion races.
diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml
index c183037fc..a30764d0b 100644
--- a/src/hyperlight_host/Cargo.toml
+++ b/src/hyperlight_host/Cargo.toml
@@ -70,7 +70,6 @@ windows = { version = "0.62", features = [
 windows-sys = { version = "0.61", features = ["Win32"] }
 windows-result = "0.4"
 rust-embed = { version = "8.11.0", features = ["debug-embed", "include-exclude", "interpolate-folder-path"] }
-sha256 = "1.6.0"
 windows-version = "0.1"
 lazy_static = "1.4.0"
 
diff --git a/src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs b/src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs
index 08c60e370..de15fde0b 100644
--- a/src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs
+++ b/src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs
@@ -19,10 +19,11 @@ use std::fs::File;
 use std::io::Write;
 use std::mem::size_of;
 use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicUsize, Ordering};
 
-use crossbeam_channel::{Receiver, Sender, unbounded};
+use crossbeam_channel::{Receiver, Sender, TryRecvError, unbounded};
 use rust_embed::RustEmbed;
-use tracing::{Span, error, info, instrument};
+use tracing::{Span, error, info, instrument, warn};
 use windows::Win32::Foundation::HANDLE;
 use windows::Win32::Security::SECURITY_ATTRIBUTES;
 use windows::Win32::System::JobObjects::{
@@ -49,14 +50,87 @@ use crate::{Result, log_then_return, new_error};
 #[include = "hyperlight_surrogate.exe"]
 struct Asset;
 
-/// This is the name of the surrogate process binary that will be used to create surrogate processes.
-/// The process does nothing , it just sleeps forever. Its only purpose is to provide a host for memory that will be mapped
-/// into the guest using the `WHvMapGpaRange2` API.
-pub(crate) const SURROGATE_PROCESS_BINARY_NAME: &str = "hyperlight_surrogate.exe";
+/// The name of the embedded surrogate asset (used as the key for `Asset::get`).
+const EMBEDDED_SURROGATE_NAME: &str = "hyperlight_surrogate.exe";
+
+/// The absolute hard limit on surrogate processes imposed by the
+/// `WHvMapGpaRange2` API (512 process handles per calling process).
+const HARD_MAX_SURROGATE_PROCESSES: usize = 512;
+
+/// Environment variable controlling how many surrogate processes are
+/// pre-created when the manager starts. Must be between 1 and
+/// `HARD_MAX_SURROGATE_PROCESSES` (512). Defaults to 512 if unset.
+const INITIAL_SURROGATES_ENV_VAR: &str = "HYPERLIGHT_INITIAL_SURROGATES";
+
+/// Environment variable controlling the maximum number of surrogate processes
+/// that can exist (including those created on demand). Must be >=
+/// `HYPERLIGHT_INITIAL_SURROGATES` and <= `HARD_MAX_SURROGATE_PROCESSES`
+/// (512). Defaults to 512 if unset.
+const MAX_SURROGATES_ENV_VAR: &str = "HYPERLIGHT_MAX_SURROGATES";
+
+/// Returns the on-disk filename for the surrogate binary, incorporating the
+/// first 8 hex characters of the BLAKE3 hash of the embedded binary so that
+/// different hyperlight versions produce different filenames and can coexist
+/// without file-deletion races.
+fn surrogate_binary_name() -> Result<String> {
+    let exe = Asset::get(EMBEDDED_SURROGATE_NAME)
+        .ok_or_else(|| new_error!("could not find embedded surrogate binary"))?;
+    let hash = blake3::hash(exe.data.as_ref());
+    let short_hash = &hash.to_hex()[..8];
+    Ok(format!("hyperlight_surrogate_{short_hash}.exe"))
+}
+
+/// Pure validation/clamping logic for surrogate process counts.
+///
+/// `raw_initial` and `raw_max` are the parsed values from the environment
+/// (or `None` when the variable is unset or unparsable).
+///
+/// Resolution order:
+/// 1. `max` is clamped to `1..=HARD_MAX_SURROGATE_PROCESSES`, defaulting
+///    to `HARD_MAX_SURROGATE_PROCESSES` when `None`.
+/// 2. `initial` is clamped to `1..=max`, defaulting to `max` when `None`.
+///    This guarantees `initial <= max` without an extra conditional.
+fn compute_surrogate_counts(raw_initial: Option<usize>, raw_max: Option<usize>) -> (usize, usize) {
+    let max = raw_max
+        .map(|n| n.clamp(1, HARD_MAX_SURROGATE_PROCESSES))
+        .unwrap_or(HARD_MAX_SURROGATE_PROCESSES);
+
+    // Clamp initial to 1..=max so it can never exceed the authoritative limit.
+    let initial = raw_initial.map(|n| n.clamp(1, max)).unwrap_or(max);
+
+    (initial, max)
+}
 
-/// The maximum number of surrogate processes that can be created.
-/// (This is a factor of limitations in the `WHvMapGpaRange2` API which only allows 512 different process handles).
-const NUMBER_OF_SURROGATE_PROCESSES: usize = 512;
+/// Returns the (initial, max) surrogate process counts from environment
+/// variables, applying validation and clamping.
+///
+/// - `HYPERLIGHT_INITIAL_SURROGATES`: clamped to `1..=max`, default `max`.
+/// - `HYPERLIGHT_MAX_SURROGATES`: clamped to `1..=512`, default 512.
+fn surrogate_process_counts() -> (usize, usize) {
+    let raw_initial = std::env::var(INITIAL_SURROGATES_ENV_VAR)
+        .ok()
+        .and_then(|v| v.parse::<usize>().ok());
+    let raw_max = std::env::var(MAX_SURROGATES_ENV_VAR)
+        .ok()
+        .and_then(|v| v.parse::<usize>().ok());
+
+    let (initial, max) = compute_surrogate_counts(raw_initial, raw_max);
+
+    // Log clamping warnings here (not in the pure function) so the
+    // messages can reference the env var names that provide context.
+    if let Some(n) = raw_initial
+        && n != initial
+    {
+        warn!("{INITIAL_SURROGATES_ENV_VAR}={n} was clamped to {initial}");
+    }
+    if let Some(n) = raw_max
+        && n != max
+    {
+        warn!("{MAX_SURROGATES_ENV_VAR}={n} was clamped to {max}");
+    }
+
+    (initial, max)
+}
 
 /// `SurrogateProcessManager` manages hyperlight_surrogate processes. These
 /// processes are required to allow multiple WHP Partitions to be created in a
@@ -86,11 +160,14 @@ const NUMBER_OF_SURROGATE_PROCESSES: usize = 512;
 /// on allocation/return to/from a Sandbox instance.
 /// It is intended to be used as a singleton and is thread safe.
 ///
-/// There is a limit of 512 partitions per process therefore this class will
-/// create a maximum of 512 processes, and if the pool is empty when a Sandbox
-/// is created it will wait for a free process.
+/// There is a limit of 512 partitions per process. By default 512 processes
+/// are pre-created at startup, but this can be reduced via
+/// `HYPERLIGHT_INITIAL_SURROGATES`. Additional processes are created on
+/// demand up to the limit set by `HYPERLIGHT_MAX_SURROGATES` (also
+/// defaulting to 512). If the pool is empty and the max has been reached,
+/// callers will block until a process is returned.
 ///
-/// This class is `Send + Sync`, and internally manages the pool of 512
+/// This class is `Send + Sync`, and internally manages the pool of
 /// surrogate processes in a concurrency-safe way.
 pub(crate) struct SurrogateProcessManager {
     job_handle: HandleWrapper,
@@ -111,14 +188,23 @@ pub(crate) struct SurrogateProcessManager {
     /// - ... and the `Hypervisor` trait requires `Send + Sync`
     process_receiver: Receiver<HandleWrapper>,
     process_sender: Sender<HandleWrapper>,
+    /// Path to the on-disk surrogate binary (hash-stamped).
+    surrogate_process_path: PathBuf,
+    /// Maximum number of surrogate processes allowed to exist.
+    max_processes: usize,
+    /// Number of surrogate processes that have been created so far.
+    /// Used to decide whether we can spawn more on demand.
+    created_count: AtomicUsize,
 }
 
 impl SurrogateProcessManager {
     #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
     fn new() -> Result<Self> {
-        ensure_surrogate_process_exe()?;
-        let surrogate_process_path =
-            get_surrogate_process_dir()?.join(SURROGATE_PROCESS_BINARY_NAME);
+        let binary_name = surrogate_binary_name()?;
+        ensure_surrogate_process_exe(&binary_name)?;
+        let surrogate_process_path = get_surrogate_process_dir()?.join(&binary_name);
+
+        let (initial, max) = surrogate_process_counts();
 
         let (sender, receiver) = unbounded();
         let job_handle = create_job_object()?;
@@ -126,17 +212,71 @@ impl SurrogateProcessManager {
             job_handle,
             process_receiver: receiver,
             process_sender: sender,
+            surrogate_process_path,
+            max_processes: max,
+            created_count: AtomicUsize::new(0),
         };
 
-        surrogate_process_manager
-            .create_surrogate_processes(&surrogate_process_path, job_handle)?;
+        surrogate_process_manager.create_initial_surrogate_processes(initial)?;
+
         Ok(surrogate_process_manager)
     }
-    /// Gets a surrogate process from the pool of surrogate processes and
-    /// allocates memory in the process. This should be called when a new
-    /// HyperV on Windows Driver is created.
+    /// Gets a surrogate process from the pool. If the pool is empty and
+    /// fewer than `max_processes` have been created, a new process is
+    /// spawned on demand. If the pool is empty and the maximum has been
+    /// reached, this call blocks until a process is returned.
     #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
     pub(super) fn get_surrogate_process(&self) -> Result<SurrogateProcess> {
+        // Fast path: try to grab an already-pooled process.
+        match self.process_receiver.try_recv() {
+            Ok(handle) => {
+                let surrogate_process_handle: HANDLE = handle.into();
+                return Ok(SurrogateProcess::new(surrogate_process_handle));
+            }
+            Err(TryRecvError::Empty) => {
+                // Pool is empty — try to grow on demand below.
+            }
+            Err(TryRecvError::Disconnected) => {
+                return Err(new_error!("surrogate process channel disconnected"));
+            }
+        }
+
+        // On-demand growth: atomically claim a slot if one is available.
+        // We use a CAS loop so that concurrent callers don't overshoot
+        // the maximum.
+        loop {
+            let current = self.created_count.load(Ordering::Acquire);
+            if current >= self.max_processes {
+                // At the limit — fall through to the blocking recv below.
+                break;
+            }
+            if self
+                .created_count
+                .compare_exchange(current, current + 1, Ordering::AcqRel, Ordering::Acquire)
+                .is_ok()
+            {
+                info!(
+                    "on-demand surrogate process creation ({}/{})",
+                    current + 1,
+                    self.max_processes
+                );
+                let handle =
+                    match create_surrogate_process(&self.surrogate_process_path, self.job_handle) {
+                        Ok(h) => h,
+                        Err(e) => {
+                            // Rollback the slot claim so capacity isn't
+                            // permanently lost on transient failures.
+                            self.created_count.fetch_sub(1, Ordering::AcqRel);
+                            return Err(e);
+                        }
+                    };
+                let surrogate_process_handle: HANDLE = handle.into();
+                return Ok(SurrogateProcess::new(surrogate_process_handle));
+            }
+            // CAS failed — another thread beat us; retry.
+        }
+
+        // Maximum reached — block until a process is returned to the pool.
         let surrogate_process_handle: HANDLE = self.process_receiver.recv()?.into();
         Ok(SurrogateProcess::new(surrogate_process_handle))
     }
@@ -146,19 +286,25 @@ impl SurrogateProcessManager {
     /// implementation, after process resources have been freed.
     #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
     pub(super) fn return_surrogate_process(&self, proc_handle: HandleWrapper) -> Result<()> {
-        Ok(self.process_sender.clone().send(proc_handle)?)
+        Ok(self.process_sender.send(proc_handle)?)
     }
 
-    /// Creates all the surrogate process when the struct is first created.
+    /// Pre-creates the initial batch of surrogate processes at startup.
     #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
-    fn create_surrogate_processes(
-        &self,
-        surrogate_process_path: &Path,
-        job_handle: HandleWrapper,
-    ) -> Result<()> {
-        for _ in 0..NUMBER_OF_SURROGATE_PROCESSES {
-            let surrogate_process = create_surrogate_process(surrogate_process_path, job_handle)?;
-            self.process_sender.clone().send(surrogate_process)?;
+    fn create_initial_surrogate_processes(&self, initial_count: usize) -> Result<()> {
+        info!(
+            "pre-creating {} surrogate processes ({}={:?}, {}={:?})",
+            initial_count,
+            INITIAL_SURROGATES_ENV_VAR,
+            std::env::var(INITIAL_SURROGATES_ENV_VAR).ok(),
+            MAX_SURROGATES_ENV_VAR,
+            std::env::var(MAX_SURROGATES_ENV_VAR).ok(),
+        );
+        for _ in 0..initial_count {
+            let surrogate_process =
+                create_surrogate_process(&self.surrogate_process_path, self.job_handle)?;
+            self.process_sender.send(surrogate_process)?;
+            self.created_count.fetch_add(1, Ordering::AcqRel);
         }
 
         Ok(())
@@ -247,40 +393,53 @@ fn get_surrogate_process_dir() -> Result<PathBuf> {
     Ok(path.to_path_buf())
 }
 
+/// Ensures the surrogate binary exists on disk at the hash-stamped path.
+///
+/// Because the filename embeds the content hash, two different hyperlight
+/// versions will write to *different* files. This avoids the
+/// delete-while-running race that occurred when an older version's running
+/// processes held a lock on the same filename.
+///
+/// Uses `File::create_new` for atomic create-or-fail semantics, avoiding
+/// a TOCTOU race with `exists()` + `create()`. If the write fails after
+/// creating the file, the partial file is deleted so future runs do not
+/// mistake it for a valid binary.
 #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")]
-fn ensure_surrogate_process_exe() -> Result<()> {
-    let surrogate_process_path = get_surrogate_process_dir()?.join(SURROGATE_PROCESS_BINARY_NAME);
-    let p = Path::new(&surrogate_process_path);
+fn ensure_surrogate_process_exe(binary_name: &str) -> Result<()> {
+    let dir = get_surrogate_process_dir()?;
+    let surrogate_process_path = dir.join(binary_name);
 
-    let exe = Asset::get(SURROGATE_PROCESS_BINARY_NAME)
+    // Resolve the embedded asset before touching the filesystem so a
+    // missing asset can't leave a zero-byte ghost file on disk.
+    let exe = Asset::get(EMBEDDED_SURROGATE_NAME)
         .ok_or_else(|| new_error!("could not find embedded surrogate binary"))?;
 
-    if p.exists() {
-        // check to see if sha's match and if not delete the file so we'll extract
-        // the embedded file below.
-        let embedded_file_sha = sha256::digest(exe.data.as_ref());
-        let file_on_disk_sha = sha256::try_digest(&p)?;
-
-        if embedded_file_sha != file_on_disk_sha {
-            println!(
-                "sha of embedded surrogate '{}' does not match sha of file on disk '{}' - deleting surrogate binary at {}",
-                embedded_file_sha,
-                file_on_disk_sha,
+    // Atomic create-or-fail: if the file already exists, `create_new`
+    // returns `AlreadyExists` and we skip extraction. The filename
+    // embeds the content hash, so an existing file is guaranteed to
+    // have the correct content.
+    match File::create_new(&surrogate_process_path) {
+        Ok(mut f) => {
+            info!(
+                "{} does not exist, extracting to {}",
+                binary_name,
                 &surrogate_process_path.display()
             );
-            std::fs::remove_file(p)?;
-        }
-    }
-
-    if !p.exists() {
-        info!(
-            "{} does not exist, copying to {}",
-            SURROGATE_PROCESS_BINARY_NAME,
-            &surrogate_process_path.display()
-        );
 
-        let mut f = File::create(&surrogate_process_path)?;
-        f.write_all(exe.data.as_ref())?;
+            if let Err(e) = f.write_all(exe.data.as_ref()) {
+                // Clean up the partial file so future runs don't skip
+                // extraction thinking a valid binary is already present.
+                // Drop the file handle first — on Windows, open handles
+                // prevent deletion.
+                drop(f);
+                let _ = std::fs::remove_file(&surrogate_process_path);
+                return Err(e.into());
+            }
+        }
+        Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
+            // Another thread/process already extracted the file — nothing to do.
+        }
+        Err(e) => return Err(e.into()),
     }
 
     Ok(())
@@ -358,7 +517,9 @@ mod tests {
         // create more threads than surrogate processes as we want to test that
         // the manager can handle multiple threads requesting processes at the
         // same time when there are not enough processes available.
-        for t in 0..NUMBER_OF_SURROGATE_PROCESSES * 2 {
+        let surrogate_process_manager = get_surrogate_process_manager().unwrap();
+        let max_processes = surrogate_process_manager.max_processes;
+        for t in 0..max_processes * 2 {
             let thread_handle = thread::spawn(move || -> Result<()> {
                 let surrogate_process_manager_res = get_surrogate_process_manager();
                 let mut rng = rng();
@@ -368,7 +529,7 @@ mod tests {
                 // for each of the parent loop iterations, try to get a
                 // surrogate process, make sure we actually got one,
                 // then put it back
-                for p in 0..NUMBER_OF_SURROGATE_PROCESSES {
+                for p in 0..max_processes {
                     let timer = Instant::now();
                     let surrogate_process = {
                         let res = surrogate_process_manager.get_surrogate_process()?;
@@ -406,12 +567,13 @@ mod tests {
             assert!(thread_handle.join().is_ok());
         }
 
-        assert_number_of_surrogate_processes(NUMBER_OF_SURROGATE_PROCESSES);
+        assert_number_of_surrogate_processes(max_processes);
     }
 
     #[track_caller]
     fn assert_number_of_surrogate_processes(expected_count: usize) {
-        let sleep_count = 10;
+        const MAX_RETRIES: u32 = 30;
+        let mut attempt = 0;
         loop {
             let snapshot_handle = unsafe { CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0) };
             assert!(snapshot_handle.is_ok());
@@ -425,7 +587,8 @@ mod tests {
             while result {
                 if let Ok(process_name) =
                     unsafe { CStr::from_ptr(process_entry.szExeFile.as_ptr()).to_str() }
-                    && process_name == SURROGATE_PROCESS_BINARY_NAME
+                    && process_name.starts_with("hyperlight_surrogate_")
+                    && process_name.ends_with(".exe")
                 {
                     count += 1;
                 }
@@ -435,9 +598,10 @@ mod tests {
                 }
             }
 
-            // if the expected count is 0, we are waiting for the processes to exit, this doesnt happen immediately, so we wait for a bit
-
-            if (expected_count == 0) && (count > 0) && (sleep_count < 30) {
+            // When waiting for processes to exit (expected_count == 0),
+            // retry with a delay since termination isn't instantaneous.
+            attempt += 1;
+            if expected_count == 0 && count > 0 && attempt < MAX_RETRIES {
                 thread::sleep(Duration::from_secs(1));
             } else {
                 assert_eq!(count, expected_count);
@@ -622,4 +786,174 @@ mod tests {
             "Mapping should be removed after ref count reaches 0"
         );
     }
+
+    /// Tests `ensure_surrogate_process_exe` for:
+    /// 1. Correct extraction — file content matches the embedded binary.
+    /// 2. Re-extraction after deletion — a missing file is recreated.
+    /// 3. Idempotent when already present — second call succeeds without
+    ///    error (exercises the `AlreadyExists` fast path).
+    /// 4. Deterministic naming — same embedded binary always produces
+    ///    the same hash-stamped filename.
+    ///
+    /// Uses a test-specific filename to avoid conflicting with the
+    /// singleton's surrogate processes, which hold a PE loader lock
+    /// on the real hash-stamped exe.
+    #[test]
+    fn test_ensure_surrogate_exe() {
+        let test_binary_name = "hyperlight_surrogate_test_extraction.exe";
+        let dir = get_surrogate_process_dir().expect("should get surrogate dir");
+        let path = dir.join(test_binary_name);
+
+        // Ensure a clean slate.
+        let _ = std::fs::remove_file(&path);
+
+        // --- First call: extracts the binary ---
+        ensure_surrogate_process_exe(test_binary_name).expect("first call should succeed");
+        assert!(path.exists(), "binary should exist after extraction");
+
+        // --- Verify extracted content matches embedded binary ---
+        let on_disk = std::fs::read(&path).expect("should read extracted file");
+        let embedded = Asset::get(EMBEDDED_SURROGATE_NAME).expect("embedded asset should exist");
+        assert_eq!(
+            on_disk,
+            embedded.data.as_ref(),
+            "extracted file content should match embedded binary"
+        );
+
+        // --- Second call: file exists, should skip (AlreadyExists path) ---
+        ensure_surrogate_process_exe(test_binary_name)
+            .expect("second call should succeed when file already exists");
+
+        // --- Delete and re-extract ---
+        std::fs::remove_file(&path).expect("should be able to delete test binary");
+        assert!(!path.exists(), "binary should be gone after deletion");
+
+        ensure_surrogate_process_exe(test_binary_name)
+            .expect("should succeed re-extracting after deletion");
+        assert!(path.exists(), "binary should be re-created after deletion");
+
+        // Clean up test artifact.
+        let _ = std::fs::remove_file(&path);
+
+        // --- Verify deterministic naming ---
+        let binary_name = surrogate_binary_name().expect("should succeed");
+        let binary_name_2 = surrogate_binary_name().expect("second call should also succeed");
+        assert_eq!(
+            binary_name, binary_name_2,
+            "surrogate_binary_name should be deterministic"
+        );
+    }
+
+    /// Verifies `compute_surrogate_counts()` returns sensible defaults
+    /// when inputs are `None`, and correct clamped values otherwise.
+    ///
+    /// This exercises the pure validation/clamping function directly,
+    /// avoiding process-global env var mutation which previously caused
+    /// a race with the `lazy_static` singleton initialisation in
+    /// parallel test runs.
+    #[test]
+    fn test_compute_surrogate_counts() {
+        // --- Both unset (or unparsable) → defaults ---
+        let (initial, max) = compute_surrogate_counts(None, None);
+        assert_eq!(
+            initial, HARD_MAX_SURROGATE_PROCESSES,
+            "default initial should be {HARD_MAX_SURROGATE_PROCESSES}"
+        );
+        assert_eq!(
+            max, HARD_MAX_SURROGATE_PROCESSES,
+            "default max should be {HARD_MAX_SURROGATE_PROCESSES}"
+        );
+
+        // --- Only initial set ---
+        let (initial, max) = compute_surrogate_counts(Some(32), None);
+        assert_eq!(initial, 32, "initial should honour provided value");
+        assert_eq!(
+            max, HARD_MAX_SURROGATE_PROCESSES,
+            "max should default when unset"
+        );
+
+        // --- Both set, max > initial ---
+        let (initial, max) = compute_surrogate_counts(Some(8), Some(64));
+        assert_eq!(initial, 8);
+        assert_eq!(max, 64);
+
+        // --- Both set, max < initial → initial clamped DOWN to max ---
+        let (initial, max) = compute_surrogate_counts(Some(100), Some(10));
+        assert_eq!(max, 10, "max is authoritative and should not be inflated");
+        assert_eq!(
+            initial, 10,
+            "initial should be clamped down to max when it exceeds it"
+        );
+
+        // --- initial below minimum → clamped to 1 ---
+        let (initial, max) = compute_surrogate_counts(Some(0), None);
+        assert_eq!(initial, 1, "initial should be clamped to minimum of 1");
+        assert_eq!(
+            max, HARD_MAX_SURROGATE_PROCESSES,
+            "max should default when unset"
+        );
+
+        // --- initial above hard limit → clamped to 512 ---
+        let (initial, max) = compute_surrogate_counts(Some(9999), None);
+        assert_eq!(
+            initial, HARD_MAX_SURROGATE_PROCESSES,
+            "initial should be clamped to {HARD_MAX_SURROGATE_PROCESSES}"
+        );
+        assert_eq!(max, HARD_MAX_SURROGATE_PROCESSES);
+
+        // --- Only max set → initial defaults then clamped down to max ---
+        let (initial, max) = compute_surrogate_counts(None, Some(256));
+        assert_eq!(max, 256, "max should honour provided value");
+        assert_eq!(
+            initial, 256,
+            "initial should be clamped down to max when it defaults above it"
+        );
+
+        // --- max below minimum → clamped to 1, initial follows ---
+        let (initial, max) = compute_surrogate_counts(None, Some(0));
+        assert_eq!(max, 1, "max should be clamped to minimum of 1");
+        assert_eq!(initial, 1, "initial should be clamped down to max");
+
+        // --- max above hard limit → clamped to 512 ---
+        let (initial, max) = compute_surrogate_counts(None, Some(9999));
+        assert_eq!(
+            max, HARD_MAX_SURROGATE_PROCESSES,
+            "max should be clamped to {HARD_MAX_SURROGATE_PROCESSES}"
+        );
+        assert_eq!(initial, HARD_MAX_SURROGATE_PROCESSES);
+
+        // --- Both at boundary values ---
+        let (initial, max) = compute_surrogate_counts(Some(1), Some(1));
+        assert_eq!(initial, 1);
+        assert_eq!(max, 1);
+
+        let (initial, max) = compute_surrogate_counts(
+            Some(HARD_MAX_SURROGATE_PROCESSES),
+            Some(HARD_MAX_SURROGATE_PROCESSES),
+        );
+        assert_eq!(initial, HARD_MAX_SURROGATE_PROCESSES);
+        assert_eq!(max, HARD_MAX_SURROGATE_PROCESSES);
+    }
+
+    /// Smoke-tests `surrogate_process_counts()` with the default test
+    /// environment (neither env var set). This does NOT mutate env vars —
+    /// it just verifies the env-reading wrapper returns the expected
+    /// defaults under normal conditions.
+    #[test]
+    fn test_surrogate_process_counts_defaults() {
+        // In the standard CI / dev environment, neither HYPERLIGHT_INITIAL_SURROGATES
+        // nor HYPERLIGHT_MAX_SURROGATES is set, so we expect the hard-max defaults.
+        // If the env vars ARE set externally (e.g. a developer's shell), this test
+        // gracefully adapts: it only asserts the invariant initial <= max <= 512.
+        let (initial, max) = surrogate_process_counts();
+        assert!(
+            (1..=HARD_MAX_SURROGATE_PROCESSES).contains(&initial),
+            "initial {initial} should be in 1..={HARD_MAX_SURROGATE_PROCESSES}"
+        );
+        assert!(
+            (1..=HARD_MAX_SURROGATE_PROCESSES).contains(&max),
+            "max {max} should be in 1..={HARD_MAX_SURROGATE_PROCESSES}"
+        );
+        assert!(initial <= max, "initial ({initial}) must be <= max ({max})");
+    }
 }