diff --git a/bindings/java/src/lib.rs b/bindings/java/src/lib.rs index 0498961d7..836bb9afd 100644 --- a/bindings/java/src/lib.rs +++ b/bindings/java/src/lib.rs @@ -108,7 +108,7 @@ pub extern "system" fn Java_com_sensmetry_sysand_Sysand_init<'local>( LocalSrcError::ImpossibleRelativePath(_) => { env.throw_exception(ExceptionKind::PathError, suberror.to_string()) } - LocalSrcError::MissingMeta => { + LocalSrcError::MissingMeta | LocalSrcError::MissingInfoMeta => { env.throw_exception(ExceptionKind::SysandException, suberror.to_string()) } }, @@ -172,12 +172,12 @@ pub extern "system" fn Java_com_sensmetry_sysand_Sysand_env<'local>( LocalWriteError::ImpossibleRelativePath(_) => { env.throw_exception(ExceptionKind::PathError, suberror.to_string()) } - LocalWriteError::MissingMeta => { - env.throw_exception(ExceptionKind::SysandException, suberror.to_string()) - } LocalWriteError::AddProject(subsuberror) => { env.throw_exception(ExceptionKind::IOError, subsuberror.to_string()) } + LocalWriteError::MissingMeta | LocalWriteError::MissingInfoMeta => { + env.throw_exception(ExceptionKind::SysandException, suberror.to_string()) + } }, }, } @@ -195,6 +195,7 @@ pub extern "system" fn Java_com_sensmetry_sysand_Sysand_infoPath<'local>( let project = LocalSrcProject { nominal_path: None, project_path: Utf8PathBuf::from(&path), + expected_checksum: None, }; let command_result = commands::info::do_info_project(&project); @@ -348,6 +349,7 @@ pub extern "system" fn Java_com_sensmetry_sysand_Sysand_setProjectIndex<'local>( let mut project = LocalSrcProject { nominal_path: None, project_path: Utf8PathBuf::from(project_path), + expected_checksum: None, }; let _ = project .set_index(rust_index) @@ -370,6 +372,7 @@ pub extern "system" fn Java_com_sensmetry_sysand_Sysand_setProjectInfo<'local>( let mut project = LocalSrcProject { nominal_path: None, project_path: Utf8PathBuf::from(project_path), + expected_checksum: None, }; let _ = project .put_info(&info_raw, true) @@ -392,6 +395,7 @@ pub extern "system" fn Java_com_sensmetry_sysand_Sysand_setProjectMetadata<'loca let mut project = LocalSrcProject { nominal_path: None, project_path: Utf8PathBuf::from(project_path), + expected_checksum: None, }; let _ = project .put_meta(&metadata_raw, true) @@ -514,6 +518,7 @@ pub extern "system" fn Java_com_sensmetry_sysand_Sysand_buildProject<'local>( let project = LocalSrcProject { nominal_path: None, project_path: Utf8PathBuf::from(project_path), + expected_checksum: None, }; let Some(compression) = env.get_str(&compression, "compression") else { return; diff --git a/bindings/js/src/env/local_storage.rs b/bindings/js/src/env/local_storage.rs index 64c8f43af..f191c299d 100644 --- a/bindings/js/src/env/local_storage.rs +++ b/bindings/js/src/env/local_storage.rs @@ -2,7 +2,8 @@ // SPDX-FileCopyrightText: © 2025 Sysand contributors use sysand_core::{ - env::{PutProjectError, ReadEnvironment, WriteEnvironment}, + env::{ProjectChecksumResult, PutProjectError, ReadEnvironment, WriteEnvironment}, + project::ProjectChecksum, utils::sha256_lowercase_hex, }; use thiserror::Error; @@ -117,8 +118,19 @@ impl ReadEnvironment for LocalBrowserStorageEnvironment { Ok(ProjectLocalBrowserStorage { vfs: self.vfs.clone(), root_path: self.project_path(&uri, &version), + expected_checksum: None, }) } + + // TODO: fix this when this env contains sufficient info + fn has_version_verified, V: AsRef>( + &self, + _uri: S, + _version: V, + _checksum: &sysand_core::project::ProjectChecksum, + ) -> Result { + Ok(ProjectChecksumResult::ChecksumNotPresent) + } } impl WriteEnvironment for LocalBrowserStorageEnvironment { @@ -130,6 +142,7 @@ impl WriteEnvironment for LocalBrowserStorageEnvironment { &mut self, uri: S, version: T, + _checksum: Option, write_project: F, ) -> Result> where @@ -138,6 +151,7 @@ impl WriteEnvironment for LocalBrowserStorageEnvironment { let mut project = ProjectLocalBrowserStorage { vfs: self.vfs.clone(), root_path: self.project_path(&uri, &version), + expected_checksum: None, }; // TODO: For production JS-version this should be made more robust diff --git a/bindings/js/src/io/local_storage.rs b/bindings/js/src/io/local_storage.rs index f0dbe25b4..28fccc0b6 100644 --- a/bindings/js/src/io/local_storage.rs +++ b/bindings/js/src/io/local_storage.rs @@ -5,6 +5,7 @@ use std::io::{Cursor, Read}; use sysand_core::context::ProjectContext; use sysand_core::lock::Source; +use sysand_core::project::{CanonicalizationError, ProjectChecksum}; use sysand_core::{ model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}, project::{ProjectMut, ProjectRead, utils::FsIoError}, @@ -19,6 +20,9 @@ use crate::local_storage_utils::{LocalStorageError, LocalStorageVFS, get_local_b pub struct ProjectLocalBrowserStorage { pub root_path: Utf8UnixPathBuf, pub vfs: LocalStorageVFS, + // TODO: enforce that the project matches the checksum if provided + // before reading; see LocalKparProject for example + pub expected_checksum: Option, } pub fn open_project_local_storage, P: AsRef>( @@ -28,6 +32,7 @@ pub fn open_project_local_storage, P: AsRef>( Ok(ProjectLocalBrowserStorage { root_path: root_path.as_ref().to_path_buf(), vfs: get_local_browser_storage(prefix)?, + expected_checksum: None, }) } @@ -50,6 +55,8 @@ pub enum Error { SerializeHandle(#[from] serde_json::Error), #[error("key `{0}` not found in local storage")] KeyNotFound(String), + #[error("project is missing `.project.json` and/or `.meta.json` files")] + MissingInfoMeta, } impl From for Error { @@ -128,10 +135,38 @@ impl ProjectRead for ProjectLocalBrowserStorage { } fn sources(&self, _ctx: &ProjectContext) -> Result, Self::Error> { + let checksum = match &self.expected_checksum { + Some(c) => c.clone(), + None => self + .checksum_canonical_hex() + .map_err(|e| match e { + CanonicalizationError::ProjectRead(e) => e, + CanonicalizationError::FileRead(path, error) => { + Error::Io(FsIoError::ReadFile(String::from(path).into(), error).into()) + } + })? + .ok_or(Error::MissingInfoMeta)?, + }; Ok(vec![sysand_core::lock::Source::LocalSrc { src_path: self.root_path.as_str().into(), + checksum, }]) } + + fn checksum_canonical_variant(&self) -> Result { + match self.checksum_canonical_hex() { + Ok(c) => match c { + Some(c) => Ok(ProjectChecksum::Project(c)), + None => Err(Error::MissingInfoMeta), + }, + Err(e) => match e { + CanonicalizationError::ProjectRead(e) => Err(e), + CanonicalizationError::FileRead(path, error) => Err(Error::Io( + FsIoError::ReadFile(String::from(path).into(), error).into(), + )), + }, + } + } } impl ProjectMut for ProjectLocalBrowserStorage { diff --git a/bindings/js/src/lib.rs b/bindings/js/src/lib.rs index 3584e98e8..146ae2cdc 100644 --- a/bindings/js/src/lib.rs +++ b/bindings/js/src/lib.rs @@ -51,6 +51,7 @@ pub fn do_init_js_local_storage( vfs: local_storage_utils::get_local_browser_storage(prefix) .map_err(|e| JsValue::from_str(&e.to_string()))?, root_path: Utf8UnixPath::new(root_path).to_path_buf(), + expected_checksum: None, }, ) .map_err(|e| JsValue::from_str(&e.to_string())) diff --git a/bindings/py/src/lib.rs b/bindings/py/src/lib.rs index 12744a491..3480416ab 100644 --- a/bindings/py/src/lib.rs +++ b/bindings/py/src/lib.rs @@ -31,7 +31,7 @@ use sysand_core::{ model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw, InterchangeProjectUsageRaw}, project::{ ProjectRead as _, - local_kpar::LocalKParProject, + local_kpar::{KparInnerPath, LocalKParProject}, local_src::{LocalSrcError, LocalSrcProject}, utils::wrapfs, }, @@ -79,6 +79,7 @@ fn do_init_py_local_file( PyValueError::new_err(error.to_string()) } LocalSrcError::MissingMeta => PyFileNotFoundError::new_err(err.to_string()), + LocalSrcError::MissingInfoMeta => PyFileNotFoundError::new_err(err.to_string()), }, }, )?; @@ -106,8 +107,10 @@ fn do_env_py_local_dir(path: String) -> PyResult<()> { LocalWriteError::ImpossibleRelativePath(error) => { PyValueError::new_err(error.to_string()) } - LocalWriteError::MissingMeta => PyFileNotFoundError::new_err(werr.to_string()), LocalWriteError::AddProject(error) => PyIOError::new_err(error.to_string()), + LocalWriteError::MissingMeta | LocalWriteError::MissingInfoMeta => { + PyFileNotFoundError::new_err(werr.to_string()) + } }, })?; @@ -126,6 +129,7 @@ fn do_info_py_path( let project = LocalSrcProject { nominal_path: None, project_path: path.into(), + expected_checksum: None, }; match do_info_project(&project) { @@ -210,6 +214,7 @@ fn do_build_py( let project = LocalSrcProject { nominal_path: None, project_path: current_project_path.into(), + expected_checksum: None, }; let compression = match compression { @@ -370,6 +375,7 @@ pub fn do_sources_project_py( let current_project = LocalSrcProject { nominal_path: None, project_path: path.into(), + expected_checksum: None, }; for src_path in do_sources_local_src_project_no_deps(¤t_project, true) @@ -433,6 +439,7 @@ fn do_add_py(path: String, iri: String, version: Option) -> PyResult<()> let mut project = LocalSrcProject { nominal_path: None, project_path: path.into(), + expected_checksum: None, }; // TODO: do dependency resolution and locking? @@ -458,6 +465,7 @@ fn do_remove_py(path: String, iri: String) -> PyResult<()> { let mut project = LocalSrcProject { nominal_path: None, project_path: path.into(), + expected_checksum: None, }; do_remove(&mut project, iri).map_err(|e| PyRuntimeError::new_err(e.to_string()))?; @@ -481,6 +489,7 @@ fn do_include_py( let mut project = LocalSrcProject { nominal_path: None, project_path: path.into(), + expected_checksum: None, }; let force_format = match force_format { @@ -516,6 +525,7 @@ fn do_exclude_py(path: String, src_path: String) -> PyResult<()> { let mut project = LocalSrcProject { nominal_path: None, project_path: path.into(), + expected_checksum: None, }; do_exclude(&mut project, src_path).map_err(|e| PyRuntimeError::new_err(e.to_string()))?; @@ -537,8 +547,7 @@ fn do_env_install_path_py(env_path: String, iri: String, location: String) -> Py let metadata = wrapfs::metadata(&location).map_err(|e| PyErr::new::(e.to_string()))?; if metadata.is_file() { - let project = LocalKParProject::new_guess_root(&location) - .map_err(|e| PyErr::new::(e.to_string()))?; + let project = LocalKParProject::new(&location, KparInnerPath::Guess, None, None); let Some(version) = project .version() @@ -550,7 +559,10 @@ fn do_env_install_path_py(env_path: String, iri: String, location: String) -> Py ))); }; - env.put_project(iri, version, |to| { + let checksum = project + .checksum_canonical_variant() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + env.put_project(iri, version, Some(checksum), |to| { clone_project(&project, to, true).map(|_| ()) }) .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; @@ -558,6 +570,7 @@ fn do_env_install_path_py(env_path: String, iri: String, location: String) -> Py let project = LocalSrcProject { nominal_path: None, project_path: location, + expected_checksum: None, }; let Some(version) = project @@ -569,8 +582,11 @@ fn do_env_install_path_py(env_path: String, iri: String, location: String) -> Py project.project_path ))); }; + let checksum = project + .checksum_canonical_variant() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; - env.put_project(iri, version, |to| { + env.put_project(iri, version, Some(checksum), |to| { clone_project(&project, to, true).map(|_| ()) }) .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; diff --git a/core/Cargo.toml b/core/Cargo.toml index 8e70415ae..4b51676d9 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -20,7 +20,7 @@ lenient_checks = [] # Binding support (but not binding libraries themselves) python = ["dep:pyo3"] js = ["dep:wasm-bindgen"] -filesystem = ["dep:camino-tempfile", "dep:dirs", "dep:zip", "dep:idna"] +filesystem = ["dep:camino-tempfile", "dep:dirs", "dep:zip"] networking = ["dep:reqwest", "dep:gix"] # "dep:reqwest-middleware", "dep:partialzip" # Different compression methods for creating KPARs kpar-bzip2 = ["zip?/bzip2"] @@ -41,7 +41,7 @@ sha2.workspace = true hex.workspace = true dirs = { version = "6.0.0", optional = true} fluent-uri = { version = "0.4.1", features = ["serde", "net"] } -idna = { version = "1.1.0", default-features = false, features = ["compiled_data"], optional = true } +idna = { version = "1.1.0", default-features = false, features = ["compiled_data"] } indexmap = { version = "2.13.0", default-features = false, features = ["serde"] } log = { version = "0.4.29", default-features = false } pubgrub = { version = "0.4.0", default-features = false } diff --git a/core/src/commands/build.rs b/core/src/commands/build.rs index 820cc7d02..611dd64d9 100644 --- a/core/src/commands/build.rs +++ b/core/src/commands/build.rs @@ -10,10 +10,11 @@ use crate::{ model::InterchangeProjectValidationError, project::{ ProjectRead, - local_kpar::{IntoKparError, LocalKParProject}, + local_kpar::{IntoKparError, LocalKParProjectRaw}, local_src::{LocalSrcError, LocalSrcProject}, utils::{FsIoError, ZipArchiveError}, }, + utils::license_file_stems, workspace::{Workspace, WorkspaceReadError}, }; @@ -248,7 +249,7 @@ pub fn do_build_kpar, Pr: ProjectRead>( compression: KparCompressionMethod, canonicalise: bool, allow_path_usage: bool, -) -> Result> { +) -> Result> { do_build_kpar_inner( project, path, @@ -266,7 +267,7 @@ fn do_build_kpar_inner, Pr: ProjectRead>( canonicalise: bool, allow_path_usage: bool, workspace_metamodel: Option<&str>, -) -> Result> { +) -> Result> { use crate::project::local_src::LocalSrcProject; let building = "Building"; @@ -362,7 +363,7 @@ fn do_build_kpar_inner, Pr: ProjectRead>( } } - Ok(LocalKParProject::from_project( + Ok(LocalKParProjectRaw::from_project( &local_project, path, compression.into(), @@ -390,37 +391,13 @@ fn read_optional_project_file( } } -/// Return the deduplicated, in-order list of SPDX identifiers (licenses plus -/// any `WITH` exceptions) named in `expression`. Each identifier maps to a -/// `LICENSES/.txt` file under REUSE conventions; the `+` "or later" -/// modifier does not affect the filename. -pub(crate) fn license_file_stems(expression: &spdx::Expression) -> Vec { - let mut stems: indexmap::IndexSet = indexmap::IndexSet::new(); - for req in expression.requirements() { - let license_name = match &req.req.license { - spdx::LicenseItem::Spdx { id, .. } => id.name.to_string(), - spdx::LicenseItem::Other(license_ref) => license_ref.to_string(), - }; - stems.insert(license_name); - - if let Some(addition) = &req.req.addition { - let addition_name = match addition { - spdx::AdditionItem::Spdx(id) => id.name.to_string(), - spdx::AdditionItem::Other(add_ref) => add_ref.to_string(), - }; - stems.insert(addition_name); - } - } - stems.into_iter().collect() -} - pub fn do_build_workspace_kpars>( workspace: &Workspace, path: P, compression: KparCompressionMethod, canonicalise: bool, allow_path_usage: bool, -) -> Result, KParBuildError> { +) -> Result, KParBuildError> { let ws_metamodel = workspace.metamodel().map(|iri| iri.as_str()); let mut result = Vec::new(); @@ -428,6 +405,7 @@ pub fn do_build_workspace_kpars>( let project = LocalSrcProject { nominal_path: None, project_path: workspace.root_path().join(&project_root.path), + expected_checksum: None, }; let file_name = default_kpar_file_name(&project)?; diff --git a/core/src/commands/env/install.rs b/core/src/commands/env/install.rs index 41f0a280e..2e3639d49 100644 --- a/core/src/commands/env/install.rs +++ b/core/src/commands/env/install.rs @@ -8,7 +8,7 @@ use crate::{ PutProjectError, ReadEnvironment, WriteEnvironment, utils::{CloneError, clone_project}, }, - project::ProjectRead, + project::{ProjectChecksum, ProjectRead}, }; #[derive(Error, Debug)] @@ -110,6 +110,7 @@ pub fn do_env_install_project< uri: S, version: &str, storage: &P, + checksum: Option, env: &mut E, allow_overwrite: bool, allow_multiple: bool, @@ -134,7 +135,7 @@ pub fn do_env_install_project< uri.as_ref(), ); - env.put_project(uri, version, |p| { + env.put_project(uri, version, checksum, |p| { clone_project(storage, p, true).map(|_| ()) }) .map_err(EnvInstallError::Installation)?; diff --git a/core/src/commands/index/add.rs b/core/src/commands/index/add.rs index 714ea6709..4ba25c1b2 100644 --- a/core/src/commands/index/add.rs +++ b/core/src/commands/index/add.rs @@ -22,7 +22,7 @@ use crate::{ model::InterchangeProjectValidationError, project::{ CanonicalizationError, ProjectRead as _, - local_kpar::{LocalKParError, LocalKParProject}, + local_kpar::{LocalKParError, LocalKParProjectRaw}, utils::{FsIoError, wrapfs}, }, purl::{is_valid_unnormalized_name, is_valid_unnormalized_publisher, normalize_field}, @@ -157,17 +157,13 @@ pub fn do_index_add, P: AsRef, R: AsRef>( let kpar_path = kpar_path.as_ref(); let kpar_path_abs = wrapfs::absolute(kpar_path)?; - let local_project = LocalKParProject::new(&kpar_path_abs, "").map_err(LocalKParError::Io)?; + let local_project = LocalKParProjectRaw::new_project_at_root(&kpar_path_abs)?; let Some(info) = local_project.get_info()? else { return Err(IndexAddError::MissingInfo(kpar_path_abs.clone())); }; let Some(meta) = local_project.get_meta()? else { return Err(IndexAddError::MissingMeta(kpar_path_abs)); }; - let project_digest = - to_explicit_digest(local_project.checksum_canonical_hex()?.unwrap_or_else(|| { - panic!("This should only be None when {INFO_FILE_NAME} or {META_FILE_NAME} is missing") - })); let map_invalid_project_err = |e| IndexAddError::InvalidProject { kpar_path: kpar_path.into(), @@ -316,7 +312,6 @@ pub fn do_index_add, P: AsRef, R: AsRef>( VersionEntry { version: version.to_string(), usage: info.usage, - project_digest, // The zip file does contain .project.json and .meta.json at this point // so it cannot be empty kpar_size: NonZero::new(local_project.file_size()?).unwrap(), diff --git a/core/src/commands/init.rs b/core/src/commands/init.rs index 33b976d6f..54464e9ff 100644 --- a/core/src/commands/init.rs +++ b/core/src/commands/init.rs @@ -123,6 +123,7 @@ pub fn do_init_local_file( let mut storage = LocalSrcProject { nominal_path: None, project_path: path, + expected_checksum: None, }; do_init(name, publisher, version, license, &mut storage)?; diff --git a/core/src/commands/lock.rs b/core/src/commands/lock.rs index 35f6a52a2..ae88126ed 100644 --- a/core/src/commands/lock.rs +++ b/core/src/commands/lock.rs @@ -6,6 +6,8 @@ use std::{ collections::{HashMap, HashSet, hash_map::Entry}, fmt::{self, Debug}, }; +#[cfg(feature = "filesystem")] +use typed_path::Utf8UnixPath; #[cfg(feature = "filesystem")] use camino::Utf8Path; @@ -14,7 +16,7 @@ use thiserror::Error; pub const DEFAULT_LOCKFILE_NAME: &str = "sysand-lock.toml"; #[cfg(feature = "filesystem")] -use crate::project::{editable::EditableProject, local_src::LocalSrcProject, utils::ToPathBuf}; +use crate::project::{editable::EditableProject, local_src::LocalSrcProject}; use crate::{ context::ProjectContext, lock::{Lock, Project, Usage, hash_str}, @@ -161,15 +163,15 @@ pub fn do_lock_projects< field: IncompleteField::Meta, }) })?; - let canonical_digest = project - .checksum_canonical_hex() - .map_err(LockProjectError::InputProjectCanonicalizationError)? - .ok_or_else(|| { - LockProjectError::LockError(LockError::IncompleteProject { - project_label: named_project_label, - field: IncompleteField::CanonicalDigest, - }) - })?; + // let canonical_digest = project + // .checksum_canonical_hex() + // .map_err(LockProjectError::InputProjectCanonicalizationError)? + // .ok_or_else(|| { + // LockProjectError::LockError(LockError::IncompleteProject { + // project_label: named_project_label, + // field: IncompleteField::CanonicalDigest, + // }) + // })?; let sources = project .sources(ctx) @@ -177,14 +179,14 @@ pub fn do_lock_projects< debug_assert!(!sources.is_empty()); lock.projects.push(Project { - name: Some(info.name), + name: info.name, publisher: info.publisher, version: info.version, exports: meta.index.into_keys().collect(), identifiers: identifiers .map(|ids| ids.into_iter().map(|id| id.into_string()).collect()) .unwrap_or_default(), - checksum: canonical_digest, + // checksum: canonical_digest, sources, usages: info .usage @@ -249,6 +251,7 @@ pub fn do_lock_extend< for (iri, project) in solution { let iri_str = iri.as_str().to_owned(); + // TODO: use get_info, that can be more efficient let info = project .get_info() .map_err(LockError::DependencyProject)? @@ -263,13 +266,6 @@ pub fn do_lock_extend< project_label: iri_str.clone(), field: IncompleteField::Meta, })?; - let canonical_digest = project - .checksum_canonical_hex() - .map_err(LockError::DependencyProjectCanonicalization)? - .ok_or_else(|| LockError::IncompleteProject { - project_label: iri_str.clone(), - field: IncompleteField::CanonicalDigest, - })?; let sources = if !provided_iris.contains_key(iri.as_str()) { let sources = project.sources(ctx).map_err(LockError::DependencyProject)?; @@ -280,12 +276,11 @@ pub fn do_lock_extend< }; let lock_project = Project { - name: Some(info.name), + name: info.name, publisher: info.publisher, version: info.version.to_string(), exports: meta.index.into_keys().collect(), identifiers: vec![iri.to_string()], - checksum: canonical_digest, sources, usages: info .usage @@ -344,7 +339,7 @@ pub type EditableLocalSrcProject = EditableProject; /// Treats a project at `path` as an editable project and solves for its dependencies. #[cfg(feature = "filesystem")] pub fn do_lock_local_editable< - P: AsRef, + P: AsRef, PR: AsRef, PD: ProjectRead + Debug, R: ResolveRead + Debug, @@ -356,13 +351,17 @@ pub fn do_lock_local_editable< resolver: R, ctx: &ProjectContext, ) -> Result, LockProjectError> { + let path = path.as_ref(); let project = EditableProject::new( - path.to_path_buf(), + path.to_owned(), LocalSrcProject { - nominal_path: Some(path.to_path_buf()), + nominal_path: None, project_path: project_root.as_ref().canonicalize_utf8().map_err(|e| { - LockError::Io(FsIoError::Canonicalize(project_root.as_ref().join(path), e).into()) + LockError::Io( + FsIoError::Canonicalize(project_root.as_ref().join(path.as_str()), e).into(), + ) })?, + expected_checksum: None, }, ); diff --git a/core/src/commands/lock_tests.rs b/core/src/commands/lock_tests.rs index b9b368747..428b0e33b 100644 --- a/core/src/commands/lock_tests.rs +++ b/core/src/commands/lock_tests.rs @@ -17,22 +17,20 @@ fn lock_export_conflict() { lock_version: String::new(), projects: vec![ Project { - name: Some("test1".into()), + name: "test1".into(), publisher: None, version: String::new(), exports: exports.clone(), identifiers: vec!["test1".into()], - checksum: String::new(), sources: vec![], usages: vec![], }, Project { - name: Some("test2".into()), + name: "test2".into(), publisher: None, version: String::new(), exports, identifiers: vec!["test2".into()], - checksum: String::new(), sources: vec![], usages: vec![], }, diff --git a/core/src/commands/publish.rs b/core/src/commands/publish.rs index 5490ccbd3..c0ff0577b 100644 --- a/core/src/commands/publish.rs +++ b/core/src/commands/publish.rs @@ -12,7 +12,10 @@ use url::Url; use crate::{ auth::{ForceBearerAuth, HTTPAuthentication}, env::discovery::{HttpBaseUrlShapeError, validate_http_base_url_shape}, - project::{ProjectRead, local_kpar::LocalKParProject}, + project::{ + ProjectRead, + local_kpar::{KparInnerPath, LocalKParProject}, + }, purl::{ PKG_SYSAND_PREFIX, is_valid_unnormalized_name, is_valid_unnormalized_publisher, normalize_field, @@ -265,8 +268,7 @@ pub struct PublishPreparation { /// activity. pub fn prepare_publish_payload(path: &Utf8Path) -> Result { // Open and validate kpar. - let kpar_project = LocalKParProject::new_guess_root(path) - .map_err(|e| PublishError::KparOpen(path.as_str().into(), e.to_string()))?; + let kpar_project = LocalKParProject::new(path, KparInnerPath::Guess, None, None); let (info, meta) = kpar_project .get_project() diff --git a/core/src/commands/sources.rs b/core/src/commands/sources.rs index c60bab8f8..196ef9b3b 100644 --- a/core/src/commands/sources.rs +++ b/core/src/commands/sources.rs @@ -12,7 +12,6 @@ use typed_path::Utf8UnixPathBuf; use crate::project::local_src::{LocalSrcError, LocalSrcProject, PathError}; use crate::{ env::ReadEnvironment, - lock::{Lock, ResolutionError}, model::{InterchangeProjectUsage, InterchangeProjectValidationError}, project::{ProjectRead, memory::InMemoryProject}, resolve::{ @@ -126,20 +125,3 @@ pub fn find_project_dependencies( }) .collect()) } - -/// Finds all (locked) projects from a `Lock` (typically loaded from lockfile) -/// in an provided environment. -pub fn enumerate_projects_lock( - lock: &Lock, - env: &Env, -) -> Result< - Vec<::InterchangeProjectRead>, - ResolutionError<::ReadError>, -> { - let projects = lock - .resolve_projects(env)? - .into_iter() - .filter_map(|(_, project_read)| project_read) - .collect(); - Ok(projects) -} diff --git a/core/src/commands/sync.rs b/core/src/commands/sync.rs index 9e5dcb078..187f9978a 100644 --- a/core/src/commands/sync.rs +++ b/core/src/commands/sync.rs @@ -3,14 +3,15 @@ use std::{collections::HashMap, num::NonZeroU64}; -use camino::Utf8Path; use thiserror::Error; +use typed_path::Utf8UnixPathBuf; use crate::{ commands::env::do_env_install_project, - env::{ReadEnvironment, WriteEnvironment, utils::ErrorBound}, + env::{ProjectChecksumResult, ReadEnvironment, WriteEnvironment, utils::ErrorBound}, + iri_normalize::canonicalize_iri_tolerant, lock::{Lock, Source}, - project::{ProjectRead, memory::InMemoryProject}, + project::{ProjectChecksum, ProjectRead, memory::InMemoryProject}, }; #[derive(Error, Debug)] @@ -32,7 +33,7 @@ pub enum SyncError { #[error("no IRI given for project with index_kpar = `{0}` in lockfile")] MissingIriIndexKparUrl(Box), #[error("no IRI given for project with remote_git = `{0}` in lockfile")] - MissingIriRemoteGitPath(Box), + MissingIriRemoteGitUrl(Box), #[error( "cannot handle project with IRI `{0}` residing in local file (type `local_src`) storage" )] @@ -64,17 +65,21 @@ pub enum SyncError { #[error("failed to install project `{uri}`:\n{cause}")] InstallFail { uri: Box, cause: String }, #[error( - "tried to install a non-provided version (checksum {hash}) of `{iri}`, which is an IRI marked as being provided by your tooling" + "tried to install a non-provided version {version} of `{iri}`, which is\n\ + an IRI marked as being provided by your tooling; provided versions are:\n\ + {provided_versions:?}" )] InvalidProvidedVersion { iri: Box, - hash: Box, - provided: Vec, + version: Box, + provided_versions: Vec, }, + // TODO: preserve error type #[error("project read error: {0}")] ProjectRead(String), } +// TODO: take `lock` by value // TODO: Use AnyProject::try_from_source to avoid having so many arguments #[allow(clippy::too_many_arguments)] pub fn do_sync< @@ -106,13 +111,14 @@ pub fn do_sync< ) -> Result<(), SyncError> where Environment: ReadEnvironment + WriteEnvironment, - CreateSrcPathStorage: Fn(&Utf8Path) -> SrcPathStorage, + CreateSrcPathStorage: Fn(Utf8UnixPathBuf, String) -> SrcPathStorage, SrcPathStorage: ProjectRead, - CreateRemoteSrcStorage: Fn(String) -> Result, + CreateRemoteSrcStorage: Fn(String, String) -> Result, RemoteSrcStorage: ProjectRead, - CreateKParPathStorage: Fn(&Utf8Path) -> KParPathStorage, + CreateKParPathStorage: Fn(String, NonZeroU64, String) -> KParPathStorage, KParPathStorage: ProjectRead, - CreateRemoteKParStorage: Fn(String) -> Result, + CreateRemoteKParStorage: + Fn(String, NonZeroU64, String) -> Result, RemoteKParStorage: ProjectRead, CreateIndexKParStorage: Fn(String, NonZeroU64, String) -> Result, @@ -131,37 +137,30 @@ where for iri in &project.identifiers { let excluded_versions = if let Ok(parsed_iri) = fluent_uri::Iri::parse(iri.clone()) { - provided_iris.get(parsed_iri.normalize().as_str()) + // TODO: maybe canonicalize on lock read, or don't canonicalize at all? + provided_iris.get(canonicalize_iri_tolerant(parsed_iri.borrow()).as_str()) } else { provided_iris.get(iri.as_str()) }; - let checksum = &project.checksum; if let Some(versions) = excluded_versions { - let mut provided = vec![]; + let mut provided_versions = vec![]; for project_version in versions { - if let Some(provided_checksum) = - // TODO: don't eat potential errors - project_version.checksum_canonical_hex().ok().flatten() - { - if checksum == &provided_checksum { - log::debug!("`{iri}` is marked as provided, skipping installation"); - continue 'main_loop; - } - - provided.push(provided_checksum); - } else { - log::debug!( - "failed to get checksum for provided project: {project_version:?}" - ); + // Provided projects must have complete metadata + let version = project_version.version().unwrap().unwrap(); + if project.version == version { + log::debug!("`{iri}` is marked as provided, skipping installation"); + continue 'main_loop; } + + provided_versions.push(version); } return Err(SyncError::InvalidProvidedVersion { iri: iri.as_str().into(), - hash: project.checksum.as_str().into(), - provided, + version: project.version.as_str().into(), + provided_versions, }); } } @@ -173,93 +172,151 @@ where } for uri in &project.identifiers { - if is_installed(uri, &project.checksum, env)? { - log::debug!("`{uri}` found in .sysand"); - continue 'main_loop; + // TODO: move functionality to check if any of a set of IRIs is installed to env trait + for source in &project.sources { + if let Some(checksum) = source.to_checksum() + && env + .has_version_verified(uri, &project.version, &checksum) + .map_err(|e| SyncError::ProjectRead(e.to_string()))? + == ProjectChecksumResult::Match + { + log::debug!("`{uri}` found in .sysand"); + continue 'main_loop; + } } } let mut no_supported = true; for source in project.sources.iter() { - let mut supported = true; + let supported = true; match source { Source::Editable { .. } => { // Nothing to install for editable } - Source::LocalSrc { src_path } => { + Source::LocalSrc { src_path, checksum } => { let uri = main_uri - .as_ref() .ok_or_else(|| SyncError::MissingIriSrcPath(src_path.as_str().into()))?; let src_path_storage = src_path_storage .as_ref() .ok_or_else(|| SyncError::MissingSrcPathStorage(uri.as_str().into()))?; - let storage = src_path_storage(src_path.as_str().into()); + let storage = src_path_storage(src_path.clone(), checksum.clone()); log::debug!("trying to install `{uri}` from src_path `{src_path}`"); - try_install(uri, &project.version, &project.checksum, storage, env)?; + try_install( + uri, + &project.version, + // TODO: avoid clone + &ProjectChecksum::Project(checksum.to_owned()), + storage, + env, + )?; } - Source::RemoteSrc { remote_src } => { + Source::RemoteSrc { + remote_src, + checksum, + } => { let uri = main_uri.as_ref().ok_or_else(|| { SyncError::MissingIriRemoteSrc(remote_src.as_str().into()) })?; let remote_src_storage = remote_src_storage .as_ref() .ok_or_else(|| SyncError::MissingRemoteSrcStorage(uri.as_str().into()))?; - let storage = remote_src_storage(remote_src.clone()).map_err(|e| { - SyncError::InvalidRemoteSource(remote_src.as_str().into(), e) - })?; + let storage = remote_src_storage(remote_src.clone(), checksum.clone()) + .map_err(|e| { + SyncError::InvalidRemoteSource(remote_src.as_str().into(), e) + })?; log::debug!("trying to install `{uri}` from remote_src: {remote_src}"); - try_install(uri, &project.version, &project.checksum, storage, env)?; + try_install( + uri, + &project.version, + &ProjectChecksum::Project(checksum.to_owned()), + storage, + env, + )?; } - Source::LocalKpar { kpar_path } => { - let uri = main_uri.as_ref().ok_or_else(|| { + Source::LocalKpar { + kpar_path, + kpar_size, + kpar_digest, + } => { + let uri = main_uri.ok_or_else(|| { SyncError::MissingIriLocalKparPath(kpar_path.as_str().into()) })?; let kpar_path_storage = kpar_path_storage.as_ref().ok_or_else(|| { SyncError::MissingLocalKparStorage(kpar_path.as_str().into()) })?; - let storage = kpar_path_storage(kpar_path.as_str().into()); + let storage = kpar_path_storage( + kpar_path.as_str().to_owned(), + *kpar_size, + kpar_digest.to_owned(), + ); log::debug!("trying to install `{uri}` from kpar_path: {kpar_path}"); - try_install(uri, &project.version, &project.checksum, storage, env)?; + try_install( + uri, + &project.version, + &ProjectChecksum::Kpar(kpar_digest.to_owned()), + storage, + env, + )?; } Source::RemoteKpar { remote_kpar, - remote_kpar_size: _, + kpar_size, + kpar_digest, } => { - let uri = main_uri.as_ref().ok_or_else(|| { + let uri = main_uri.ok_or_else(|| { SyncError::MissingIriRemoteKparPath(remote_kpar.as_str().into()) })?; let remote_kpar_storage = remote_kpar_storage.as_ref().ok_or_else(|| { SyncError::MissingRemoteKparStorage(remote_kpar.as_str().into()) })?; - let storage = remote_kpar_storage(remote_kpar.clone()).map_err(|e| { - SyncError::InvalidRemoteSource(remote_kpar.as_str().into(), e) - })?; + let storage = remote_kpar_storage( + remote_kpar.to_owned(), + *kpar_size, + kpar_digest.to_owned(), + ) + .map_err(|e| SyncError::InvalidRemoteSource(remote_kpar.as_str().into(), e))?; log::debug!("trying to install `{uri}` from remote_kpar: {remote_kpar}"); - try_install(uri, &project.version, &project.checksum, storage, env)?; + try_install( + uri, + &project.version, + &ProjectChecksum::Kpar(kpar_digest.to_owned()), + storage, + env, + )?; } Source::IndexKpar { index_kpar, - index_kpar_size, - index_kpar_digest, + kpar_size, + kpar_digest, } => { - let uri = main_uri.as_ref().ok_or_else(|| { + let uri = main_uri.ok_or_else(|| { SyncError::MissingIriIndexKparUrl(index_kpar.as_str().into()) })?; let index_kpar_storage = index_kpar_storage .as_ref() .ok_or_else(|| SyncError::MissingIndexKparStorage(uri.as_str().into()))?; - let storage = index_kpar_storage( - index_kpar.clone(), - *index_kpar_size, - index_kpar_digest.clone(), - ) - .map_err(|e| SyncError::InvalidRemoteSource(index_kpar.as_str().into(), e))?; + let storage = + index_kpar_storage(index_kpar.to_owned(), *kpar_size, kpar_digest.clone()) + .map_err(|e| { + SyncError::InvalidRemoteSource(index_kpar.as_str().into(), e) + })?; log::debug!("trying to install `{uri}` from index_kpar: {index_kpar}"); - try_install(uri, &project.version, &project.checksum, storage, env)?; + try_install( + uri, + &project.version, + &ProjectChecksum::Kpar(kpar_digest.to_owned()), + storage, + env, + )?; } + // TODO: git is for now assumed to be editable; in particular we should probably set + // editable=true in lockfile/env.toml or some other indicator that the project is expected to change and no + // integrity checking will be done + // To avoid having remote URLs for editable projects in env.toml, for now on sync unconditionally + // install the project Source::RemoteGit { remote_git } => { - let uri = main_uri.as_ref().ok_or_else(|| { - SyncError::MissingIriRemoteGitPath(remote_git.as_str().into()) + let uri = main_uri.ok_or_else(|| { + SyncError::MissingIriRemoteGitUrl(remote_git.as_str().into()) })?; let remote_git_storage = remote_git_storage.as_ref().ok_or_else(|| { SyncError::MissingRemoteGitStorage(remote_git.as_str().into()) @@ -267,9 +324,12 @@ where let storage = remote_git_storage(remote_git.clone()) .map_err(|e| SyncError::GitDownload(remote_git.as_str().into(), e))?; log::debug!("trying to install `{uri}` from remote_git: {remote_git}"); - try_install(uri, &project.version, &project.checksum, storage, env)?; + do_env_install_project(uri, &project.version, &storage, None, env, true, true) + .map_err(|e| SyncError::InstallFail { + uri: uri.as_str().into(), + cause: e.to_string(), + })?; } - _ => supported = false, } if supported { no_supported = false; @@ -290,68 +350,43 @@ where Ok(()) } -// TODO: move functionality to check if any of a set of IRIs is installed to env trait -fn is_installed, P: AsRef>( - uri: S, - checksum: P, - env: &E, -) -> Result> { - if !env - .has(&uri) - .map_err(|e| SyncError::ProjectRead(e.to_string()))? - { - return Ok(false); - } - for version in env - .versions(&uri) - .map_err(|e| SyncError::ProjectRead(e.to_string()))? - { - let version: String = version.map_err(|e| SyncError::ProjectRead(e.to_string()))?; - let project_checksum = env - .get_project(&uri, version) - .map_err(|e| SyncError::ProjectRead(e.to_string()))? - .checksum_non_canonical_hex() - .map_err(|e| SyncError::ProjectRead(e.to_string()))? - .ok_or_else(|| SyncError::BadProject(uri.as_ref().to_owned()))?; - if checksum.as_ref() == project_checksum { - return Ok(true); - } - } - Ok(false) -} - fn try_install< E: ReadEnvironment + WriteEnvironment, P: ProjectRead, U: ErrorBound, G: ErrorBound, - S1: AsRef, - S2: AsRef, + S: AsRef, >( - uri: S1, + uri: S, version: &str, - checksum: S2, + expected_checksum: &ProjectChecksum, storage: P, env: &mut E, ) -> Result<(), SyncError> { let uri = uri.as_ref(); - let checksum = checksum.as_ref(); - let project_checksum = storage - .checksum_canonical_hex() - .map_err(|e| SyncError::ProjectRead(e.to_string()))? - .ok_or_else(|| SyncError::BadProject(uri.to_owned()))?; - if checksum == project_checksum { + let actual_checksum = storage + .checksum_canonical_variant() + .map_err(|e| SyncError::ProjectRead(e.to_string()))?; + if expected_checksum == &actual_checksum { // TODO: Need to decide how to handle existing installations and possible flags to modify behavior - do_env_install_project(uri, version, &storage, env, true, true).map_err(|e| { - SyncError::InstallFail { - uri: uri.into(), - cause: e.to_string(), - } + do_env_install_project( + uri, + version, + &storage, + Some(actual_checksum), + env, + true, + true, + ) + .map_err(|e| SyncError::InstallFail { + uri: uri.into(), + cause: e.to_string(), })?; } else { log::debug!("incorrect checksum for `{uri}` in lockfile"); - log::debug!("lockfile checksum = `{checksum}`"); - log::debug!("project checksum = `{project_checksum}`"); + // TODO: reenable once proper Display is implemented + // log::debug!("lockfile checksum = `{expected_checksum}`"); + // log::debug!("project checksum = `{project_checksum}`"); return Err(SyncError::BadChecksum(uri.into())); } Ok(()) diff --git a/core/src/commands/sync_tests.rs b/core/src/commands/sync_tests.rs index 8fd4d091b..9eade728f 100644 --- a/core/src/commands/sync_tests.rs +++ b/core/src/commands/sync_tests.rs @@ -9,13 +9,18 @@ use semver::Version; use crate::{ env::{ - ReadEnvironment, WriteEnvironment, memory::MemoryStorageEnvironment, utils::clone_project, + ProjectChecksumResult, ReadEnvironment, WriteEnvironment, memory::MemoryStorageEnvironment, + utils::clone_project, }, model::{InterchangeProjectInfo, InterchangeProjectMetadata}, - project::{ProjectMut, ProjectRead, memory::InMemoryProject}, - sync::{SyncError, is_installed, try_install}, + project::{ProjectChecksum, ProjectMut, ProjectRead, memory::InMemoryProject}, + sync::{SyncError, try_install}, }; +fn new_env() -> MemoryStorageEnvironment { + MemoryStorageEnvironment::::new() +} + fn storage_example() -> InMemoryProject { let mut storage = InMemoryProject::new(); @@ -52,15 +57,9 @@ fn storage_example() -> InMemoryProject { #[test] fn not_installed_project_not_found() { let uri = "urn:kpar:install_test"; - let checksum = "00"; - let env = MemoryStorageEnvironment::new(); + let env = new_env(); - assert!( - !is_installed::, Infallible, Infallible, _, _>( - uri, checksum, &env - ) - .unwrap() - ); + assert!(!env.has(uri).unwrap()); } #[test] @@ -68,32 +67,33 @@ fn installed_projects_are_found() { let storage = storage_example(); let uri = "urn:kpar:install_test"; - let checksum = storage.checksum_non_canonical_hex().unwrap().unwrap(); - let mut env = MemoryStorageEnvironment::new(); - env.put_project(uri, "1,2,3", |p| { + let version = "1.2.3"; + let checksum = storage.checksum_canonical_variant().unwrap(); + let mut env = new_env(); + env.put_project(uri, version, Some(checksum.clone()), |p| { clone_project(&storage, p, true).map(|_| ()) }) .unwrap(); - assert!( - is_installed::, Infallible, Infallible, _, _>( - uri, &checksum, &env - ) - .unwrap() + assert_eq!( + env.has_version_verified(uri, version, &checksum).unwrap(), + ProjectChecksumResult::Match ); - assert!( - !is_installed::, Infallible, Infallible, _, _>( - uri, "00", &env + assert_eq!( + env.has_version_verified( + uri, + version, + &crate::project::ProjectChecksum::Project(String::from("00")) ) - .unwrap() + .unwrap(), + ProjectChecksumResult::Mismatch ); - assert!( - !is_installed::, Infallible, Infallible, _, _>( - "not_uri", &checksum, &env - ) - .unwrap() + assert_eq!( + env.has_version_verified("not_uri", version, &checksum) + .unwrap(), + ProjectChecksumResult::VersionNotFound ); } @@ -102,17 +102,12 @@ fn try_install_installs_project() { let storage = storage_example(); let uri = "urn:kpar:install_test"; - let checksum = storage.checksum_non_canonical_hex().unwrap().unwrap(); - let mut env = MemoryStorageEnvironment::new(); - - try_install::< - MemoryStorageEnvironment, - InMemoryProject, - Infallible, - Infallible, - _, - _, - >(uri, "1.2.3", &checksum, storage, &mut env) + let checksum = storage.checksum_canonical_variant().unwrap(); + let mut env = new_env(); + + try_install::<_, InMemoryProject, Infallible, Infallible, _>( + uri, "1.2.3", &checksum, storage, &mut env, + ) .unwrap(); let uris = env.uris().unwrap(); @@ -131,17 +126,12 @@ fn try_install_fails_to_install_wrong_checksum() { let storage = storage_example(); let uri = "urn:kpar:install_test"; - let checksum = "00"; - let mut env = MemoryStorageEnvironment::new(); - - let SyncError::BadChecksum(msg) = try_install::< - MemoryStorageEnvironment, - InMemoryProject, - Infallible, - Infallible, - _, - _, - >(&uri, "1.2.3", &checksum, storage, &mut env) + let checksum = crate::project::ProjectChecksum::Project("00".to_owned()); + let mut env = new_env(); + + let SyncError::BadChecksum(msg) = try_install::<_, InMemoryProject, Infallible, Infallible, _>( + &uri, "1.2.3", &checksum, storage, &mut env, + ) .unwrap_err() else { panic!() }; @@ -152,3 +142,45 @@ fn try_install_fails_to_install_wrong_checksum() { assert_eq!(uris.len(), 0); } + +#[test] +fn has_version_verified_different_checksum_kinds() { + let storage = storage_example(); + let uri = "urn:kpar:install_test"; + let version = "1.2.3"; + let project_checksum = storage.checksum_canonical_variant().unwrap(); + // InMemoryProject always produces ProjectChecksum::Project; flip to Kpar to trigger mismatch + let kpar_checksum = match &project_checksum { + ProjectChecksum::Project(c) => ProjectChecksum::Kpar(c.clone()), + ProjectChecksum::Kpar(c) => ProjectChecksum::Project(c.clone()), + }; + let mut env = new_env(); + env.put_project(uri, version, Some(project_checksum), |p| { + clone_project(&storage, p, true).map(|_| ()) + }) + .unwrap(); + + assert_eq!( + env.has_version_verified(uri, version, &kpar_checksum) + .unwrap(), + ProjectChecksumResult::DifferentChecksumKinds + ); +} + +#[test] +fn has_version_verified_version_not_found_for_known_uri() { + let storage = storage_example(); + let uri = "urn:kpar:install_test"; + let version = "1.2.3"; + let checksum = storage.checksum_canonical_variant().unwrap(); + let mut env = new_env(); + env.put_project(uri, version, Some(checksum.clone()), |p| { + clone_project(&storage, p, true).map(|_| ()) + }) + .unwrap(); + + assert_eq!( + env.has_version_verified(uri, "9.9.9", &checksum).unwrap(), + ProjectChecksumResult::VersionNotFound + ); +} diff --git a/core/src/config/local_fs.rs b/core/src/config/local_fs.rs index ecc924979..e2925b1da 100644 --- a/core/src/config/local_fs.rs +++ b/core/src/config/local_fs.rs @@ -9,7 +9,7 @@ use toml_edit::{ArrayOfTables, DocumentMut, Item, Table, Value}; use super::Config; use crate::{ - lock::Source, + config::OverrideSource, project::utils::{FsIoError, wrapfs}, utils::multiline_array, }; @@ -73,7 +73,7 @@ pub enum ConfigProjectSourceError { pub fn add_project_source_to_config, S: AsRef>( config_path: P, iri: S, - source: &Source, + source: &OverrideSource, ) -> Result<(), ConfigProjectSourceError> { let config_path = config_path.as_ref(); let sources = multiline_array(std::iter::once(source.to_toml())); diff --git a/core/src/config/local_fs_tests.rs b/core/src/config/local_fs_tests.rs index 2109c9edf..eb05886ef 100644 --- a/core/src/config/local_fs_tests.rs +++ b/core/src/config/local_fs_tests.rs @@ -6,8 +6,7 @@ use std::{error::Error, io::Write}; use camino_tempfile::tempdir; use crate::{ - config::{Config, ConfigProject, Index, local_fs}, - lock::Source, + config::{Config, ConfigProject, Index, OverrideSource, local_fs}, project::utils::wrapfs, }; @@ -38,7 +37,7 @@ fn add_project_source_to_config() -> Result<(), Box> { let dir = tempdir()?; let config_path = dir.path().join(local_fs::CONFIG_FILE); let iri = "urn:kpar:test"; - let source = Source::LocalSrc { + let source = OverrideSource::LocalSrc { src_path: "local/test".into(), }; @@ -66,7 +65,7 @@ fn remove_project_source_from_config() -> Result<(), Box> { let config_path = dir.path().join(local_fs::CONFIG_FILE); let mut config_file = wrapfs::File::create(&config_path)?; let iri = "urn:kpar:test"; - let source = Source::LocalSrc { + let source = OverrideSource::LocalSrc { src_path: "local/test".into(), }; let config = Config { diff --git a/core/src/config/mod.rs b/core/src/config/mod.rs index 5316fda50..bdc0d37d8 100644 --- a/core/src/config/mod.rs +++ b/core/src/config/mod.rs @@ -2,9 +2,11 @@ // SPDX-FileCopyrightText: © 2025 Sysand contributors use serde::{Deserialize, Serialize}; +use toml_edit::{InlineTable, Value}; +use typed_path::Utf8UnixPathBuf; use url::Url; -use crate::lock::Source; +use crate::project::utils::{deserialize_unix_path, serialize_unix_path}; #[cfg(feature = "filesystem")] pub mod local_fs; @@ -23,7 +25,79 @@ pub struct ConfigProject { #[serde(skip_serializing_if = "Vec::is_empty", default)] pub identifiers: Vec, #[serde(skip_serializing_if = "Vec::is_empty", default)] - pub sources: Vec, + pub sources: Vec, +} + +#[derive(Clone, Eq, Debug, Deserialize, Ord, PartialEq, PartialOrd, Serialize)] +#[serde(untagged)] +pub enum OverrideSource { + // Path must be a Unix path relative to workspace root + Editable { + #[serde( + deserialize_with = "deserialize_unix_path", + serialize_with = "serialize_unix_path" + )] + editable: Utf8UnixPathBuf, + }, + LocalSrc { + #[serde( + deserialize_with = "deserialize_unix_path", + serialize_with = "serialize_unix_path" + )] + src_path: Utf8UnixPathBuf, + }, + LocalKpar { + #[serde( + deserialize_with = "deserialize_unix_path", + serialize_with = "serialize_unix_path" + )] + kpar_path: Utf8UnixPathBuf, + }, + RemoteKpar { + remote_kpar: String, + }, + // TODO: it doesn't make sense to have this in url shape; it should be a + // publisher/name/IRI + // IndexKpar { + // index_kpar: String, + // }, + RemoteSrc { + remote_src: String, + }, + RemoteGit { + remote_git: String, + }, +} + +impl OverrideSource { + pub fn to_toml(&self) -> InlineTable { + let mut table = InlineTable::new(); + match self { + Self::Editable { editable } => { + debug_assert!( + editable.is_relative(), + "editable project path is absolute: `{editable}`" + ); + table.insert("editable", Value::from(editable.as_str())); + } + Self::LocalKpar { kpar_path } => { + table.insert("kpar_path", Value::from(kpar_path.as_str())); + } + Self::LocalSrc { src_path } => { + table.insert("src_path", Value::from(src_path.as_str())); + } + Self::RemoteGit { remote_git } => { + table.insert("remote_git", Value::from(remote_git)); + } + Self::RemoteKpar { remote_kpar } => { + table.insert("remote_kpar", Value::from(remote_kpar)); + } + Self::RemoteSrc { remote_src } => { + table.insert("remote_src", Value::from(remote_src)); + } + } + table + } } impl Config { diff --git a/core/src/config/mod_tests.rs b/core/src/config/mod_tests.rs index 20cc9bf1e..5218cd1c0 100644 --- a/core/src/config/mod_tests.rs +++ b/core/src/config/mod_tests.rs @@ -3,10 +3,7 @@ use url::Url; -use crate::{ - config::{Config, ConfigProject, Index}, - lock::Source, -}; +use crate::config::{Config, ConfigProject, Index, OverrideSource}; #[test] fn default_config() { @@ -36,7 +33,7 @@ fn merge() { }], projects: vec![ConfigProject { identifiers: vec!["urn:kpar:test".to_string()], - sources: vec![Source::LocalSrc { + sources: vec![OverrideSource::LocalSrc { src_path: "./path/to project".into(), }], }], diff --git a/core/src/discover.rs b/core/src/discover.rs index d50ec3b53..b37ad1e37 100644 --- a/core/src/discover.rs +++ b/core/src/discover.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: © 2025 Sysand contributors use camino::{Utf8Path, Utf8PathBuf}; +use typed_path::Utf8UnixPathBuf; use crate::{ project::{ @@ -23,8 +24,9 @@ pub fn discover_project>( working_directory.as_ref() ); let project = discover(working_directory, is_project_file)?.map(|path| LocalSrcProject { - nominal_path: Some(Utf8PathBuf::from(".")), + nominal_path: Some(Utf8UnixPathBuf::from(".")), project_path: path, + expected_checksum: None, }); Ok(project) } diff --git a/core/src/env/index.rs b/core/src/env/index.rs index 17aea9d0e..fba13a4e0 100644 --- a/core/src/env/index.rs +++ b/core/src/env/index.rs @@ -44,6 +44,8 @@ use crate::{ resolve::net_utils::json_get_request, }; +use super::ProjectChecksumResult; + /// Async HTTP client for the sysand index protocol. /// /// `index_root` is resolved lazily via `sysand-index-config.json` on @@ -180,7 +182,6 @@ impl TryFrom<&str> for Sha256HexDigest { pub(crate) struct AdvertisedVersion { pub(crate) version: semver::Version, pub(crate) usage: Vec, - pub(crate) project_digest: Sha256HexDigest, pub(crate) kpar_size: NonZeroU64, pub(crate) kpar_digest: Sha256HexDigest, pub(crate) status: VersionStatus, @@ -427,51 +428,43 @@ fn validate_versions( url: &url::Url, vs: VersionsJson, ) -> Result, IndexEnvironmentError> { - let validated: Rc<[AdvertisedVersion]> = - vs.versions - .into_iter() - .map(|entry| { - let version = Version::parse(&entry.version).map_err(|source| { - IndexEnvironmentError::InvalidSemverVersion { - url: url.as_str().into(), - value: entry.version.clone(), - source, - } - })?; - // `semver::Version::parse` is lenient on the `+build…` - // suffix; the wire-format rejection lives here. - if !version.build.is_empty() { - return Err(IndexEnvironmentError::VersionHasBuildMetadata { - url: url.as_str().into(), - value: entry.version.clone(), - }); + let validated: Rc<[AdvertisedVersion]> = vs + .versions + .into_iter() + .map(|entry| { + let version = Version::parse(&entry.version).map_err(|source| { + IndexEnvironmentError::InvalidSemverVersion { + url: url.as_str().into(), + value: entry.version.clone(), + source, } - let project_digest = Sha256HexDigest::try_from(entry.project_digest.as_str()) - .map_err(|_| IndexEnvironmentError::InvalidVersionEntry { + })?; + // `semver::Version::parse` is lenient on the `+build…` + // suffix; the wire-format rejection lives here. + if !version.build.is_empty() { + return Err(IndexEnvironmentError::VersionHasBuildMetadata { + url: url.as_str().into(), + value: entry.version.clone(), + }); + } + let kpar_digest = + Sha256HexDigest::try_from(entry.kpar_digest.as_str()).map_err(|_| { + IndexEnvironmentError::InvalidVersionEntry { url: url.as_str().into(), version: entry.version.clone(), - field: "project_digest", - value: entry.project_digest.clone(), - })?; - let kpar_digest = - Sha256HexDigest::try_from(entry.kpar_digest.as_str()).map_err(|_| { - IndexEnvironmentError::InvalidVersionEntry { - url: url.as_str().into(), - version: entry.version.clone(), - field: "kpar_digest", - value: entry.kpar_digest.clone(), - } - })?; - Ok::<_, IndexEnvironmentError>(AdvertisedVersion { - version, - usage: entry.usage, - project_digest, - kpar_size: entry.kpar_size, - kpar_digest, - status: entry.status, - }) + field: "kpar_digest", + value: entry.kpar_digest.clone(), + } + })?; + Ok::<_, IndexEnvironmentError>(AdvertisedVersion { + version, + usage: entry.usage, + kpar_size: entry.kpar_size, + kpar_digest, + status: entry.status, }) - .collect::>()?; + }) + .collect::>()?; // "Pick the best duplicate" has no principled answer — two entries // with the same semver may carry different digests. Letting them // reach `resolve_candidates` would leak non-determinism into @@ -501,6 +494,8 @@ fn validate_versions( type ResultStream = futures::stream::Iter>>; +/// TODO: move index away from ReadEnvironment traits; most new methods will not be used +/// for index impl ReadEnvironmentAsync for IndexEnvironmentAsync { type ReadError = IndexEnvironmentError; @@ -635,6 +630,16 @@ impl ReadEnvironmentAsync for IndexEnvironmentAsync< Ok(project) } + + /// Not useful for an index + async fn has_version_verified_async, V: AsRef>( + &self, + _uri: S, + _version: V, + _checksum: &crate::project::ProjectChecksum, + ) -> Result { + panic!() + } } #[cfg(test)] diff --git a/core/src/env/index_tests.rs b/core/src/env/index_tests.rs index 3e8268759..4f0b25b86 100644 --- a/core/src/env/index_tests.rs +++ b/core/src/env/index_tests.rs @@ -42,14 +42,13 @@ use crate::{ context::ProjectContext, env::{ReadEnvironment, ReadEnvironmentAsync, discovery::ResolvedEndpoints}, lock::Source, - model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw, project_hash_hex}, project::{ - InlineProjectDigest, ProjectRead, canonical_project_digest_inline, - index_entry::IndexEntryProjectError, reqwest_kpar_download::ReqwestKparDownloadedError, + ProjectRead, index_entry::IndexEntryProjectError, + reqwest_kpar_download::ReqwestKparDownloadedError, }, purl::PKG_SYSAND_PREFIX, resolve::net_utils::create_reqwest_client, - utils::{lowercase_hex, sha256_lowercase_hex}, + utils::sha256_lowercase_hex, }; // Re-exports so that `super::X` paths inside sub-modules (which refer to this @@ -78,21 +77,19 @@ fn versions_json_body(entries: [(&str, &str); N]) -> String { .iter() .map(|(version, usage)| { format!( - r#"{{"version":"{version}","usage":{usage},"project_digest":"{FILLER_DIGEST}","kpar_size":42,"kpar_digest":"{FILLER_DIGEST}"}}"# + r#"{{"version":"{version}","usage":{usage},"kpar_size":42,"kpar_digest":"{FILLER_DIGEST}"}}"# ) }) .collect(); format!(r#"{{"versions":[{}]}}"#, parts.join(",")) } -fn versions_json_body_with_project_digest( - entries: [(&str, &str, &str); N], -) -> String { +fn versions_json_body_with_project_digest(entries: [(&str, &str); N]) -> String { let parts: Vec = entries .iter() - .map(|(version, usage, project_digest)| { + .map(|(version, usage )| { format!( - r#"{{"version":"{version}","usage":{usage},"project_digest":"{project_digest}","kpar_size":42,"kpar_digest":"{FILLER_DIGEST}"}}"# + r#"{{"version":"{version}","usage":{usage},"kpar_size":42,"kpar_digest":"{FILLER_DIGEST}"}}"# ) }) .collect(); @@ -117,33 +114,6 @@ fn meta_json_body() -> &'static str { r#"{"index":{},"created":"2026-01-01T00:00:00.000000000Z"}"# } -fn project_digest(info_json: &str, meta_json: &str) -> Result> { - let info: InterchangeProjectInfoRaw = serde_json::from_str(info_json)?; - let meta: InterchangeProjectMetadataRaw = serde_json::from_str(meta_json)?; - let mut hash = project_hash_hex(&info, &meta); - hash.insert_str(0, "sha256:"); - Ok(hash) -} - -/// Compute the canonical project digest — matches what the server would -/// advertise in `versions.json`'s `project_digest`. Equivalent to -/// `project_digest` when `meta` has no checksum entries or only lowercase -/// SHA256 entries, but differs when entries require canonicalization -/// (mixed-case SHA256 hex values). -fn canonical_project_digest( - info_json: &str, - meta_json: &str, -) -> Result> { - let info: InterchangeProjectInfoRaw = serde_json::from_str(info_json)?; - let meta: InterchangeProjectMetadataRaw = serde_json::from_str(meta_json)?; - let InlineProjectDigest::Computed(hash) = canonical_project_digest_inline(&info, &meta) else { - panic!("canonical digest should be computable inline for this fixture"); - }; - let mut hash = lowercase_hex(hash); - hash.insert_str(0, "sha256:"); - Ok(hash) -} - fn make_runtime() -> Result, Box> { Ok(Arc::new( tokio::runtime::Builder::new_current_thread() @@ -262,38 +232,38 @@ fn mock_json_get_count( .create() } -/// Build a minimal kpar (ZIP) archive carrying `.project.json`, -/// `.meta.json`, and a single source file at the archive root, returning -/// the archive bytes alongside the exact info/meta JSON strings written -/// into it. Tests that also mock the per-version `.project.json` / -/// `.meta.json` endpoints reuse those strings so the index-served content -/// matches the in-archive content — the only deliberate drift remains in -/// the advertised `project_digest`. -fn build_minimal_kpar( - name: &str, - version: &str, - src_path: &str, - src_body: &str, -) -> (Vec, String, &'static str) { - use std::io::Write as _; - let info_json = format!(r#"{{"name":"{name}","version":"{version}","usage":[]}}"#); - let meta_json: &'static str = r#"{"index":{},"created":"0000-00-00T00:00:00.123456789Z"}"#; - let mut buf: Vec = Vec::new(); - { - let mut zip = zip::ZipWriter::new(std::io::Cursor::new(&mut buf)); - let options = zip::write::SimpleFileOptions::default() - .compression_method(zip::CompressionMethod::Stored) - .unix_permissions(0o755); - zip.start_file(".project.json", options).unwrap(); - zip.write_all(info_json.as_bytes()).unwrap(); - zip.start_file(".meta.json", options).unwrap(); - zip.write_all(meta_json.as_bytes()).unwrap(); - zip.start_file(src_path, options).unwrap(); - zip.write_all(src_body.as_bytes()).unwrap(); - zip.finish().unwrap(); - } - (buf, info_json, meta_json) -} +// /// Build a minimal kpar (ZIP) archive carrying `.project.json`, +// /// `.meta.json`, and a single source file at the archive root, returning +// /// the archive bytes alongside the exact info/meta JSON strings written +// /// into it. Tests that also mock the per-version `.project.json` / +// /// `.meta.json` endpoints reuse those strings so the index-served content +// /// matches the in-archive content — the only deliberate drift remains in +// /// the advertised `project_digest`. +// fn build_minimal_kpar( +// name: &str, +// version: &str, +// src_path: &str, +// src_body: &str, +// ) -> (Vec, String, &'static str) { +// use std::io::Write as _; +// let info_json = format!(r#"{{"name":"{name}","version":"{version}","usage":[]}}"#); +// let meta_json: &'static str = r#"{"index":{},"created":"0000-00-00T00:00:00.123456789Z"}"#; +// let mut buf: Vec = Vec::new(); +// { +// let mut zip = zip::ZipWriter::new(std::io::Cursor::new(&mut buf)); +// let options = zip::write::SimpleFileOptions::default() +// .compression_method(zip::CompressionMethod::Stored) +// .unix_permissions(0o755); +// zip.start_file(".project.json", options).unwrap(); +// zip.write_all(info_json.as_bytes()).unwrap(); +// zip.start_file(".meta.json", options).unwrap(); +// zip.write_all(meta_json.as_bytes()).unwrap(); +// zip.start_file(src_path, options).unwrap(); +// zip.write_all(src_body.as_bytes()).unwrap(); +// zip.finish().unwrap(); +// } +// (buf, info_json, meta_json) +// } mod uris { use crate::{index::iri::ParseIriError, utils::format_sources}; @@ -1254,11 +1224,10 @@ mod get_project { let info_json = project_json_body("proj0", Some("admin"), "0.3.0", "[]"); let meta_json = meta_json_body(); - let project_digest = project_digest(&info_json, meta_json)?; let body = format!( r#"{{"versions":[ - {{"version":"0.3.0","usage":[],"project_digest":"{project_digest}","kpar_size":42,"kpar_digest":"{FILLER_DIGEST}","status":"yanked"}} + {{"version":"0.3.0","usage":[],"kpar_size":42,"kpar_digest":"{FILLER_DIGEST}","status":"yanked"}} ]}}"# ); @@ -1288,12 +1257,11 @@ mod get_project { let info_json = project_json_body("proj0", Some("admin"), "0.3.0", "[]"); let meta_json = meta_json_body(); - let project_digest = project_digest(&info_json, meta_json)?; let versions_mock = mock_json_get( &mut server, "/admin/proj0/versions.json", - versions_json_body_with_project_digest([("0.3.0", "[]", &project_digest)]), + versions_json_body_with_project_digest([("0.3.0", "[]")]), ); let project_json_mock = @@ -1305,7 +1273,7 @@ mod get_project { let inner = &project.inner; assert_eq!( - inner.archive.url.as_str(), + inner.archive.url().as_str(), format!("{}/admin/proj0/0.3.0/project.kpar", server.url()) ); @@ -1334,12 +1302,11 @@ mod get_project { let info_json = project_json_body("b", None, "1.0.0", "[]"); let meta_json = meta_json_body(); - let project_digest = project_digest(&info_json, meta_json)?; let versions_mock = mock_json_get( &mut server, "/_iri/621a5fdf587a3ecc878a98c8be2240dd5bbe561860d11f4da1ece4a4fe2fb8b5/versions.json", - versions_json_body_with_project_digest([("1.0.0", "[]", &project_digest)]), + versions_json_body_with_project_digest([("1.0.0", "[]")]), ); let project_json_mock = mock_json_get( @@ -1358,7 +1325,7 @@ mod get_project { let inner = &project.inner; assert_eq!( - inner.archive.url.as_str(), + inner.archive.url().as_str(), format!( "{}/_iri/621a5fdf587a3ecc878a98c8be2240dd5bbe561860d11f4da1ece4a4fe2fb8b5/1.0.0/project.kpar", server.url() @@ -1391,7 +1358,6 @@ mod get_project { ); let info_json = project_json_body("proj0", Some("admin"), "0.3.0", &usage_json); let meta_json = meta_json_body(); - let project_digest = project_digest(&info_json, meta_json)?; // `usage` shown to the caller comes from `.project.json`; carry // the same payload in `versions.json` to check there's no @@ -1399,7 +1365,7 @@ mod get_project { let versions_mock = mock_json_get( &mut server, "/admin/proj0/versions.json", - versions_json_body_with_project_digest([("0.3.0", &usage_json, &project_digest)]), + versions_json_body_with_project_digest([("0.3.0", &usage_json)]), ); let project_json_mock = @@ -1448,16 +1414,11 @@ mod get_project { // verification passes; advertised and fetched `usage` differ. let fetched_info_json = project_json_body("proj0", Some("admin"), "0.3.0", &fetched_usage); let meta_json = meta_json_body(); - let advertised_digest = project_digest(&fetched_info_json, meta_json)?; let versions_mock = mock_json_get( &mut server, "/admin/proj0/versions.json", - versions_json_body_with_project_digest([( - "0.3.0", - &advertised_usage, - &advertised_digest, - )]), + versions_json_body_with_project_digest([("0.3.0", &advertised_usage)]), ); let project_json_mock = mock_json_get( @@ -1500,12 +1461,11 @@ mod get_project { &usage_json, ); let meta_json = r#"{"index":{},"created":"2026-04-17T00:00:00.000000000Z","metamodel":"https://www.omg.org/spec/KerML/20250201"}"#; - let project_digest = project_digest(&info_json, meta_json)?; let versions_mock = mock_json_get( &mut server, "/admin/proj0/versions.json", - versions_json_body_with_project_digest([("0.3.0", &usage_json, &project_digest)]), + versions_json_body_with_project_digest([("0.3.0", &usage_json)]), ); let project_json_mock = @@ -1772,79 +1732,13 @@ mod iri { /// that `get_project_async` will later reject. mod digest { - use super::*; - - #[test] - fn advertised_project_digest_mismatch_rejected_before_expose() - -> Result<(), Box> { - // Syntactically-valid-but-wrong advertised digest: must refuse - // to expose info AND meta, even though the JSON pair itself - // parses cleanly. - let mut server = mockito::Server::new(); - - let env = index_env_sync(&server)?; - - let info_json = project_json_body("proj0", Some("admin"), "0.3.0", "[]"); - let meta_json = meta_json_body(); - let bogus_advertised_digest = format!("sha256:{}", "c".repeat(64)); - - let versions_mock = mock_json_get( - &mut server, - "/admin/proj0/versions.json", - versions_json_body_with_project_digest([("0.3.0", "[]", &bogus_advertised_digest)]), - ); - - let project_json_mock = mock_json_get_count( - &mut server, - "/admin/proj0/0.3.0/.project.json", - info_json, - 2, - ); - - let meta_json_mock = - mock_json_get_count(&mut server, "/admin/proj0/0.3.0/.meta.json", meta_json, 2); - - // Verification runs from JSON only. - let kpar_mock = expect_untouched(&mut server, "GET", "/admin/proj0/0.3.0/project.kpar"); - - let project = env.get_project(purl("admin/proj0"), "0.3.0")?; - let err = project - .get_info() - .expect_err("digest drift must reject before exposing info"); - match err { - IndexEntryProjectError::AdvertisedDigestDrift { expected, .. } => { - assert_eq!(expected, "c".repeat(64)); - } - other => panic!("expected AdvertisedDigestDrift, got {other:?}"), - } - - // get_meta() must also refuse — both documents must be unavailable. - let err_meta = project - .get_meta() - .expect_err("digest drift must reject before exposing meta too"); - assert!( - matches!( - err_meta, - IndexEntryProjectError::AdvertisedDigestDrift { .. } - ), - "get_meta must also surface drift" - ); + use crate::project::ProjectChecksum; - versions_mock.assert(); - project_json_mock.assert(); - meta_json_mock.assert(); - kpar_mock.assert(); - - Ok(()) - } + use super::*; #[test] - fn checksum_uses_inline_project_digest_and_skips_kpar_download() + fn checksum_variant_uses_advertised_kpar_digest_and_skips_kpar_download() -> Result<(), Box> { - // The pre-download shortcut: `checksum_canonical_hex` returns - // the advertised digest without touching any leaf endpoint. - // A regression that re-introduced a materialization step here - // would silently start downloading archives during resolution. let mut server = mockito::Server::new(); let env = index_env_sync(&server)?; @@ -1856,7 +1750,7 @@ mod digest { &mut server, "/admin/proj0/versions.json", format!( - r#"{{"versions":[{{"version":"0.3.0","usage":[],"project_digest":"{advertised_digest}","kpar_size":42,"kpar_digest":"{FILLER_DIGEST}"}}]}}"#, + r#"{{"versions":[{{"version":"0.3.0","usage":[],"kpar_size":42,"kpar_digest":"{advertised_digest}"}}]}}"#, ), ); @@ -1868,10 +1762,8 @@ mod digest { let kpar_mock = expect_untouched(&mut server, "GET", "/admin/proj0/0.3.0/project.kpar"); let project = env.get_project(purl("admin/proj0"), "0.3.0")?; - let digest = project - .checksum_canonical_hex()? - .expect("prefetched digest should propagate"); - assert_eq!(digest, expected_hex); + let digest = project.checksum_canonical_variant()?; + assert_eq!(digest, ProjectChecksum::Kpar(expected_hex)); versions_mock.assert(); project_json_mock.assert(); @@ -1882,7 +1774,7 @@ mod digest { } #[test] - fn malformed_project_digest_errors() -> Result<(), Box> { + fn malformed_kpar_digest_errors() -> Result<(), Box> { // Non-`sha256:` advertised value: surface as a protocol // error rather than silently recomputing (which would break // lock/sync cross-checks downstream). @@ -1893,9 +1785,7 @@ mod digest { let versions_mock = mock_json_get( &mut server, "/admin/proj0/versions.json", - format!( - r#"{{"versions":[{{"version":"0.3.0","usage":[],"project_digest":"md5:abc","kpar_size":42,"kpar_digest":"{FILLER_DIGEST}"}}]}}"#, - ), + r#"{"versions":[{"version":"0.3.0","usage":[],"kpar_digest":"md5:abc","kpar_size":42}]}"#, ); let kpar_mock = expect_untouched(&mut server, "GET", "/admin/proj0/0.3.0/project.kpar"); @@ -1911,7 +1801,7 @@ mod digest { .. } => { assert_eq!(version, "0.3.0"); - assert_eq!(field, "project_digest"); + assert_eq!(field, "kpar_digest"); assert_eq!(value, "md5:abc"); } other => panic!("expected InvalidVersionEntry, got {other:?}"), @@ -1923,93 +1813,6 @@ mod digest { Ok(()) } - #[test] - fn project_digest_drift_after_download_errors() -> Result<(), Box> { - // Post-download authoritative check: correct `kpar_digest` but - // deliberately-wrong `project_digest` lets the download succeed - // and forces `checksum_canonical_hex` into the reconciliation - // branch, where the mismatch must surface as - // `AdvertisedDigestDrift` rather than silently corrupt the - // lockfile. - - let mut server = mockito::Server::new(); - - let env = index_env_sync(&server)?; - - // `.project.json` / `.meta.json` are only consumed from inside the - // kpar for this test; the destructured JSON strings are unused. - let (kpar_bytes, _info_json, _meta_json) = - build_minimal_kpar("proj0", "0.3.0", "foo.sysml", "// hi"); - let kpar_digest_hex = sha256_lowercase_hex(&kpar_bytes); - let advertised_kpar = format!("sha256:{kpar_digest_hex}"); - - // `bbb…b` is not the canonical project digest of the archive above, - // which is what forces the drift branch post-download. - let wrong_project_digest_hex = "b".repeat(64); - let advertised_project = format!("sha256:{wrong_project_digest_hex}"); - - let versions_mock = mock_json_get( - &mut server, - "/admin/proj0/versions.json", - format!( - r#"{{"versions":[{{"version":"0.3.0","usage":[],"project_digest":"{advertised_project}","kpar_size":{kpar_size},"kpar_digest":"{advertised_kpar}"}}]}}"#, - kpar_size = kpar_bytes.len(), - ), - ); - - // Reconciliation runs against the in-archive copies; neither - // `read_source` nor the post-download branch of - // `checksum_canonical_hex` touches the per-version JSON - // endpoints. `expect(0)` catches a regression that would fall - // back to those during drift checks. - let project_json_mock = - expect_untouched(&mut server, "GET", "/admin/proj0/0.3.0/.project.json"); - - let meta_json_mock = expect_untouched(&mut server, "GET", "/admin/proj0/0.3.0/.meta.json"); - - let kpar_mock = server - .mock("GET", "/admin/proj0/0.3.0/project.kpar") - .with_status(200) - .with_header("content-type", "application/zip") - .with_body(&kpar_bytes) - .expect(1) - .create(); - - let project = env.get_project(purl("admin/proj0"), "0.3.0")?; - - // Force a download so `checksum_canonical_hex` reaches the - // post-download (authoritative local) branch rather than the - // pre-download shortcut. - let mut reader = project.read_source("foo.sysml").unwrap(); - let mut buf = String::new(); - std::io::Read::read_to_string(&mut reader, &mut buf)?; - drop(reader); - - let err = project - .checksum_canonical_hex() - .expect_err("drift between advertised and locally-computed digest must error"); - - // Surface path: CanonicalizationError::ProjectRead -> - // IndexEntryProjectError::AdvertisedDigestDrift. Traverse whatever - // wrappers the display-side adds by matching on the debug text. - let text = format!("{err:?}"); - assert!( - text.contains("AdvertisedDigestDrift"), - "expected AdvertisedDigestDrift, got: {text}" - ); - assert!( - text.contains(&wrong_project_digest_hex), - "advertised digest should appear in error: {text}" - ); - - versions_mock.assert(); - project_json_mock.assert(); - meta_json_mock.assert(); - kpar_mock.assert(); - - Ok(()) - } - #[test] fn versions_async_rejects_document_with_malformed_digest() -> Result<(), Box> { @@ -2069,19 +1872,11 @@ mod digest { // 64-char hex with uppercase digits — a legal SHA256 value that // canonicalization lowercases before hashing. let meta_json = r#"{"index":{"Sym":"foo.sysml"},"created":"2026-01-01T00:00:00.000000000Z","checksum":{"foo.sysml":{"value":"ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789","algorithm":"SHA256"}}}"#; - let advertised_digest = canonical_project_digest(&info_json, meta_json)?; - // Sanity: canonical and raw-hash digests must differ, else the - // test would pass even with the old buggy code. - assert_ne!( - advertised_digest, - project_digest(&info_json, meta_json)?, - "fixture must exercise canonicalization — raw and canonical digests should differ" - ); let versions_mock = mock_json_get( &mut server, "/admin/proj0/versions.json", - versions_json_body_with_project_digest([("0.3.0", "[]", &advertised_digest)]), + versions_json_body_with_project_digest([("0.3.0", "[]")]), ); let project_json_mock = @@ -2105,54 +1900,6 @@ mod digest { Ok(()) } - - #[test] - fn get_project_rejects_non_sha256_meta_checksums() -> Result<(), Box> { - // A non-SHA256 `meta.checksum` entry makes the canonical digest - // require source reads. The index protocol requires verification - // from (info, meta) alone, so the client refuses to expose either - // document rather than silently skipping verification. - let mut server = mockito::Server::new(); - - let env = index_env_sync(&server)?; - - let info_json = project_json_body("proj0", Some("admin"), "0.3.0", "[]"); - // SHA1 — canonicalizing this entry would force reading source - // bytes from the kpar, which the protocol forbids. - let meta_json = r#"{"index":{"Sym":"foo.sysml"},"created":"2026-01-01T00:00:00.000000000Z","checksum":{"foo.sysml":{"value":"dabe95d26be5d1c68a80fae65d12ae056e8fc8ab","algorithm":"SHA1"}}}"#; - - let advertised_digest = format!("sha256:{}", "a".repeat(64)); - - let versions_mock = mock_json_get( - &mut server, - "/admin/proj0/versions.json", - versions_json_body_with_project_digest([("0.3.0", "[]", &advertised_digest)]), - ); - - let project_json_mock = - mock_json_get(&mut server, "/admin/proj0/0.3.0/.project.json", info_json); - - let meta_json_mock = mock_json_get(&mut server, "/admin/proj0/0.3.0/.meta.json", meta_json); - - // Error is triggered purely on the JSON pair. - let kpar_mock = expect_untouched(&mut server, "GET", "/admin/proj0/0.3.0/project.kpar"); - - let project = env.get_project(purl("admin/proj0"), "0.3.0")?; - let err = project - .get_info() - .expect_err("non-SHA256 meta.checksum must refuse to expose info/meta"); - match err { - IndexEntryProjectError::ProjectDigestRequiresSourceReads { .. } => {} - other => panic!("expected ProjectDigestRequiresSourceReads, got {other:?}"), - } - - versions_mock.assert(); - project_json_mock.assert(); - meta_json_mock.assert(); - kpar_mock.assert(); - - Ok(()) - } } /// Tests for the two caching layers: `fetched_info_meta`'s per-project @@ -2174,12 +1921,11 @@ mod caching { let info_json = project_json_body("proj0", Some("admin"), "0.3.0", "[]"); let meta_json = meta_json_body(); - let advertised_digest = project_digest(&info_json, meta_json)?; let versions_mock = mock_json_get( &mut server, "/admin/proj0/versions.json", - versions_json_body_with_project_digest([("0.3.0", "[]", &advertised_digest)]), + versions_json_body_with_project_digest([("0.3.0", "[]")]), ); let project_json_mock = server @@ -2433,7 +2179,7 @@ mod sources { &mut server, "/admin/proj0/versions.json", format!( - r#"{{"versions":[{{"version":"0.3.0","usage":[],"project_digest":"{FILLER_DIGEST}","kpar_size":42,"kpar_digest":"{advertised}"}}]}}"#, + r#"{{"versions":[{{"version":"0.3.0","usage":[],"project_digest":"{FILLER_DIGEST}","kpar_size":24,"kpar_digest":"{advertised}"}}]}}"#, ), ); @@ -2500,7 +2246,8 @@ mod sources { assert_eq!(sources.len(), 1); match &sources[0] { Source::IndexKpar { - index_kpar_size, .. + kpar_size: index_kpar_size, + .. } => assert_eq!(index_kpar_size.get(), 42), other => panic!("expected Source::IndexKpar, got {:?}", other), } diff --git a/core/src/env/local_directory/metadata.rs b/core/src/env/local_directory/metadata.rs index 63d3def41..8cfb274bb 100644 --- a/core/src/env/local_directory/metadata.rs +++ b/core/src/env/local_directory/metadata.rs @@ -10,9 +10,10 @@ use toml_edit::{ArrayOfTables, DocumentMut, Item, Table, Value, value}; use typed_path::{Utf8UnixComponent, Utf8UnixPathBuf}; use crate::{ + env::ProjectChecksum, project::{ local_src::{LocalSrcError, LocalSrcProject}, - utils::{FsIoError, ToUnixPathBuf, deserialize_unix_path, wrapfs}, + utils::{FsIoError, deserialize_unix_path, wrapfs}, }, utils::multiline_array, }; @@ -141,6 +142,18 @@ impl EnvMetadata { .find(|p| p.version == version && p.identifiers.iter().any(|iri| iri == identifier)) } + pub(super) fn find_project_version_mut, V: AsRef>( + &mut self, + identifier: S, + version: V, + ) -> Option<&mut EnvProject> { + let identifier = identifier.as_ref(); + let version = version.as_ref(); + self.projects + .iter_mut() + .find(|p| p.version == version && p.identifiers.iter().any(|iri| iri == identifier)) + } + pub(super) fn find_project_version_any_mut, V: AsRef>( &mut self, identifiers: &[S], @@ -166,6 +179,9 @@ impl EnvMetadata { .filter(move |p| p.identifiers.iter().any(|iri| iri == identifier)) } + /// Find all projects that have `identifier`, and return + /// `(project_index, project)` pairs, sorted smallest to largest according + /// to `project_index` pub(super) fn find_project_versions_idxs( &self, identifier: &str, @@ -193,34 +209,30 @@ impl EnvMetadata { project: &LocalSrcProject, editable: bool, workspace: bool, + checksum: Option, ) -> Result<(), AddProjectError> { let info = project .get_info()? .ok_or(AddProjectError::MissingInfo(project.project_path.clone()))?; let project = EnvProject { publisher: info.publisher, - name: Some(info.name), + name: info.name, version: info.version, path: project .nominal_path .as_ref() .expect("BUG: no nominal path for project") - .to_unix_path_buf(), + .to_owned(), identifiers, usages: info.usage.into_iter().map(|u| u.resource).collect(), editable, workspace, + checksum: checksum.map(Into::into), }; self.add_project(project); Ok(()) } - - pub(super) fn project_dir_exists(&self, dir_name: &str) -> bool { - self.projects - .iter() - .any(|p| p.path.file_name().unwrap() == dir_name) - } } /// Metadata describing a project belonging to an environment. @@ -229,7 +241,7 @@ pub struct EnvProject { /// Publisher of the project. Intended for display purposes. pub publisher: Option, /// Name of the project. Intended for display purposes. - pub name: Option, + pub name: String, /// Version of the project. pub version: String, /// Path to the root directory of the project. @@ -249,13 +261,36 @@ pub struct EnvProject { /// track the interdependence of project in the environment. #[serde(default)] pub usages: Vec, - /// Indicator of wether the project is fully installed in + /// Indicator of whether the project is fully installed in /// the environment or located elsewhere. #[serde(default)] pub editable: bool, - /// Indicator of wether the project is part of a workspace. + /// Indicator of whether the project is part of a workspace. #[serde(default)] pub workspace: bool, + #[serde(flatten, skip_serializing_if = "Option::is_none")] + pub checksum: Option, +} + +/// Checksum of the source this project was installed from: +/// - kpar_cksum for local/remote/index KPAR +/// - src_cksum for local/remote src +// Serde by default will allow both variants to coexist in the file, this +// is desirable to have forward compat. +#[derive(Clone, Debug, Deserialize)] +#[serde(untagged)] +pub enum EnvProjectChecksum { + Kpar { kpar_cksum: String }, + Project { src_cksum: String }, +} + +impl From for EnvProjectChecksum { + fn from(value: ProjectChecksum) -> Self { + match value { + ProjectChecksum::Project(c) => Self::Project { src_cksum: c }, + ProjectChecksum::Kpar(c) => Self::Kpar { kpar_cksum: c }, + } + } } impl EnvProject { @@ -264,9 +299,7 @@ impl EnvProject { if let Some(publisher) = &self.publisher { table.insert("publisher", value(publisher)); } - if let Some(name) = &self.name { - table.insert("name", value(name)); - } + table.insert("name", value(&self.name)); table.insert("version", value(&self.version)); table.insert("path", value(self.path.as_str())); if !self.identifiers.is_empty() { @@ -284,6 +317,16 @@ impl EnvProject { if self.workspace { table.insert("workspace", value(true)); } + if let Some(cksum) = &self.checksum { + match cksum { + EnvProjectChecksum::Kpar { kpar_cksum } => { + table.insert("kpar_cksum", value(kpar_cksum)); + } + EnvProjectChecksum::Project { src_cksum } => { + table.insert("src_cksum", value(src_cksum)); + } + } + } table } diff --git a/core/src/env/local_directory/metadata_tests.rs b/core/src/env/local_directory/metadata_tests.rs index c1b556488..fbea5f34a 100644 --- a/core/src/env/local_directory/metadata_tests.rs +++ b/core/src/env/local_directory/metadata_tests.rs @@ -8,6 +8,7 @@ fn minimal_toml(path: &str, editable: bool) -> String { r#"version = "0.1" [[project]] +name = "Example" version = "1.0.0" path = "{path}" editable = {editable} @@ -48,13 +49,13 @@ fn non_editable_absolute_path_is_rejected() { #[test] fn non_editable_normal_relative_path_is_accepted() { let toml = minimal_toml("subdir/project", false); - assert!(EnvMetadata::from_str(&toml).is_ok()); + EnvMetadata::from_str(&toml).unwrap(); } #[test] fn editable_project_with_parent_dir_is_accepted() { let toml = minimal_toml("../workspace-project", true); - assert!(EnvMetadata::from_str(&toml).is_ok()); + EnvMetadata::from_str(&toml).unwrap(); } #[test] diff --git a/core/src/env/local_directory/mod.rs b/core/src/env/local_directory/mod.rs index 8e35ec840..c22734bb3 100644 --- a/core/src/env/local_directory/mod.rs +++ b/core/src/env/local_directory/mod.rs @@ -10,14 +10,15 @@ use std::{ use camino::{Utf8Component, Utf8Path, Utf8PathBuf}; use fluent_uri::Iri; use thiserror::Error; +use typed_path::Utf8UnixPathBuf; use crate::{ env::{ - PutProjectError, ReadEnvironment, WriteEnvironment, + ProjectChecksum, ProjectChecksumResult, PutProjectError, ReadEnvironment, WriteEnvironment, local_directory::{ metadata::{ - AddProjectError, EnvMetadata, EnvMetadataError, EnvProject, load_env_metadata, - parse_env_metadata, + AddProjectError, EnvMetadata, EnvMetadataError, EnvProject, EnvProjectChecksum, + load_env_metadata, parse_env_metadata, }, utils::clean_dir, }, @@ -107,6 +108,10 @@ impl LocalDirectoryEnvironment { /// by `self.put_project()`. /// Call is idempotent. /// Does not update metadata file. + // TODO: what to do if lock does not contain projects that are present in env: + // - workspace: env also has to remove it, the project was deleted/renamed + // - editable: unclear; it could be (re)moved/renamed, but also it could be that + // workspace just no longer depends on it, so it's absent from lock. pub fn merge_lock(&mut self, lock: &Lock, ws: Option<&Workspace>) { for project in &lock.projects { // Projects that are installed in the environment are ignored, so only @@ -146,6 +151,7 @@ impl LocalDirectoryEnvironment { usages, editable: true, workspace: workspace_member, + checksum: None, }); } } @@ -168,15 +174,6 @@ impl LocalDirectoryEnvironment { &self.metadata.projects } - /// Determine absolute path of `project` - fn absolute_project_path(&self, project: &EnvProject) -> Utf8PathBuf { - if project.editable { - self.parent_dir().join(project.path.as_str()) - } else { - self.root_dir.join(project.path.as_str()) - } - } - /// Parent directory of the env, i.e. the directory in which `.sysand` resides. /// It is assumed to be the workspace (if present) or project root, which in turn is /// the root of relative paths of `editable`/`workspace` projects @@ -226,6 +223,7 @@ impl LocalDirectoryEnvironment { LocalSrcProject { nominal_path: Some(relative.into()), project_path: absolute, + expected_checksum: None, } } else { let absolute = self.root_dir.join(relative); @@ -233,15 +231,27 @@ impl LocalDirectoryEnvironment { LocalSrcProject { nominal_path: Some(relative.into()), project_path: absolute, + expected_checksum: None, } } } + /// Determine whether `p` is installed in the environment. This is currently false + /// only for editable projects. + fn is_installed(p: &EnvProject) -> bool { + p.path.starts_with(PROJECT_PATH_PREFIX) + } + /// Determine a path for a new project/version. Path will be relative to `self.root_path()` - fn compute_project_path(&self, iri: Iri<&str>, version: impl AsRef) -> Utf8PathBuf { + fn compute_project_path(&self, iri: Iri<&str>, version: impl AsRef) -> Utf8UnixPathBuf { let mut path_iter = IriVersionFilename::new(iri, version); let mut candidate = path_iter.next_candidate(); - while self.metadata.project_dir_exists(candidate) { + while self + .metadata + .projects + .iter() + .any(|p| Self::is_installed(p) && p.path.file_name().unwrap() == candidate) + { candidate = path_iter.next_candidate(); } let mut path = String::from(path_iter); @@ -264,34 +274,8 @@ impl LocalDirectoryEnvironment { } } - // /// Find a project `uri` version `version` and determine its absolute path - // pub fn absolute_project_path_find, T: AsRef>( - // &self, - // uri: S, - // version: T, - // ) -> Option { - // self.metadata - // .find_project_version(uri, version) - // .map(|p| self.project_to_absolute_path(p)) - // } - - // /// Project path relative to the env directory - // pub fn relative_project_path_find, T: AsRef>( - // &self, - // uri: S, - // version: T, - // ) -> Option { - // self.metadata.find_project_version(uri, version).map(|p| { - // if p.editable { - // // TODO: this assumes that parent is workspace root - // Utf8Path::new("../").join(p.path.as_str()) - // } else { - // p.path.as_str().into() - // } - // }) - // } - - // fn project_to_absolute_path(&self, project: &EnvProject) -> Utf8PathBuf { + // /// Determine absolute path of `project` + // fn absolute_project_path(&self, project: &EnvProject) -> Utf8PathBuf { // if project.editable { // self.parent_dir().join(project.path.as_str()) // } else { @@ -356,6 +340,33 @@ impl ReadEnvironment for LocalDirectoryEnvironment { Err(LocalReadError::ProjectNotFound(uri.as_ref().into())) } } + + fn has_version_verified, V: AsRef>( + &self, + uri: S, + version: V, + checksum: &ProjectChecksum, + ) -> Result { + if let Some(p) = self.metadata.find_project_version(uri, version) { + let (expected, actual) = match (checksum, &p.checksum) { + (_, None) => return Ok(ProjectChecksumResult::ChecksumNotPresent), + (ProjectChecksum::Project(c), Some(EnvProjectChecksum::Project { src_cksum })) => { + (c, src_cksum) + } + (ProjectChecksum::Kpar(c), Some(EnvProjectChecksum::Kpar { kpar_cksum })) => { + (c, kpar_cksum) + } + _ => return Ok(ProjectChecksumResult::DifferentChecksumKinds), + }; + if expected == actual { + Ok(ProjectChecksumResult::Match) + } else { + Ok(ProjectChecksumResult::Mismatch) + } + } else { + Ok(ProjectChecksumResult::VersionNotFound) + } + } } #[derive(Error, Debug)] @@ -384,6 +395,8 @@ pub enum LocalWriteError { ImpossibleRelativePath(#[from] RelativizePathError), #[error("project is missing metadata file `.meta.json`")] MissingMeta, + #[error("project is missing `.project.json` and/or `.meta.json` files")] + MissingInfoMeta, } impl From for LocalWriteError { @@ -411,6 +424,7 @@ impl From for LocalWriteError { LocalSrcError::Serialize(error) => Self::Serialize(error), LocalSrcError::ImpossibleRelativePath(err) => Self::ImpossibleRelativePath(err), LocalSrcError::MissingMeta => LocalWriteError::MissingMeta, + LocalSrcError::MissingInfoMeta => LocalWriteError::MissingInfoMeta, } } } @@ -428,6 +442,7 @@ impl WriteEnvironment for LocalDirectoryEnvironment { &mut self, uri: S, version: T, + checksum: Option, write_project: F, ) -> Result> where @@ -441,30 +456,39 @@ impl WriteEnvironment for LocalDirectoryEnvironment { let mut tentative_project = LocalSrcProject { nominal_path: None, project_path: project_temp.path().to_path_buf(), + expected_checksum: None, }; - if let Some(existing) = self.metadata.find_project_version(identifier, version) { + if let Some(existing) = self.metadata.find_project_version_mut(identifier, version) { // Create a temp clone and change it to avoid modifying env in case of errors - // TODO: check that publisher/name match? - // Will assume that usages/publisher/name/etc remain unchanged // TODO: how to handle editable projects here? assert!(!existing.editable); assert!(!existing.workspace); write_project(&mut tentative_project).map_err(PutProjectError::Callback)?; + // Project is not editable, so this is always correct + let absolute_path = self.root_dir.join(existing.path.as_str()); + try_move_files(&[(project_temp.path(), &absolute_path)]) + .map_err(LocalWriteError::from)?; + tentative_project.project_path = absolute_path; - let path = self.absolute_project_path(existing); - try_move_files(&[(project_temp.path(), &path)]).map_err(LocalWriteError::from)?; + let info = match tentative_project.get_info() { + Ok(Some(info)) => info, + Ok(None) => return Err(PutProjectError::Write(LocalWriteError::MissingInfoMeta)), + Err(e) => return Err(PutProjectError::Write(LocalWriteError::from(e))), + }; - // Metadata didn't change, nothing to write + existing.publisher = info.publisher; + existing.name = info.name; + existing.version = info.version; + existing.usages = info.usage.into_iter().map(|u| u.resource).collect(); + existing.checksum = checksum.map(Into::into); - tentative_project.project_path = self.root_path().join(&path); - tentative_project.nominal_path = Some(path); + self.write().map_err(LocalWriteError::from)?; Ok(tentative_project) } else { - // TODO: be optimistic: move existing target out of the way, try writing to - // the target directly and on failure revert. + // TODO: try writing to the target directly (we manage it exclusively) and on failure revert. write_project(&mut tentative_project).map_err(PutProjectError::Callback)?; // Project write was successful @@ -473,7 +497,7 @@ impl WriteEnvironment for LocalDirectoryEnvironment { let iri = Iri::parse(identifier) .map_err(|e| PutProjectError::IriParse(identifier.to_owned(), e))?; let path = self.compute_project_path(iri, version); - let absolute_project_path = self.root_path().join(&path); + let absolute_project_path = self.root_path().join(path.as_str()); // Tolerate missing `lib/` self.ensure_lib_dir_exists() @@ -493,6 +517,7 @@ impl WriteEnvironment for LocalDirectoryEnvironment { &tentative_project, false, false, + checksum, ) .map_err(LocalWriteError::from)?; @@ -544,7 +569,6 @@ impl WriteEnvironment for LocalDirectoryEnvironment { } // `swap_remove()` does not affect elements before the one being removed, // so indices have to be removed from largest to smallest - indices_to_remove.sort_unstable(); for idx in indices_to_remove.iter().copied().rev() { self.metadata.projects.swap_remove(idx); } diff --git a/core/src/env/memory.rs b/core/src/env/memory.rs index 14eacd580..33ff1ea11 100644 --- a/core/src/env/memory.rs +++ b/core/src/env/memory.rs @@ -2,8 +2,8 @@ // SPDX-FileCopyrightText: © 2025 Sysand contributors use crate::{ - env::{PutProjectError, ReadEnvironment, WriteEnvironment}, - project::{ProjectMut, ProjectRead}, + env::{PutProjectError, ReadEnvironment, WriteEnvironment, utils::ErrorBound}, + project::{ProjectChecksum, ProjectMut, ProjectRead}, }; use std::{ collections::{HashMap, hash_map::Entry}, @@ -12,6 +12,8 @@ use std::{ use thiserror::Error; +use super::ProjectChecksumResult; + #[derive(Debug)] pub struct MemoryStorageEnvironment { pub projects: HashMap>, @@ -256,6 +258,7 @@ impl WriteEnvironment for MemoryStorageEn &mut self, uri: S, version: T, + _checksum: Option, write_project: F, ) -> Result> where @@ -298,29 +301,31 @@ impl WriteEnvironment for MemoryStorageEn } #[derive(Error, Debug)] -pub enum MemoryReadError { +pub enum MemoryReadError { #[error("missing project with IRI `{0}`")] MissingProject(String), #[error("missing project with IRI `{0}` version `{1}`")] MissingVersion(String, String), + #[error("failed to get checksum for project `{0}`")] + Checksum(String, #[source] PRE), } impl ReadEnvironment for MemoryStorageEnvironment { - type ReadError = MemoryReadError; + type ReadError = MemoryReadError; - type UriIter = Vec>; + type UriIter = Vec>; fn uris(&self) -> Result { - let uri_vec: Vec> = + let uri_vec: Vec> = self.projects.keys().map(|x| Ok(x.to_owned())).collect(); Ok(uri_vec) } - type VersionIter = Vec>; + type VersionIter = Vec>; fn versions>(&self, uri: S) -> Result { - let version_vec: Vec> = self + let version_vec: Vec> = self .projects .get(uri.as_ref()) .ok_or_else(|| MemoryReadError::MissingProject(uri.as_ref().to_string()))? @@ -351,6 +356,44 @@ impl ReadEnvironment for MemoryStorageEnvi })? .clone()) } + + /// Will never return `Ok(ProjectChecksumResult::ChecksumNotPresent)` + /// because the hashing is cheap (at least for `InMemoryProject`) + fn has_version_verified, V: AsRef>( + &self, + uri: S, + version: V, + checksum: &ProjectChecksum, + ) -> Result { + let version = version.as_ref(); + let versions = match self.projects.get(uri.as_ref()) { + Some(v) => v, + None => return Ok(ProjectChecksumResult::VersionNotFound), + }; + + match versions.iter().find(|p| p.0 == version) { + Some((_, p)) => { + let pc = p + .checksum_canonical_variant() + .map_err(|e| MemoryReadError::Checksum(uri.as_ref().to_owned(), e))?; + match (checksum, &pc) { + (ProjectChecksum::Project(c1), ProjectChecksum::Project(c2)) + | (ProjectChecksum::Kpar(c1), ProjectChecksum::Kpar(c2)) => { + if c1 == c2 { + Ok(ProjectChecksumResult::Match) + } else { + Ok(ProjectChecksumResult::Mismatch) + } + } + (ProjectChecksum::Project(_), ProjectChecksum::Kpar(_)) + | (ProjectChecksum::Kpar(_), ProjectChecksum::Project(_)) => { + Ok(ProjectChecksumResult::DifferentChecksumKinds) + } + } + } + None => Ok(ProjectChecksumResult::VersionNotFound), + } + } } #[cfg(test)] diff --git a/core/src/env/memory_tests.rs b/core/src/env/memory_tests.rs index f8e429bed..b7e6f7835 100644 --- a/core/src/env/memory_tests.rs +++ b/core/src/env/memory_tests.rs @@ -10,19 +10,28 @@ use crate::{ utils::{CloneError, clone_project}, }, init::do_init_memory, - project::memory::{InMemoryError, InMemoryProject}, + project::{ + ProjectRead, + memory::{InMemoryError, InMemoryProject}, + }, }; +fn new_env() -> MemoryStorageEnvironment { + MemoryStorageEnvironment::::new() +} + #[test] fn write_environment() { let uri1 = "urn:kpar:first".to_string(); let uri2 = "urn:kpar:second".to_string(); let version = "0.0.1".to_string(); let project1 = do_init_memory("First", Some("a"), &version, None).unwrap(); + let c1 = project1.checksum_canonical_variant().unwrap(); let project2 = do_init_memory("Second", None::<&str>, &version, None).unwrap(); - let mut env = MemoryStorageEnvironment::::new(); + let c2 = project2.checksum_canonical_variant().unwrap(); + let mut env = new_env(); - env.put_project(&uri1, &version, |p| { + env.put_project(&uri1, &version, Some(c1), |p| { clone_project(&project1, p, true)?; Ok::<(), CloneError>(()) @@ -35,7 +44,7 @@ fn write_environment() { env.projects.get(&uri1).unwrap().get(&version).unwrap() ); - env.put_project(&uri2, &version, |p| { + env.put_project(&uri2, &version, Some(c2), |p| { clone_project(&project2, p, true)?; Ok::<(), CloneError>(()) diff --git a/core/src/env/mod.rs b/core/src/env/mod.rs index 273d07e97..66dba2893 100644 --- a/core/src/env/mod.rs +++ b/core/src/env/mod.rs @@ -8,7 +8,10 @@ use thiserror::Error; use crate::{ env::utils::ErrorBound, - project::{AsAsyncProject, AsSyncProjectTokio, ProjectMut, ProjectRead, ProjectReadAsync}, + project::{ + AsAsyncProject, AsSyncProjectTokio, ProjectChecksum, ProjectMut, ProjectRead, + ProjectReadAsync, + }, }; // Implementations @@ -25,6 +28,21 @@ pub mod utils; pub const DEFAULT_ENV_NAME: &str = ".sysand"; +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum ProjectChecksumResult { + /// Version is not present. This can also mean no versions + /// of the project are present, i.e. project itself was not + /// found + VersionNotFound, + /// Checksum is not present for the version + ChecksumNotPresent, + /// Checksum kinds differ between requested and actual + DifferentChecksumKinds, + /// Checksum values do not match + Mismatch, + Match, +} + pub trait ReadEnvironment { type ReadError: ErrorBound; @@ -63,6 +81,20 @@ pub trait ReadEnvironment { .any(|v: String| v == version.as_ref())) } + // TODO: decide on whtat should be returned here in these cases: + // - project not present: Ok(VersionNotFound) + // - project version not present: Ok(VersionNotFound) - both cases return VersionNotFound + // to give freedom to implementers; also to callers it makes no difference whether + // project or specific version was not found + // - checksum does not match: Ok(Mismatch) + // - checksum matches: Ok(Match) + fn has_version_verified, V: AsRef>( + &self, + uri: S, + version: V, + checksum: &ProjectChecksum, + ) -> Result; + fn candidate_projects>( &self, uri: S, @@ -140,6 +172,13 @@ pub trait ReadEnvironmentAsync { } } + fn has_version_verified_async, V: AsRef>( + &self, + uri: S, + version: V, + checksum: &ProjectChecksum, + ) -> impl Future>; + fn candidate_projects_async>( &self, uri: S, @@ -214,6 +253,15 @@ where inner: self.inner.get_project(uri, version)?, }) } + + async fn has_version_verified_async, V: AsRef>( + &self, + uri: S, + version: V, + checksum: &ProjectChecksum, + ) -> Result { + self.inner.has_version_verified(uri, version, checksum) + } } /// Wrapper intended to wrap an `ReadEnvironmentAsync` as a `ReadEnvironment` @@ -282,6 +330,18 @@ where .block_on(self.inner.get_project_async(uri, version))?, }) } + + fn has_version_verified, V: AsRef>( + &self, + uri: S, + version: V, + checksum: &ProjectChecksum, + ) -> Result { + self.runtime.block_on( + self.inner + .has_version_verified_async(uri, version, checksum), + ) + } } #[derive(Error, Debug)] @@ -302,10 +362,13 @@ pub trait WriteEnvironment { type InterchangeProjectMut: ProjectMut; // TODO: Should this be replaced by a transactional interface? + /// Install a project in the environment. The project files will be copied into + /// the environment, so this is not suitable for editable projects fn put_project, T: AsRef, F, CE>( &mut self, uri: S, version: T, + checksum: Option, // Callback allows the implementation to gracefully recover // in case of an error, to just "allocate" write_project: F, diff --git a/core/src/env/null.rs b/core/src/env/null.rs index d33cfbf28..a8901c883 100644 --- a/core/src/env/null.rs +++ b/core/src/env/null.rs @@ -10,6 +10,8 @@ use thiserror::Error; use crate::{env::ReadEnvironment, project::ProjectRead}; +use super::ProjectChecksumResult; + #[derive(Debug)] pub struct NullEnvironment { phantom: std::marker::PhantomData, @@ -59,4 +61,13 @@ impl ReadEnvironment for NullEnvironment { ) -> Result { Err(EmptyEnvironmentError::NullEnvironmentIsEmpty) } + + fn has_version_verified, V: AsRef>( + &self, + _uri: S, + _version: V, + _checksum: &super::ProjectChecksum, + ) -> Result { + Ok(ProjectChecksumResult::VersionNotFound) + } } diff --git a/core/src/env/utils.rs b/core/src/env/utils.rs index fa53890da..9156a4701 100644 --- a/core/src/env/utils.rs +++ b/core/src/env/utils.rs @@ -4,9 +4,11 @@ use crate::{ model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}, project::{ProjectMut, ProjectRead, utils::FsIoError}, + utils::license_file_stems, }; use thiserror::Error; +use typed_path::Utf8UnixPath; /// Trait to use as a bound for all errors exposed through public /// crate interfaces. This makes it convenient to use anyhow::Error. @@ -59,7 +61,48 @@ pub fn clone_project( to.write_source(source_path, &mut source, overwrite) .map_err(CloneError::EnvWrite)?; } + copy_optional_file(from, to, overwrite, "README.md")?; + + if let Some(l) = info.license.as_deref() { + match spdx::Expression::parse(l) { + Ok(expr) => { + for stem in license_file_stems(&expr) { + let relative = format!("LICENSES/{stem}.txt"); + copy_optional_file(from, to, overwrite, relative)?; + } + } + Err(e) => { + log::debug!( + "project's license `{l}` is not a valid SPDX license expression:\n{e}" + ); + } + } + } + Ok((info, meta)) } } } + +fn copy_optional_file>( + from: &P, + to: &mut Q, + overwrite: bool, + path: S, +) -> Result<(), CloneError> { + match from.read_source(&path) { + Ok(mut f) => { + to.write_source(path, &mut f, overwrite) + .map_err(CloneError::EnvWrite)?; + } + Err(e) => { + log::debug!("failed to read `{}` from a project: {e}", path.as_ref()); + let mut error: &dyn std::error::Error = &e; + while let Some(source) = error.source() { + log::debug!(" caused by: {source}"); + error = source; + } + } + } + Ok(()) +} diff --git a/core/src/index/model.rs b/core/src/index/model.rs index 356b0f3c2..666f32fd5 100644 --- a/core/src/index/model.rs +++ b/core/src/index/model.rs @@ -44,7 +44,7 @@ pub(crate) enum VersionStatus { /// Per-project `versions.json`: enough to enumerate candidates and /// verify archives without downloading first. The publish-time artifact -/// metadata (`project_digest`, `kpar_size`, `kpar_digest`) lets the +/// metadata (`kpar_size`, `kpar_digest`) lets the /// client populate the lockfile lazily; `.project.json` / `.meta.json` /// are only fetched once a specific version is materialized, and the /// client reconciles them against these digests before exposing either. @@ -59,9 +59,6 @@ pub(crate) struct VersionEntry { /// Required so the solver can run on `versions.json` alone, without /// fetching each candidate's `.project.json`. pub(crate) usage: Vec, - /// Canonical project digest (sha256 over canonicalized info+meta), - /// used to populate the lockfile checksum without downloading the kpar. - pub(crate) project_digest: String, /// Byte length of the kpar archive; lets `sources_async` skip a HEAD. pub(crate) kpar_size: NonZeroU64, /// Digest of the kpar archive bytes, verified against the streamed diff --git a/core/src/iri_normalize.rs b/core/src/iri_normalize.rs index e967f15f4..4dcfbf600 100644 --- a/core/src/iri_normalize.rs +++ b/core/src/iri_normalize.rs @@ -41,7 +41,10 @@ use idna::punycode; /// serialization. /// /// Returns the canonicalized serialization as -/// a `String`, or an error if the host fails IDN conversion. +/// a `String`, or an error if the host fails IDN conversion; IDN conversion +/// fails are likely, but not necessarily user error (they are not allowed +/// by IDNA). +#[cfg(feature = "filesystem")] pub(crate) fn canonicalize_iri(iri: Iri<&str>) -> Result { let normalized = iri.normalize(); let with_idn = punycode_host(&normalized)?; @@ -70,6 +73,43 @@ pub(crate) fn canonicalize_iri(iri: Iri<&str>) -> Result) -> String { + let normalized = iri.normalize(); + let with_idn = match punycode_host(&normalized) { + Ok(iri) => iri, + Err(e) => { + log::debug!("IRI `{iri}` failed punycode host conversion: {e}"); + normalized.to_string() + } + }; + + // For `http`/`https` with an empty path, WHATWG URL serialization + // produces a `/` before any query/fragment; `fluent_uri::normalize` + // deliberately leaves the path untouched, so apply the fixup here. + // Scheme and path are read from `normalized` because `punycode_host` + // only edits the host. + let scheme = normalized.scheme(); + let needs_root_slash = + (scheme == SCHEME_HTTP || scheme == SCHEME_HTTPS) && normalized.path().as_str().is_empty(); + let final_string = if needs_root_slash { + match with_idn.find(['?', '#']) { + Some(i) => format!("{}/{}", &with_idn[..i], &with_idn[i..]), + None => format!("{with_idn}/"), + } + } else { + with_idn + }; + + debug_assert!( + Iri::parse(final_string.as_str()).is_ok(), + "canonical IRI must remain valid" + ); + final_string +} + /// Replace a non-ASCII RegName host with its `domainToASCII` (Punycode) form. /// IPv4, IPv6 literals, and already-ASCII RegNames pass through untouched. /// Returns the resulting serialization as an owned `String`; the rewrite is a @@ -101,6 +141,7 @@ fn punycode_host(iri: &Iri) -> Result { #[derive(Debug, thiserror::Error)] pub enum IriNormalizeError { + #[cfg(feature = "filesystem")] #[error("IRI is not a well-formed RFC 3987 IRI: {0}")] Parse(fluent_uri::ParseError), #[error("host `{host}` is not a valid IDN and cannot be converted to Punycode")] @@ -121,7 +162,7 @@ const MAX_IRI_LEN_BYTES: u8 = 120; /// to case- or normalization-insensitivity. /// /// A precondition for `iri` is that it must not have a fragment. Fragment presence -/// would not affect output, but would be ggod to enforce, as we currently don't +/// would not affect output, but would be good to enforce, as we currently don't /// do anything with fragments. /// The final form will be of the shape `host-path-query`. fn iri_to_filename_part(iri: Iri<&str>) -> String { @@ -452,6 +493,6 @@ impl From for String { } } -#[cfg(test)] +#[cfg(all(test, feature = "filesystem"))] #[path = "./iri_normalize_tests.rs"] mod tests; diff --git a/core/src/lib.rs b/core/src/lib.rs index 046775480..d5baf84f1 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -18,7 +18,6 @@ pub mod context; pub mod env; #[cfg(feature = "filesystem")] pub mod index; -#[cfg(feature = "filesystem")] mod iri_normalize; pub mod lock; pub mod project; diff --git a/core/src/lock.rs b/core/src/lock.rs index 502eef5ac..d1272dc7a 100644 --- a/core/src/lock.rs +++ b/core/src/lock.rs @@ -18,16 +18,17 @@ use toml_edit::{ArrayOfTables, DocumentMut, Formatted, InlineTable, Item, Table, use typed_path::Utf8UnixPathBuf; use crate::{ + config::OverrideSource, env::ReadEnvironment, project::{ - ProjectRead, + ProjectChecksum, utils::{deserialize_unix_path, serialize_unix_path}, }, utils::multiline_array, }; pub const LOCKFILE_PREFIX: &str = "# This file is automatically generated by Sysand and is not intended to be edited manually.\n\n"; -pub const CURRENT_LOCK_VERSION: &str = "0.4"; +pub const CURRENT_LOCK_VERSION: &str = "0.5"; pub const SUPPORTED_LOCK_VERSIONS: &[&str] = &[CURRENT_LOCK_VERSION]; pub const LOCKFILE_ENTRIES: &[&str] = &["lock_version", "project"]; @@ -166,13 +167,10 @@ pub enum ValidationError { NameCollision(String), #[error("project identifier `{0}` is\nspecified by more than one project in lockfile")] ProjectIdCollision(String), - #[error("{0} does not have an identifier")] + #[error("project `{0}` does not have an identifier")] ProjectWithoutId(String), - #[error("unsatisfied usage `{usage}` for {project_with_name} in lockfile")] - UnsatisfiedUsage { - usage: String, - project_with_name: String, - }, + #[error("unsatisfied usage `{usage}` for project `{name}` in lockfile")] + UnsatisfiedUsage { usage: String, name: String }, // #[error( // "unsatisfied usage `{usage}` for {project_with_name} (found version `{version}`) in lockfile" // )] @@ -182,12 +180,9 @@ pub enum ValidationError { // project_with_name: String, // }, #[error( - "invalid format of canonical project-digest `{digest}` for {project_with_name} in lockfile" + "invalid format of canonical project-digest `{digest}` for project `{name}` in lockfile" )] - InvalidProjectDigestFormat { - digest: String, - project_with_name: String, - }, + InvalidProjectDigestFormat { digest: String, name: String }, #[error("invalid format of `index_kpar_digest` `{digest}` for {project_with_name} in lockfile")] InvalidIndexKparDigestFormat { digest: String, @@ -201,61 +196,6 @@ pub type ProjectResolution = ( ); impl Lock { - /// Find all projects that are both in lockfile and in `env`. Additionally - /// includes all projects where `editable = true` - pub fn resolve_projects( - &self, - env: &Env, - ) -> Result>, ResolutionError> { - let mut missing = vec![]; - let mut found = vec![]; - - for project in &self.projects { - // Projects without sources (default for standard libraries) and - // projects with editable sources won't be installed in environment. - match project.sources.as_slice() { - [] => { - continue; - } - [Source::Editable { editable: _ }, ..] => { - found.push((project.clone(), None)); - continue; - } - _ => {} - } - - let checksum = &project.checksum; - - let mut resolved_project = None; - - 'outer: for iri in &project.identifiers { - for candidate_project in env - .candidate_projects(iri) - .map_err(ResolutionError::CandidateProjects)? - { - if let Ok(Some(candidate_checksum)) = candidate_project.checksum_canonical_hex() - && candidate_checksum == *checksum - { - resolved_project = Some(candidate_project); - break 'outer; - } - } - } - - if resolved_project.is_some() { - found.push((project.clone(), resolved_project)); - } else { - missing.push(project.clone()); - } - } - - if !missing.is_empty() { - return Err(ResolutionError::MissingProjects(missing)); - } - - Ok(found) - } - pub fn validate(&self) -> Result<(), ValidationError> { self.validate_lock_version()?; self.check_name_collision()?; @@ -297,9 +237,7 @@ impl Lock { let id = match project.identifiers.first() { Some(id) => id, None => { - return Err(ValidationError::ProjectWithoutId(project_with( - project.name.as_ref(), - ))); + return Err(ValidationError::ProjectWithoutId(project.name.clone())); } }; return Err(ValidationError::ProjectIdCollision(id.to_string())); @@ -318,10 +256,10 @@ impl Lock { for project in &self.projects { let _ = Version::parse(&project.version).inspect_err(|err| { log::warn!( - "invalid semantic version `{}` for project {}\n\ + "invalid semantic version `{}` for project `{}`\n\ {:>8} {}", project.version, - project_with(project.name.as_ref()), + project.name, ' ', err ); @@ -335,7 +273,7 @@ impl Lock { if !iri_versions.contains(&usage.resource) { return Err(ValidationError::UnsatisfiedUsage { usage: usage.resource.clone(), - project_with_name: project_with(project.name.clone()), + name: project.name.clone(), }); } } @@ -345,12 +283,23 @@ impl Lock { fn validate_project_digest_format(&self) -> Result<(), ValidationError> { for project in &self.projects { - let digest = &project.checksum; - if digest.len() != 64 || !digest.bytes().all(|c| c.is_ascii_hexdigit()) { - return Err(ValidationError::InvalidProjectDigestFormat { - digest: digest.clone(), - project_with_name: project_with(project.name.clone()), - }); + for source in &project.sources { + match source { + Source::LocalSrc { checksum: c, .. } + | Source::LocalKpar { kpar_digest: c, .. } + | Source::RemoteKpar { kpar_digest: c, .. } + | Source::IndexKpar { kpar_digest: c, .. } + | Source::RemoteSrc { checksum: c, .. } => { + if c.len() != 64 || !c.bytes().all(|c| c.is_ascii_hexdigit()) { + // TODO: improve error message + return Err(ValidationError::InvalidProjectDigestFormat { + digest: c.clone(), + name: project.name.clone(), + }); + } + } + Source::RemoteGit { remote_git: _ } | Source::Editable { editable: _ } => (), + } } } Ok(()) @@ -360,7 +309,8 @@ impl Lock { for project in &self.projects { for source in &project.sources { let Source::IndexKpar { - index_kpar_digest, .. + kpar_digest: index_kpar_digest, + .. } = source else { continue; @@ -378,7 +328,7 @@ impl Lock { .identifiers .first() .cloned() - .unwrap_or_else(|| project_with(project.name.clone())), + .unwrap_or_else(|| project.name.clone()), }); } } @@ -412,13 +362,15 @@ impl Lock { fn canonicalize_checksums(&mut self) { for project in &mut self.projects { - project.checksum.make_ascii_lowercase(); + // project.checksum.make_ascii_lowercase(); for source in &mut project.sources { - if let Source::IndexKpar { - index_kpar_digest, .. - } = source - { - index_kpar_digest.make_ascii_lowercase(); + match source { + Source::LocalSrc { checksum: c, .. } + | Source::LocalKpar { kpar_digest: c, .. } + | Source::RemoteKpar { kpar_digest: c, .. } + | Source::IndexKpar { kpar_digest: c, .. } + | Source::RemoteSrc { checksum: c, .. } => c.make_ascii_lowercase(), + Source::RemoteGit { remote_git: _ } | Source::Editable { editable: _ } => (), } } } @@ -433,16 +385,15 @@ pub const PROJECT_ENTRIES: &[&str] = &[ "identifiers", "usages", "sources", - "checksum", + // "checksum", ]; /// Fields that are not critical for using the lockfile use `Option` #[derive(Clone, Eq, Debug, Deserialize, PartialEq)] pub struct Project { - #[serde(skip_serializing_if = "Option::is_none", default)] - pub name: Option, #[serde(skip_serializing_if = "Option::is_none", default)] pub publisher: Option, + pub name: String, pub version: String, #[serde(skip_serializing_if = "Vec::is_empty", default)] pub exports: Vec, @@ -452,16 +403,19 @@ pub struct Project { pub usages: Vec, #[serde(skip_serializing_if = "Vec::is_empty", default)] pub sources: Vec, - pub checksum: String, + // pub checksum: String, } impl std::hash::Hash for Project { - /// Project hash is its canonical checksum. - /// IRIs could also be used for this, but they may differ in amount - /// (including having no known IRIs) and/or ordering between different - /// project instances + /// This will be unique for each project, but a project can + /// have multiple hashes due to non-canonical metadata + /// The exact calculation may change at any time, so it must + /// not be used in any files fn hash(&self, state: &mut H) { - self.checksum.hash(state); + self.publisher.hash(state); + self.name.hash(state); + self.version.hash(state); + self.exports.hash(state); } } @@ -497,9 +451,7 @@ pub fn hash_str(val: &str) -> StrHash { impl Project { pub fn to_toml(&self) -> Table { let mut table = Table::new(); - if let Some(name) = &self.name { - table.insert("name", value(name)); - } + table.insert("name", value(&self.name)); table.insert("version", value(&self.version)); let exports = multiline_array(self.exports.iter().map(Value::from)); if !exports.is_empty() { @@ -517,7 +469,7 @@ impl Project { if !sources.is_empty() { table.insert("sources", value(sources)); } - table.insert("checksum", value(&self.checksum)); + // table.insert("checksum", value(&self.checksum)); table } @@ -530,17 +482,17 @@ impl Project { } const SOURCE_ENTRIES: &[&str] = &[ + "checksum", "editable", "src_path", "kpar_path", "remote_src", "remote_kpar", - "remote_kpar_size", + "kpar_size", "index_kpar", - "index_kpar_size", - "index_kpar_digest", + "kpar_size", + "kpar_digest", "remote_git", - "remote_api", ]; #[derive(Clone, Eq, Debug, Deserialize, Ord, PartialEq, PartialOrd, Serialize)] @@ -560,6 +512,7 @@ pub enum Source { serialize_with = "serialize_unix_path" )] src_path: Utf8UnixPathBuf, + checksum: String, }, LocalKpar { #[serde( @@ -567,25 +520,26 @@ pub enum Source { serialize_with = "serialize_unix_path" )] kpar_path: Utf8UnixPathBuf, + kpar_size: NonZeroU64, + kpar_digest: String, }, RemoteKpar { remote_kpar: String, - remote_kpar_size: Option, + kpar_size: NonZeroU64, + kpar_digest: String, }, IndexKpar { index_kpar: String, - index_kpar_size: NonZeroU64, - index_kpar_digest: String, + kpar_size: NonZeroU64, + kpar_digest: String, }, RemoteSrc { remote_src: String, + checksum: String, }, RemoteGit { remote_git: String, }, - RemoteApi { - remote_api: String, - }, } impl Source { @@ -599,44 +553,130 @@ impl Source { ); table.insert("editable", Value::from(editable.as_str())); } - Source::LocalKpar { kpar_path } => { + Source::LocalKpar { + kpar_path, + kpar_size, + kpar_digest, + } => { table.insert("kpar_path", Value::from(kpar_path.as_str())); + let size = i64::try_from(kpar_size.get()).unwrap(); + table.insert("kpar_size", Value::Integer(Formatted::new(size))); + table.insert("kpar_digest", Value::from(kpar_digest)); } - Source::LocalSrc { src_path } => { + Source::LocalSrc { src_path, checksum } => { table.insert("src_path", Value::from(src_path.as_str())); - } - Source::RemoteApi { remote_api } => { - table.insert("remote_api", Value::from(remote_api)); + table.insert("checksum", Value::from(checksum)); } Source::RemoteGit { remote_git } => { table.insert("remote_git", Value::from(remote_git)); } Source::RemoteKpar { remote_kpar, - remote_kpar_size, + kpar_size, + kpar_digest, } => { table.insert("remote_kpar", Value::from(remote_kpar)); - if let Some(remote_kpar_size) = remote_kpar_size { - let size = i64::try_from(*remote_kpar_size).unwrap(); - table.insert("remote_kpar_size", Value::Integer(Formatted::new(size))); - } + let size = i64::try_from(kpar_size.get()).unwrap(); + table.insert("kpar_size", Value::Integer(Formatted::new(size))); + table.insert("kpar_digest", Value::from(kpar_digest)); } Source::IndexKpar { index_kpar, - index_kpar_size, - index_kpar_digest, + kpar_size, + kpar_digest, } => { table.insert("index_kpar", Value::from(index_kpar)); - let size = i64::try_from(index_kpar_size.get()).unwrap(); - table.insert("index_kpar_size", Value::Integer(Formatted::new(size))); - table.insert("index_kpar_digest", Value::from(index_kpar_digest)); + let size = i64::try_from(kpar_size.get()).unwrap(); + table.insert("kpar_size", Value::Integer(Formatted::new(size))); + table.insert("kpar_digest", Value::from(kpar_digest)); } - Source::RemoteSrc { remote_src } => { + Source::RemoteSrc { + remote_src, + checksum, + } => { table.insert("remote_src", Value::from(remote_src)); + table.insert("checksum", Value::from(checksum)); } } table } + + pub fn to_override(&self) -> OverrideSource { + match self { + Source::Editable { editable } => OverrideSource::Editable { + editable: editable.to_owned(), + }, + Source::LocalSrc { + src_path, + checksum: _, + } => OverrideSource::LocalSrc { + src_path: src_path.to_owned(), + }, + Source::LocalKpar { + kpar_path, + kpar_size: _, + kpar_digest: _, + } => OverrideSource::LocalKpar { + kpar_path: kpar_path.to_owned(), + }, + Source::RemoteKpar { + remote_kpar, + kpar_size: _, + kpar_digest: _, + } => OverrideSource::RemoteKpar { + remote_kpar: remote_kpar.to_owned(), + }, + Source::IndexKpar { + index_kpar, + kpar_size: _, + kpar_digest: _, + } => { + // TODO: currently this makes the most sense; no point in having "index" + // source override that just points directly to the kpar + OverrideSource::RemoteKpar { + remote_kpar: index_kpar.to_owned(), + } + } + Source::RemoteSrc { + remote_src, + checksum: _, + } => OverrideSource::RemoteSrc { + remote_src: remote_src.to_owned(), + }, + Source::RemoteGit { remote_git } => OverrideSource::RemoteGit { + remote_git: remote_git.to_owned(), + }, + } + } + + pub fn to_checksum(&self) -> Option { + match self { + Source::Editable { editable: _ } | Source::RemoteGit { remote_git: _ } => None, + Source::RemoteSrc { + remote_src: _, + checksum, + } + | Source::LocalSrc { + src_path: _, + checksum, + } => Some(ProjectChecksum::Project(checksum.clone())), + Source::LocalKpar { + kpar_path: _, + kpar_size: _, + kpar_digest, + } + | Source::RemoteKpar { + remote_kpar: _, + kpar_size: _, + kpar_digest, + } + | Source::IndexKpar { + index_kpar: _, + kpar_size: _, + kpar_digest, + } => Some(ProjectChecksum::Kpar(kpar_digest.clone())), + } + } } #[derive(Clone, Eq, Debug, Ord, PartialEq, PartialOrd)] diff --git a/core/src/lock_tests.rs b/core/src/lock_tests.rs index be930ee57..0c50da88b 100644 --- a/core/src/lock_tests.rs +++ b/core/src/lock_tests.rs @@ -1,18 +1,16 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 // SPDX-FileCopyrightText: © 2026 Sysand contributors -use std::{convert::Infallible, fmt::Display, slice, str::FromStr}; +use std::{fmt::Display, num::NonZeroU64, slice, str::FromStr}; use toml_edit::DocumentMut; use typed_path::Utf8UnixPathBuf; use crate::lock::{ CURRENT_LOCK_VERSION, LOCKFILE_PREFIX, Lock, Project, Source, Usage, ValidationError, - VersionError, check_lock_version, project_with, + VersionError, check_lock_version, }; -const CHECKSUM: &str = "0000000000000000000000000000000000000000000000000000000000000000"; - #[test] fn check_current_lock_version() { let version = CURRENT_LOCK_VERSION.to_string(); @@ -47,7 +45,6 @@ fn old_registry_lockfile_is_rejected_by_version_gate() { [[project]] name = "Old registry source" version = "1.0.0" -checksum = "{CHECKSUM}" sources = [{{ registry = "https://example.org" }}] "# ); @@ -73,7 +70,6 @@ fn zero_index_kpar_size_is_rejected_by_lockfile_parse() { [[project]] name = "Indexed" version = "1.0.0" -checksum = "{CHECKSUM}" sources = [{{ index_kpar = "https://example.org/project.kpar", index_kpar_size = 0, index_kpar_digest = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }}] "# ); @@ -111,22 +107,19 @@ fn to_toml_matches_expected(projects: Vec, toml: D) { fn minimal_to_toml() { to_toml_matches_expected( vec![Project { - name: None, + name: "a".to_owned(), publisher: None, version: "0.0.1".to_string(), exports: vec![], identifiers: vec![], usages: vec![], sources: vec![], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] +name = "a" version = "0.0.1" -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -135,54 +128,46 @@ fn many_projects_to_toml() { to_toml_matches_expected( vec![ Project { - name: Some("One".to_string()), + name: "One".to_string(), publisher: None, version: "0.0.1".to_string(), exports: vec![], identifiers: vec![], usages: vec![], sources: vec![], - checksum: CHECKSUM.to_string(), }, Project { - name: Some("Two".to_string()), + name: "Two".to_string(), publisher: None, version: "0.0.2".to_string(), exports: vec![], identifiers: vec![], usages: vec![], sources: vec![], - checksum: CHECKSUM.to_string(), }, Project { - name: Some("Three".to_string()), + name: "Three".to_string(), publisher: None, version: "0.0.3".to_string(), exports: vec![], identifiers: vec![], usages: vec![], sources: vec![], - checksum: CHECKSUM.to_string(), }, ], - format!( - r#" + r#" [[project]] name = "One" version = "0.0.1" -checksum = "{CHECKSUM}" [[project]] name = "Two" version = "0.0.2" -checksum = "{CHECKSUM}" [[project]] name = "Three" version = "0.0.3" -checksum = "{CHECKSUM}" "#, - ), ); } @@ -190,26 +175,22 @@ checksum = "{CHECKSUM}" fn one_export_to_toml() { to_toml_matches_expected( vec![Project { - name: Some("One Package".to_string()), + name: "One Package".to_string(), publisher: None, version: "0.1.1".to_string(), exports: vec!["PackageName".to_string()], identifiers: vec![], usages: vec![], sources: vec![], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] name = "One Package" version = "0.1.1" exports = [ "PackageName", ] -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -217,7 +198,7 @@ checksum = "{CHECKSUM}" fn many_exports_to_toml() { to_toml_matches_expected( vec![Project { - name: Some("Three Packages".to_string()), + name: "Three Packages".to_string(), publisher: None, version: "0.1.3".to_string(), exports: vec![ @@ -228,10 +209,8 @@ fn many_exports_to_toml() { identifiers: vec![], usages: vec![], sources: vec![], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] name = "Three Packages" version = "0.1.3" @@ -240,9 +219,7 @@ exports = [ "Package2", "Package3", ] -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -250,26 +227,22 @@ checksum = "{CHECKSUM}" fn one_iri_to_toml() { to_toml_matches_expected( vec![Project { - name: Some("One IRI".to_string()), + name: "One IRI".to_string(), publisher: None, version: "0.2.1".to_string(), exports: vec![], identifiers: vec!["urn:kpar:example".to_string()], usages: vec![], sources: vec![], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] name = "One IRI" version = "0.2.1" identifiers = [ "urn:kpar:example", ] -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -277,7 +250,7 @@ checksum = "{CHECKSUM}" fn many_identifiers_to_toml() { to_toml_matches_expected( vec![Project { - name: Some("Three IRI:s".to_string()), + name: "Three IRI:s".to_string(), publisher: None, version: "0.2.3".to_string(), exports: vec![], @@ -288,10 +261,8 @@ fn many_identifiers_to_toml() { ], usages: vec![], sources: vec![], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] name = "Three IRI:s" version = "0.2.3" @@ -300,9 +271,7 @@ identifiers = [ "ftp://www.example.com", "http://www.example.com", ] -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -310,7 +279,7 @@ checksum = "{CHECKSUM}" fn one_source_to_toml() { to_toml_matches_expected( vec![Project { - name: Some("One source".to_string()), + name: "One source".to_string(), publisher: None, version: "0.4.1".to_string(), exports: vec![], @@ -319,19 +288,15 @@ fn one_source_to_toml() { sources: vec![Source::Editable { editable: Utf8UnixPathBuf::from("."), }], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] name = "One source" version = "0.4.1" sources = [ - {{ editable = "." }}, + { editable = "." }, ] -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -339,59 +304,63 @@ checksum = "{CHECKSUM}" fn many_sources_to_toml() { to_toml_matches_expected( vec![Project { - name: Some("Eight sources".to_string()), + name: "Seven sources".to_string(), publisher: None, version: "0.4.7".to_string(), exports: vec![], identifiers: vec![], usages: vec![], sources: vec![ + Source::Editable { + editable: Utf8UnixPathBuf::from("example/path"), + }, Source::LocalKpar { kpar_path: Utf8UnixPathBuf::from("example.kpar"), + kpar_size: NonZeroU64::new(64).unwrap(), + kpar_digest: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + .to_string(), }, Source::LocalSrc { src_path: Utf8UnixPathBuf::from("example/path"), + checksum: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + .to_string(), }, Source::RemoteKpar { remote_kpar: "www.example.com/remote.kpar".to_string(), - remote_kpar_size: Some(64), + kpar_size: NonZeroU64::new(64).unwrap(), + kpar_digest: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + .to_string(), }, Source::IndexKpar { index_kpar: "www.example.com/index.kpar".to_string(), - index_kpar_size: std::num::NonZeroU64::new(128).unwrap(), - index_kpar_digest: - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - .to_string(), + kpar_size: NonZeroU64::new(128).unwrap(), + kpar_digest: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + .to_string(), }, Source::RemoteSrc { remote_src: "www.example.com/remote".to_string(), + checksum: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + .to_string(), }, Source::RemoteGit { remote_git: "github.com/example/remote.git".to_string(), }, - Source::RemoteApi { - remote_api: "www.example.com/api".to_string(), - }, ], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] -name = "Eight sources" +name = "Seven sources" version = "0.4.7" sources = [ - {{ kpar_path = "example.kpar" }}, - {{ src_path = "example/path" }}, - {{ remote_kpar = "www.example.com/remote.kpar", remote_kpar_size = 64 }}, - {{ index_kpar = "www.example.com/index.kpar", index_kpar_size = 128, index_kpar_digest = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }}, - {{ remote_src = "www.example.com/remote" }}, - {{ remote_git = "github.com/example/remote.git" }}, - {{ remote_api = "www.example.com/api" }}, + { editable = "example/path" }, + { kpar_path = "example.kpar", kpar_size = 64, kpar_digest = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + { src_path = "example/path", checksum = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + { remote_kpar = "www.example.com/remote.kpar", kpar_size = 64, kpar_digest = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + { index_kpar = "www.example.com/index.kpar", kpar_size = 128, kpar_digest = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + { remote_src = "www.example.com/remote", checksum = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + { remote_git = "github.com/example/remote.git" }, ] -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -399,7 +368,7 @@ checksum = "{CHECKSUM}" fn one_usage_to_toml() { to_toml_matches_expected( vec![Project { - name: Some("One usage".to_string()), + name: "One usage".to_string(), publisher: None, version: "0.5.1".to_string(), exports: vec![], @@ -408,19 +377,15 @@ fn one_usage_to_toml() { resource: "urn:kpar:usage".to_string(), }], sources: vec![], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] name = "One usage" version = "0.5.1" usages = [ "urn:kpar:usage", ] -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -428,7 +393,7 @@ checksum = "{CHECKSUM}" fn many_usage_to_toml() { to_toml_matches_expected( vec![Project { - name: Some("Three usages".to_string()), + name: "Three usages".to_string(), publisher: None, version: "0.5.3".to_string(), exports: vec![], @@ -445,10 +410,8 @@ fn many_usage_to_toml() { }, ], sources: vec![], - checksum: CHECKSUM.to_string(), }], - format!( - r#" + r#" [[project]] name = "Three usages" version = "0.5.3" @@ -457,9 +420,7 @@ usages = [ "urn:kpar:second", "urn:kpar:third", ] -checksum = "{CHECKSUM}" -"# - ), +"#, ); } @@ -474,19 +435,18 @@ fn roundtrip_makes_no_changes(toml: D) { #[test] fn simple_roundtrip() { - roundtrip_makes_no_changes(format!( + roundtrip_makes_no_changes( r#" [[project]] name = "Simple" version = "0.0.1" -checksum = "{CHECKSUM}" -"# - )); +"#, + ); } #[test] fn complex_roundtrip() { - roundtrip_makes_no_changes(format!( + roundtrip_makes_no_changes( r#" [[project]] name = "One" @@ -499,7 +459,6 @@ exports = [ usages = [ "urn:kpar:usage", ] -checksum = "{CHECKSUM}" [[project]] name = "Two" @@ -512,7 +471,6 @@ identifiers = [ "ftp://www.example.com", "http://www.example.com", ] -checksum = "{CHECKSUM}" [[project]] name = "Three" @@ -525,13 +483,12 @@ usages = [ "urn:kpar:second", "urn:kpar:third", ] -checksum = "{CHECKSUM}" -"# - )); +"#, + ); } -fn make_project>( - name: Option, +fn make_project, S: AsRef>( + name: N, publisher: Option, version: S, exports: &[&'static str], @@ -539,14 +496,13 @@ fn make_project>( usages: &[Usage], ) -> Project { Project { - name, + name: name.as_ref().into(), publisher, version: version.as_ref().to_string(), exports: exports.iter().map(|s| String::from(*s)).collect(), identifiers: identifiers.iter().map(|s| String::from(*s)).collect(), usages: usages.to_vec(), sources: vec![], - checksum: CHECKSUM.to_string(), } } @@ -564,7 +520,7 @@ fn validate_empty() { fn validate_minimal() { Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), - projects: vec![make_project(None, None, "0.0.1", &[], &[], &[])], + projects: vec![make_project("a", None, "0.0.1", &[], &[], &[])], } .validate() .unwrap(); @@ -577,7 +533,7 @@ fn validate_single_usage() { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![ make_project( - None, + "a", None, "0.0.1", &[], @@ -586,7 +542,7 @@ fn validate_single_usage() { resource: iri.to_string(), }], ), - make_project(None, None, "0.0.1", &[], &[iri], &[]), + make_project("b", None, "0.0.1", &[], &[iri], &[]), ], } .validate() @@ -601,7 +557,7 @@ fn validate_multiple_usage() { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![ make_project( - None, + "a", None, "0.0.1", &[], @@ -615,8 +571,8 @@ fn validate_multiple_usage() { }, ], ), - make_project(None, None, "0.0.1", &[], &[iri1], &[]), - make_project(None, None, "0.0.1", &[], &[iri2], &[]), + make_project("b", None, "0.0.1", &[], &[iri1], &[]), + make_project("c", None, "0.0.1", &[], &[iri2], &[]), ], } .validate() @@ -631,7 +587,7 @@ fn validate_chained_usages() { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![ make_project( - None, + "a", None, "0.0.1", &[], @@ -641,7 +597,7 @@ fn validate_chained_usages() { }], ), make_project( - None, + "b", None, "0.0.1", &[], @@ -650,7 +606,7 @@ fn validate_chained_usages() { resource: iri2.to_string(), }], ), - make_project(None, None, "0.0.1", &[], &[iri2], &[]), + make_project("c", None, "0.0.1", &[], &[iri2], &[]), ], } .validate() @@ -685,7 +641,7 @@ fn validate_single_name_collision() { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![ make_project( - None, + "a", None, "0.0.1", &[name], @@ -694,7 +650,7 @@ fn validate_single_name_collision() { resource: iri.to_string(), }], ), - make_project(None, None, "0.0.1", &[name], &[iri], &[]), + make_project("b", None, "0.0.1", &[name], &[iri], &[]), ], } .validate() else { @@ -717,7 +673,7 @@ fn validate_multiple_name_collision() { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![ make_project( - None, + "a", None, "0.0.1", &[name1, name2, name3], @@ -726,7 +682,7 @@ fn validate_multiple_name_collision() { resource: iri.to_string(), }], ), - make_project(None, None, "0.0.1", &[name2, name3, name4], &[iri], &[]), + make_project("b", None, "0.0.1", &[name2, name3, name4], &[iri], &[]), ], } .validate() else { @@ -745,7 +701,7 @@ fn validate_unsatisfied_usage() { let Err(err) = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![make_project( - None, + "a", None, "0.0.1", &[], @@ -756,15 +712,11 @@ fn validate_unsatisfied_usage() { .validate() else { panic!() }; - let ValidationError::UnsatisfiedUsage { - usage, - project_with_name, - } = err - else { + let ValidationError::UnsatisfiedUsage { usage, name } = err else { panic!() }; assert_eq!(usage, usage_in.resource); - assert_eq!(project_with_name, project_with::(None)); + assert_eq!(name, "a"); } #[test] @@ -773,37 +725,35 @@ fn validate_checksum() { let Err(err) = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![Project { - name: None, + name: "a".into(), publisher: None, version: "0.0.1".to_string(), exports: vec![], identifiers: vec![], usages: vec![], - sources: vec![], - checksum: invalid_checksum.to_owned(), + sources: vec![Source::LocalSrc { + src_path: Utf8UnixPathBuf::from("../path/to/the/project"), + checksum: invalid_checksum.to_owned(), + }], }], } .validate() else { panic!() }; - let ValidationError::InvalidProjectDigestFormat { - digest, - project_with_name, - } = err - else { + let ValidationError::InvalidProjectDigestFormat { digest, name } = err else { panic!() }; assert_eq!(digest, invalid_checksum); - assert_eq!(project_with_name, project_with::(None)); + assert_eq!(name, "a"); } #[test] fn validate_index_kpar_digest_rejects_uppercase() { let invalid_digest = "dA8747a6f27A32f10Ba393113bCe29f788181037a71f093f90e0ad5829d2b780"; - let Err(err) = Lock { + let err = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![Project { - name: Some("Indexed".to_string()), + name: "Indexed".to_string(), publisher: None, version: "0.0.1".to_string(), exports: vec![], @@ -811,15 +761,13 @@ fn validate_index_kpar_digest_rejects_uppercase() { usages: vec![], sources: vec![Source::IndexKpar { index_kpar: "https://example.com/indexed.kpar".to_string(), - index_kpar_size: std::num::NonZeroU64::new(123).unwrap(), - index_kpar_digest: invalid_digest.to_string(), + kpar_size: std::num::NonZeroU64::new(123).unwrap(), + kpar_digest: invalid_digest.to_string(), }], - checksum: CHECKSUM.to_string(), }], } - .validate() else { - panic!() - }; + .validate() + .unwrap_err(); let ValidationError::InvalidIndexKparDigestFormat { digest, project_with_name, @@ -844,7 +792,7 @@ fn sort_empty() { #[test] fn sort_single_trivial() { - let project = make_project(None, None, "0.0.1", &[], &[], &[]); + let project = make_project("a", None, "0.0.1", &[], &[], &[]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project.clone()], @@ -856,8 +804,8 @@ fn sort_single_trivial() { #[test] fn sort_exports() { - let project1 = make_project(None, None, "0.0.1", &["B", "A"], &[], &[]); - let project2 = make_project(None, None, "0.0.1", &["A", "B"], &[], &[]); + let project1 = make_project("a", None, "0.0.1", &["B", "A"], &[], &[]); + let project2 = make_project("a", None, "0.0.1", &["A", "B"], &[], &[]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project1], @@ -869,8 +817,8 @@ fn sort_exports() { #[test] fn sort_identifiers() { - let project1 = make_project(None, None, "0.0.1", &[], &["urn:kpar:b", "urn:kpar:a"], &[]); - let project2 = make_project(None, None, "0.0.1", &[], &["urn:kpar:a", "urn:kpar:b"], &[]); + let project1 = make_project("a", None, "0.0.1", &[], &["urn:kpar:b", "urn:kpar:a"], &[]); + let project2 = make_project("a", None, "0.0.1", &[], &["urn:kpar:a", "urn:kpar:b"], &[]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project1], @@ -889,14 +837,14 @@ fn sort_sources() { resource: "urn:kpar:b".to_string(), }; let project1 = make_project( - None, + "a", None, "0.0.1", &[], &[], &[usage2.clone(), usage1.clone()], ); - let project2 = make_project(None, None, "0.0.1", &[], &[], &[usage1, usage2]); + let project2 = make_project("a", None, "0.0.1", &[], &[], &[usage1, usage2]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project1], @@ -915,14 +863,14 @@ fn sort_sources_with_constraints() { resource: "urn:kpar:a".to_string(), }; let project1 = make_project( - None, + "a", None, "0.0.1", &[], &[], &[usage2.clone(), usage1.clone()], ); - let project2 = make_project(None, None, "0.0.1", &[], &[], &[usage1, usage2]); + let project2 = make_project("a", None, "0.0.1", &[], &[], &[usage1, usage2]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project1], @@ -934,22 +882,8 @@ fn sort_sources_with_constraints() { #[test] fn sort_projects_by_name() { - let project1 = make_project( - Some("A".to_string()), - None, - "0.0.2", - &["B"], - &["urn:kpar:b"], - &[], - ); - let project2 = make_project( - Some("B".to_string()), - None, - "0.0.1", - &["A"], - &["urn:kpar:a"], - &[], - ); + let project1 = make_project("A", None, "0.0.2", &["B"], &["urn:kpar:b"], &[]); + let project2 = make_project("B", None, "0.0.1", &["A"], &["urn:kpar:a"], &[]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project2.clone(), project1.clone()], @@ -961,22 +895,8 @@ fn sort_projects_by_name() { #[test] fn sort_projects_by_exports() { - let project1 = make_project( - Some("A".to_string()), - None, - "0.0.2", - &["A"], - &["urn:kpar:b"], - &[], - ); - let project2 = make_project( - Some("A".to_string()), - None, - "0.0.1", - &["B"], - &["urn:kpar:a"], - &[], - ); + let project1 = make_project("A", None, "0.0.2", &["A"], &["urn:kpar:b"], &[]); + let project2 = make_project("B", None, "0.0.1", &["B"], &["urn:kpar:a"], &[]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project2.clone(), project1.clone()], @@ -988,22 +908,8 @@ fn sort_projects_by_exports() { #[test] fn sort_projects_by_identifiers() { - let project1 = make_project( - Some("A".to_string()), - None, - "0.0.2", - &["A"], - &["urn:kpar:a"], - &[], - ); - let project2 = make_project( - Some("A".to_string()), - None, - "0.0.1", - &["A"], - &["urn:kpar:b"], - &[], - ); + let project1 = make_project("A", None, "0.0.2", &["A"], &["urn:kpar:a"], &[]); + let project2 = make_project("B", None, "0.0.1", &["A"], &["urn:kpar:b"], &[]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project2.clone(), project1.clone()], @@ -1015,22 +921,8 @@ fn sort_projects_by_identifiers() { #[test] fn sort_projects_by_version() { - let project1 = make_project( - Some("A".to_string()), - None, - "0.0.1", - &["A"], - &["urn:kpar:a"], - &[], - ); - let project2 = make_project( - Some("A".to_string()), - None, - "0.0.2", - &["A"], - &["urn:kpar:a"], - &[], - ); + let project1 = make_project("A", None, "0.0.1", &["A"], &["urn:kpar:a"], &[]); + let project2 = make_project("B", None, "0.0.2", &["A"], &["urn:kpar:a"], &[]); let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![project2.clone(), project1.clone()], @@ -1045,15 +937,17 @@ fn canonicalize_checksums() { let mut lock = Lock { lock_version: CURRENT_LOCK_VERSION.to_string(), projects: vec![Project { - name: None, + name: "a".into(), publisher: None, version: "0.0.1".to_string(), exports: vec![], identifiers: vec![], usages: vec![], - sources: vec![], - checksum: "dA8747a6f27A32f10Ba393113bCE29f788181037a71f093f90e0ad5829D2b780" - .to_string(), + sources: vec![Source::LocalSrc { + src_path: Utf8UnixPathBuf::from("../path/to/the/project"), + checksum: "dA8747a6f27A32f10Ba393113bCE29f788181037a71f093f90e0ad5829D2b780" + .to_owned(), + }], }], }; lock.canonicalize_checksums(); @@ -1061,8 +955,15 @@ fn canonicalize_checksums() { let [project] = projects.as_slice() else { panic!() }; + let Source::LocalSrc { + src_path: _, + checksum, + } = &project.sources[0] + else { + panic!() + }; assert_eq!( - project.checksum, + checksum, "da8747a6f27a32f10ba393113bce29f788181037a71f093f90e0ad5829d2b780" ); } diff --git a/core/src/model.rs b/core/src/model.rs index af9492e51..d74f9e05c 100644 --- a/core/src/model.rs +++ b/core/src/model.rs @@ -649,10 +649,6 @@ impl InterchangeProjectMetadataRaw { .map(|x| x.0) .collect() } - - // pub fn remove_index_from>(&mut self, path: &P) { - // todo!() - // } } impl TryFrom for InterchangeProjectMetadata { diff --git a/core/src/project/any.rs b/core/src/project/any.rs index fcecdbc03..ab1ce286d 100644 --- a/core/src/project/any.rs +++ b/core/src/project/any.rs @@ -10,6 +10,7 @@ use typed_path::Utf8UnixPath; use crate::{ auth::HTTPAuthentication, + config::OverrideSource, context::ProjectContext, env::memory::MemoryStorageEnvironment, lock::Source, @@ -21,20 +22,26 @@ use crate::{ local_kpar::LocalKParProject, local_src::LocalSrcProject, reference::ProjectReference, - reqwest_kpar_download::{ReqwestKparDownloadedError, ReqwestKparDownloadedProject}, + reqwest_kpar_download::{ + ReqwestIndexKparDownloadedProject, ReqwestKparDownloadedError, + ReqwestRemoteKparDownloadedProject, + }, reqwest_src::ReqwestSrcProjectAsync, utils::FsIoError, }, resolve::memory::{AcceptAll, MemoryResolver}, }; +use super::local_kpar::KparInnerPath; + #[derive(Debug, ProjectRead)] pub enum AnyProject { Editable(EditableProject), LocalSrc(LocalSrcProject), LocalKpar(LocalKParProject), RemoteSrc(AsSyncProjectTokio>), - RemoteKpar(AsSyncProjectTokio>), + RemoteKpar(AsSyncProjectTokio>), + IndexKpar(AsSyncProjectTokio>), RemoteGit(GixDownloadedProject), } @@ -47,84 +54,73 @@ pub enum TryFromSourceError { #[error(transparent)] RemoteKpar(ReqwestKparDownloadedError), #[error(transparent)] - RemoteSrc(url::ParseError), + IndexKpar(ReqwestKparDownloadedError), + #[error("failed to parse project url `{0}`")] + UrlParse(String, #[source] url::ParseError), #[error(transparent)] RemoteGit(GixDownloadedError), } // TODO: Find a better solution going from source to project. impl AnyProject { - pub fn try_from_source>( - source: Source, + pub fn try_from_override_source>( + source: OverrideSource, project_root: P, auth_policy: Arc, client: ClientWithMiddleware, runtime: Arc, ) -> Result { match source { - Source::Editable { editable } => { + OverrideSource::Editable { editable } => { let project = LocalSrcProject { nominal_path: Some(editable.to_string().into()), project_path: project_root.as_ref().join(editable.as_str()), + expected_checksum: None, }; Ok(AnyProject::Editable( EditableProject::::new(editable.as_str().into(), project), )) } - Source::LocalKpar { kpar_path } => Ok(AnyProject::LocalKpar( - LocalKParProject::new_guess_root_nominal( + OverrideSource::LocalKpar { kpar_path } => { + Ok(AnyProject::LocalKpar(LocalKParProject::new( project_root.as_ref().join(kpar_path.as_str()), - kpar_path.as_str(), - ) - .map_err(TryFromSourceError::LocalKpar)?, - )), - Source::LocalSrc { src_path } => { - let nominal_path = src_path.into_string().into(); - let project_path = project_root.as_ref().join(&nominal_path); + KparInnerPath::Guess, + Some(kpar_path), + None, + ))) + } + OverrideSource::LocalSrc { src_path } => { + let project_path = project_root.as_ref().join(src_path.as_str()); Ok(AnyProject::LocalSrc(LocalSrcProject { - nominal_path: Some(nominal_path), + nominal_path: Some(src_path), project_path, + expected_checksum: None, })) } // TODO: use expected size - Source::RemoteKpar { remote_kpar, .. } => Ok(AnyProject::RemoteKpar( - ReqwestKparDownloadedProject::::new_guess_root( + OverrideSource::RemoteKpar { remote_kpar } => Ok(AnyProject::RemoteKpar( + ReqwestRemoteKparDownloadedProject::::new_guess_root( remote_kpar, client, auth_policy, None, - None, - ) - .map_err(TryFromSourceError::RemoteKpar)? - .to_tokio_sync(runtime), - )), - Source::IndexKpar { - index_kpar, - index_kpar_size, - index_kpar_digest, - } => Ok(AnyProject::RemoteKpar( - ReqwestKparDownloadedProject::::new_guess_root( - index_kpar, - client, - auth_policy, - Some(index_kpar_digest), - Some(index_kpar_size), ) .map_err(TryFromSourceError::RemoteKpar)? .to_tokio_sync(runtime), )), - Source::RemoteSrc { remote_src } => Ok(AnyProject::RemoteSrc( + OverrideSource::RemoteSrc { remote_src } => Ok(AnyProject::RemoteSrc( ReqwestSrcProjectAsync:: { client, - url: reqwest::Url::parse(&remote_src).map_err(TryFromSourceError::RemoteSrc)?, + url: reqwest::Url::parse(&remote_src) + .map_err(|e| TryFromSourceError::UrlParse(remote_src, e))?, auth_policy, + expected_checksum: None, } .to_tokio_sync(runtime), )), - Source::RemoteGit { remote_git } => Ok(AnyProject::RemoteGit( + OverrideSource::RemoteGit { remote_git } => Ok(AnyProject::RemoteGit( GixDownloadedProject::new(remote_git).map_err(TryFromSourceError::RemoteGit)?, )), - _ => Err(TryFromSourceError::UnsupportedSource(format!("{source:?}"))), } } } diff --git a/core/src/project/cached.rs b/core/src/project/cached.rs index 6444342ea..2c9490047 100644 --- a/core/src/project/cached.rs +++ b/core/src/project/cached.rs @@ -88,4 +88,8 @@ impl ProjectRead for CachedProject Result { + self.local.checksum_canonical_variant() + } } diff --git a/core/src/project/editable.rs b/core/src/project/editable.rs index b77e909be..75172ee3d 100644 --- a/core/src/project/editable.rs +++ b/core/src/project/editable.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 // SPDX-FileCopyrightText: © 2025 Sysand contributors -use camino::Utf8PathBuf; +use typed_path::Utf8UnixPathBuf; use crate::{ context::ProjectContext, @@ -20,12 +20,12 @@ use crate::{ pub struct EditableProject { inner: P, /// Must be relative to workspace root - nominal_path: Utf8PathBuf, + nominal_path: Utf8UnixPathBuf, include_original_sources: bool, } impl EditableProject

{ - pub fn new(nominal_path: Utf8PathBuf, project: P) -> EditableProject

{ + pub fn new(nominal_path: Utf8UnixPathBuf, project: P) -> EditableProject

{ debug_assert!(nominal_path.is_relative()); EditableProject { inner: project, @@ -82,4 +82,9 @@ impl ProjectRead for EditableProject

{ Ok(inner_sources) } + + /// This will always panic, as it's irrelevant for editable projects + fn checksum_canonical_variant(&self) -> Result { + panic!() + } } diff --git a/core/src/project/gix_git_download.rs b/core/src/project/gix_git_download.rs index 4b5570ddf..9e80a4dd3 100644 --- a/core/src/project/gix_git_download.rs +++ b/core/src/project/gix_git_download.rs @@ -51,6 +51,10 @@ pub enum GixDownloadedError { {0}" )] ImpossibleRelativePath(#[from] RelativizePathError), + #[error("project is missing metadata file `.meta.json`")] + MissingMeta, + #[error("project is missing `.project.json` and/or `.meta.json` files")] + MissingInfoMeta, #[error("{0}")] Other(String), } @@ -67,14 +71,13 @@ impl From for GixDownloadedError { LocalSrcError::Deserialize(error) => Self::Deserialize(error), LocalSrcError::Path(error) => Self::Path(error), LocalSrcError::AlreadyExists(msg) => { - GixDownloadedError::Other(format!("unexpected internal error: {}", msg)) + Self::Other(format!("unexpected internal error: {}", msg)) } LocalSrcError::Io(e) => Self::Io(e), LocalSrcError::Serialize(error) => Self::Serialize(error), LocalSrcError::ImpossibleRelativePath(err) => Self::ImpossibleRelativePath(err), - LocalSrcError::MissingMeta => GixDownloadedError::Other( - "project is missing metadata file `.meta.json`".to_string(), - ), + LocalSrcError::MissingMeta => Self::MissingMeta, + LocalSrcError::MissingInfoMeta => Self::MissingInfoMeta, } } } @@ -89,6 +92,7 @@ impl GixDownloadedProject { inner: LocalSrcProject { nominal_path: None, project_path: wrapfs::canonicalize(tmp_dir.path())?, + expected_checksum: None, }, tmp_dir, }) @@ -150,6 +154,12 @@ impl ProjectRead for GixDownloadedProject { remote_git: self.url.to_string(), }]) } + + fn checksum_canonical_variant(&self) -> Result { + self.ensure_downloaded()?; + + Ok(self.inner.checksum_canonical_variant()?) + } } #[cfg(test)] diff --git a/core/src/project/index_entry.rs b/core/src/project/index_entry.rs index 10074ae3b..c61f08868 100644 --- a/core/src/project/index_entry.rs +++ b/core/src/project/index_entry.rs @@ -41,18 +41,21 @@ use crate::{ lock::Source, model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw, InterchangeProjectUsageRaw}, project::{ - CanonicalizationError, InlineProjectDigest, ProjectReadAsync, - canonical_project_digest_inline, - reqwest_kpar_download::{ReqwestKparDownloadedError, ReqwestKparDownloadedProject}, + ProjectReadAsync, + reqwest_kpar_download::{ + ReqwestIndexKparDownloadedProject, ReqwestKparDownloadedError, + ReqwestRemoteKparDownloadedProject, + }, }, - utils::lowercase_hex, }; +use super::ProjectChecksum; + #[derive(Debug)] pub struct IndexEntryProject { /// The kpar archive backend — field name tracks its role in this struct, /// type name tracks the transport. - pub(crate) archive: ReqwestKparDownloadedProject, + pub(crate) archive: ReqwestIndexKparDownloadedProject, /// Single source of truth for protocol-advertised per-version metadata. /// `version_async` and `usage_async` return these fields without I/O; /// `checksum_canonical_hex_async` does the same until the archive has been @@ -99,12 +102,12 @@ impl IndexEntryProject { auth_policy: Arc, ) -> Result { Ok(Self { - archive: ReqwestKparDownloadedProject::new( + archive: ReqwestIndexKparDownloadedProject::new( kpar_url, client, auth_policy, - Some(advertised.kpar_digest.as_hex().to_owned()), - Some(advertised.kpar_size), + advertised.kpar_size, + advertised.kpar_digest.as_hex().to_owned(), )?, advertised, project_json_url, @@ -131,7 +134,7 @@ impl IndexEntryProject { .expect("RequirePresent never returns Ok(None)")) } - async fn fetched_project_async( + async fn ensure_downloaded_verified( &self, ) -> Result<&(InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw), IndexEntryProjectError> { @@ -145,28 +148,6 @@ impl IndexEntryProject { self.fetch_required_json(self.meta_json_url.clone()), )?; - // Pre-expose digest check (see module doc). The index - // protocol requires `project_digest` to be verifiable from - // (info, meta) alone; source-reading canonicalization is not - // a valid fallback here because callers receive info/meta - // before any kpar bytes are exposed. - let hash = match canonical_project_digest_inline(&info, &meta) { - InlineProjectDigest::Computed(hash) => hash, - InlineProjectDigest::RequiresSourceReads => { - return Err(IndexEntryProjectError::ProjectDigestRequiresSourceReads { - url: self.project_json_url.as_str().into(), - }); - } - }; - let computed = lowercase_hex(hash); - if computed != self.advertised.project_digest.as_hex() { - return Err(IndexEntryProjectError::AdvertisedDigestDrift { - url: self.project_json_url.as_str().into(), - expected: self.advertised.project_digest.as_hex().to_string(), - computed, - }); - } - Ok((info, meta)) }) .await @@ -185,12 +166,12 @@ impl ProjectReadAsync for IndexEntryProject ), Self::Error, > { - let (info, meta) = self.fetched_project_async().await?; + let (info, meta) = self.ensure_downloaded_verified().await?; Ok((Some(info.clone()), Some(meta.clone()))) } type SourceReader<'a> - = as ProjectReadAsync>::SourceReader<'a> + = as ProjectReadAsync>::SourceReader<'a> where Self: 'a; @@ -206,18 +187,18 @@ impl ProjectReadAsync for IndexEntryProject async fn sources_async(&self, _ctx: &ProjectContext) -> Result, Self::Error> { Ok(vec![Source::IndexKpar { - index_kpar: self.archive.url.to_string(), - index_kpar_size: self.advertised.kpar_size, - index_kpar_digest: self.advertised.kpar_digest.as_hex().to_string(), + index_kpar: self.archive.url().to_string(), + kpar_size: self.advertised.kpar_size, + kpar_digest: self.advertised.kpar_digest.as_hex().to_string(), }]) } async fn get_info_async(&self) -> Result, Self::Error> { - Ok(Some(self.fetched_project_async().await?.0.clone())) + Ok(Some(self.ensure_downloaded_verified().await?.0.clone())) } async fn get_meta_async(&self) -> Result, Self::Error> { - Ok(Some(self.fetched_project_async().await?.1.clone())) + Ok(Some(self.ensure_downloaded_verified().await?.1.clone())) } async fn version_async(&self) -> Result, Self::Error> { @@ -228,36 +209,14 @@ impl ProjectReadAsync for IndexEntryProject Ok(Some(self.advertised.usage.clone())) } - async fn checksum_canonical_hex_async( - &self, - ) -> Result, CanonicalizationError> { - // Gate on verified archive bytes, not merely on the final path - // existing. The project digest check below validates info/meta - // consistency; archive-byte authenticity belongs to the kpar digest - // verification performed by `ensure_downloaded_verified`. - if !self.archive.is_downloaded_and_verified() { - return Ok(Some(self.advertised.project_digest.as_hex().to_string())); - } - - let computed = self - .archive - .checksum_canonical_hex_async() - .await - .map_err(|e| e.map_project_read(IndexEntryProjectError::Downloaded))?; - - if let Some(computed_hex) = computed.as_ref() - && computed_hex.as_str() != self.advertised.project_digest.as_hex() - { - return Err(CanonicalizationError::ProjectRead( - IndexEntryProjectError::AdvertisedDigestDrift { - url: self.project_json_url.as_str().into(), - expected: self.advertised.project_digest.as_hex().to_string(), - computed: computed_hex.clone(), - }, - )); - } - - Ok(computed) + // TODO: decide the security requirements here and maybe have separate methods + // used for e.g. generating a lockfile, where advertised checksum is fine, + // as it will be verified on sync, and the actual verification of e.g. projects being downloaded + // against a lockfile, or verifying that the correct ones are installed in env + async fn checksum_canonical_variant_async(&self) -> Result { + Ok(ProjectChecksum::Kpar( + self.advertised.kpar_digest.as_hex().to_string(), + )) } } diff --git a/core/src/project/index_entry_tests.rs b/core/src/project/index_entry_tests.rs index 84f6b803d..6641e2e8d 100644 --- a/core/src/project/index_entry_tests.rs +++ b/core/src/project/index_entry_tests.rs @@ -3,7 +3,7 @@ //! Sibling unit tests for `index_entry.rs`. //! -//! Network-dependent paths (pre-expose `project_digest` check, +//! Network-dependent paths (pre-expose `kpar_digest` check, //! post-download digest drift, `.project.json`/`.meta.json` fetch error //! mapping) live in `core/src/env/index_tests.rs` because they need //! mockito to drive the HTTP side. These tests pin the network-free @@ -21,7 +21,7 @@ use crate::{ env::index::{AdvertisedVersion, Sha256HexDigest}, index::model::VersionStatus, model::InterchangeProjectUsageRaw, - project::{ProjectReadAsync, index_entry::IndexEntryProject}, + project::{ProjectChecksum, ProjectReadAsync, index_entry::IndexEntryProject}, purl::PKG_SYSAND_PREFIX, resolve::net_utils::create_reqwest_client, }; @@ -35,10 +35,6 @@ use crate::{ fn make_fixture() -> IndexEntryProject { // Two distinct 64-hex digests so a test that confuses them fails // loudly rather than passing on equality. - let project_digest = Sha256HexDigest::try_from( - "sha256:1111111111111111111111111111111111111111111111111111111111111111", - ) - .expect("fixture project_digest must be valid sha256 hex"); let kpar_digest = Sha256HexDigest::try_from( "sha256:2222222222222222222222222222222222222222222222222222222222222222", ) @@ -50,7 +46,6 @@ fn make_fixture() -> IndexEntryProject { resource: format!("{PKG_SYSAND_PREFIX}acme/widget"), version_constraint: Some("^1.0".to_string()), }], - project_digest, kpar_size: std::num::NonZeroU64::new(42).unwrap(), kpar_digest, status: VersionStatus::Available, @@ -104,17 +99,19 @@ fn usage_async_returns_advertised_without_fetch() { } #[test] -fn checksum_canonical_hex_async_returns_advertised_before_download() { +fn checksum_canonical_variant_async_returns_advertised_before_download() { let project = make_fixture(); - let digest = block_on(project.checksum_canonical_hex_async()).unwrap(); + let digest = block_on(project.checksum_canonical_variant_async()).unwrap(); assert_eq!( - digest.as_deref(), - Some("1111111111111111111111111111111111111111111111111111111111111111"), - "pre-download, checksum_canonical_hex_async must return the advertised digest verbatim \ + digest, + ProjectChecksum::Kpar( + "2222222222222222222222222222222222222222222222222222222222222222".to_owned() + ), + "pre-download, checksum_canonical_variant_async must return the advertised kpar digest verbatim \ (no archive download, no kpar-side computation)" ); assert!( !project.archive.is_downloaded_and_verified(), - "checksum_canonical_hex_async must not trigger a download before the archive is present" + "checksum_canonical_variant_async must not trigger a download before the archive is present" ); } diff --git a/core/src/project/local_kpar.rs b/core/src/project/local_kpar.rs index 350191f38..18f8e7e40 100644 --- a/core/src/project/local_kpar.rs +++ b/core/src/project/local_kpar.rs @@ -2,23 +2,26 @@ // SPDX-FileCopyrightText: © 2025 Sysand contributors use std::{ - fs, - io::{Read, Write as _}, + cell::OnceCell, + fs::{self, File}, + io::{Read, Seek, Write as _}, + num::NonZeroU64, }; use camino::{Utf8Path, Utf8PathBuf}; use camino_tempfile::{Utf8TempDir, tempdir}; +use serde::de::DeserializeOwned; use sha2::{Digest as _, Sha256}; use thiserror::Error; -use typed_path::{Utf8Component, Utf8UnixPath}; -use zip::ZipArchive; +use typed_path::{Utf8UnixPath, Utf8UnixPathBuf}; +use zip::{ZipArchive, read::ZipFile, result::ZipError}; use crate::{ context::ProjectContext, lock::Source, model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}, project::{ - self, ProjectRead, + self, KparMeta, ProjectChecksum, ProjectRead, hash_reader, utils::{RelativizePathError, ZipArchiveError, relativize_path}, }, utils::{lowercase_hex, sha256_lowercase_hex}, @@ -26,6 +29,17 @@ use crate::{ use super::utils::{FsIoError, ProjectDeserializationError, ToPathBuf, wrapfs}; +#[derive(Debug, Clone)] +pub enum KparInnerPath { + /// Project is at the root of the archive, i.e. accessed directly as + /// `.project.json` and `.meta.json` + Root, + /// Project is at a given path within archive + Known(Utf8UnixPathBuf), + /// Project path within archive is unknown and has to be guessed + Guess, +} + /// Project stored in as a KPar (Zip) archive in the local filesystem. /// Source file paths are interpreted relative to `root`. Both `.project.json` /// and `.meta.json` are searched for in `root`. If `root` is not given, it is @@ -39,21 +53,38 @@ use super::utils::{FsIoError, ProjectDeserializationError, ToPathBuf, wrapfs}; /// presence of a (presumed unique) `.project.json`. /// /// The archive is read directly without extracting it. +// TODO: add a way to indicate whether to guess root at construction time +// and use it to indicate that no guessing needed for index kpars #[derive(Debug)] pub struct LocalKParProject { - /// Temporary directory for unpacking files in archive. - tmp_dir: Utf8TempDir, - /// Path used in `Source::LocalSrc` returned by `.sources()`. + /// Path used in `Source::LocalKpar` returned by `.sources()`. /// If `None` no source will be given. /// E.g. if used in lockfile would be the path relative to the lockfile. // TODO: Consider removing this and replacing it with some way of // relativizing `archive_path` at the call site of .sources(). - pub nominal_path: Option, + pub nominal_path: Option, + /// Path used when locating the project archive internally. + /// Should be absolute. + archive_path: Utf8PathBuf, + expected: Option, + /// Optionally specify name of project directory inside archive. + /// If none, currently always tries to guess before reading + /// any project files. + pub root: KparInnerPath, + init: OnceCell<(LocalKParProjectRaw, KparMeta)>, +} + +/// Assumes that the kpar is already at `archive_path` +#[derive(Debug)] +pub struct LocalKParProjectRaw { + /// Temporary directory for unpacking files in archive. + tmp_dir: Utf8TempDir, /// Path used when locating the project archive internally. /// Should be absolute. - pub archive_path: Utf8PathBuf, - /// Optionally specify name of root directory inside archive. - pub root: Option, + archive_path: Utf8PathBuf, + /// Path of project directory inside archive. If `None`, project is + /// at archive root. + root: Option, } #[derive(Error, Debug)] @@ -72,6 +103,20 @@ pub enum LocalKParError { {0}" )] ImpossibleRelativePath(#[from] RelativizePathError), + #[error("kpar at `{path}` has sha256 `{computed}` but the expected digest was `{expected}`")] + DigestMismatch { + path: Box, + expected: String, + computed: String, + }, + #[error("kpar at `{path}` has size {actual} bytes but the expected size was {expected} bytes")] + SizeMismatch { + path: Box, + expected: u64, + actual: u64, + }, + #[error("kpar at `{path}` is an empty file")] + EmptyKpar { path: Box }, } impl From for LocalKParError { @@ -80,58 +125,6 @@ impl From for LocalKParError { } } -fn guess_root(archive: &mut ZipArchive) -> Result { - let mut maybe_root = None; - for i in 0..archive.len() { - let file = archive.by_index(i).map_err(ZipArchiveError::FileMeta)?; - - if let Some(p) = file.enclosed_name() { - // `enclosed_name()` creates path from `String` - let p = Utf8PathBuf::from_path_buf(p).unwrap(); - if p.file_name() == Some(".project.json") { - maybe_root = Some( - p.parent() - .ok_or_else(|| ZipArchiveError::InvalidPath(p.as_path().into()))? - .to_path_buf(), - ); - break; - } - } - } - - if let Some(root) = maybe_root { - Ok(root) - } else { - Err(LocalKParError::NotFound(".project.json".into())) - } -} - -// Wrapping this in case we want to add more normalisation logic -fn path_index, Q: AsRef>( - set_root: Option, - archive: &mut ZipArchive, - path: P, -) -> Result { - // NOTE: - let mut native_path = match set_root { - Some(root) => root.to_path_buf(), - None => guess_root(archive)?, - }; - - // TODO: Extract this somewhere and clarify behaviour, see - // sysand-core/src/io/local_file.rs#L57-78 - // @ 04c7d46fe2e188602df620407d6cedfef3440eb8 - for component in path.as_ref().components() { - native_path.push(component.as_str()); - } - - let idx = archive - .index_for_path(&native_path) - .ok_or_else(|| LocalKParError::NotFound(native_path.as_path().into()))?; - - Ok(idx) -} - #[derive(Debug, Error)] pub enum IntoKparError { #[error("missing project information file `.project.json`")] @@ -155,66 +148,251 @@ impl From for IntoKparError { } impl LocalKParProject { - /// path should be absolute - pub fn new, Q: AsRef>( + pub fn new>( path: P, - root: Q, - ) -> Result> { - Ok(LocalKParProject { - tmp_dir: tempdir().map_err(FsIoError::MkTempDir)?, - nominal_path: None, + root: KparInnerPath, + nominal: Option, + expected: Option, + ) -> Self { + LocalKParProject { + nominal_path: nominal, + root, + init: OnceCell::new(), archive_path: path.to_path_buf(), - root: Some(root.to_path_buf()), - }) + expected, + } + } + + pub fn archive_path(&self) -> &Utf8Path { + &self.archive_path + } + + fn ensure_initialized(&self) -> Result<&(LocalKParProjectRaw, KparMeta), LocalKParError> { + // TODO: use `OnceCell::get_or_try_init()` once it's stable; + // using `get_or_init()` directly requires us to always put the error into an `Arc` to + // allow returning it repeatedly from functions (`io::Error` is not cloneable) + match self.init.get() { + Some(val) => Ok(val), + None => { + let (inner, meta) = + LocalKParProjectRaw::new_hash(&self.archive_path, self.root.to_owned())?; + if let Some(expected) = &self.expected { + if meta.size_bytes != expected.size_bytes { + return Err(LocalKParError::SizeMismatch { + path: self.archive_path.as_str().into(), + expected: expected.size_bytes.get(), + actual: meta.size_bytes.get(), + }); + } else if meta.sha256_hex != expected.sha256_hex { + return Err(LocalKParError::DigestMismatch { + path: self.archive_path.as_str().into(), + expected: expected.sha256_hex.to_owned(), + computed: meta.sha256_hex, + }); + } + } + Ok(self.init.get_or_init(|| (inner, meta))) + } + } + } +} + +type KParFile<'a> = super::utils::FileWithLifetime<'a>; + +// NOTE: Current implementation keeps re-opening the archive file. This appears to +// be unavoidable with the current design of this trait. +impl ProjectRead for LocalKParProject { + type Error = LocalKParError; + + fn get_project( + &self, + ) -> Result< + ( + Option, + Option, + ), + Self::Error, + > { + match self.ensure_initialized() { + Ok((inner, _)) => inner.get_project(), + Err(e) => Err(e), + } + } + + fn get_info(&self) -> Result, Self::Error> { + match self.ensure_initialized() { + Ok((inner, _)) => inner.get_info(), + Err(e) => Err(e), + } + } + + fn get_meta(&self) -> Result, Self::Error> { + match self.ensure_initialized() { + Ok((inner, _)) => inner.get_meta(), + Err(e) => Err(e), + } + } + + type SourceReader<'a> + = KParFile<'a> + where + Self: 'a; + + fn read_source>( + &self, + path: P, + ) -> Result, Self::Error> { + match self.ensure_initialized() { + Ok((inner, _)) => inner.read_source(path), + Err(e) => Err(e), + } + } + + fn sources(&self, ctx: &ProjectContext) -> Result, Self::Error> { + match self.ensure_initialized() { + Ok((inner, meta)) => { + let kpar_path = if let Some(np) = self.nominal_path.as_ref() { + np.as_str().into() + } else if let Some(w) = &ctx.current_workspace { + relativize_path(&inner.archive_path, w.root_path())? + .into_string() + .into() + } else if let Some(cp) = &ctx.current_project { + relativize_path(&inner.archive_path, cp.root_path())? + .into_string() + .into() + } else { + panic!( + "`LocalKparProject` without `nominal_path` does not have any project sources" + ); + }; + Ok(vec![Source::LocalKpar { + kpar_path, + kpar_size: meta.size_bytes, + kpar_digest: meta.sha256_hex.to_owned(), + }]) + } + Err(e) => Err(e), + } } - /// path should be absolute - pub fn new_nominal, Q: AsRef, N: AsRef>( + fn checksum_canonical_variant(&self) -> Result { + match self.ensure_initialized() { + Ok((_, meta)) => Ok(ProjectChecksum::Kpar(meta.sha256_hex.clone())), + Err(e) => Err(e), + } + } +} + +impl LocalKParProjectRaw { + pub fn new_hash>( path: P, - root: Q, - nominal: N, - ) -> Result> { - Ok(LocalKParProject { + root: KparInnerPath, + ) -> Result<(Self, KparMeta), LocalKParError> { + let path = path.as_ref(); + let size_bytes = match NonZeroU64::new(wrapfs::metadata(path)?.len()) { + Some(n) => n, + None => { + return Err(LocalKParError::EmptyKpar { + path: path.as_str().into(), + }); + } + }; + let mut archive = wrapfs::File::open(path)?; + let sha256_hex = match hash_reader(&mut archive) { + Ok(digest) => lowercase_hex(digest), + Err(e) => { + return Err(LocalKParError::Io( + FsIoError::ReadFile(path.to_owned(), e).into(), + )); + } + }; + + let meta = KparMeta { + size_bytes, + sha256_hex, + }; + let root = match root { + KparInnerPath::Root => None, + KparInnerPath::Known(path) => Some(path), + KparInnerPath::Guess => { + archive + .rewind() + .map_err(|e| FsIoError::Seek(path.to_owned(), 0, e))?; + let mut zip = zip::ZipArchive::new(archive) + .map_err(|e| ZipArchiveError::ReadArchive(path.into(), e))?; + Some(guess_root(&mut zip)?) + } + }; + + let project = LocalKParProjectRaw { tmp_dir: tempdir().map_err(FsIoError::MkTempDir)?, - nominal_path: Some(nominal.to_path_buf()), archive_path: path.to_path_buf(), - root: Some(root.to_path_buf()), - }) + root, + }; + Ok((project, meta)) } - /// path should be absolute - pub fn new_guess_root>(path: P) -> Result> { - Ok(LocalKParProject { + pub fn new_project_at_root>(path: P) -> Result> { + Ok(LocalKParProjectRaw { tmp_dir: tempdir().map_err(FsIoError::MkTempDir)?, - nominal_path: None, archive_path: path.to_path_buf(), root: None, }) } - /// path should be absolute - pub fn new_guess_root_nominal, N: AsRef>( - path: P, - nominal: N, - ) -> Result> { - Ok(LocalKParProject { + pub fn new_guess_root>(path: P) -> Result { + let path = path.as_ref(); + let archive = wrapfs::File::open(path)?; + + let mut zip = zip::ZipArchive::new(archive) + .map_err(|e| ZipArchiveError::ReadArchive(path.into(), e))?; + let root = Some(guess_root(&mut zip)?); + + Ok(LocalKParProjectRaw { tmp_dir: tempdir().map_err(FsIoError::MkTempDir)?, - nominal_path: Some(nominal.to_path_buf()), archive_path: path.to_path_buf(), - root: None, + root, }) } pub fn new_temporary() -> Result> { let tmp_dir = tempdir().map_err(FsIoError::MkTempDir)?; - Ok(LocalKParProject { - nominal_path: None, + Ok(LocalKParProjectRaw { archive_path: tmp_dir.path().join("project.kpar"), tmp_dir, root: None, }) } + pub fn new_tempdir>( + tmp_dir: Utf8TempDir, + path: P, + root: KparInnerPath, + ) -> Result { + let path = path.as_ref(); + let root = match root { + KparInnerPath::Root => None, + KparInnerPath::Known(path) => Some(path), + KparInnerPath::Guess => { + let archive = wrapfs::File::open(path)?; + + let mut zip = zip::ZipArchive::new(archive) + .map_err(|e| ZipArchiveError::ReadArchive(path.into(), e))?; + Some(guess_root(&mut zip)?) + } + }; + Ok(LocalKParProjectRaw { + archive_path: path.to_owned(), + tmp_dir, + root, + }) + } + + pub fn archive_path(&self) -> &Utf8Path { + &self.archive_path + } + /// Build a KPAR archive from `from`. /// /// `extra_files` are added to the archive alongside the project's source @@ -273,26 +451,57 @@ impl LocalKParProject { zip.finish() .map_err(|e| ZipArchiveError::Finish(path.as_ref().into(), e))?; - LocalKParProject::new(&path, ".").map_err(IntoKparError::Io) + Self::new_project_at_root(&path).map_err(IntoKparError::Io) } - fn new_file(&self) -> Result { + pub fn file_size(&self) -> Result { + Ok(wrapfs::metadata(&self.archive_path)?.len()) + } + + fn open_archive_file(&self) -> Result { Ok(wrapfs::File::open(&self.archive_path)?) } - fn new_archive(&self) -> Result, LocalKParError> { - Ok(zip::ZipArchive::new(self.new_file()?) + fn open_archive(&self) -> Result, LocalKParError> { + Ok(zip::ZipArchive::new(self.open_archive_file()?) .map_err(|e| ZipArchiveError::ReadArchive(self.archive_path.as_path().into(), e))?) } - pub fn file_size(&self) -> Result { - Ok(fs::metadata(&self.archive_path) - .map_err(FsIoError::MetadataHandle)? - .len()) + /// `path` must be relative and use Unix separators + fn get_relative<'a, P: AsRef>( + &self, + zip: &'a mut ZipArchive, + path: P, + ) -> Result, (Utf8UnixPathBuf, ZipError)> { + let path_in_zip = match self.root.as_ref() { + Some(p) => p.join(path.as_ref()), + None => path.as_ref().into(), + }; + match zip.by_path(path_in_zip.as_str()) { + Ok(f) => Ok(f), + Err(e) => Err((path_in_zip, e)), + } + } + + fn get_parsed>( + &self, + zip: &mut ZipArchive, + path: P, + ) -> Result, LocalKParError> { + match self.get_relative(zip, path) { + Ok(f) => Ok(Some(serde_json::from_reader(f).map_err(|e| { + ProjectDeserializationError::new("failed to deserialize `.project.json`", e) + })?)), + Err((_, ZipError::FileNotFound)) => Ok(None), + Err((path, err)) => Err(LocalKParError::Zip(ZipArchiveError::NamedFileMeta( + path.into_string().into(), + err, + ))), + } } pub fn digest_sha256(&self) -> Result { - let mut file = self.new_file()?; + let mut file = self.open_archive_file()?; let mut buf = [0; 1024]; let mut hasher = Sha256::new(); loop { @@ -309,11 +518,9 @@ impl LocalKParProject { } } -type KParFile<'a> = super::utils::FileWithLifetime<'a>; - // NOTE: Current implementation keeps re-opening the archive file. This appears to // be unavoidable with the current design of this trait. -impl ProjectRead for LocalKParProject { +impl ProjectRead for LocalKParProjectRaw { type Error = LocalKParError; fn get_project( @@ -325,35 +532,22 @@ impl ProjectRead for LocalKParProject { ), Self::Error, > { - let mut archive = self.new_archive()?; - - let info = match path_index(self.root.as_deref(), &mut archive, ".project.json") { - Ok(idx) => serde_json::from_reader( - archive - .by_index(idx) - .map_err(|e| ZipArchiveError::NamedFileMeta(".project.json".into(), e))?, - ) - .map_err(|e| { - ProjectDeserializationError::new("failed to deserialize `.project.json`", e) - })?, - Err(LocalKParError::NotFound(_)) => None, - Err(err) => return Err(err), - }; + let mut archive = self.open_archive()?; + let info = self.get_parsed(&mut archive, ".project.json")?; + let meta = self.get_parsed(&mut archive, ".meta.json")?; + Ok((info, meta)) + } - let meta = match path_index(self.root.as_deref(), &mut archive, ".meta.json") { - Ok(idx) => serde_json::from_reader( - archive - .by_index(idx) - .map_err(|e| ZipArchiveError::NamedFileMeta(".meta.json".into(), e))?, - ) - .map_err(|e| { - ProjectDeserializationError::new("failed to deserialize `.meta.json`", e) - })?, - Err(LocalKParError::NotFound(_)) => None, - Err(err) => return Err(err), - }; + fn get_info(&self) -> Result, Self::Error> { + let mut archive = self.open_archive()?; + let info = self.get_parsed(&mut archive, ".project.json")?; + Ok(info) + } - Ok((info, meta)) + fn get_meta(&self) -> Result, Self::Error> { + let mut archive = self.open_archive()?; + let meta = self.get_parsed(&mut archive, ".meta.json")?; + Ok(meta) } type SourceReader<'a> @@ -361,6 +555,8 @@ impl ProjectRead for LocalKParProject { where Self: 'a; + // FIXME: this may garble the file if two calls interleave (which can + // happen via async wrappers). fn read_source>( &self, path: P, @@ -371,12 +567,16 @@ impl ProjectRead for LocalKParProject { if !tmp_file_path.is_file() { let mut tmp_file = wrapfs::File::create(&tmp_file_path)?; - let mut archive = self.new_archive()?; - let idx = path_index(self.root.as_deref(), &mut archive, &path)?; + let mut archive = self.open_archive()?; + let mut zip_file = self + .get_relative(&mut archive, path) + .map_err(|(p, e)| ZipArchiveError::NamedFileMeta(p.into_string().into(), e))?; + + // let idx = path_index(self.root.as_deref(), &mut archive, &path)?; - let mut zip_file = archive - .by_index(idx) - .map_err(|e| ZipArchiveError::NamedFileMeta(path.as_ref().as_str().into(), e))?; + // let mut zip_file = archive + // .by_index(idx) + // .map_err(|e| ZipArchiveError::NamedFileMeta(path.as_ref().as_str().into(), e))?; std::io::copy(&mut zip_file, &mut tmp_file) .map_err(|e| FsIoError::WriteFile(tmp_file_path.clone(), e))?; @@ -390,27 +590,42 @@ impl ProjectRead for LocalKParProject { // Ok(KparFile { archive: archive, file: &mut archive.by_index(idx)? }) } - fn sources(&self, ctx: &ProjectContext) -> Result, Self::Error> { - if let Some(np) = self.nominal_path.as_ref() { - Ok(vec![Source::LocalKpar { - kpar_path: np.as_str().into(), - }]) - } else if let Some(w) = &ctx.current_workspace { - Ok(vec![Source::LocalKpar { - kpar_path: relativize_path(&self.archive_path, w.root_path())? - .into_string() - .into(), - }]) - } else if let Some(cp) = &ctx.current_project { - Ok(vec![Source::LocalKpar { - kpar_path: relativize_path(&self.archive_path, cp.root_path())? - .into_string() - .into(), - }]) - } else { - panic!("`LocalKparProject` without `nominal_path` does not have any project sources"); + /// This always panics. Wrapper is responsible for providing an appropriate source + fn sources(&self, _ctx: &ProjectContext) -> Result, Self::Error> { + panic!() + } + + /// This always panics. Wrapper is responsible for providing the checksum + fn checksum_canonical_variant(&self) -> Result { + panic!() + } +} + +fn guess_root(archive: &mut ZipArchive) -> Result { + let mut maybe_root = None; + for i in 0..archive.len() { + let file = archive.by_index(i).map_err(ZipArchiveError::FileMeta)?; + + if let Some(p) = file.enclosed_name() { + // `enclosed_name()` creates path from `String` + let p = Utf8UnixPathBuf::from(p.into_os_string().into_string().unwrap()); + dbg!(&p, p.file_name()); + if p.file_name() == Some(".project.json") { + maybe_root = Some( + p.parent() + .ok_or_else(|| ZipArchiveError::InvalidPath(p.as_path().into()))? + .to_path_buf(), + ); + break; + } } } + + if let Some(root) = maybe_root { + Ok(root) + } else { + Err(LocalKParError::NotFound(".project.json".into())) + } } #[cfg(test)] diff --git a/core/src/project/local_kpar_tests.rs b/core/src/project/local_kpar_tests.rs index b11798e19..5fccbb1eb 100644 --- a/core/src/project/local_kpar_tests.rs +++ b/core/src/project/local_kpar_tests.rs @@ -6,6 +6,8 @@ use std::io::{Read as _, Write}; use camino_tempfile::tempdir; use zip::write::SimpleFileOptions; +use crate::project::local_kpar::KparInnerPath; + use super::ProjectRead; #[test] @@ -31,7 +33,7 @@ fn basic_kpar_archive() -> Result<(), Box> { zip.finish().unwrap(); } - let project = super::LocalKParProject::new_guess_root(zip_path)?; + let project = super::LocalKParProject::new(zip_path, KparInnerPath::Guess, None, None); let (Some(info), Some(meta)) = project.get_project()? else { panic!(); @@ -74,7 +76,7 @@ fn nested_kpar_archive() -> Result<(), Box> { zip.finish().unwrap(); } - let project = super::LocalKParProject::new_guess_root(zip_path)?; + let project = super::LocalKParProject::new(zip_path, KparInnerPath::Guess, None, None); let (Some(info), Some(meta)) = project.get_project()? else { panic!(); diff --git a/core/src/project/local_src.rs b/core/src/project/local_src.rs index 8808b098d..fc2e3fafe 100644 --- a/core/src/project/local_src.rs +++ b/core/src/project/local_src.rs @@ -24,7 +24,10 @@ use crate::{ }, }; -use super::utils::{FsIoError, ProjectDeserializationError, ProjectSerializationError}; +use super::{ + CanonicalizationError, ProjectChecksum, + utils::{FsIoError, ProjectDeserializationError, ProjectSerializationError}, +}; /// Project stored in a local directory as an extracted KPAR archive. /// Source file paths with (unix) segments `segment1/.../segmentN` are @@ -36,10 +39,13 @@ pub struct LocalSrcProject { /// E.g. if used in lockfile would be the path relative to the lockfile. // TODO: Consider removing this and replacing it with some way of // relativizing `project_path` at the call site of .sources(). - pub nominal_path: Option, + pub nominal_path: Option, /// Path used when locating the project internally. /// Should be absolute. pub project_path: Utf8PathBuf, + // TODO: enforce that the project matches the checksum if provided + // before reading; see LocalKparProject for example + pub expected_checksum: Option, } /// Tries to canonicalize the (longest possible) prefix of a path. @@ -212,6 +218,7 @@ impl LocalSrcProject { let mut tmp_project = Self { nominal_path: None, project_path: wrapfs::canonicalize(tmp.path())?, + expected_checksum: None, }; let (info, meta) = clone_project(project, &mut tmp_project, true)?; @@ -320,6 +327,8 @@ pub enum LocalSrcError { AlreadyExists(String), #[error("project is missing metadata file `.meta.json`")] MissingMeta, + #[error("project is missing `.project.json` and/or `.meta.json` files")] + MissingInfoMeta, #[error(transparent)] Deserialize(#[from] ProjectDeserializationError), #[error(transparent)] @@ -414,25 +423,52 @@ impl ProjectRead for LocalSrcProject { Ok(f) } + // TODO: should `sources` be decoupled from `ProjectRead`? It requires knowing additional + // integrity details, such as kpar/project checksums, which therefore have to be stored + // inside the ProjectRead implementer, as there is no other wrapper used. This needs a + // consistent design for all project types. E.g. should they contain any checksums? when + // should they be verified to match the project? fn sources(&self, ctx: &ProjectContext) -> Result, Self::Error> { - if let Some(np) = self.nominal_path.as_ref() { - Ok(vec![Source::LocalSrc { - src_path: np.as_str().into(), - }]) + let checksum = match &self.expected_checksum { + Some(c) => c.clone(), + None => self + .checksum_canonical_hex() + .map_err(|e| match e { + CanonicalizationError::ProjectRead(e) => e, + CanonicalizationError::FileRead(path, error) => LocalSrcError::Io( + FsIoError::ReadFile(String::from(path).into(), error).into(), + ), + })? + .ok_or(LocalSrcError::MissingInfoMeta)?, + }; + let src_path = if let Some(np) = self.nominal_path.as_ref() { + np.as_str().into() } else if let Some(w) = &ctx.current_workspace { - Ok(vec![Source::LocalSrc { - src_path: relativize_path(&self.project_path, w.root_path())? - .into_string() - .into(), - }]) + relativize_path(&self.project_path, w.root_path())? + .into_string() + .into() } else if let Some(cp) = &ctx.current_project { - Ok(vec![Source::LocalSrc { - src_path: relativize_path(&self.project_path, cp.root_path())? - .into_string() - .into(), - }]) + relativize_path(&self.project_path, cp.root_path())? + .into_string() + .into() } else { panic!("`LocalSrcProject` without `nominal_path` does not have any project sources"); + }; + Ok(vec![Source::LocalSrc { src_path, checksum }]) + } + + fn checksum_canonical_variant(&self) -> Result { + match self.checksum_canonical_hex() { + Ok(c) => match c { + Some(c) => Ok(ProjectChecksum::Project(c)), + None => Err(LocalSrcError::MissingInfoMeta), + }, + Err(e) => match e { + CanonicalizationError::ProjectRead(e) => Err(e), + CanonicalizationError::FileRead(path, error) => Err(LocalSrcError::Io( + FsIoError::ReadFile(String::from(path).into(), error).into(), + )), + }, } } } diff --git a/core/src/project/memory.rs b/core/src/project/memory.rs index fe61d2b0f..bd3b87c5a 100644 --- a/core/src/project/memory.rs +++ b/core/src/project/memory.rs @@ -14,9 +14,11 @@ use crate::{ env::utils::{CloneError, clone_project}, lock::Source, model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}, - project::{ProjectMut, ProjectRead}, + project::{CanonicalizationError, ProjectMut, ProjectRead}, }; +use super::ProjectChecksum; + /// Project stored in a local directory #[derive(Clone, Eq, Default, Debug, PartialEq)] pub struct InMemoryProject { @@ -123,6 +125,8 @@ pub enum InMemoryError { FileNotFound(Utf8UnixPathBuf), #[error("failed to read from reader: {0}")] IoRead(#[from] std::io::Error), + #[error("project is missing `.project.json` and/or `.meta.json` files")] + MissingInfoMeta, } impl ProjectRead for InMemoryProject { @@ -159,4 +163,15 @@ impl ProjectRead for InMemoryProject { debug_assert!(!self.nominal_sources.is_empty()); Ok(self.nominal_sources.clone()) } + + fn checksum_canonical_variant(&self) -> Result { + let checksum = self + .checksum_canonical_hex() + .map_err(|e| match e { + CanonicalizationError::ProjectRead(e) => e, + CanonicalizationError::FileRead(_, _) => unreachable!(), + })? + .ok_or(InMemoryError::MissingInfoMeta)?; + Ok(ProjectChecksum::Project(checksum)) + } } diff --git a/core/src/project/mod.rs b/core/src/project/mod.rs index e1a7f0db7..b4b05edcc 100644 --- a/core/src/project/mod.rs +++ b/core/src/project/mod.rs @@ -9,6 +9,7 @@ use std::{ fmt::Debug, io::{self, BufRead as _, BufReader, Read}, marker::Unpin, + num::NonZeroU64, sync::Arc, }; use thiserror::Error; @@ -57,69 +58,14 @@ pub mod reference; pub mod utils; -/// Result of trying to compute the canonical project digest from -/// `.project.json` and `.meta.json` alone. -#[cfg(all(feature = "filesystem", feature = "networking"))] -pub(crate) enum InlineProjectDigest { - /// The digest is available without reading source files. - Computed(ProjectHash), - /// At least one metadata checksum uses a non-SHA256 algorithm, so - /// canonicalization would require reading source bytes. - RequiresSourceReads, -} - -/// Attempts to compute the canonical project digest without reading any -/// source files. -/// -/// Canonicalization (see [`ProjectRead::canonical_meta`]) lowercases SHA256 -/// checksum values and rewrites non-SHA256 checksum entries to SHA256 — -/// the latter requires reading the corresponding source file. This helper -/// handles the first case inline and returns -/// [`InlineProjectDigest::RequiresSourceReads`] in the second case. -/// -/// Returns [`InlineProjectDigest::Computed`] when every `meta.checksum` entry -/// uses the `SHA256` algorithm (mixed-case hex values are lowercased inline -/// before hashing). -/// -/// Callers verifying an advertised `project_digest` against a locally -/// reconstructed (info, meta) pair should use this to perform the check -/// without materializing the project's archive. Callers that can read sources -/// may fall back to [`ProjectRead::checksum_canonical_hex`] / the async -/// equivalent when this returns [`InlineProjectDigest::RequiresSourceReads`]; -/// protocol surfaces that require `(info, meta)` to be self-verifying can -/// reject that outcome explicitly. -#[cfg(all(feature = "filesystem", feature = "networking"))] -pub(crate) fn canonical_project_digest_inline( - info: &InterchangeProjectInfoRaw, - meta: &InterchangeProjectMetadataRaw, -) -> InlineProjectDigest { - use crate::model::project_hash_raw; - - let sha256_alg: &str = KerMlChecksumAlg::Sha256.into(); - - // Fast path: no checksums at all means canonical == raw. - let needs_canonicalization = meta - .checksum - .as_ref() - .is_some_and(|entries| entries.values().any(|entry| entry.algorithm != sha256_alg)); - - if needs_canonicalization { - return InlineProjectDigest::RequiresSourceReads; - } - - let mut canonical = meta.clone(); - if let Some(entries) = canonical.checksum.as_mut() { - for (_path, entry) in entries.iter_mut() { - // All entries are SHA256 here (checked above); lowercase the hex - // value to match `canonical_meta`'s output. - entry.value = entry.value.to_lowercase(); - } - } - - InlineProjectDigest::Computed(project_hash_raw(info, &canonical)) +#[derive(Debug)] +pub struct KparMeta { + pub size_bytes: NonZeroU64, + pub sha256_hex: String, } -fn hash_reader(reader: &mut R) -> Result { +/// Produce a SHA-256 digest by hashing all the contents of `reader` +pub(crate) fn hash_reader(reader: &mut R) -> Result { let mut hasher = Sha256::new(); let mut buffered = BufReader::new(reader); @@ -195,6 +141,16 @@ pub enum IntoProjectError { MissingMeta, } +// TODO: serialize this as "kpar:" or "meta:" to avoid having two fields, +// which can't be both populated +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ProjectChecksum { + /// Same as `.checksum_canonical_hex()` + Project(String), + /// SHA256 hex digest of the original KPAR + Kpar(String), +} + /// Anything implementing `ProjectRead` can be treated as a method for accessing (one /// particular) interchange project. pub trait ProjectRead { @@ -233,7 +189,7 @@ pub trait ProjectRead { /// In case no sources are included, they should be derived /// from the known info, including `ctx` if possible. /// - /// Should panic if no sources are available. + /// Must not return an empty list; should panic if no sources are available. fn sources(&self, ctx: &ProjectContext) -> Result, Self::Error>; // Optional and helpers @@ -352,6 +308,8 @@ pub trait ProjectRead { .map(|(info, meta)| project_hash_hex(&info, &meta))) } + fn checksum_canonical_variant(&self) -> Result; + // TODO: Make this return an associated type instead? /// Treat this `ProjectRead` as a (trivial) `ProjectReadAsync` fn to_async(self) -> AsAsyncProject @@ -436,6 +394,10 @@ impl ProjectRead for &T { fn checksum_canonical_hex(&self) -> Result, CanonicalizationError> { (*self).checksum_canonical_hex() } + + fn checksum_canonical_variant(&self) -> Result { + (*self).checksum_canonical_variant() + } } impl ProjectRead for &mut T { @@ -512,6 +474,10 @@ impl ProjectRead for &mut T { fn checksum_canonical_hex(&self) -> Result, CanonicalizationError> { (**self).checksum_canonical_hex() } + + fn checksum_canonical_variant(&self) -> Result { + (**self).checksum_canonical_variant() + } } pub trait ProjectReadAsync { @@ -549,6 +515,8 @@ pub trait ProjectReadAsync { /// some typical order of preference. /// /// May be empty if no valid sources are known. + // TODO: should we require the checksum to be verified (i.e. actual, instead of + // expected) fn sources_async( &self, ctx: &ProjectContext, @@ -689,6 +657,10 @@ pub trait ProjectReadAsync { } } + fn checksum_canonical_variant_async( + &self, + ) -> impl Future>; + /// Treat this `ProjectReadAsync` as a `ProjectRead` using the provided tokio runtime. fn to_tokio_sync(self, runtime: Arc) -> AsSyncProjectTokio where @@ -793,6 +765,12 @@ impl ProjectReadAsync for &T { ) -> impl Future, CanonicalizationError>> { (**self).checksum_canonical_hex_async() } + + fn checksum_canonical_variant_async( + &self, + ) -> impl Future> { + (**self).checksum_canonical_variant_async() + } } impl ProjectReadAsync for &mut T { @@ -887,6 +865,12 @@ impl ProjectReadAsync for &mut T { ) -> impl Future, CanonicalizationError>> { (**self).checksum_canonical_hex_async() } + + fn checksum_canonical_variant_async( + &self, + ) -> impl Future> { + (**self).checksum_canonical_variant_async() + } } // TODO: Eliminate the need for this? @@ -1191,6 +1175,10 @@ where ) -> Result, CanonicalizationError> { self.inner.checksum_canonical_hex() } + + async fn checksum_canonical_variant_async(&self) -> Result { + self.inner.checksum_canonical_variant() + } } /// Wrapper intended to wrap a `ProjectReadAsync`, indicating that it be treated as @@ -1277,6 +1265,11 @@ impl ProjectRead for AsSyncProjectTokio { self.runtime .block_on(self.inner.checksum_canonical_hex_async()) } + + fn checksum_canonical_variant(&self) -> Result { + self.runtime + .block_on(self.inner.checksum_canonical_variant_async()) + } } #[cfg(test)] diff --git a/core/src/project/null.rs b/core/src/project/null.rs index e417e14c9..97bf37685 100644 --- a/core/src/project/null.rs +++ b/core/src/project/null.rs @@ -17,6 +17,8 @@ use crate::{ project::{ProjectRead, ProjectReadAsync}, }; +use super::ProjectChecksum; + #[derive(Debug)] pub struct NullProject { nothing: Infallible, @@ -78,6 +80,10 @@ impl ProjectRead for NullProject { fn sources(&self, _ctx: &ProjectContext) -> Result, Self::Error> { match self.nothing {} } + + fn checksum_canonical_variant(&self) -> Result { + match self.nothing {} + } } impl ProjectReadAsync for NullProject { @@ -110,4 +116,8 @@ impl ProjectReadAsync for NullProject { async fn sources_async(&self, _ctx: &ProjectContext) -> Result, Self::Error> { match self.nothing {} } + + async fn checksum_canonical_variant_async(&self) -> Result { + match self.nothing {} + } } diff --git a/core/src/project/reference.rs b/core/src/project/reference.rs index 7c5687fa0..d15007b21 100644 --- a/core/src/project/reference.rs +++ b/core/src/project/reference.rs @@ -63,6 +63,10 @@ impl ProjectRead for ProjectReference { fn sources(&self, ctx: &ProjectContext) -> Result, Self::Error> { self.project.sources(ctx) } + + fn checksum_canonical_variant(&self) -> Result { + self.project.checksum_canonical_variant() + } } #[cfg(feature = "filesystem")] diff --git a/core/src/project/reference_tests.rs b/core/src/project/reference_tests.rs index f5e1a6e64..b803003bf 100644 --- a/core/src/project/reference_tests.rs +++ b/core/src/project/reference_tests.rs @@ -1,9 +1,17 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 // SPDX-FileCopyrightText: © 2026 Sysand contributors -use crate::project::{local_kpar::LocalKParProject, reference::ProjectReference}; +use crate::project::{ + local_kpar::{KparInnerPath, LocalKParProject}, + reference::ProjectReference, +}; #[test] fn project_reference_is_cloneable() { - let kpar = ProjectReference::new(LocalKParProject::new("path", "root").unwrap()); + let kpar = ProjectReference::new(LocalKParProject::new( + "path", + KparInnerPath::Known("root".into()), + None, + None, + )); let _clone = kpar.clone(); } diff --git a/core/src/project/reqwest_kpar_download.rs b/core/src/project/reqwest_kpar_download.rs index d0d5b1efa..cda62528f 100644 --- a/core/src/project/reqwest_kpar_download.rs +++ b/core/src/project/reqwest_kpar_download.rs @@ -2,12 +2,14 @@ // SPDX-FileCopyrightText: © 2025 Sysand contributors use std::{ + error::Error, io::{self, Write as _}, num::NonZeroU64, pin::Pin, sync::Arc, }; +use camino_tempfile::tempdir; use futures::AsyncRead; use sha2::{Digest as _, Sha256}; use thiserror::Error; @@ -18,14 +20,18 @@ use crate::{ lock::Source, model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}, project::{ - ProjectRead, ProjectReadAsync, - local_kpar::{LocalKParError, LocalKParProject}, + KparMeta, ProjectRead, ProjectReadAsync, + local_kpar::{LocalKParError, LocalKParProject, LocalKParProjectRaw}, }, resolve::net_utils::kpar_get_request, utils::lowercase_hex, }; -use super::utils::{FsIoError, wrapfs}; +use super::{ + ProjectChecksum, + local_kpar::KparInnerPath, + utils::{FsIoError, wrapfs}, +}; /// Project stored at a remote URL such as https://www.example.com/project.kpar. /// The URL is expected to resolve to a kpar-archive (ZIP-file) (at least) if @@ -36,19 +42,50 @@ use super::utils::{FsIoError, wrapfs}; /// Downloads the full archive to a temporary directory and then accesses it using /// `LocalKParProject`. #[derive(Debug)] -pub struct ReqwestKparDownloadedProject { - pub url: reqwest::Url, +pub struct ReqwestRemoteKparDownloadedProject { + // inner: LocalKParProjectRaw, + // tmp_dir: Utf8TempDir, + // archive_path: Utf8PathBuf, + url: reqwest::Url, + pub client: reqwest_middleware::ClientWithMiddleware, + pub auth_policy: Arc, + /// Optionally contains: + /// + /// - expected sha256 hex digest for callers that need this + /// instance to enforce archive verification before exposing project + /// contents (e.g. lockfile-driven `sync`). + /// - expected archive byte length. Index-backed kpars carry this + /// in the lockfile / versions index; enforce it while streaming so a + /// malicious server cannot exhaust disk before the digest check fails. + expected: Option, + /// Fans concurrent `ensure_downloaded*` calls on the same instance + /// into a single download — without this, racing tasks would both + /// truncate the destination archive and interleave writes. + /// + /// The kpar is downloaded directly to the destination path, so + /// `is_downloaded_and_verified` must be checked before reading it. + /// Errors aren't cached, so a transient failure is retryable. + /// + /// If this is initialized, the archive has been downloaded and verified + /// against `expected` if present (then this will be empty), and + /// otherwise contains actual downloaded KPAR metadata. + downloaded_verified: tokio::sync::OnceCell<(LocalKParProjectRaw, Option)>, +} + +#[derive(Debug)] +pub struct ReqwestIndexKparDownloadedProject { + // inner: LocalKParProjectRaw, + url: reqwest::Url, pub client: reqwest_middleware::ClientWithMiddleware, - inner: LocalKParProject, pub auth_policy: Arc, - /// Optional expected sha256 hex digest for callers that need this + /// Expected sha256 hex digest for callers that need this /// instance to enforce archive verification before exposing project /// contents (e.g. lockfile-driven `sync`). - expected_sha256_hex: Option, - /// Optional expected archive byte length. Index-backed kpars carry this + expected_kpar_sha256: String, + /// Expected archive byte length. Index-backed kpars carry this /// in the lockfile / versions index; enforce it while streaming so a /// malicious server cannot exhaust disk before the digest check fails. - expected_size: Option, + expected_size: NonZeroU64, /// Fans concurrent `ensure_downloaded*` calls on the same instance /// into a single download — without this, racing tasks would both /// truncate the destination archive and interleave writes. @@ -58,7 +95,7 @@ pub struct ReqwestKparDownloadedProject { /// Errors aren't cached, so a transient failure is retryable. /// If this is initialized, the archive has been downloaded and verified /// against `expected_sha256_hex`/`expected_size` if present. - downloaded_verified: tokio::sync::OnceCell<()>, + downloaded_verified: tokio::sync::OnceCell, } // TODO: reduce size of errors here and elsewhere @@ -71,10 +108,10 @@ pub enum ReqwestKparDownloadedError { }, #[error("failed to parse URL `{0}`: {1}")] ParseUrl(Box, url::ParseError), - // TODO: nicer formatting. Debug formatting is used here to include - // all the details, since they are not given in the Display impl + // TODO: ensure that error chain is printed, then remove cause + // details from this message #[error("error making an HTTP request:\n{0:#?}")] - Reqwest(reqwest::Error), + Reqwest(#[from] reqwest::Error), #[error("error making an HTTP request:\n{0:#?}")] ReqwestMiddleware(#[from] reqwest_middleware::Error), #[error(transparent)] @@ -93,6 +130,8 @@ pub enum ReqwestKparDownloadedError { expected: u64, actual: u64, }, + #[error("kpar at `{url}` is an empty file")] + EmptyKpar { url: Box }, } impl From for ReqwestKparDownloadedError { @@ -101,54 +140,45 @@ impl From for ReqwestKparDownloadedError { } } -impl ReqwestKparDownloadedProject { - pub fn new( - url: reqwest::Url, +impl ReqwestRemoteKparDownloadedProject { + // TODO: indicate to `inner` that it should not guess root + // Also decide whether to take URL or str in all constructors here, and + // do so consistently for all project types + pub fn new_guess_root>( + url: S, client: reqwest_middleware::ClientWithMiddleware, auth_policy: Arc, - expected_sha256_hex: Option, - expected_size: Option, + expected: Option, ) -> Result { + let url = reqwest::Url::parse(url.as_ref()) + .map_err(|e| ReqwestKparDownloadedError::ParseUrl(url.as_ref().into(), e))?; Ok(Self { url, - inner: LocalKParProject::new_temporary()?, client, auth_policy, - expected_sha256_hex, - expected_size, + expected, downloaded_verified: tokio::sync::OnceCell::new(), }) } - pub fn new_guess_root>( - url: S, - client: reqwest_middleware::ClientWithMiddleware, - auth_policy: Arc, - expected_sha256_hex: Option, - expected_size: Option, - ) -> Result { - Self::new( - reqwest::Url::parse(url.as_ref()) - .map_err(|e| ReqwestKparDownloadedError::ParseUrl(url.as_ref().into(), e))?, - client, - auth_policy, - expected_sha256_hex, - expected_size, - ) - } - /// True iff the archive is on disk and has been successfully /// verified against expected hex and length (if present) pub fn is_downloaded_and_verified(&self) -> bool { self.downloaded_verified.initialized() } - /// Ensure the archive is on disk and verify the digest if known - pub async fn ensure_downloaded_verified(&self) -> Result<(), ReqwestKparDownloadedError> { + /// Ensure the archive is on disk and verify the digest if known. + /// THe digest is returned iff it's not known. + pub async fn ensure_downloaded_verified( + &self, + ) -> Result<&(LocalKParProjectRaw, Option), ReqwestKparDownloadedError> { self.downloaded_verified .get_or_try_init(|| self.perform_download()) - .await?; - Ok(()) + .await + } + + pub fn url(&self) -> &reqwest::Url { + &self.url } /// Download the archive. Invoked through @@ -159,7 +189,9 @@ impl ReqwestKparDownloadedProject { /// Downloads directly to the final path. Callers must go through /// `ensure_downloaded_verified` before reading so the `OnceCell` has /// observed a successful download and any configured verification. - async fn perform_download(&self) -> Result<(), ReqwestKparDownloadedError> { + async fn perform_download( + &self, + ) -> Result<(LocalKParProjectRaw, Option), ReqwestKparDownloadedError> { use futures::StreamExt as _; let resp = self @@ -174,71 +206,87 @@ impl ReqwestKparDownloadedProject { }); } - if let (Some(actual), Some(expected)) = (resp.content_length(), self.expected_size) - && actual != expected.get() + if let (Some(actual), Some(expected)) = (resp.content_length(), &self.expected) + && actual != expected.size_bytes.get() { return Err(ReqwestKparDownloadedError::SizeMismatch { url: self.url.as_str().into(), - expected: expected.get(), + expected: expected.size_bytes.get(), actual, }); } - let mut file = wrapfs::File::create(&self.inner.archive_path)?; + let tmp_dir = tempdir().map_err(FsIoError::MkTempDir)?; + let archive_path = tmp_dir.path().join("project.kpar"); + let mut file = wrapfs::File::create(&archive_path)?; let mut bytes_stream = resp.bytes_stream(); - let mut hasher = self.expected_sha256_hex.as_deref().map(|_| Sha256::new()); + let mut hasher = Sha256::new(); let mut written = 0_u64; while let Some(bytes) = bytes_stream.next().await { let bytes = bytes.map_err(ReqwestKparDownloadedError::Reqwest)?; written += bytes.len() as u64; - if let Some(expected) = self.expected_size - && written > expected.get() + if let Some(expected) = &self.expected + && written > expected.size_bytes.get() { return Err(ReqwestKparDownloadedError::SizeMismatch { url: self.url.as_str().into(), - expected: expected.get(), + expected: expected.size_bytes.get(), actual: written, }); } - if let Some(h) = hasher.as_mut() { - h.update(&bytes); - } + hasher.update(&bytes); file.write_all(&bytes) - .map_err(|e| FsIoError::WriteFile(self.inner.archive_path.clone(), e))?; + .map_err(|e| FsIoError::WriteFile(archive_path.clone(), e))?; } + let written = if let Some(w) = NonZeroU64::new(written) { + w + } else { + return Err(ReqwestKparDownloadedError::EmptyKpar { + url: self.url.as_str().into(), + }); + }; - if let Some(expected) = self.expected_size - && written != expected.get() + if let Some(expected) = &self.expected + && written != expected.size_bytes { return Err(ReqwestKparDownloadedError::SizeMismatch { url: self.url.as_str().into(), - expected: expected.get(), - actual: written, + expected: expected.size_bytes.get(), + actual: written.get(), }); } file.sync_all() - .map_err(|e| FsIoError::WriteFile(self.inner.archive_path.clone(), e))?; - - if let Some(expected) = self.expected_size { - debug_assert_eq!(expected.get(), self.inner.file_size().unwrap()); - } - - if let (Some(h), Some(expected)) = (hasher, self.expected_sha256_hex.as_deref()) { - let computed = lowercase_hex(h.finalize()); - if computed == expected { - return Ok(()); + .map_err(|e| FsIoError::WriteFile(archive_path.clone(), e))?; + + let computed_hash = lowercase_hex(hasher.finalize()); + if let Some(expected) = &self.expected { + debug_assert_eq!( + expected.size_bytes.get(), + wrapfs::metadata(&archive_path).unwrap().len() + ); + + if computed_hash == expected.sha256_hex { + let inner = + LocalKParProjectRaw::new_tempdir(tmp_dir, archive_path, KparInnerPath::Guess)?; + Ok((inner, None)) } else { - return Err(ReqwestKparDownloadedError::DigestMismatch { + Err(ReqwestKparDownloadedError::DigestMismatch { url: self.url.as_str().into(), - expected: expected.to_owned(), - computed, - }); + expected: expected.sha256_hex.to_owned(), + computed: computed_hash, + }) } + } else { + let inner = + LocalKParProjectRaw::new_tempdir(tmp_dir, archive_path, KparInnerPath::Guess)?; + let meta = KparMeta { + size_bytes: written, + sha256_hex: computed_hash, + }; + Ok((inner, Some(meta))) } - - Ok(()) } } @@ -257,7 +305,7 @@ impl AsyncRead for AsAsyncRead { } } -impl ProjectReadAsync for ReqwestKparDownloadedProject { +impl ProjectReadAsync for ReqwestRemoteKparDownloadedProject { type Error = ReqwestKparDownloadedError; async fn get_project_async( @@ -269,18 +317,24 @@ impl ProjectReadAsync for ReqwestKparDownloadedProje ), Self::Error, > { - self.ensure_downloaded_verified().await?; - Ok(self.inner.get_project()?) + match self.ensure_downloaded_verified().await { + Ok((inner, _)) => Ok(inner.get_project()?), + Err(e) => Err(e), + } } async fn get_info_async(&self) -> Result, Self::Error> { - self.ensure_downloaded_verified().await?; - Ok(self.inner.get_info()?) + match self.ensure_downloaded_verified().await { + Ok((inner, _)) => Ok(inner.get_info()?), + Err(e) => Err(e), + } } async fn get_meta_async(&self) -> Result, Self::Error> { - self.ensure_downloaded_verified().await?; - Ok(self.inner.get_meta()?) + match self.ensure_downloaded_verified().await { + Ok((inner, _)) => Ok(inner.get_meta()?), + Err(e) => Err(e), + } } type SourceReader<'a> @@ -292,33 +346,271 @@ impl ProjectReadAsync for ReqwestKparDownloadedProje &self, path: P, ) -> Result, Self::Error> { - self.ensure_downloaded_verified().await?; + match self.ensure_downloaded_verified().await { + Ok((inner, _)) => Ok(AsAsyncRead { + inner: inner.read_source(path)?, + }), + Err(e) => Err(e), + } + } - Ok(AsAsyncRead { - inner: self.inner.read_source(path)?, + async fn sources_async(&self, _ctx: &ProjectContext) -> Result, Self::Error> { + let (kpar_size, kpar_digest) = if let Some(expected) = &self.expected { + (expected.size_bytes, expected.sha256_hex.to_owned()) + } else { + // If expected is not present, download populates the cell with actual + let (_, maybe_meta) = self.ensure_downloaded_verified().await?; + let actual_meta = maybe_meta.as_ref().unwrap(); + (actual_meta.size_bytes, actual_meta.sha256_hex.to_owned()) + }; + Ok(vec![Source::RemoteKpar { + remote_kpar: self.url.to_string(), + kpar_size, + kpar_digest, + }]) + } + + async fn is_definitely_invalid_async(&self) -> bool { + // FIXME: error should be returned + match self.ensure_downloaded_verified().await { + Ok((inner, _)) => inner.is_definitely_invalid(), + Err(e) => { + // TODO: generalize `format_sources()` to logging + log::debug!("error downloading/reading a kpar: {e}"); + let mut error: &dyn Error = &e; + while let Some(source) = error.source() { + log::debug!(" caused by: {source}"); + error = source; + } + + false + } + } + } + + async fn checksum_canonical_variant_async(&self) -> Result { + match self.ensure_downloaded_verified().await { + Ok((_, meta)) => { + let meta = match meta { + Some(m) => m, + None => self.expected.as_ref().unwrap(), + }; + Ok(ProjectChecksum::Kpar(meta.sha256_hex.to_owned())) + } + Err(e) => Err(e), + } + } +} + +impl ReqwestIndexKparDownloadedProject { + pub fn new( + url: reqwest::Url, + client: reqwest_middleware::ClientWithMiddleware, + auth_policy: Arc, + expected_size: NonZeroU64, + expected_kpar_sha256: String, + ) -> Result { + let url = reqwest::Url::parse(url.as_ref()) + .map_err(|e| ReqwestKparDownloadedError::ParseUrl(url.as_ref().into(), e))?; + Ok(Self { + url, + client, + auth_policy, + expected_kpar_sha256, + expected_size, + downloaded_verified: tokio::sync::OnceCell::new(), }) } - async fn sources_async(&self, _ctx: &ProjectContext) -> Result, Self::Error> { - let src = if let (Some(index_kpar_size), Some(index_kpar_digest)) = - (self.expected_size, self.expected_sha256_hex.as_ref()) + /// True iff the archive is on disk and has been successfully + /// verified against expected hex and length (if present) + pub fn is_downloaded_and_verified(&self) -> bool { + self.downloaded_verified.initialized() + } + + /// Ensure the archive is on disk and verify the digest if known + pub async fn ensure_downloaded_verified( + &self, + ) -> Result<&LocalKParProjectRaw, ReqwestKparDownloadedError> { + self.downloaded_verified + .get_or_try_init(|| self.perform_download()) + .await + } + + pub fn url(&self) -> &reqwest::Url { + &self.url + } + + /// Download the archive. Invoked through + /// [`tokio::sync::OnceCell::get_or_try_init`], so concurrent callers + /// share the single in-flight attempt and a returned `Err` leaves + /// the cell uninitialized (retries succeed). + /// + /// Downloads directly to the final path. Callers must go through + /// `ensure_downloaded_verified` before reading so the `OnceCell` has + /// observed a successful download and verification. + async fn perform_download(&self) -> Result { + use futures::StreamExt as _; + + let resp = self + .auth_policy + .with_authentication(&self.client, &kpar_get_request(self.url.clone())) + .await?; + + if !resp.status().is_success() { + return Err(ReqwestKparDownloadedError::BadHttpStatus { + url: self.url.as_str().into(), + status: resp.status(), + }); + } + + if let Some(actual) = resp.content_length() + && actual != self.expected_size.get() { - Source::IndexKpar { - index_kpar: self.url.to_string(), - index_kpar_size, - index_kpar_digest: index_kpar_digest.clone(), + return Err(ReqwestKparDownloadedError::SizeMismatch { + url: self.url.as_str().into(), + expected: self.expected_size.get(), + actual, + }); + } + + let tmp_dir = tempdir().map_err(FsIoError::MkTempDir)?; + let archive_path = tmp_dir.path().join("project.kpar"); + let mut file = wrapfs::File::create(&archive_path)?; + let mut bytes_stream = resp.bytes_stream(); + let mut hasher = Sha256::new(); + let mut written = 0_u64; + + while let Some(bytes) = bytes_stream.next().await { + let bytes = bytes.map_err(ReqwestKparDownloadedError::Reqwest)?; + written += bytes.len() as u64; + if written > self.expected_size.get() { + return Err(ReqwestKparDownloadedError::SizeMismatch { + url: self.url.as_str().into(), + expected: self.expected_size.get(), + actual: written, + }); } + hasher.update(&bytes); + file.write_all(&bytes) + .map_err(|e| FsIoError::WriteFile(archive_path.clone(), e))?; + } + let written = if let Some(w) = NonZeroU64::new(written) { + w } else { - Source::RemoteKpar { - remote_kpar: self.url.to_string(), - remote_kpar_size: self.inner.file_size().ok(), - } + return Err(ReqwestKparDownloadedError::EmptyKpar { + url: self.url.as_str().into(), + }); }; - Ok(vec![src]) + + if written != self.expected_size { + return Err(ReqwestKparDownloadedError::SizeMismatch { + url: self.url.as_str().into(), + expected: self.expected_size.get(), + actual: written.get(), + }); + } + + file.sync_all() + .map_err(|e| FsIoError::WriteFile(archive_path.clone(), e))?; + + let computed_hash = lowercase_hex(hasher.finalize()); + debug_assert_eq!( + self.expected_size.get(), + wrapfs::metadata(&archive_path).unwrap().len() + ); + + debug_assert_eq!( + self.expected_size.get(), + wrapfs::metadata(&archive_path).unwrap().len() + ); + + if computed_hash == self.expected_kpar_sha256 { + let inner = + LocalKParProjectRaw::new_tempdir(tmp_dir, archive_path, KparInnerPath::Guess)?; + Ok(inner) + } else { + Err(ReqwestKparDownloadedError::DigestMismatch { + url: self.url.as_str().into(), + expected: self.expected_kpar_sha256.to_owned(), + computed: computed_hash, + }) + } + } +} + +impl ProjectReadAsync for ReqwestIndexKparDownloadedProject { + type Error = ReqwestKparDownloadedError; + + async fn get_project_async( + &self, + ) -> Result< + ( + Option, + Option, + ), + Self::Error, + > { + Ok(self.ensure_downloaded_verified().await?.get_project()?) + } + + async fn get_info_async(&self) -> Result, Self::Error> { + Ok(self.ensure_downloaded_verified().await?.get_info()?) + } + + async fn get_meta_async(&self) -> Result, Self::Error> { + Ok(self.ensure_downloaded_verified().await?.get_meta()?) + } + + type SourceReader<'a> + = AsAsyncRead<::SourceReader<'a>> + where + Self: 'a; + + async fn read_source_async>( + &self, + path: P, + ) -> Result, Self::Error> { + match self.ensure_downloaded_verified().await { + Ok(inner) => Ok(AsAsyncRead { + inner: inner.read_source(path)?, + }), + Err(e) => Err(e), + } + } + + async fn sources_async(&self, _ctx: &ProjectContext) -> Result, Self::Error> { + Ok(vec![Source::IndexKpar { + index_kpar: self.url.to_string(), + kpar_size: self.expected_size, + kpar_digest: self.expected_kpar_sha256.clone(), + }]) } async fn is_definitely_invalid_async(&self) -> bool { - self.inner.is_definitely_invalid() + // FIXME: error should be returned; does it make sense to download the project + // here, as this is supposed to be a quick check + match self.ensure_downloaded_verified().await { + Ok(inner) => inner.is_definitely_invalid(), + Err(e) => { + // TODO: generalize `format_sources()` to logging + log::debug!("error downloading/reading a kpar: {e}"); + let mut error: &dyn Error = &e; + while let Some(source) = error.source() { + log::debug!(" caused by: {source}"); + error = source; + } + + false + } + } + } + + async fn checksum_canonical_variant_async(&self) -> Result { + match self.ensure_downloaded_verified().await { + Ok(_) => Ok(ProjectChecksum::Kpar(self.expected_kpar_sha256.clone())), + Err(e) => Err(e), + } } } diff --git a/core/src/project/reqwest_kpar_download_tests.rs b/core/src/project/reqwest_kpar_download_tests.rs index caf2c81fa..d1f120ba2 100644 --- a/core/src/project/reqwest_kpar_download_tests.rs +++ b/core/src/project/reqwest_kpar_download_tests.rs @@ -3,18 +3,26 @@ use std::{ io::{Read, Write as _}, + num::NonZeroU64, sync::Arc, }; +use url::Url; + use crate::{ auth::Unauthenticated, context::ProjectContext, lock::Source, - project::{ProjectRead, ProjectReadAsync, reqwest_kpar_download::ReqwestKparDownloadedError}, + project::{ + KparMeta, ProjectRead, ProjectReadAsync, + reqwest_kpar_download::{ReqwestIndexKparDownloadedProject, ReqwestKparDownloadedError}, + }, resolve::net_utils::create_reqwest_client, utils::sha256_lowercase_hex, }; +use super::ReqwestRemoteKparDownloadedProject; + #[test] fn basic_download_request() -> Result<(), Box> { let buf = { @@ -51,12 +59,11 @@ fn basic_download_request() -> Result<(), Box> { .expect(1) .create(); - let project = super::ReqwestKparDownloadedProject::new_guess_root( + let project = ReqwestRemoteKparDownloadedProject::new_guess_root( format!("{}basic_download_request.kpar", url,), create_reqwest_client()?, Arc::new(Unauthenticated {}), None, - None, )? .to_tokio_sync(Arc::new( tokio::runtime::Builder::new_current_thread() @@ -129,12 +136,14 @@ fn concurrent_downloads_fan_in_to_single_fetch() -> Result<(), Box Result<(), Box Result<(), Box Result<(), Box Result<(), Box Result<(), Box { - assert_eq!(expected, wrong_size); + assert_eq!(expected, wrong_size.get()); assert_eq!(actual, kpar_bytes.len() as u64); } other => panic!("expected SizeMismatch for wrong kpar size, got {other:?}"), @@ -226,10 +234,6 @@ fn expected_size_mismatch_rejects_download() -> Result<(), Box Result<(), Box Result<(), Box> { let index_kpar = "https://example.com/project.kpar"; - let index_kpar_size = std::num::NonZeroU64::new(1234).unwrap(); + let index_kpar_size = NonZeroU64::new(1234).unwrap(); let index_kpar_digest = "a".repeat(64); - let project = super::ReqwestKparDownloadedProject::new_guess_root( - index_kpar, + let project = ReqwestIndexKparDownloadedProject::new( + Url::parse(index_kpar).unwrap(), create_reqwest_client()?, Arc::new(Unauthenticated {}), - Some(index_kpar_digest.clone()), - Some(index_kpar_size), + index_kpar_size, + index_kpar_digest.clone(), )?; let runtime = tokio::runtime::Builder::new_current_thread() @@ -258,8 +262,8 @@ fn index_kpar_source_roundtrips_digest_and_size() -> Result<(), Box { /// Base-url of the project pub url: reqwest::Url, pub auth_policy: Arc, + pub expected_checksum: Option, } impl ReqwestSrcProjectAsync { @@ -51,37 +54,6 @@ impl ReqwestSrcProjectAsync { .join(path.as_ref().as_str()) .expect("internal URL error") } - - // pub fn head_info(&self) -> reqwest_middleware::RequestBuilder { - // self.client - // .head(self.info_url()) - // .header(reqwest::header::ACCEPT, "application/json") - // } - - // pub fn head_meta(&self) -> reqwest_middleware::RequestBuilder { - // self.client - // .head(self.meta_url()) - // .header(reqwest::header::ACCEPT, "application/json") - // } - - // pub fn get_info(&self) -> reqwest_middleware::RequestBuilder { - // self.client - // .get(self.info_url()) - // .header(reqwest::header::ACCEPT, "application/json") - // } - - // pub fn get_meta(&self) -> reqwest_middleware::RequestBuilder { - // self.client - // .get(self.meta_url()) - // .header(reqwest::header::ACCEPT, "application/json") - // } - - // pub fn reqwest_src>( - // &self, - // path: P, - // ) -> reqwest_middleware::RequestBuilder { - // self.client.get(self.src_url(path)) - // } } #[derive(Error, Debug)] @@ -94,6 +66,12 @@ pub enum ReqwestSrcError { Deserialize(String, serde_json::Error), #[error("HTTP request to `{0}` returned unexpected status code {1}")] BadStatus(Box, reqwest::StatusCode), + #[error("project is missing `.project.json` and/or `.meta.json` files")] + MissingInfoMeta, + // This is only needed because `checksum_canonical_hex` can return + // `io:Error`, which in this case is actually a network/HTTP error + #[error(transparent)] + Io(#[from] Box), } impl ProjectReadAsync for ReqwestSrcProjectAsync { @@ -196,10 +174,38 @@ impl ProjectReadAsync for ReqwestSrcProjectAsync Result, Self::Error> { + let checksum = match &self.expected_checksum { + Some(c) => c.clone(), + None => self + .checksum_canonical_hex_async() + .await + .map_err(|e| match e { + CanonicalizationError::ProjectRead(e) => e, + CanonicalizationError::FileRead(path, error) => ReqwestSrcError::Io( + FsIoError::ReadFile(String::from(path).into(), error).into(), + ), + })? + .ok_or(ReqwestSrcError::MissingInfoMeta)?, + }; Ok(vec![Source::RemoteSrc { remote_src: self.url.to_string(), + checksum, }]) } + + async fn checksum_canonical_variant_async(&self) -> Result { + let checksum = self + .checksum_canonical_hex_async() + .await + .map_err(|e| match e { + CanonicalizationError::ProjectRead(e) => e, + CanonicalizationError::FileRead(path, error) => ReqwestSrcError::Io( + FsIoError::ReadFile(String::from(path).into(), error).into(), + ), + })? + .ok_or(ReqwestSrcError::MissingInfoMeta)?; + Ok(ProjectChecksum::Project(checksum)) + } } #[cfg(test)] diff --git a/core/src/project/reqwest_src_tests.rs b/core/src/project/reqwest_src_tests.rs index 0b500abf5..1e1ce7612 100644 --- a/core/src/project/reqwest_src_tests.rs +++ b/core/src/project/reqwest_src_tests.rs @@ -24,6 +24,7 @@ fn empty_remote_definitely_invalid_http_src() -> Result<(), Box Result<(), Box> { client, url, auth_policy: Arc::new(Unauthenticated {}), + expected_checksum: None, } .to_tokio_sync(Arc::new( tokio::runtime::Builder::new_current_thread() diff --git a/core/src/project/utils.rs b/core/src/project/utils.rs index 221e69429..cbbc679ef 100644 --- a/core/src/project/utils.rs +++ b/core/src/project/utils.rs @@ -125,6 +125,8 @@ pub enum FsIoError { IsFile(Utf8PathBuf, io::Error), #[error("failed to get metadata to determine if\n `{0}` is a directory:\n {1}")] IsDir(Utf8PathBuf, io::Error), + #[error("failed to seek file `{0}` to {1}:\n {2}")] + Seek(Utf8PathBuf, u64, io::Error), } /// Wrappers for filesystem I/O functions to return `FsIoError`. @@ -351,7 +353,7 @@ pub enum ZipArchiveError { Finish(Box, ZipError), } -#[derive(Debug, Error)] +#[derive(Debug, Error, Clone)] pub enum RelativizePathError { #[error("unable to relativize path `{path}` with respect to `{root}`")] NoCommonPrefix { @@ -432,7 +434,7 @@ fn contains_non_canonical_components(path: &Utf8Path) -> bool { pub fn relativize_path, R: AsRef>( path: P, root: R, -) -> Result { +) -> Result { let path = path.as_ref(); let root = root.as_ref(); @@ -471,7 +473,7 @@ pub fn relativize_path, R: AsRef>( } } - let mut result = Utf8PathBuf::new(); + let mut result = Utf8UnixPathBuf::new(); for r in root_iter { if let Utf8Component::Normal(_) = r { @@ -479,6 +481,7 @@ pub fn relativize_path, R: AsRef>( } } + // TODO: it's probably always expected that we will use Unix paths for p in path_iter { result.push(p.as_str()); } @@ -487,7 +490,7 @@ pub fn relativize_path, R: AsRef>( result.push("."); } - Ok(result) + Ok(result.into_string().into()) } #[cfg(test)] diff --git a/core/src/resolve/file.rs b/core/src/resolve/file.rs index 6096c3429..6afbd4b50 100644 --- a/core/src/resolve/file.rs +++ b/core/src/resolve/file.rs @@ -17,7 +17,7 @@ use crate::{ model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}, project::{ self, ProjectRead, - local_kpar::{LocalKParError, LocalKParProject}, + local_kpar::{KparInnerPath, LocalKParError, LocalKParProject}, local_src::{LocalSrcError, LocalSrcProject}, utils::{FsIoError, ProjectDeserializationError, RelativizePathError, wrapfs}, }, @@ -160,6 +160,20 @@ pub enum FileResolverProjectError { {0}" )] ImpossibleRelativePath(#[from] RelativizePathError), + #[error("kpar at `{path}` has sha256 `{computed}` but the expected digest was `{expected}`")] + DigestMismatch { + path: Box, + expected: String, + computed: String, + }, + #[error("kpar at `{path}` has size {actual} bytes but the expected size was {expected} bytes")] + SizeMismatch { + path: Box, + expected: u64, + actual: u64, + }, + #[error("kpar at `{path}` is an empty file")] + EmptyKpar { path: Box }, #[error("{0}")] Other(String), } @@ -171,8 +185,8 @@ impl From for FileResolverProjectError { } pub enum FileResolverProjectReader<'a> { - File(::SourceReader<'a>), - Archive(::SourceReader<'a>), + File(::SourceReader<'a>), + Archive(::SourceReader<'a>), } impl Read for FileResolverProjectReader<'_> { @@ -192,6 +206,25 @@ impl From for FileResolverProjectError { LocalKParError::Io(error) => FileResolverProjectError::Io(error), LocalKParError::Zip(err) => FileResolverProjectError::Zip(err), LocalKParError::ImpossibleRelativePath(err) => Self::ImpossibleRelativePath(err), + LocalKParError::DigestMismatch { + path, + expected, + computed, + } => Self::DigestMismatch { + path, + expected, + computed, + }, + LocalKParError::SizeMismatch { + path, + expected, + actual, + } => Self::SizeMismatch { + path, + expected, + actual, + }, + LocalKParError::EmptyKpar { path } => Self::EmptyKpar { path }, } } } @@ -265,6 +298,15 @@ impl ProjectRead for FileResolverProject { FileResolverProject::LocalKParProject(proj) => Ok(proj.sources(ctx)?), } } + + fn checksum_canonical_variant(&self) -> Result { + match self { + FileResolverProject::LocalSrcProject(proj) => proj + .checksum_canonical_variant() + .map_err(FileResolverProjectError::LocalSrc), + FileResolverProject::LocalKParProject(proj) => Ok(proj.checksum_canonical_variant()?), + } + } } impl ResolveRead for FileResolver { @@ -283,9 +325,10 @@ impl ResolveRead for FileResolver { Ok(FileResolverProject::LocalSrcProject(LocalSrcProject { nominal_path: None, project_path: path.clone(), + expected_checksum: None, })), Ok(FileResolverProject::LocalKParProject( - LocalKParProject::new_guess_root(path)?, + LocalKParProject::new(path, KparInnerPath::Guess, None, None), )), ]), ResolutionOutcome::UnsupportedIRIType(msg) => { diff --git a/core/src/resolve/priority.rs b/core/src/resolve/priority.rs index 83398576e..abc0a5494 100644 --- a/core/src/resolve/priority.rs +++ b/core/src/resolve/priority.rs @@ -218,6 +218,17 @@ impl ProjectRead .map_err(|e| e.map_project_read(PriorityError::Lower)), } } + + fn checksum_canonical_variant(&self) -> Result { + match self { + PriorityProject::HigherProject(project) => project + .checksum_canonical_variant() + .map_err(PriorityError::Higher), + PriorityProject::LowerProject(project) => project + .checksum_canonical_variant() + .map_err(PriorityError::Lower), + } + } } impl ResolveRead for PriorityResolver { diff --git a/core/src/resolve/remote.rs b/core/src/resolve/remote.rs index 2972e9072..bacc370c3 100644 --- a/core/src/resolve/remote.rs +++ b/core/src/resolve/remote.rs @@ -184,6 +184,17 @@ impl ProjectRead .map_err(|e| e.map_project_read(RemoteProjectError::GitRead)), } } + + fn checksum_canonical_variant(&self) -> Result { + match self { + RemoteProject::HTTPProject(project) => project + .checksum_canonical_variant() + .map_err(RemoteProjectError::HTTPRead), + RemoteProject::GitProject(project) => project + .checksum_canonical_variant() + .map_err(RemoteProjectError::GitRead), + } + } } pub struct ResolvedRemote { diff --git a/core/src/resolve/reqwest_http.rs b/core/src/resolve/reqwest_http.rs index 9964a3659..d5df6f9f4 100644 --- a/core/src/resolve/reqwest_http.rs +++ b/core/src/resolve/reqwest_http.rs @@ -13,7 +13,8 @@ use crate::{ model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}, project::{ CanonicalizationError, ProjectReadAsync, - reqwest_kpar_download::ReqwestKparDownloadedProject, reqwest_src::ReqwestSrcProjectAsync, + reqwest_kpar_download::ReqwestRemoteKparDownloadedProject, + reqwest_src::ReqwestSrcProjectAsync, }, resolve::ResolveReadAsync, utils::scheme::{SCHEME_HTTP, SCHEME_HTTPS}, @@ -34,7 +35,7 @@ pub struct HTTPResolverAsync { pub enum HTTPProjectAsync { HTTPSrcProject(ReqwestSrcProjectAsync), // HTTPKParProjectRanged(ReqwestKparRangedProject), - HTTPKParProjectDownloaded(Box>), + HTTPKParProjectDownloaded(Box>), } #[derive(Error, Debug)] @@ -44,14 +45,14 @@ pub enum HTTPProjectError { // #[error(transparent)] // KParRanged(::Error), #[error(transparent)] - KparDownloaded( as ProjectReadAsync>::Error), + KparDownloaded( as ProjectReadAsync>::Error), } pub enum HTTPProjectAsyncReader<'a, Policy: HTTPAuthentication> { SrcProjectReader( as ProjectReadAsync>::SourceReader<'a>), //KParRangedReader(::SourceReader<'a>), KparDownloadedReader( - as ProjectReadAsync>::SourceReader<'a>, + as ProjectReadAsync>::SourceReader<'a>, ), } @@ -215,6 +216,21 @@ impl ProjectReadAsync for HTTPProjectAsync { .map_err(|e| e.map_project_read(HTTPProjectError::KparDownloaded)), } } + + async fn checksum_canonical_variant_async( + &self, + ) -> Result { + match self { + HTTPProjectAsync::HTTPSrcProject(proj) => proj + .checksum_canonical_variant_async() + .await + .map_err(HTTPProjectError::SrcProject), + HTTPProjectAsync::HTTPKParProjectDownloaded(proj) => proj + .checksum_canonical_variant_async() + .await + .map_err(HTTPProjectError::KparDownloaded), + } + } } pub struct HTTPProjects { @@ -255,12 +271,11 @@ impl HTTPProjects { // } Some(HTTPProjectAsync::HTTPKParProjectDownloaded(Box::new( - ReqwestKparDownloadedProject::new_guess_root( + ReqwestRemoteKparDownloadedProject::new_guess_root( &url, self.client.clone(), self.auth_policy.clone(), None, - None, ) .expect("internal IO error"), ))) @@ -276,6 +291,7 @@ impl HTTPProjects { client: self.client.clone(), // Already internally an Rc url: self.url.clone(), auth_policy: auth_policy.clone(), + expected_checksum: None, })) // If the resolver is set to be lax, try forcing the terminal slash } else if self.lax { @@ -288,6 +304,7 @@ impl HTTPProjects { client: self.client.clone(), // Already internally an Rc url: lax_url, auth_policy, + expected_checksum: None, })) } else { None diff --git a/core/src/utils.rs b/core/src/utils.rs index 55c2d6a56..de5571c33 100644 --- a/core/src/utils.rs +++ b/core/src/utils.rs @@ -7,7 +7,6 @@ use digest::{array::Array, typenum}; use sha2::{Digest, Sha256}; pub(crate) mod scheme { - #[cfg(feature = "filesystem")] use fluent_uri::component::Scheme; #[cfg(feature = "filesystem")] pub const SCHEME_FILE: &Scheme = Scheme::new_or_panic("file"); @@ -21,9 +20,7 @@ pub(crate) mod scheme { pub const SCHEME_GIT_HTTP: &Scheme = Scheme::new_or_panic("git+http"); #[cfg(all(feature = "filesystem", feature = "networking"))] pub const SCHEME_GIT_HTTPS: &Scheme = Scheme::new_or_panic("git+https"); - #[cfg(feature = "filesystem")] pub const SCHEME_HTTP: &Scheme = Scheme::new_or_panic("http"); - #[cfg(feature = "filesystem")] pub const SCHEME_HTTPS: &Scheme = Scheme::new_or_panic("https"); } @@ -59,3 +56,27 @@ pub fn sha256_lowercase_hex(data: impl AsRef<[u8]>) -> String { pub fn lowercase_hex(bytes: Array) -> String { hex::encode(bytes) } + +/// Return the deduplicated, in-order list of SPDX identifiers (licenses plus +/// any `WITH` exceptions) named in `expression`. Each identifier maps to a +/// `LICENSES/.txt` file under REUSE conventions; the `+` "or later" +/// modifier does not affect the filename. +pub(crate) fn license_file_stems(expression: &spdx::Expression) -> Vec { + let mut stems: indexmap::IndexSet = indexmap::IndexSet::new(); + for req in expression.requirements() { + let license_name = match &req.req.license { + spdx::LicenseItem::Spdx { id, .. } => id.name.to_string(), + spdx::LicenseItem::Other(license_ref) => license_ref.to_string(), + }; + stems.insert(license_name); + + if let Some(addition) = &req.req.addition { + let addition_name = match addition { + spdx::AdditionItem::Spdx(id) => id.name.to_string(), + spdx::AdditionItem::Other(add_ref) => add_ref.to_string(), + }; + stems.insert(addition_name); + } + } + stems.into_iter().collect() +} diff --git a/core/tests/filesystem_env.rs b/core/tests/filesystem_env.rs index efa7d4cb7..44b59a2f6 100644 --- a/core/tests/filesystem_env.rs +++ b/core/tests/filesystem_env.rs @@ -127,8 +127,9 @@ version = \"0.1\" &mut Cursor::new("package Pkg;"), true, )?; + let checksum = source_project.checksum_canonical_variant()?; - env.put_project(uri, version_str, |p| { + env.put_project(uri, version_str, Some(checksum), |p| { clone_project(&source_project, p, true).map(|_| ()) })?; } @@ -213,7 +214,8 @@ version = \"0.1\" &mut Cursor::new("package Other;"), true, )?; - env.put_project(other_uri, "1.0.0", |p| { + let checksum = other_project.checksum_canonical_variant()?; + env.put_project(other_uri, "1.0.0", Some(checksum), |p| { clone_project(&other_project, p, true).map(|_| ()) })?; } @@ -283,8 +285,9 @@ version = \"0.1\" let source_code = "package SomePackage;"; source_project.write_source(source_path, &mut Cursor::new(source_code), true)?; + let checksum = source_project.checksum_canonical_variant()?; - directory_environment.put_project("urn:sysand_test:1", "1.2.3", |p| { + directory_environment.put_project("urn:sysand_test:1", "1.2.3", Some(checksum), |p| { clone_project(&source_project, p, true).map(|_| ()) })?; diff --git a/core/tests/index_management.rs b/core/tests/index_management.rs index f238afbf7..e9d0988b8 100644 --- a/core/tests/index_management.rs +++ b/core/tests/index_management.rs @@ -149,14 +149,12 @@ fn file_state_test() { { "version": "0.2.0", "usage": [], - "project_digest": "sha256:6420d5d3170a11b6f6a811dfa71940317e69cef249ce664c1e4499124676fdd6", "kpar_size": 348, "kpar_digest": "sha256:873476ac47fe239c60d7ed6a51d752ae716d782872292ee7c7820cc3ee7fc021" }, { "version": "0.1.0", "usage": [], - "project_digest": "sha256:de024b833722716ad706981bdcb809f9af28e609ccee7c6522567218ca7fe2a6", "kpar_size": 348, "kpar_digest": "sha256:b67db84b3a2168e012262bd3dd7a202b284deb4f515a1418409d9b10d0effc8f" } @@ -219,7 +217,6 @@ fn file_state_test() { "versionConstraint": "^0.1.0" } ], - "project_digest": "sha256:6606158ab6f322fe25b9c2f8d963fa30ececf5156c5e5570185f6896aa4ea452", "kpar_size": 397, "kpar_digest": "sha256:3acdae9db465a4edcf3d99c4a57bf476c9acf3045636c6b8bb091db8cf61bdbe" } diff --git a/core/tests/memory_env.rs b/core/tests/memory_env.rs index ebae99b1b..1113815d1 100644 --- a/core/tests/memory_env.rs +++ b/core/tests/memory_env.rs @@ -76,7 +76,7 @@ fn env_manual_install() -> Result<(), Box> { source_project.write_source(source_path, &mut Cursor::new(source_code), true)?; - memory_environment.put_project("urn:sysand_test:1", "1.2.3", |p| { + memory_environment.put_project("urn:sysand_test:1", "1.2.3", None, |p| { clone_project(&source_project, p, true)?; Ok::<(), CloneError>(()) diff --git a/core/tests/project_derive.rs b/core/tests/project_derive.rs index bfe81d333..4c222849c 100644 --- a/core/tests/project_derive.rs +++ b/core/tests/project_derive.rs @@ -8,7 +8,9 @@ use std::{ use sysand_core::{ model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}, - project::{CanonicalizationError, ProjectMut, ProjectRead, memory::InMemoryProject}, + project::{ + CanonicalizationError, ProjectChecksum, ProjectMut, ProjectRead, memory::InMemoryProject, + }, }; // Have to have these in scope for ProjectRead @@ -238,6 +240,10 @@ impl ProjectRead for FixedDigestProject { fn checksum_canonical_hex(&self) -> Result, CanonicalizationError> { Ok(Some(self.digest.clone())) } + + fn checksum_canonical_variant(&self) -> Result { + Ok(ProjectChecksum::Project(self.digest.clone())) + } } #[derive(ProjectRead)] diff --git a/core/tests/project_no_derive.rs b/core/tests/project_no_derive.rs index 1aa1d8a0a..0f497f4e5 100644 --- a/core/tests/project_no_derive.rs +++ b/core/tests/project_no_derive.rs @@ -8,7 +8,7 @@ use thiserror::Error; use sysand_core::{ context::ProjectContext, lock::Source, - project::{ProjectMut, ProjectRead, memory::InMemoryProject}, + project::{ProjectChecksum, ProjectMut, ProjectRead, memory::InMemoryProject}, }; pub enum GenericProject @@ -127,6 +127,20 @@ where } } } + + fn checksum_canonical_variant(&self) -> Result { + match self { + GenericProject::Variant1(project) => project + .checksum_canonical_variant() + .map_err(GenericProjectError::Variant1), + GenericProject::Variant2(project) => project + .checksum_canonical_variant() + .map_err(GenericProjectError::Variant2), + GenericProject::Variant3(project) => project + .checksum_canonical_variant() + .map_err(GenericProjectError::Variant3), + } + } } impl ProjectMut for GenericProject diff --git a/docs/src/publishing.md b/docs/src/publishing.md index 38565b6e8..451b4c4fc 100644 --- a/docs/src/publishing.md +++ b/docs/src/publishing.md @@ -63,7 +63,7 @@ manually. Sensmetry is currently working on automating this process. Add usage to the list with `sysand add ` - - **Metamodel**: This field is neccessary to let the library consumers know + - **Metamodel**: This field is necessary to let the library consumers know which language standard (KerML, SysML, other KerML-based languages) and version to use when parsing the library. diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 6623bc22d..113fa81ec 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -193,6 +193,12 @@ pub fn project_read_derive(input: TokenStream) -> TokenStream { .checksum_canonical_hex() .map_err(|e| e.map_project_read(#error_ident::#variant_ident)) }, + // checksum_canonical_variant match + quote! { + #enum_ident::#variant_ident(project) => project + .checksum_canonical_variant() + .map_err(#error_ident::#variant_ident) + }, )) }) .collect(); @@ -220,6 +226,7 @@ pub fn project_read_derive(input: TokenStream) -> TokenStream { let mut version_match = vec![]; let mut usage_match = vec![]; let mut checksum_canonical_hex_match = vec![]; + let mut checksum_canonical_variant_match = vec![]; for ( variant_list_part, @@ -237,6 +244,7 @@ pub fn project_read_derive(input: TokenStream) -> TokenStream { version_match_part, usage_match_part, checksum_canonical_hex_match_part, + checksum_canonical_variant_match_part, ) in variant_parts.iter().cloned() { variant_list.push(variant_list_part); @@ -254,6 +262,7 @@ pub fn project_read_derive(input: TokenStream) -> TokenStream { version_match.push(version_match_part); usage_match.push(usage_match_part); checksum_canonical_hex_match.push(checksum_canonical_hex_match_part); + checksum_canonical_variant_match.push(checksum_canonical_variant_match_part); } let expanded = quote! { @@ -371,6 +380,17 @@ pub fn project_read_derive(input: TokenStream) -> TokenStream { #( #checksum_canonical_hex_match ),* } } + + fn checksum_canonical_variant( + &self, + ) -> ::std::result::Result< + ::sysand_core::project::ProjectChecksum, + Self::Error, + > { + match self { + #( #checksum_canonical_variant_match ),* + } + } } }; diff --git a/sysand/src/commands/add.rs b/sysand/src/commands/add.rs index 2d4abd28e..3c20fbc31 100644 --- a/sysand/src/commands/add.rs +++ b/sysand/src/commands/add.rs @@ -12,7 +12,7 @@ use sysand_core::{ auth::HTTPAuthentication, commands::lock::{DEFAULT_LOCKFILE_NAME, LockOutcome, do_lock_local_editable}, config::{ - Config, ConfigProject, + Config, ConfigProject, OverrideSource, local_fs::{CONFIG_FILE, add_project_source_to_config}, }, context::ProjectContext, @@ -57,14 +57,14 @@ pub fn command_add( let source = if let Some(path) = source_opts.from_path { let metadata = wrapfs::metadata(&path)?; if metadata.is_dir() { - Some(sysand_core::lock::Source::LocalSrc { - src_path: get_relative(path, current_project.root_path())? + Some(OverrideSource::LocalSrc { + src_path: get_relative(path, current_project.root_path(), &ctx.current_directory)? .as_str() .into(), }) } else if metadata.is_file() { - Some(sysand_core::lock::Source::LocalKpar { - kpar_path: get_relative(path, current_project.root_path())? + Some(OverrideSource::LocalKpar { + kpar_path: get_relative(path, current_project.root_path(), &ctx.current_directory)? .as_str() .into(), }) @@ -93,17 +93,15 @@ pub fn command_add( runtime.clone(), auth_policy.clone(), )?; - let outcome = std_resolver.resolve_read_raw(&url)?; + let outcome = std_resolver.resolve_read(&url)?; let mut source = None; match outcome { ResolutionOutcome::Resolved(alternatives) => { for candidate in alternatives { match candidate { Ok(project) => { - source = project.sources(&ctx)?.first().cloned(); - if source.is_some() { - break; - } + source = Some(project.sources(&ctx)?[0].to_override()); + break; } Err(err) => { log::debug!("skipping candidate project: {err}"); @@ -121,34 +119,46 @@ pub fn command_add( } source } else if let Some(editable) = source_opts.as_editable { - Some(sysand_core::lock::Source::Editable { - editable: get_relative(editable, current_project.root_path())? - .as_str() - .into(), + Some(OverrideSource::Editable { + editable: get_relative( + editable, + current_project.root_path(), + &ctx.current_directory, + )? + .as_str() + .into(), }) } else if let Some(src_path) = source_opts.as_local_src { - Some(sysand_core::lock::Source::LocalSrc { - src_path: get_relative(src_path, current_project.root_path())? - .as_str() - .into(), + Some(OverrideSource::LocalSrc { + src_path: get_relative( + src_path, + current_project.root_path(), + &ctx.current_directory, + )? + .as_str() + .into(), }) } else if let Some(kpar_path) = source_opts.as_local_kpar { - Some(sysand_core::lock::Source::LocalKpar { - kpar_path: get_relative(kpar_path, current_project.root_path())? - .as_str() - .into(), + Some(OverrideSource::LocalKpar { + kpar_path: get_relative( + kpar_path, + current_project.root_path(), + &ctx.current_directory, + )? + .as_str() + .into(), }) } else if let Some(remote_src) = source_opts.as_remote_src { - Some(sysand_core::lock::Source::RemoteSrc { + Some(OverrideSource::RemoteSrc { remote_src: remote_src.into_string(), }) } else if let Some(remote_kpar) = source_opts.as_remote_kpar { - Some(sysand_core::lock::Source::RemoteKpar { + // TODO: maybe also allow giving IndexKpar (does it make sense?) + Some(OverrideSource::RemoteKpar { remote_kpar: remote_kpar.into_string(), - remote_kpar_size: None, }) } else if let Some(remote_git) = source_opts.as_remote_git { - Some(sysand_core::lock::Source::RemoteGit { + Some(OverrideSource::RemoteGit { remote_git: remote_git.into_string(), }) } else { @@ -293,13 +303,14 @@ fn resolve_deps, Policy: HTTPAuthentication>( fn get_relative + AsRef>( src_path: P, project_root: &Utf8Path, + cwd: &Utf8Path, ) -> Result { - let src_path = if src_path.as_ref().is_absolute() || wrapfs::current_dir()? != project_root { + let src_path = if src_path.as_ref().is_absolute() || cwd != project_root { let path = relativize_path(wrapfs::canonicalize(src_path.as_ref())?, project_root)?; if path == "." { bail!("cannot add current project as usage of itself"); } - path + path.into_string().into() } else { src_path.into() }; diff --git a/sysand/src/commands/clone.rs b/sysand/src/commands/clone.rs index 04e3c5280..c6c14479d 100644 --- a/sysand/src/commands/clone.rs +++ b/sysand/src/commands/clone.rs @@ -244,6 +244,7 @@ fn obtain_project( let mut local_project = LocalSrcProject { nominal_path: None, project_path, + expected_checksum: None, }; let std_resolver = standard_resolver( @@ -275,6 +276,7 @@ fn obtain_project( let remote_project = LocalSrcProject { nominal_path: None, project_path: path.into(), + expected_checksum: None, }; if let Some(version) = version { let project_version = remote_project diff --git a/sysand/src/commands/env.rs b/sysand/src/commands/env.rs index ad73aa51a..fb5ffea1a 100644 --- a/sysand/src/commands/env.rs +++ b/sysand/src/commands/env.rs @@ -17,7 +17,10 @@ use sysand_core::{ lock::Lock, model::InterchangeProjectUsage, project::{ - ProjectRead, local_kpar::LocalKParProject, local_src::LocalSrcProject, utils::wrapfs, + ProjectRead, + local_kpar::{KparInnerPath, LocalKParProject}, + local_src::LocalSrcProject, + utils::wrapfs, }, resolve::{ file::FileResolverProject, @@ -26,6 +29,7 @@ use sysand_core::{ standard::standard_resolver, }, }; +use typed_path::Utf8UnixPathBuf; use crate::{ DEFAULT_INDEX_URL, @@ -133,6 +137,7 @@ pub fn command_env_install( &iri, &version.to_string(), &storage, + Some(storage.checksum_canonical_variant()?), // Initialized above &mut ctx.env.unwrap(), allow_overwrite, @@ -221,13 +226,17 @@ pub fn command_env_install_path( FileResolverProject::LocalSrcProject(LocalSrcProject { // Provide an empty nominal path to satisfy lock. It won't be used for actual // syncing, as the project is installed manually - nominal_path: Some(Utf8PathBuf::new()), + nominal_path: Some(Utf8UnixPathBuf::new()), project_path: path.as_str().into(), + expected_checksum: None, }) } else if metadata.is_file() { - FileResolverProject::LocalKParProject(LocalKParProject::new_guess_root_nominal( - &path, &path, - )?) + FileResolverProject::LocalKParProject(LocalKParProject::new( + &path, + KparInnerPath::Guess, + Some(path.as_str().into()), + None, + )) } else { bail!("path `{path}` is neither a directory nor a file"); }; @@ -266,6 +275,7 @@ pub fn command_env_install_path( iri.as_str(), &project_version, &project, + Some(project.checksum_canonical_variant()?), ctx.env.as_mut().unwrap(), allow_overwrite, allow_multiple, diff --git a/sysand/src/commands/info.rs b/sysand/src/commands/info.rs index da44a0964..67c0cd908 100644 --- a/sysand/src/commands/info.rs +++ b/sysand/src/commands/info.rs @@ -15,7 +15,7 @@ use sysand_core::{ model::{ InterchangeProjectChecksumRaw, InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw, }, - project::{ProjectMut, ProjectRead, any::OverrideProject}, + project::{ProjectMut, ProjectRead, any::OverrideProject, local_kpar::KparInnerPath}, resolve::{ file::FileResolverProject, memory::MemoryResolver, priority::PriorityResolver, standard::standard_resolver, @@ -97,11 +97,17 @@ pub fn pprint_interchange_project( fn interpret_project_path>(path: P) -> Result { let metadata = wrapfs::metadata(&path)?; Ok(if metadata.is_file() { - FileResolverProject::LocalKParProject(LocalKParProject::new_guess_root(path)?) + FileResolverProject::LocalKParProject(LocalKParProject::new( + path, + KparInnerPath::Guess, + None, + None, + )) } else if metadata.is_dir() { FileResolverProject::LocalSrcProject(LocalSrcProject { nominal_path: None, project_path: path.as_ref().as_str().into(), + expected_checksum: None, }) } else { // TODO: NoResolve is for IRIs, this is a path diff --git a/sysand/src/commands/init.rs b/sysand/src/commands/init.rs index e500c261a..1a319a4e0 100644 --- a/sysand/src/commands/init.rs +++ b/sysand/src/commands/init.rs @@ -41,6 +41,7 @@ pub fn command_init( &mut LocalSrcProject { nominal_path: None, project_path: path, + expected_checksum: None, }, )?; Ok(()) diff --git a/sysand/src/commands/lock.rs b/sysand/src/commands/lock.rs index ebb0519ed..e84d42541 100644 --- a/sysand/src/commands/lock.rs +++ b/sysand/src/commands/lock.rs @@ -20,12 +20,13 @@ use sysand_core::{ }, stdlib::known_std_libs, }; +use typed_path::Utf8UnixPath; use crate::{DEFAULT_INDEX_URL, cli::ResolutionOptions, get_overrides}; /// Generate a lockfile for `current_project`. #[expect(clippy::too_many_arguments)] -pub fn command_lock, Policy: HTTPAuthentication, R: AsRef>( +pub fn command_lock, Policy: HTTPAuthentication, R: AsRef>( path: P, resolution_opts: ResolutionOptions, config: &Config, @@ -57,7 +58,7 @@ pub fn command_lock, Policy: HTTPAuthentication, R: AsRef, Policy: HTTPAuthentication, R: AsRef, Policy: HTTPAuthentication>( sysand_core::commands::sync::do_sync( lock, env, - Some(|src_path: &Utf8Path| LocalSrcProject { - nominal_path: Some(src_path.to_path_buf()), - project_path: project_root.as_ref().join(src_path), - }), Some( - |remote_src: String| -> Result>, ParseError> { + |src_path: Utf8UnixPathBuf, checksum: String| -> LocalSrcProject { + LocalSrcProject { + project_path: project_root.as_ref().join(src_path.as_str()), + nominal_path: Some(src_path), + expected_checksum: Some(checksum), + } + }, + ), + Some( + |remote_src: String, + checksum: String| + -> Result>, ParseError> { Ok(ReqwestSrcProjectAsync { client: client.clone(), url: reqwest::Url::parse(&remote_src)?, - auth_policy: auth_policy.clone() + auth_policy: auth_policy.clone(), + expected_checksum: Some(checksum), } .to_tokio_sync(runtime.clone())) }, ), + Some( + |kpar_path: String, kpar_size: NonZeroU64, kpar_digest: String| -> LocalKParProject { + LocalKParProject::new( + project_root.as_ref().join(&kpar_path), + KparInnerPath::Guess, + Some(kpar_path.into()), + Some(KparMeta { + size_bytes: kpar_size, + sha256_hex: kpar_digest, + }), + ) + }, + ), // TODO: Fix error handling here - Some(|kpar_path: &Utf8Path| LocalKParProject::new_guess_root_nominal(project_root.as_ref().join(kpar_path), kpar_path).unwrap()), Some( - |remote_kpar: String| - -> Result>, ParseError> { - let project = ReqwestKparDownloadedProject::new_guess_root( - reqwest::Url::parse(&remote_kpar)?, + |index_kpar: String, + index_kpar_size: NonZeroU64, + index_kpar_digest: String| + -> Result< + AsSyncProjectTokio>, + ParseError, + > { + let project = ReqwestRemoteKparDownloadedProject::new_guess_root( + reqwest::Url::parse(&index_kpar)?, client.clone(), auth_policy.clone(), - None, None + Some(KparMeta { + size_bytes: index_kpar_size, + sha256_hex: index_kpar_digest, + }), ) .unwrap(); Ok(project.to_tokio_sync(runtime.clone())) @@ -70,20 +101,26 @@ pub fn command_sync, Policy: HTTPAuthentication>( |index_kpar: String, index_kpar_size: NonZeroU64, index_kpar_digest: String| - -> Result>, ParseError> { - let project = ReqwestKparDownloadedProject::new_guess_root( + -> Result< + AsSyncProjectTokio>, + ParseError, + > { + let project = ReqwestIndexKparDownloadedProject::new( reqwest::Url::parse(&index_kpar)?, client.clone(), auth_policy.clone(), - Some(index_kpar_digest), Some(index_kpar_size) + index_kpar_size, + index_kpar_digest, ) .unwrap(); Ok(project.to_tokio_sync(runtime.clone())) }, ), - Some(|remote_git: String| -> Result { - GixDownloadedProject::new(remote_git) - }), + Some( + |remote_git: String| -> Result { + GixDownloadedProject::new(remote_git) + }, + ), provided_iris, )?; diff --git a/sysand/src/lib.rs b/sysand/src/lib.rs index 2d78ef103..d73f139a9 100644 --- a/sysand/src/lib.rs +++ b/sysand/src/lib.rs @@ -814,7 +814,7 @@ pub fn get_overrides, Policy: HTTPAuthentication>( for identifier in &config_project.identifiers { let mut projects = Vec::new(); for source in &config_project.sources { - projects.push(ProjectReference::new(AnyProject::try_from_source( + projects.push(ProjectReference::new(AnyProject::try_from_override_source( source.clone(), &project_root, auth_policy.clone(), diff --git a/sysand/tests/cli_build.rs b/sysand/tests/cli_build.rs index b7509ffde..6b19e4fc5 100644 --- a/sysand/tests/cli_build.rs +++ b/sysand/tests/cli_build.rs @@ -8,7 +8,10 @@ use std::io::{Read, Write}; use sysand::cli::KparCompressionMethodCli; use sysand_core::{ model::{InterchangeProjectChecksumRaw, KerMlChecksumAlg}, - project::{ProjectRead, local_kpar::LocalKParProject}, + project::{ + ProjectRead, + local_kpar::{KparInnerPath, LocalKParProject}, + }, }; // pub due to https://github.com/rust-lang/rust/issues/46379 @@ -43,7 +46,12 @@ fn project_build() -> Result<(), Box> { .stdout(predicate::str::contains("Name: test_build")) .stdout(predicate::str::contains("Version: 1.2.3")); - let kpar_project = LocalKParProject::new_guess_root(cwd.join("test_build.kpar"))?; + let kpar_project = LocalKParProject::new( + cwd.join("test_build.kpar"), + KparInnerPath::Guess, + None, + None, + ); let (Some(_), Some(meta)) = kpar_project.get_project()? else { panic!("failed to get built project info/meta"); @@ -114,7 +122,12 @@ fn project_build_path_usage() -> Result<(), Box> { .stdout(predicate::str::contains("Version: 1.2.3")) .stdout(predicate::str::contains(file_url_from_path(&cwd2))); - let kpar_project = LocalKParProject::new_guess_root(cwd1.join("test_build.kpar"))?; + let kpar_project = LocalKParProject::new( + cwd1.join("test_build.kpar"), + KparInnerPath::Guess, + None, + None, + ); let (Some(_), Some(_)) = kpar_project.get_project()? else { panic!("failed to get built project info/meta"); @@ -165,7 +178,6 @@ fn workspace_build() -> Result<(), Box> { out.assert().success(); for project_name in ["project1", "project2", "project3"] { - println!("W9: {}", project_name); let kpar_path = cwd .join("output") .join(format!("{}-1.2.3.kpar", project_name)); @@ -182,7 +194,7 @@ fn workspace_build() -> Result<(), Box> { .stdout(predicate::str::contains(format!("Name: {}", project_name))) .stdout(predicate::str::contains("Version: 1.2.3")); - let kpar_project = LocalKParProject::new_guess_root(kpar_path)?; + let kpar_project = LocalKParProject::new(kpar_path, KparInnerPath::Guess, None, None); let (Some(_), Some(meta)) = kpar_project.get_project()? else { panic!("failed to get built project info/meta"); @@ -251,7 +263,7 @@ fn workspace_build_with_metamodel() -> Result<(), Box> { kpar_path ); - let kpar_project = LocalKParProject::new_guess_root(kpar_path)?; + let kpar_project = LocalKParProject::new(kpar_path, KparInnerPath::Guess, None, None); let (Some(_), Some(meta)) = kpar_project.get_project()? else { panic!("failed to get built project info/meta"); }; @@ -309,7 +321,7 @@ fn workspace_build_with_unknown_metamodel() -> Result<(), Box Result<(), Box Result<(), Box) -> Result<(), Box Result<(), Box> { let file_url = file_url_from_path(&test_path); // auto path from `file` iri - let (_temp_dir, cwd, out) = run_sysand(["clone", &file_url, "-v"], None)?; + let (_temp_dir, cwd, out) = run_sysand(["clone", &file_url], None)?; out.assert() .success() diff --git a/sysand/tests/cli_env.rs b/sysand/tests/cli_env.rs index ede886671..dab6899b5 100644 --- a/sysand/tests/cli_env.rs +++ b/sysand/tests/cli_env.rs @@ -80,6 +80,7 @@ path = "lib/kpar.test_0.0.1" identifiers = [ "urn:kpar:test", ] +src_cksum = "69311f2c1fcc3cc8649461ff1961e17df46237399bad1596e099e8483865e3f4" "# ); @@ -208,6 +209,7 @@ path = "lib/127.0.0.1-test_lib_0.0.1" identifiers = [ "{}/test_lib.kpar", ] +kpar_cksum = "1838ad10a9c1fa46c74a92c68212e6fdcf6f6011a94ca5f16e343ea18a8e203b" "#, server.url() ) diff --git a/sysand/tests/cli_info.rs b/sysand/tests/cli_info.rs index 08f5d2cae..2df05ff2d 100644 --- a/sysand/tests/cli_info.rs +++ b/sysand/tests/cli_info.rs @@ -15,7 +15,6 @@ use predicates::prelude::*; // pub due to https://github.com/rust-lang/rust/issues/46379 mod common; pub use common::*; -use sysand_core::model::project_hash_hex; /// Register a `sysand-index-config.json` 404 mock on `server`. /// Configured index URLs go through the discovery step, which fetches this @@ -681,7 +680,9 @@ fn info_basic_local_kpar() -> Result<(), Box> { zip.finish().unwrap(); } - let (_, _, out) = run_sysand(["info", "--path", &zip_path.to_string_lossy()], None)?; + let (_, _, out) = run_sysand(["info", "--path", &zip_path.to_string_lossy(), "-v"], None)?; + println!("{}", str::from_utf8(&out.stdout).unwrap()); + println!("{}", str::from_utf8(&out.stdout).unwrap()); out.assert() .success() @@ -766,18 +767,12 @@ fn project_json_for(name: &str, version: &str) -> String { /// that hashes the body reproducible. const TEST_META_JSON_BODY: &str = r#"{"index":{},"created":"2026-01-01T00:00:00.000000000Z"}"#; -/// Build a `versions.json` body advertising a single entry with the correct -/// canonical `project_digest` for the supplied `.project.json` / `.meta.json` -/// bodies. The kpar digest is a placeholder because `sysand info` reads the +/// Build a `versions.json` body advertising a single entry. The kpar digest is a +/// placeholder because `sysand info` reads the /// per-version JSON directly and never downloads the archive. -fn versions_json_for(version: &str, project_body: &str, meta_body: &str) -> String { - use sysand_core::model::{InterchangeProjectInfoRaw, InterchangeProjectMetadataRaw}; - let info: InterchangeProjectInfoRaw = serde_json::from_str(project_body).unwrap(); - let meta: InterchangeProjectMetadataRaw = serde_json::from_str(meta_body).unwrap(); - let digest_hex = project_hash_hex(&info, &meta); +fn versions_json_for(version: &str) -> String { versions_json_body(&[versions_json_entry_body( version, - &digest_hex, 42, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", )]) @@ -791,7 +786,7 @@ fn info_basic_index_url() -> Result<(), Box> { let iri_dir = "/_iri/e837859ce90bb1917c2698a6d62caa5786f67662fd1e35eb320f6e9da96939fe"; let project_body = project_json_for("info_basic_index_url", "1.2.3"); - let versions_body = versions_json_for("1.2.3", &project_body, TEST_META_JSON_BODY); + let versions_body = versions_json_for("1.2.3"); let versions_mock = server .mock("GET", format!("{iri_dir}/versions.json").as_str()) @@ -887,8 +882,8 @@ fn info_multi_index_url_noauth() -> Result<(), Box> { let project_body = project_json_for("info_multi_index_url", "1.2.3"); let project_alt_body = project_json_for("info_multi_index_url_alt", "1.2.3"); - let versions_body = versions_json_for("1.2.3", &project_body, TEST_META_JSON_BODY); - let versions_alt_body = versions_json_for("1.2.3", &project_alt_body, TEST_META_JSON_BODY); + let versions_body = versions_json_for("1.2.3"); + let versions_alt_body = versions_json_for("1.2.3"); let versions_mock = server .mock("GET", format!("{iri_dir}/versions.json").as_str()) @@ -1049,8 +1044,8 @@ fn info_multi_index_url_auth() -> Result<(), Box> { let project_body = project_json_for("info_multi_index_url", "1.2.3"); let project_alt_body = project_json_for("info_multi_index_url_alt", "1.2.3"); - let versions_body = versions_json_for("1.2.3", &project_body, TEST_META_JSON_BODY); - let versions_alt_body = versions_json_for("1.2.3", &project_alt_body, TEST_META_JSON_BODY); + let versions_body = versions_json_for("1.2.3"); + let versions_alt_body = versions_json_for("1.2.3"); let versions_mock = server .mock("GET", format!("{iri_dir}/versions.json").as_str()) @@ -1252,8 +1247,8 @@ fn info_multi_index_url_config() -> Result<(), Box> { let project_body = project_json_for("info_multi_index_url_config", "1.2.3"); let project_alt_body = project_json_for("info_multi_index_url_config_alt", "1.2.3"); - let versions_body = versions_json_for("1.2.3", &project_body, TEST_META_JSON_BODY); - let versions_alt_body = versions_json_for("1.2.3", &project_alt_body, TEST_META_JSON_BODY); + let versions_body = versions_json_for("1.2.3"); + let versions_alt_body = versions_json_for("1.2.3"); let versions_mock = server .mock("GET", format!("{iri_dir}/versions.json").as_str()) diff --git a/sysand/tests/cli_lock.rs b/sysand/tests/cli_lock.rs index 4ff47d384..ef490dd5c 100644 --- a/sysand/tests/cli_lock.rs +++ b/sysand/tests/cli_lock.rs @@ -8,9 +8,10 @@ use mockito::{Mock, Server, ServerGuard}; use predicates::{prelude::*, str::contains}; use sysand_core::{ commands::lock::DEFAULT_LOCKFILE_NAME, + config::OverrideSource, env::{DEFAULT_ENV_NAME, local_directory::LocalDirectoryEnvironment}, lock::{Lock, Source}, - model::{InterchangeProjectInfoRaw, InterchangeProjectUsageRaw, project_hash_hex}, + model::{InterchangeProjectInfoRaw, InterchangeProjectUsageRaw}, purl::PKG_SYSAND_PREFIX, utils::sha256_lowercase_hex, }; @@ -75,7 +76,7 @@ fn lock_local_source() -> Result<(), Box> { indexes: vec![], projects: vec![sysand_core::config::ConfigProject { identifiers: vec!["urn:kpar:local_dep".to_string()], - sources: vec![sysand_core::lock::Source::LocalSrc { + sources: vec![OverrideSource::LocalSrc { src_path: cwd.join("local_dep").as_str().into(), }], }], @@ -125,7 +126,7 @@ fn lock_std_lib() -> Result<(), Box> { indexes: vec![], projects: vec![sysand_core::config::ConfigProject { identifiers: vec!["urn:kpar:local_dep".to_string()], - sources: vec![sysand_core::lock::Source::LocalSrc { + sources: vec![OverrideSource::LocalSrc { src_path: cwd.join("local_dep").as_str().into(), }], }], @@ -314,7 +315,7 @@ fn lock_basic_http_deps() -> Result<(), Box> { let project_names: Vec<_> = projects .iter() .cloned() - .filter_map(|project| project.name) + .map(|project| project.name) .collect(); assert!(project_names.contains(&"lock_basic_http_deps".to_string())); @@ -399,7 +400,6 @@ fn lock_and_sync_against_mock_index() -> Result<(), Box> let kpar_sha256_hex = sha256_lowercase_hex(&kpar_bytes); // No `meta.checksum` entries → canonical digest == raw digest for this // fixture; see the docstring on `build_index_kpar_bytes`. - let project_digest_hex = project_hash_hex(&info, &meta); let kpar_size = kpar_bytes.len(); // `sysand lock` targets a specific IRI via `versions_async`; it must not @@ -428,7 +428,6 @@ fn lock_and_sync_against_mock_index() -> Result<(), Box> .with_header("content-type", "application/json") .with_body(versions_json_body(&[versions_json_entry_body( "0.1.0", - &project_digest_hex, kpar_size, &kpar_sha256_hex, )])) @@ -487,20 +486,16 @@ fn lock_and_sync_against_mock_index() -> Result<(), Box> let dep = projects .iter() - .find(|p| p.name.as_deref() == Some("dep")) + .find(|p| p.name == "dep") .expect("locked dep should carry name from versions.json"); - assert_eq!( - dep.checksum, project_digest_hex, - "lockfile must record the advertised digest verbatim" - ); assert!( dep.sources.iter().any(|source| { matches!( source, Source::IndexKpar { - index_kpar_digest, + kpar_digest, .. - } if index_kpar_digest == &kpar_sha256_hex + } if kpar_digest == &kpar_sha256_hex ) }), "lockfile must retain the advertised kpar_digest for sync-time verification" @@ -542,7 +537,6 @@ fn sync_hard_fails_on_kpar_digest_drift_from_lockfile() -> Result<(), Box Result<(), Box Result<(), Box Result<(), Box> { std::fs::write( cwd.join(DEFAULT_LOCKFILE_NAME), - r#"lock_version = "0.4" + r#"lock_version = "0.5" [[project]] name = "sync_to_local" version = "1.2.3" identifiers = ["urn:kpar:sync_to_local"] -checksum = "4b3adfb7bea950c7c598093c50323fa2ea9f816cb4b10cd299b205bfd4b47a5c" sources = [ - { src_path = "lib/sync_to_local" }, + { src_path = "lib/sync_to_local", checksum = "4b3adfb7bea950c7c598093c50323fa2ea9f816cb4b10cd299b205bfd4b47a5c" }, ] "#, )?; @@ -138,6 +137,7 @@ path = "lib/kpar.sync_to_local_1.2.3" identifiers = [ "urn:kpar:sync_to_local", ] +src_cksum = "4b3adfb7bea950c7c598093c50323fa2ea9f816cb4b10cd299b205bfd4b47a5c" "# ) ); @@ -184,15 +184,14 @@ fn sync_to_remote() -> Result<(), Box> { std::fs::write( cwd.join(DEFAULT_LOCKFILE_NAME), format!( - r#"lock_version = "0.4" + r#"lock_version = "0.5" [[project]] name = "sync_to_remote" version = "1.2.3" identifiers = ["urn:kpar:sync_to_remote"] -checksum = "39f49107a084ab27624ee78d4d37f87a1f7606a2b5d242cdcd9374cf20ab1895" sources = [ - {{ remote_src = "{}" }}, + {{ remote_src = "{}", checksum = "39f49107a084ab27624ee78d4d37f87a1f7606a2b5d242cdcd9374cf20ab1895" }}, ] "#, &server.url() @@ -226,6 +225,7 @@ path = "lib/kpar.sync_to_remote_1.2.3" identifiers = [ "urn:kpar:sync_to_remote", ] +src_cksum = "39f49107a084ab27624ee78d4d37f87a1f7606a2b5d242cdcd9374cf20ab1895" "# ) ); @@ -299,15 +299,14 @@ fn sync_to_remote_auth() -> Result<(), Box> { std::fs::write( cwd.join(DEFAULT_LOCKFILE_NAME), format!( - r#"lock_version = "0.4" + r#"lock_version = "0.5" [[project]] name = "sync_to_remote" version = "1.2.3" identifiers = ["urn:kpar:sync_to_remote"] -checksum = "39f49107a084ab27624ee78d4d37f87a1f7606a2b5d242cdcd9374cf20ab1895" sources = [ - {{ remote_src = "{}" }}, + {{ remote_src = "{}", checksum = "39f49107a084ab27624ee78d4d37f87a1f7606a2b5d242cdcd9374cf20ab1895" }}, ] "#, &server.url() @@ -405,15 +404,14 @@ fn sync_to_remote_incorrect_auth() -> Result<(), Box> { std::fs::write( cwd.join(DEFAULT_LOCKFILE_NAME), format!( - r#"lock_version = "0.4" + r#"lock_version = "0.5" [[project]] name = "sync_to_remote" version = "1.2.3" identifiers = ["urn:kpar:sync_to_remote"] -checksum = "39f49107a084ab27624ee78d4d37f87a1f7606a2b5d242cdcd9374cf20ab1895" sources = [ - {{ remote_src = "{}" }}, + {{ remote_src = "{}", checksum = "39f49107a084ab27624ee78d4d37f87a1f7606a2b5d242cdcd9374cf20ab1895" }}, ] "#, &server.url() diff --git a/sysand/tests/common/mod.rs b/sysand/tests/common/mod.rs index 540a3d101..6946a572d 100644 --- a/sysand/tests/common/mod.rs +++ b/sysand/tests/common/mod.rs @@ -60,14 +60,9 @@ pub fn fixture_path(name: &str) -> Utf8PathBuf { } /// Build a single `versions.json` entry fixture. -pub fn versions_json_entry_body( - version: &str, - project_digest_hex: &str, - kpar_size: usize, - kpar_digest_hex: &str, -) -> String { +pub fn versions_json_entry_body(version: &str, kpar_size: usize, kpar_digest_hex: &str) -> String { format!( - r#"{{"version":"{version}","usage":[],"project_digest":"sha256:{project_digest_hex}","kpar_size":{kpar_size},"kpar_digest":"sha256:{kpar_digest_hex}"}}"# + r#"{{"version":"{version}","usage":[],"kpar_size":{kpar_size},"kpar_digest":"sha256:{kpar_digest_hex}"}}"# ) }