From 296296b54de8c1d7b4b1d456bbe9778542004ffb Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 22 Apr 2026 08:22:19 +0900 Subject: [PATCH] fix(download): parse crates.io version from API/web URL for oss_version --- src/fosslight_util/download.py | 24 +++++++- tests/test_download_version_hint.py | 95 +++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 tests/test_download_version_hint.py diff --git a/src/fosslight_util/download.py b/src/fosslight_util/download.py index db3a0a2..3baf2ba 100755 --- a/src/fosslight_util/download.py +++ b/src/fosslight_util/download.py @@ -311,6 +311,16 @@ def get_remote_refs(git_url: str): r'-((?:\d+\.)+\d+(?:-[0-9A-Za-z][0-9A-Za-z.-]*)?)$', re.IGNORECASE, ) +# crates.io: API download URL ends with .../name/VERSION/download (basename must not be used as version) +_CRATES_IO_API_VERSION = re.compile( + r'/api/v1/crates/[^/]+/([^/]+)/download/?$', + re.IGNORECASE, +) +# crates.io: .../crates/CRATE_NAME/VERSION +_CRATES_IO_WEB_VERSION = re.compile( + r'/crates/[^/]+/([^/?#]+)/?(?:$|[?#])', + re.IGNORECASE, +) def clarified_version_from_oss_version(oss_version: str) -> str: @@ -366,6 +376,15 @@ def _version_string_from_archive_stem(stem: str) -> str: def _oss_version_hint_from_wget_link(link: str, downloaded_file: str) -> str: """Version string from last URL path segment or saved filename for clarified_version.""" + if link: + path = urllib.parse.urlparse(link).path or "" + m = _CRATES_IO_API_VERSION.search(path) + if m: + return m.group(1) + m = _CRATES_IO_WEB_VERSION.search(path) + if m: + return m.group(1) + for src in (link, downloaded_file): if not src: continue @@ -377,8 +396,9 @@ def _oss_version_hint_from_wget_link(link: str, downloaded_file: str) -> str: if not base: continue stem = _strip_known_archive_suffixes(base) - if stem: - return _version_string_from_archive_stem(stem) + if not stem or stem.lower() == "download": + continue + return _version_string_from_archive_stem(stem) return "" diff --git a/tests/test_download_version_hint.py b/tests/test_download_version_hint.py new file mode 100644 index 0000000..81e2cee --- /dev/null +++ b/tests/test_download_version_hint.py @@ -0,0 +1,95 @@ +# Copyright (c) 2026 LG Electronics Inc. +# SPDX-License-Identifier: Apache-2.0 +"""Tests for wget-path oss_version / clarified_version hints from URL and filename.""" + +import pytest + +from fosslight_util.download import ( + clarified_version_from_oss_version, + _oss_version_hint_from_wget_link, +) + + +@pytest.mark.parametrize( + "link,downloaded_file,expected_hint", + [ + # crates.io (API URL must not use path segment "download" as version) + ( + "https://crates.io/api/v1/crates/transpose/0.2.3/download", + "/tmp/transpose-0.2.3.crate", + "0.2.3", + ), + ( + "https://crates.io/crates/transpose/0.2.3", + "", + "0.2.3", + ), + # GNU mirror–style tarball URL + ( + "https://mirrors.ustc.edu.cn/gnu/bison/bison-3.8.2.tar.xz", + "/dl/bison-3.8.2.tar.xz", + "3.8.2", + ), + # GitHub release archive (basename is vX.Y.Z.tar.gz; hint keeps leading v) + ( + "https://github.com/Kitware/CMake/archive/refs/tags/v3.28.3.tar.gz", + "/t/v3.28.3.tar.gz", + "v3.28.3", + ), + # PyPI file URL (basename package-version.tar.gz) + ( + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.31.0.tar.gz", + "/t/requests-2.31.0.tar.gz", + "2.31.0", + ), + # X.Org individual lib + ( + "https://www.x.org/releases/individual/lib/libXdmcp-1.1.4.tar.xz", + "/t/libXdmcp-1.1.4.tar.xz", + "1.1.4", + ), + # npm registry tarball (often ends with package-version.tgz) + ( + "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "/t/lodash-4.17.21.tgz", + "4.17.21", + ), + # Generic: path ends with /download but real version only in filename + ( + "https://example.com/releases/download", + "/build/myproject-2.0.0.tar.xz", + "2.0.0", + ), + # crates.io API takes precedence over misleading local path + ( + "https://crates.io/api/v1/crates/serde/1.0.190/download", + "/wrong/path.txt", + "1.0.190", + ), + ], +) +def test_oss_version_hint_from_wget_link(link, downloaded_file, expected_hint): + got = _oss_version_hint_from_wget_link(link, downloaded_file) + assert got == expected_hint, f"hint got {got!r} expected {expected_hint!r}" + + +@pytest.mark.parametrize( + "hint,expected_clarified", + [ + ("0.2.3", "0.2.3"), + ("3.8.2", "3.8.2"), + ("2.31.0", "2.31.0"), + ("4.17.21", "4.17.21"), + ("1.1.4", "1.1.4"), + ("v3.28.3", "3.28.3"), + ], +) +def test_clarified_follows_hint_for_semver(hint, expected_clarified): + assert clarified_version_from_oss_version(hint) == expected_clarified + + +def test_github_archive_hint_then_clarified(): + link = "https://github.com/Kitware/CMake/archive/refs/tags/v3.28.3.tar.gz" + hint = _oss_version_hint_from_wget_link(link, "/t/v3.28.3.tar.gz") + assert hint == "v3.28.3" + assert clarified_version_from_oss_version(hint) == "3.28.3"