diff --git a/pyproject.toml b/pyproject.toml index f10aa8b..6cbdccd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "normality==2.6.1", # Python 3.13+ needs psycopg2-binary 2.9.10+ (has wheels; 2.9.9 builds fail with _PyInterpreterState_Get) "psycopg2-binary>=2.9.10; python_version >= '3.13'", + "tomli; python_version < '3.11'", "tqdm", ] diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index 591a9b6..b0a742d 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -24,6 +24,7 @@ r'package\.json$', r'setup\.py$', r'setup\.cfg$', + r'pyproject\.toml$', r'.*\.podspec$', r'Cargo\.toml$', r'huggingface_hub_metadata\.json$', diff --git a/src/fosslight_source/run_manifest_extractor.py b/src/fosslight_source/run_manifest_extractor.py index b9efd2f..9c03fb1 100644 --- a/src/fosslight_source/run_manifest_extractor.py +++ b/src/fosslight_source/run_manifest_extractor.py @@ -125,6 +125,63 @@ def get_licenses_from_setup_py(file_path: str) -> list[str]: return _split_spdx_expression(value) +def get_licenses_from_pyproject_toml(file_path: str) -> list[str]: + try: + data = None + try: + import tomllib as toml_loader # Python 3.11+ + with open(file_path, 'rb') as f: + data = toml_loader.load(f) + except Exception: + try: + import tomli as toml_loader # Backport + with open(file_path, 'rb') as f: + data = toml_loader.load(f) + except Exception: + data = None + + if isinstance(data, dict): + project_tbl = data.get('project') or {} + license_value = project_tbl.get('license') + if isinstance(license_value, str) and license_value.strip(): + return [license_value.strip()] + if isinstance(license_value, dict): + text_value = license_value.get('text') + if isinstance(text_value, str) and text_value.strip(): + return [text_value.strip()] + if license_value.get('file'): + return [] + except Exception as ex: + logger.info(f"Failed to parse pyproject.toml via toml parser for {file_path}: {ex}") + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + project_match = re.search(r'^\s*\[project\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL) + if not project_match: + return [] + block = project_match.group(1) + m = re.search(r'^\s*license\s*=\s*(?P"""|\'\'\'|"|\')(?P.*?)(?P=q)', block, + flags=re.MULTILINE | re.DOTALL) + if m: + val = m.group('val').strip() + if val: + return [val] + m2 = re.search(r'^\s*license\s*=\s*\{[^}]*?\btext\s*=\s*(?P"""|\'\'\'|"|\')(?P.*?)(?P=q)', + block, flags=re.MULTILINE | re.DOTALL) + if m2: + val = m2.group('val').strip() + if val: + return [val] + m3 = re.search(r'^\s*license\s*=\s*\{[^}]*?\bfile\s*=', block, flags=re.MULTILINE | re.DOTALL) + if m3: + return [] + except Exception as ex: + logger.info(f"Failed to parse pyproject.toml {file_path}: {ex}") + return [] + return [] + + def get_licenses_from_podspec(file_path: str) -> list[str]: try: with open(file_path, 'r', encoding='utf-8') as f: @@ -278,6 +335,12 @@ def get_manifest_licenses(file_path: str) -> list[str]: except Exception as ex: logger.info(f"Failed to extract license from setup.py {file_path}: {ex}") return [] + elif os.path.basename(file_path).lower() == 'pyproject.toml': + try: + return get_licenses_from_pyproject_toml(file_path) + except Exception as ex: + logger.info(f"Failed to extract license from pyproject.toml {file_path}: {ex}") + return [] elif os.path.basename(file_path).lower().endswith('.podspec'): try: return get_licenses_from_podspec(file_path)