From 6ef8f6701d402b12ffffc1325d19e5956532eb07 Mon Sep 17 00:00:00 2001 From: Jonathan LEGRAND Date: Fri, 30 Jan 2026 15:07:43 +0100 Subject: [PATCH 1/6] Add environment-based FSDB authentication and CLI support for login - Updated `task.py` to handle FSDB authentication using `ROMI_DB_USER` and `ROMI_DB_PASSWORD` environment variables. - Added CLI arguments to allow passing `--db-user` and `--db-password` for FSDB login in `romi_run_task.py`. - Ensured that database credentials from CLI or environment variables are passed to the runtime environment in `run_task`. - Adjusted imports in `romi_run_task.py` for consistency and readability. --- src/romitask/cli/romi_run_task.py | 27 +++++++++++++++++++++------ src/romitask/task.py | 9 ++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/romitask/cli/romi_run_task.py b/src/romitask/cli/romi_run_task.py index 80e9d6d..bfaa726 100755 --- a/src/romitask/cli/romi_run_task.py +++ b/src/romitask/cli/romi_run_task.py @@ -64,12 +64,11 @@ from pathlib import Path import toml - from romitask import PIPE_TOML from romitask import SCAN_TOML from romitask.log import LOG_LEVELS -from romitask.log import get_logger from romitask.log import get_log_filename +from romitask.log import get_logger from romitask.log import get_logging_config from romitask.modules import DATA_CREATION_TASK from romitask.modules import MODULES @@ -111,6 +110,12 @@ def parsing(): parser.add_argument('--dry-run', dest='dry_run', action="store_true", help="Use this to test the command-line by doing everything except calling the task(s).") + # DB Authentication + parser.add_argument('--db-user', dest='db_user', type=str, default=None, + help="Username for FSDB login.") + parser.add_argument('--db-password', dest='db_password', type=str, default=None, + help="Password for FSDB login.") + # Luigi related arguments: luigi = parser.add_argument_group("luigi options") luigi.add_argument('--luigicmd', dest='luigicmd', type=str, default=LUIGI_CMD, @@ -331,9 +336,11 @@ def create_backup_cfg(path, cfgname, config): # The following parameters control Luigi scheduler behavior. # https://luigi.readthedocs.io/en/stable/configuration.html#scheduler config["scheduler"] = { - "retry_count": 1, # Number of times a task can fail within `disable_window` before the scheduler will automatically disable it. + "retry_count": 1, + # Number of times a task can fail within `disable_window` before the scheduler will automatically disable it. "retry_delay": 1, # Number of seconds to wait after a task failure to mark it pending again. - "disable_window": 3600, # Number of seconds during which `retry_count` failures must occur in order for an automatic disable by the scheduler. + "disable_window": 3600, + # Number of seconds during which `retry_count` failures must occur in order for an automatic disable by the scheduler. } # The following return codes are the recommended exit codes for Luigi. @@ -529,6 +536,12 @@ def run_task(dataset_path, task, config, **kwargs): # - Define environment variables to provide the logging TOML file path to `luigi`: env = {"LUIGI_CONFIG_PARSER": "toml", "LUIGI_CONFIG_PATH": file_path} env.update({'PYOPENCL_CTX': '0'}) # default choice + + # Add database credentials to environment if provided + if kwargs.get('db_user') and kwargs.get('db_password'): + env['ROMI_DB_USER'] = kwargs['db_user'] + env['ROMI_DB_PASSWORD'] = kwargs['db_password'] + # - Define the luigi command to run: # "--ScanConfiguration-scan args.dataset_path" set the value of `scan` for the `ScanConfiguration` Config class # https://luigi.readthedocs.io/en/stable/parameters.html#setting-parameter-value-for-other-classes @@ -624,14 +637,16 @@ def _dataset_path_error(args): try: run_task(dataset_path, args.task, args.config, log_level=args.log_level, luigicmd=args.luigicmd, module=args.module, - local_scheduler=args.ls, dry_run=args.dry_run) + local_scheduler=args.ls, dry_run=args.dry_run, + db_user=args.db_user, db_password=args.db_password) except Exception as e: print(e) else: ## For the folder: run_task(folders, args.task, args.config, log_level=args.log_level, luigicmd=args.luigicmd, module=args.module, - local_scheduler=args.ls, dry_run=args.dry_run) + local_scheduler=args.ls, dry_run=args.dry_run, + db_user=args.db_user, db_password=args.db_password) if __name__ == '__main__': diff --git a/src/romitask/task.py b/src/romitask/task.py index 839e313..8488cf1 100644 --- a/src/romitask/task.py +++ b/src/romitask/task.py @@ -180,7 +180,7 @@ def parse(self, scan_path): If the given scan dataset id does not exist, it is created. """ - from plantdb.commons.fsdb import FSDB + from plantdb.commons.fsdb.core import FSDB global db path = scan_path.rstrip('/') path = path.split('/') @@ -192,6 +192,13 @@ def parse(self, scan_path): if db is None: # TODO: cannot change DB during run... db = FSDB(db_path) db.connect() + # Attempt login if credentials are provided in environment variables + db_user = os.environ.get('ROMI_DB_USER') + db_pass = os.environ.get('ROMI_DB_PASSWORD') + if db_user and db_pass: + logger.info(f"Logging in to FSDB as user '{db_user}'...") + db.login(db_user, db_pass) + # Get the scan dataset object or create one & return it if db.scan_exists(scan_id): scan = db.get_scan(scan_id) From 7f9b463679a9a1a8965caf2347560d86df009185 Mon Sep 17 00:00:00 2001 From: Jonathan LEGRAND Date: Mon, 9 Feb 2026 10:53:22 +0100 Subject: [PATCH 2/6] Update FSDB import paths across the codebase Description: - Replaced all occurrences of `from plantdb.commons.fsdb import FSDB` with `from plantdb.commons.fsdb.core import FSDB` in multiple files. - Adjusted dummy FSDB imports in test and task files to use `from plantdb.commons.test_database import dummy_db`. - Updated Jupyter notebooks to reflect the new import paths. - Ensured compatibility of downstream modules, scripts, and tasks with updated imports. --- src/romitask/runner.py | 4 ++-- src/romitask/task.py | 10 +++++----- src/romitask/watch.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/romitask/runner.py b/src/romitask/runner.py index 09fea66..dafa6dc 100644 --- a/src/romitask/runner.py +++ b/src/romitask/runner.py @@ -39,7 +39,7 @@ Usage Examples -------------- ->>> from plantdb.commons.fsdb import FSDB +>>> from plantdb.commons.fsdb.core import FSDB >>> from romitask.runner import DBRunner >>> from romitask.task import DummyTask >>> db = FSDB('path/to/database') @@ -77,7 +77,7 @@ class DBRunner(object): Examples -------- - >>> from plantdb.commons.fsdb import FSDB + >>> from plantdb.commons.fsdb.core import FSDB >>> from romitask.runner import DBRunner >>> from romitask.task import DummyTask >>> db = FSDB('path/to/database') diff --git a/src/romitask/task.py b/src/romitask/task.py index 8488cf1..008619b 100644 --- a/src/romitask/task.py +++ b/src/romitask/task.py @@ -76,7 +76,7 @@ import luigi from tqdm import tqdm -from plantdb.commons.fsdb import FSDB +from plantdb.commons.fsdb.core import FSDB from plantdb.commons.fsdb.validation import _is_fsdb from plantdb.commons.io import read_json from plantdb.commons.io import write_json @@ -237,7 +237,7 @@ class FSDBConfiguration(luigi.Config): Examples -------- >>> from romitask.task import FSDBConfiguration - >>> from plantdb.commons.fsdb import dummy_db + >>> from plantdb.commons.test_database import dummy_db >>> # - First, let's create a dummy FSDB database to play with: >>> db = dummy_db() >>> db.connect() @@ -259,7 +259,7 @@ class ScanConfiguration(luigi.Config): Examples -------- >>> from romitask.task import ScanConfiguration - >>> from plantdb.commons.fsdb import dummy_db + >>> from plantdb.commons.test_database import dummy_db >>> # - First, let's create a dummy FSDB database to play with: >>> db = dummy_db() >>> db.connect() @@ -294,8 +294,8 @@ class FilesetTarget(luigi.Target): Examples -------- >>> from romitask.task import FilesetTarget - >>> from plantdb.commons.fsdb import FSDB - >>> from plantdb.commons.fsdb import dummy_db + >>> from plantdb.commons.fsdb.core import FSDB + >>> from plantdb.commons.test_database import dummy_db >>> # - First, let's create a dummy FSDB database to play with: >>> db = dummy_db() >>> db.connect() diff --git a/src/romitask/watch.py b/src/romitask/watch.py index 51c68d2..03a124b 100644 --- a/src/romitask/watch.py +++ b/src/romitask/watch.py @@ -39,7 +39,7 @@ Usage Examples -------------- >>> from romitask.watch import FSDBWatch ->>> from plantdb.commons.fsdb import FSDB +>>> from plantdb.commons.fsdb.core import FSDB >>> from my_tasks import ProcessingTask >>> # Initialize database and watch @@ -91,7 +91,7 @@ class FSDBWatcher(): Examples -------- - >>> from plantdb.commons.fsdb import FSDB + >>> from plantdb.commons.fsdb.core import FSDB >>> db = FSDB("path/to/database") >>> tasks = [MyTask1(), MyTask2()] >>> config = {"param1": "value1", "param2": "value2"} @@ -185,7 +185,7 @@ class FSDBEventHandler(FileSystemEventHandler): Examples -------- - >>> from plantdb.commons.fsdb import FSDB + >>> from plantdb.commons.fsdb.core import FSDB >>> from romitask.task import DummyTask >>> >>> # Create database and handler From 4294bfc72bd4614b0b0627dc097608ac871acbd1 Mon Sep 17 00:00:00 2001 From: Jonathan LEGRAND Date: Mon, 9 Feb 2026 14:57:25 +0100 Subject: [PATCH 3/6] Handle existing fileset with `FilesetExistsError` instead of generic `ValueError` - Import `FilesetExistsError` from `plantdb.commons.fsdb.exceptions` in `romitask/src/romitask/task.py`. - Update the `try/except` block when creating a fileset to catch `FilesetExistsError` and fallback to `fs_target.get()` for existing filesets. --- src/romitask/task.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/romitask/task.py b/src/romitask/task.py index 008619b..8462e4e 100644 --- a/src/romitask/task.py +++ b/src/romitask/task.py @@ -74,6 +74,7 @@ from shutil import rmtree import luigi +from plantdb.commons.fsdb.exceptions import FilesetExistsError from tqdm import tqdm from plantdb.commons.fsdb.core import FSDB @@ -447,7 +448,7 @@ def output(self): try: fs = fs_target.create() # create the fileset - except ValueError: + except FilesetExistsError: fs = fs_target.get() # get the fileset # Export all the task parameters as a dictionary: params = dict(self.to_str_params(only_significant=False, only_public=False)) From 2e775fdba575c4fbb2cd7c2379769cd70e52f4d2 Mon Sep 17 00:00:00 2001 From: Arthur Luciani Date: Thu, 12 Mar 2026 13:40:33 +0100 Subject: [PATCH 4/6] Streamlined FSDB login logic to reflect evolutions of plantdb and updated imports Signed-off-by: Arthur Luciani --- src/romitask/task.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/romitask/task.py b/src/romitask/task.py index 8462e4e..30b6862 100644 --- a/src/romitask/task.py +++ b/src/romitask/task.py @@ -74,10 +74,10 @@ from shutil import rmtree import luigi -from plantdb.commons.fsdb.exceptions import FilesetExistsError from tqdm import tqdm from plantdb.commons.fsdb.core import FSDB +from plantdb.commons.fsdb.exceptions import FilesetExistsError from plantdb.commons.fsdb.validation import _is_fsdb from plantdb.commons.io import read_json from plantdb.commons.io import write_json @@ -193,13 +193,7 @@ def parse(self, scan_path): if db is None: # TODO: cannot change DB during run... db = FSDB(db_path) db.connect() - # Attempt login if credentials are provided in environment variables - db_user = os.environ.get('ROMI_DB_USER') - db_pass = os.environ.get('ROMI_DB_PASSWORD') - if db_user and db_pass: - logger.info(f"Logging in to FSDB as user '{db_user}'...") - db.login(db_user, db_pass) - + db.login(username=os.getenv("ROMI_DB_USER"), password=os.getenv("ROMI_DB_PASSWORD")) # Get the scan dataset object or create one & return it if db.scan_exists(scan_id): scan = db.get_scan(scan_id) @@ -238,7 +232,7 @@ class FSDBConfiguration(luigi.Config): Examples -------- >>> from romitask.task import FSDBConfiguration - >>> from plantdb.commons.test_database import dummy_db + >>> from plantdb.commons.fsdb import dummy_db >>> # - First, let's create a dummy FSDB database to play with: >>> db = dummy_db() >>> db.connect() @@ -260,7 +254,7 @@ class ScanConfiguration(luigi.Config): Examples -------- >>> from romitask.task import ScanConfiguration - >>> from plantdb.commons.test_database import dummy_db + >>> from plantdb.commons.fsdb import dummy_db >>> # - First, let's create a dummy FSDB database to play with: >>> db = dummy_db() >>> db.connect() @@ -295,8 +289,8 @@ class FilesetTarget(luigi.Target): Examples -------- >>> from romitask.task import FilesetTarget - >>> from plantdb.commons.fsdb.core import FSDB - >>> from plantdb.commons.test_database import dummy_db + >>> from plantdb.commons.fsdb import FSDB + >>> from plantdb.commons.fsdb import dummy_db >>> # - First, let's create a dummy FSDB database to play with: >>> db = dummy_db() >>> db.connect() From 4cfb3ef54eddb6ccaa1fcea1db93a0a31f8cf78e Mon Sep 17 00:00:00 2001 From: Jonathan LEGRAND Date: Mon, 16 Mar 2026 10:08:04 +0100 Subject: [PATCH 5/6] Refactor database connection and import paths - Replaced `db.connect(unsafe=True)` with `db.connect()` across multiple files for safer database connections. - Updated import path for `compute_fileset_matches` to use `plantdb.server.core.utils` instead of `plantdb.server.rest_api`. - Fixed incorrect use of `sparse_pcd` for dense point cloud arrays in `plant3dvision/colmap.py`. --- src/romitask/task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/romitask/task.py b/src/romitask/task.py index 8462e4e..19be70a 100644 --- a/src/romitask/task.py +++ b/src/romitask/task.py @@ -119,7 +119,7 @@ def parse(self, db_path): if db_path.exists() and db_path.is_dir() and _is_fsdb(db_path): db = FSDB(db_path) - db.connect(unsafe=True) + db.connect() return db else: logger.error(f"Could not parse FSDB from string: {db_path}") From 18e24e28730e1bb3884e9b56be9e0034c07376d1 Mon Sep 17 00:00:00 2001 From: Jonathan LEGRAND Date: Wed, 18 Mar 2026 17:32:30 +0100 Subject: [PATCH 6/6] Refactor `Clean` task in `romitask/task.py` - Added `keep_pipeline_cfg` parameter to control removal of the `pipeline.toml` backup. - Introduced helper methods (`_merge_metadata_keep_list`, `_filesets_to_remove`, `_delete_filesets`, `_clean_images_metadata`, `_clean_orphan_json_files`, `_clean_orphan_directories`, `_remove_pipeline_backup`, `_confirm`) to modularize cleaning logic and simplify testing. - Replaced inline confirmation handling with `ask_confirmation` from `romitask.utils`. - Updated imports to include `Iterable` and the new `ask_confirmation` helper. - Fixed type annotation for `keep_metadata` (`luigi.listParameter`). - Streamlined metadata whitelist creation using the new helper. - Simplified fileset deletion and orphan metadata cleanup using the new static helpers. - Added optional pipeline backup removal controlled by `keep_pipeline_cfg`. - Improved logging messages and docstrings throughout the cleaning workflow. --- src/romitask/task.py | 256 +++++++++++++++++++++++++++++------------- src/romitask/utils.py | 21 ++++ 2 files changed, 197 insertions(+), 80 deletions(-) diff --git a/src/romitask/task.py b/src/romitask/task.py index f44d993..890df48 100644 --- a/src/romitask/task.py +++ b/src/romitask/task.py @@ -72,6 +72,7 @@ from json import JSONDecodeError from pathlib import Path from shutil import rmtree +from typing import Iterable import luigi from tqdm import tqdm @@ -82,6 +83,7 @@ from plantdb.commons.io import read_json from plantdb.commons.io import write_json from romitask.log import get_logger +from romitask.utils import ask_confirmation logger = get_logger(__name__) db = None @@ -892,7 +894,7 @@ def run(self): return -#: List of original image metadata (to keep in Clean task): +#: list of original image metadata (to keep in Clean task): #: * "pose" is added by the PlantImager or VirtualPlantImager #: * "approximate_pose" is added by the PlantImager #: * "channel" is added by the PlantImager or VirtualPlantImager @@ -913,8 +915,8 @@ class Clean(RomiTask): no_confirm : luigi.BoolParameter Do not ask for confirmation of the cleaning in the command prompt. Default to ``False``. - keep_metadata : luigi.ListParameter - List of metadata to keep (retain) in the `images` fileset metadata. + keep_metadata : luigi.listParameter + list of metadata to keep (retain) in the `images` fileset metadata. Default to ``IMAGES_MD``. See Also @@ -925,6 +927,7 @@ class Clean(RomiTask): upstream_task = None # override default attribute from ``RomiTask`` no_confirm = luigi.BoolParameter(default=False) keep_metadata = luigi.ListParameter(default=[]) + keep_pipeline_cfg = luigi.BoolParameter(default=True) def requires(self): """No requirements here.""" @@ -939,98 +942,191 @@ def complete(self): return False # there is no output @staticmethod - def confirm(c, default='n'): - """Handle keyboard input from user.""" - valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} - if c == '': - return valid[default] + def _merge_metadata_keep_list(user_keep: Iterable[str]) -> set[str]: + """Merge the user‑provided metadata keys with the default set required for image files. + + Parameters + ---------- + user_keep: + Iterable of metadata keys supplied via the ``keep_metadata`` task + parameter. + + Returns + ------- + set[str] + The union of ``user_keep`` and :data:`~romitask.task.IMAGES_MD`. + """ + return set(user_keep).union(IMAGES_MD) + + @staticmethod + def _filesets_to_remove(scan: "Scan", exclude: set[str]) -> list[str]: + """Return the list of fileset IDs that should be deleted. + + Parameters + ---------- + scan: + The active :class:`plantdb.commons.fsdb.Scan` instance. + exclude: + Set of fileset IDs that must be kept (e.g. ``{"images"}``). + + Returns + ------- + list[str] + Fileset IDs that are *not* in ``exclude`` and do not start with + ``"VirtualPlant"``. + """ + return [ + fs.id + for fs in scan.get_filesets() + if fs.id not in exclude and not fs.id.startswith("VirtualPlant") + ] + + @staticmethod + def _delete_filesets(scan: "Scan", fileset_ids: Iterable[str]) -> None: + """Delete the given filesets from the scan, logging each operation. + + Parameters + ---------- + scan: + The active scan. + fileset_ids: + Iterable of fileset IDs to delete. + """ + for fs_id in tqdm(fileset_ids, unit="fileset"): + logger.info("Deleting fileset %r...", fs_id) + scan.delete_fileset(fs_id) + + @staticmethod + def _clean_images_metadata(images_fs: "Fileset", keep_keys: set[str]) -> None: + """Keep only the whitelisted metadata keys for every file in the ``images`` fileset. + + Parameters + ---------- + images_fs: + The ``images`` fileset. + keep_keys: + Set of metadata keys that must be retained. + """ + logger.info("Cleaning metadata of the 'images' fileset...") + for file_ in tqdm(images_fs.get_files(), unit="file"): + original_md = file_.get_metadata() + cleaned_md = {k: v for k, v in original_md.items() if k in keep_keys} + # Clear existing metadata before writing the cleaned version + file_.metadata = {} + file_.set_metadata(cleaned_md) + + @staticmethod + def _clean_orphan_json_files(metadata_dir: Path) -> None: + """Remove JSON files that are not ``images.json`` or ``metadata.json``. + + Parameters + ---------- + metadata_dir: + Path to the ``metadata`` directory of the scan. + """ + pattern = str(metadata_dir / "*.json") + json_files = [ + f for f in glob.glob(pattern) if Path(f).name not in {"images.json", "metadata.json"} + ] + + if json_files: + logger.info("Found %d orphan JSON metadata files.", len(json_files)) + + for file_path in json_files: + try: + Path(file_path).unlink(missing_ok=False) + logger.warning("Deleted orphan JSON file: %s", file_path) + except FileNotFoundError: + logger.error("Failed to delete JSON file (not found): %s", file_path) + + @staticmethod + def _clean_orphan_directories(metadata_dir: Path) -> None: + """Remove empty metadata sub‑directories that are not the ``images`` folder. + + Parameters + ---------- + metadata_dir: + Path to the ``metadata`` directory of the scan. + """ + subdirs = { + d for d in os.listdir(metadata_dir) if (metadata_dir / d).is_dir() + } - {"images"} + + if subdirs: + logger.info("Found %d orphan metadata directories.", len(subdirs)) + + for subdir in subdirs: + dir_path = metadata_dir / subdir + try: + rmtree(dir_path, ignore_errors=True) + logger.info("Deleted orphan metadata directory: %s", subdir) + except FileNotFoundError: + logger.error("Failed to delete directory (not found): %s", subdir) + + @staticmethod + def _remove_pipeline_backup(scan: "Scan") -> None: + """Delete the optional ``pipeline.toml`` backup file if it exists. + + Parameters + ---------- + scan: + The active scan. + """ + backup_path = Path(scan.path()) / "pipeline.toml" + if backup_path.is_file(): + try: + backup_path.unlink() + logger.info("Deleted backup pipeline config: %s", backup_path) + except FileNotFoundError: + logger.warning("Backup pipeline config vanished before removal: %s", backup_path) else: - return valid[c] + logger.info("No backup pipeline config found.") + + @staticmethod + def _confirm(prompt: str = "Confirm? [y/N] ", default: str = "n") -> bool: + """Wrap the generic confirmation helper for easier testing/mocking.""" + return ask_confirmation(prompt, default) def run(self): - """Run the task.""" + """Execute the cleaning workflow.""" scan = ScanConfiguration().scan - logger.info(f"Cleaning task got a scan named '{scan.id}'...") + logger.info(f"Cleaning task started for scan '{scan.id}'...") # - Create the list of metadata to keep (retain) to a set and add the ones defined in `IMAGES_MD` - keep_metadata = list(set(self.keep_metadata) | set(IMAGES_MD)) + metadata_whitelist = self._merge_metadata_keep_list(self.keep_metadata) # - Handle the necessity to confirm prior to dataset & metadata cleaning. if not self.no_confirm: del_msg = "This will delete all filesets and metadata except for the `images` & 'VirtualPlant' filesets." - confirm_msg = "Confirm? [y/N]" logger.warning(del_msg) - logger.warning(confirm_msg) - clean = self.confirm(input().lower()) + if not self._confirm(): + logger.info("User aborted the cleaning operation.") + return + + # - Delete unwanted filesets. + filesets_to_remove = self._filesets_to_remove(scan, exclude={"images"}) + if filesets_to_remove: + self._delete_filesets(scan, filesets_to_remove) else: - clean = True - - # - If cleaning not required, abort: - if not clean: - logger.info("Did not validate dataset cleaning!") - return - - # - Perform the dataset & metadata cleaning: - # List all filesets in dataset (excluding 'images'): - fs_ids = [fs.id for fs in scan.get_filesets() if fs.id != "images"] - # Also exclude the dataset associated to VirtualPlant: - fs_ids = [fs for fs in fs_ids if not fs.startswith("VirtualPlant")] - logger.info(f"Found {len(fs_ids)} filesets to cleanup (excluding 'images' & 'VirtualPlant')...") - - # Cleanup all Filesets except 'images' & VirtualPlant*: - if len(fs_ids) != 0: - for fs in tqdm(fs_ids, unit='fileset'): - logger.info(f"Deleting '{fs}' fileset...") - scan.delete_fileset(fs) - - # Cleanup 'images' Filesets metadata: - img_fs = scan.get_fileset('images') - if img_fs is None: - logger.critical(f"Could not get the 'image' fileset for '{scan.id}'!") - else: - logger.info("Cleaning 'images' Fileset metadata...") - for f in tqdm(img_fs.get_files(), unit='file'): - md = f.get_metadata() - clean_md = {k: v for k, v in md.items() if k in keep_metadata} - f.metadata = {} # need to clear all metadata before setting the clean ones - f.set_metadata(clean_md) - - # - Cleanup metadata folder: - metadata_path = Path.resolve(Path(scan.path()) / 'metadata') - # Clean orphan metadata JSON files - fs_metadata = glob.glob(str(metadata_path) + '/*.json') - fs_metadata = [f for f in fs_metadata if f.split('/')[-1] not in ['images.json', 'metadata.json']] - if len(fs_metadata) != 0: - logger.info(f"Found {len(fs_metadata)} orphan metadata JSON files!") - for f in fs_metadata: - try: - Path(f).unlink(missing_ok=False) - logger.warning(f"Deleted file: {f}") - except FileNotFoundError: - logger.error(f"Could not delete file '{f}'!") - # Clean orphan metadata directories - fs_dir = set([d for d in os.listdir(metadata_path) if Path(d).is_dir()]) - {'images'} - if len(fs_dir) != 0: - logger.info(f"Found {len(fs_dir)} orphan metadata directories!") - for md_dir in fs_dir: - try: - rmtree(metadata_path / md_dir, ignore_errors=True) - logger.info(f"Deleted directory: {md_dir}") - except FileNotFoundError: - logger.error(f"Could not delete directory '{md_dir}'!") + logger.info("No filesets to delete (only 'images' and VirtualPlant* remain).") - # Try to remove 'pipeline.toml' backup, if any: - pipe_toml = Path.resolve(Path(scan.path()) / 'pipeline.toml') - if pipe_toml.is_file(): - try: - pipe_toml.unlink() - except FileNotFoundError: - logger.warning(f"Could not delete backup pipeline config: '{pipe_toml}'") - else: - logger.info(f"Deleted backup pipeline config: '{pipe_toml}'") + # - Clean the metadata of the ``images`` fileset. + images_fs = scan.get_fileset("images") + if images_fs is None: + logger.critical("Could not locate the 'images' fileset in scan %s.", scan.id) else: - logger.info("No backup pipeline config found!") + self._clean_images_metadata(images_fs, metadata_whitelist) + + # - Clean orphan metadata files and directories. + metadata_dir = Path(scan.path()) / "metadata" + self._clean_orphan_json_files(metadata_dir) + self._clean_orphan_directories(metadata_dir) + + # - Optionally remove the pipeline backup. + if not self.keep_pipeline_cfg: + self._remove_pipeline_backup(scan) + logger.info("Cleaning task finished for scan %r.", scan.id) return diff --git a/src/romitask/utils.py b/src/romitask/utils.py index 6c74d7b..579c67f 100644 --- a/src/romitask/utils.py +++ b/src/romitask/utils.py @@ -135,3 +135,24 @@ def parse_kbdi(kbdi, default='n'): return valid[default] else: return valid[kbdi] + +def ask_confirmation(prompt: str, default: str = "n") -> bool: + """Prompt the user for a yes/no answer. + + Parameters + ---------- + prompt: str + Message displayed to the user. + default: str + Default answer when the user just hits *Enter* (``"y"`` or ``"n"``). + + Returns + ------- + bool + ``True`` if the answer is affirmative, ``False`` otherwise. + """ + valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} + answer = input(prompt).strip().lower() + if answer == "": + answer = default + return valid.get(answer, False)