From 1cf2bdc0ca1d5e5982dc2ef97fbc484149955bb8 Mon Sep 17 00:00:00 2001 From: Ye Yuan Date: Tue, 19 May 2026 11:11:25 +0800 Subject: [PATCH 1/2] Fix critical bugs in FilePad.update_file() and related methods - Add missing self.filepad.update_one() call in _update_file_contents to persist gfs_id and compressed changes to MongoDB after update - Reorder _update_file_contents to insert new file before deleting old GridFS entry, preventing data loss on write failure - Fix file handle leak by using `with open()` context manager - Wrap gfs_id with ObjectId() in delete_file_by_id for proper GridFS deletion (string vs ObjectId mismatch) - Fix from_db_file() passing `database=` instead of `name=` to __init__, which caused TypeError on every call - Fix zlib.compress() receiving boolean as compression level instead of using the default level (6) - Update tests with round-trip assertions to verify data persistence after update_file and update_file_by_id --- fireworks/utilities/filepad.py | 13 ++++++++----- fireworks/utilities/tests/test_filepad.py | 16 +++++++++++++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/fireworks/utilities/filepad.py b/fireworks/utilities/filepad.py index e92380929..fad97a2ca 100644 --- a/fireworks/utilities/filepad.py +++ b/fireworks/utilities/filepad.py @@ -254,7 +254,7 @@ def delete_file_by_id(self, gfs_id) -> None: Args: gfs_id (str): the file id. """ - self.gridfs.delete(gfs_id) + self.gridfs.delete(ObjectId(gfs_id)) self.filepad.delete_one({"gfs_id": gfs_id}) def delete_file_by_query(self, query) -> None: @@ -301,7 +301,7 @@ def _insert_to_gridfs(self, contents, compress): if compress: if self.text_mode: contents = contents.encode() - contents = zlib.compress(contents, compress) + contents = zlib.compress(contents) # insert to gridfs return str(self.gridfs.put(contents)) @@ -334,9 +334,12 @@ def _update_file_contents(self, doc, path, compress): if doc is None: return None, None old_gfs_id = doc["gfs_id"] - self.gridfs.delete(old_gfs_id) read_mode = "r" if self.text_mode else "rb" - gfs_id = self._insert_to_gridfs(open(path, read_mode).read(), compress) # noqa: SIM115 + with open(path, read_mode) as f: + contents = f.read() + gfs_id = self._insert_to_gridfs(contents, compress) + self.gridfs.delete(ObjectId(old_gfs_id)) + self.filepad.update_one({"gfs_id": old_gfs_id}, {"$set": {"gfs_id": gfs_id, "compressed": compress}}) doc["gfs_id"] = gfs_id doc["compressed"] = compress return old_gfs_id, gfs_id @@ -372,7 +375,7 @@ def from_db_file(cls, db_file: str, admin: bool = True) -> "Self": return cls( host=creds.get("host", "localhost"), port=int(creds.get("port", 27017)), - database=creds.get("name", "fireworks"), + name=creds.get("name", "fireworks"), username=user, password=password, authsource=authsource, diff --git a/fireworks/utilities/tests/test_filepad.py b/fireworks/utilities/tests/test_filepad.py index 60308981d..bc70689ca 100644 --- a/fireworks/utilities/tests/test_filepad.py +++ b/fireworks/utilities/tests/test_filepad.py @@ -1,6 +1,8 @@ import os import unittest +from bson.objectid import ObjectId + from fireworks.utilities.filepad import FilePad module_dir = os.path.join(os.path.dirname(os.path.abspath(__file__))) @@ -47,13 +49,25 @@ def test_update_file(self) -> None: old_id, new_id = self.fp.update_file("test_update_file", self.chgcar_file) assert old_id == gfs_id assert new_id != gfs_id - assert not self.fp.gridfs.exists(old_id) + assert not self.fp.gridfs.exists(ObjectId(old_id)) + # verify round-trip: updated file should be retrievable and match original + contents, doc = self.fp.get_file("test_update_file") + assert doc is not None + assert doc["gfs_id"] == new_id + with open(self.chgcar_file, "rb") as f: + assert contents == f.read() def test_update_file_by_id(self) -> None: gfs_id, _ = self.fp.add_file(self.chgcar_file, identifier="some identifier") old, new = self.fp.update_file_by_id(gfs_id, self.chgcar_file) assert old == gfs_id assert new != gfs_id + # verify round-trip: updated file should be retrievable and match original + contents, doc = self.fp.get_file_by_id(new) + assert doc is not None + assert doc["gfs_id"] == new + with open(self.chgcar_file, "rb") as f: + assert contents == f.read() def tearDown(self) -> None: self.fp.reset() From f3a3fa7550f82af7d5205da30ebf8a64d513c072 Mon Sep 17 00:00:00 2001 From: Ye Yuan Date: Tue, 19 May 2026 15:25:08 +0800 Subject: [PATCH 2/2] Add documentation for FilePad update_file() and related methods - Add summary line to _update_file_contents and delete_file_by_id docstrings (previously had only Args blocks, no description) - Add "Updating files" section to FilePad tutorial covering update_file() and update_file_by_id() - Fix tutorial intro to mention "update" alongside add/delete, consistent with the module-level docstring --- docs/_sources/filepad_tutorial.rst.txt | 17 ++++++++++++++++- docs_rst/filepad_tutorial.rst | 17 ++++++++++++++++- fireworks/utilities/filepad.py | 6 ++++-- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/docs/_sources/filepad_tutorial.rst.txt b/docs/_sources/filepad_tutorial.rst.txt index cf20fab18..1a6d3a188 100644 --- a/docs/_sources/filepad_tutorial.rst.txt +++ b/docs/_sources/filepad_tutorial.rst.txt @@ -3,7 +3,7 @@ Using FilePad for storing and retrieving files =============================================== -FilePad utility provides the api to add and delete arbitrary files of arbitrary sizes to MongoDB(filepad). +FilePad utility provides the api to add, update and delete arbitrary files of arbitrary sizes to MongoDB(filepad). The is achieved by inserting the entire file contents to GridFS and storing the id returned by the GridFS insertion, the user provided identifier and the metadata in a document in the filepad. In the following documentation, ``file contents`` refers to the file contents stored in GridFS and ``document`` refers to the @@ -61,6 +61,21 @@ where ```` is monogo query dict and the returned values ``all_files`` is tuples that match the query. +Updating files +================= + +To update the file contents associated with an existing identifier:: + + old_file_id, new_file_id = fp.update_file(, , compress=True/False) + +where ```` is the path to the new file. The old GridFS entry is deleted and the filepad +document is updated with the new file ID. The old and new file IDs are returned. + +To update by the file ID instead:: + + old_file_id, new_file_id = fp.update_file_by_id(, , compress=True/False) + + Deleting files ================= diff --git a/docs_rst/filepad_tutorial.rst b/docs_rst/filepad_tutorial.rst index cf20fab18..1a6d3a188 100644 --- a/docs_rst/filepad_tutorial.rst +++ b/docs_rst/filepad_tutorial.rst @@ -3,7 +3,7 @@ Using FilePad for storing and retrieving files =============================================== -FilePad utility provides the api to add and delete arbitrary files of arbitrary sizes to MongoDB(filepad). +FilePad utility provides the api to add, update and delete arbitrary files of arbitrary sizes to MongoDB(filepad). The is achieved by inserting the entire file contents to GridFS and storing the id returned by the GridFS insertion, the user provided identifier and the metadata in a document in the filepad. In the following documentation, ``file contents`` refers to the file contents stored in GridFS and ``document`` refers to the @@ -61,6 +61,21 @@ where ```` is monogo query dict and the returned values ``all_files`` is tuples that match the query. +Updating files +================= + +To update the file contents associated with an existing identifier:: + + old_file_id, new_file_id = fp.update_file(, , compress=True/False) + +where ```` is the path to the new file. The old GridFS entry is deleted and the filepad +document is updated with the new file ID. The old and new file IDs are returned. + +To update by the file ID instead:: + + old_file_id, new_file_id = fp.update_file_by_id(, , compress=True/False) + + Deleting files ================= diff --git a/fireworks/utilities/filepad.py b/fireworks/utilities/filepad.py index fad97a2ca..c385b0f7e 100644 --- a/fireworks/utilities/filepad.py +++ b/fireworks/utilities/filepad.py @@ -250,7 +250,8 @@ def update_file(self, identifier, path, compress=True): return self._update_file_contents(doc, path, compress) def delete_file_by_id(self, gfs_id) -> None: - """ + """Delete the file from GridFS and remove the associated document from filepad by gfs_id. + Args: gfs_id (str): the file id. """ @@ -322,7 +323,8 @@ def _get_file_contents(self, doc): return None, None def _update_file_contents(self, doc, path, compress): - """ + """Replace file contents in GridFS and update the filepad document with the new gfs_id. + Args: doc (dict): From the filepad collection. path (str): Path to the new file whose contents will replace the existing one.