diff --git a/docs/_sources/filepad_tutorial.rst.txt b/docs/_sources/filepad_tutorial.rst.txt index cf20fab18..1a6d3a188 100644 --- a/docs/_sources/filepad_tutorial.rst.txt +++ b/docs/_sources/filepad_tutorial.rst.txt @@ -3,7 +3,7 @@ Using FilePad for storing and retrieving files =============================================== -FilePad utility provides the api to add and delete arbitrary files of arbitrary sizes to MongoDB(filepad). +FilePad utility provides the api to add, update and delete arbitrary files of arbitrary sizes to MongoDB(filepad). The is achieved by inserting the entire file contents to GridFS and storing the id returned by the GridFS insertion, the user provided identifier and the metadata in a document in the filepad. In the following documentation, ``file contents`` refers to the file contents stored in GridFS and ``document`` refers to the @@ -61,6 +61,21 @@ where ```` is monogo query dict and the returned values ``all_files`` is tuples that match the query. +Updating files +================= + +To update the file contents associated with an existing identifier:: + + old_file_id, new_file_id = fp.update_file(, , compress=True/False) + +where ```` is the path to the new file. The old GridFS entry is deleted and the filepad +document is updated with the new file ID. The old and new file IDs are returned. + +To update by the file ID instead:: + + old_file_id, new_file_id = fp.update_file_by_id(, , compress=True/False) + + Deleting files ================= diff --git a/docs_rst/filepad_tutorial.rst b/docs_rst/filepad_tutorial.rst index cf20fab18..1a6d3a188 100644 --- a/docs_rst/filepad_tutorial.rst +++ b/docs_rst/filepad_tutorial.rst @@ -3,7 +3,7 @@ Using FilePad for storing and retrieving files =============================================== -FilePad utility provides the api to add and delete arbitrary files of arbitrary sizes to MongoDB(filepad). +FilePad utility provides the api to add, update and delete arbitrary files of arbitrary sizes to MongoDB(filepad). The is achieved by inserting the entire file contents to GridFS and storing the id returned by the GridFS insertion, the user provided identifier and the metadata in a document in the filepad. In the following documentation, ``file contents`` refers to the file contents stored in GridFS and ``document`` refers to the @@ -61,6 +61,21 @@ where ```` is monogo query dict and the returned values ``all_files`` is tuples that match the query. +Updating files +================= + +To update the file contents associated with an existing identifier:: + + old_file_id, new_file_id = fp.update_file(, , compress=True/False) + +where ```` is the path to the new file. The old GridFS entry is deleted and the filepad +document is updated with the new file ID. The old and new file IDs are returned. + +To update by the file ID instead:: + + old_file_id, new_file_id = fp.update_file_by_id(, , compress=True/False) + + Deleting files ================= diff --git a/fireworks/utilities/filepad.py b/fireworks/utilities/filepad.py index e92380929..c385b0f7e 100644 --- a/fireworks/utilities/filepad.py +++ b/fireworks/utilities/filepad.py @@ -250,11 +250,12 @@ def update_file(self, identifier, path, compress=True): return self._update_file_contents(doc, path, compress) def delete_file_by_id(self, gfs_id) -> None: - """ + """Delete the file from GridFS and remove the associated document from filepad by gfs_id. + Args: gfs_id (str): the file id. """ - self.gridfs.delete(gfs_id) + self.gridfs.delete(ObjectId(gfs_id)) self.filepad.delete_one({"gfs_id": gfs_id}) def delete_file_by_query(self, query) -> None: @@ -301,7 +302,7 @@ def _insert_to_gridfs(self, contents, compress): if compress: if self.text_mode: contents = contents.encode() - contents = zlib.compress(contents, compress) + contents = zlib.compress(contents) # insert to gridfs return str(self.gridfs.put(contents)) @@ -322,7 +323,8 @@ def _get_file_contents(self, doc): return None, None def _update_file_contents(self, doc, path, compress): - """ + """Replace file contents in GridFS and update the filepad document with the new gfs_id. + Args: doc (dict): From the filepad collection. path (str): Path to the new file whose contents will replace the existing one. @@ -334,9 +336,12 @@ def _update_file_contents(self, doc, path, compress): if doc is None: return None, None old_gfs_id = doc["gfs_id"] - self.gridfs.delete(old_gfs_id) read_mode = "r" if self.text_mode else "rb" - gfs_id = self._insert_to_gridfs(open(path, read_mode).read(), compress) # noqa: SIM115 + with open(path, read_mode) as f: + contents = f.read() + gfs_id = self._insert_to_gridfs(contents, compress) + self.gridfs.delete(ObjectId(old_gfs_id)) + self.filepad.update_one({"gfs_id": old_gfs_id}, {"$set": {"gfs_id": gfs_id, "compressed": compress}}) doc["gfs_id"] = gfs_id doc["compressed"] = compress return old_gfs_id, gfs_id @@ -372,7 +377,7 @@ def from_db_file(cls, db_file: str, admin: bool = True) -> "Self": return cls( host=creds.get("host", "localhost"), port=int(creds.get("port", 27017)), - database=creds.get("name", "fireworks"), + name=creds.get("name", "fireworks"), username=user, password=password, authsource=authsource, diff --git a/fireworks/utilities/tests/test_filepad.py b/fireworks/utilities/tests/test_filepad.py index 60308981d..bc70689ca 100644 --- a/fireworks/utilities/tests/test_filepad.py +++ b/fireworks/utilities/tests/test_filepad.py @@ -1,6 +1,8 @@ import os import unittest +from bson.objectid import ObjectId + from fireworks.utilities.filepad import FilePad module_dir = os.path.join(os.path.dirname(os.path.abspath(__file__))) @@ -47,13 +49,25 @@ def test_update_file(self) -> None: old_id, new_id = self.fp.update_file("test_update_file", self.chgcar_file) assert old_id == gfs_id assert new_id != gfs_id - assert not self.fp.gridfs.exists(old_id) + assert not self.fp.gridfs.exists(ObjectId(old_id)) + # verify round-trip: updated file should be retrievable and match original + contents, doc = self.fp.get_file("test_update_file") + assert doc is not None + assert doc["gfs_id"] == new_id + with open(self.chgcar_file, "rb") as f: + assert contents == f.read() def test_update_file_by_id(self) -> None: gfs_id, _ = self.fp.add_file(self.chgcar_file, identifier="some identifier") old, new = self.fp.update_file_by_id(gfs_id, self.chgcar_file) assert old == gfs_id assert new != gfs_id + # verify round-trip: updated file should be retrievable and match original + contents, doc = self.fp.get_file_by_id(new) + assert doc is not None + assert doc["gfs_id"] == new + with open(self.chgcar_file, "rb") as f: + assert contents == f.read() def tearDown(self) -> None: self.fp.reset()