diff --git a/src/nemosis/custom_tables.py b/src/nemosis/custom_tables.py
index eaec94d..429a1c9 100644
--- a/src/nemosis/custom_tables.py
+++ b/src/nemosis/custom_tables.py
@@ -200,7 +200,7 @@ def performance_at_nodal_peak(capacity_and_scada_grouped):
     index_max = capacity_and_scada_grouped["TOTALDEMAND"].idxmax()
     try:
         output_at_peak = capacity_and_scada_grouped["SCADAVALUE"][index_max]
-    except:
+    except Exception:
         x = 1
     if np.isnan(output_at_peak):
         output_at_peak = 0
diff --git a/src/nemosis/data_fetch_methods.py b/src/nemosis/data_fetch_methods.py
index 461e953..96c5d57 100644
--- a/src/nemosis/data_fetch_methods.py
+++ b/src/nemosis/data_fetch_methods.py
@@ -24,7 +24,8 @@ def dynamic_data_compiler(
     filter_cols=None,
     filter_values=None,
     fformat="feather",
-    keep_csv=True,
+    keep_csv=False,
+    keep_zip=False,
     parse_data_types=True,
     rebuild=False,
     **kwargs,
@@ -51,7 +52,14 @@ def dynamic_data_compiler(
                           Stored parquet and feather files will store columns
                           as object type (compatbile with GUI use). For
                           type inference for a cache, use cache_compiler.
-        keep_csv (bool): retains CSVs in cache.
+        keep_csv (bool): If True, raw CSVs from AEMO are retained in the
+                         cache directory after the typed feather/parquet
+                         is written. False by default — lean cache.
+        keep_zip (bool): If True, downloaded AEMO archive zips are
+                         retained in the cache directory after extraction.
+                         False by default. Set True on slow connections
+                         to avoid re-downloading on subsequent runs
+                         (see #56).
         data_merge (bool): concatenate DataFrames and return one DataFrame.
                            If False, will not return any data.
         parse_data_types (bool): infers data types of columns when reading
@@ -112,6 +120,7 @@ def dynamic_data_compiler(
         date_filter,
         fformat=fformat,
         keep_csv=keep_csv,
+        keep_zip=keep_zip,
         rebuild=rebuild,
         write_kwargs=kwargs,
     )
@@ -156,7 +165,8 @@ def cache_compiler(
     select_columns=None,
     fformat="feather",
     rebuild=False,
-    keep_csv=True,
+    keep_csv=False,
+    keep_zip=False,
     **kwargs,
 ):
     """
@@ -183,8 +193,14 @@ def cache_compiler(
                           type inference for a cache, use cache_compiler.
         rebuild (bool): If True then cache files are rebuilt
                         even if they exist already. False by default.
-        keep_csv (bool): If True raw CSVs from AEMO are not deleted after
-                         the cache is built. True by default
+        keep_csv (bool): If True, raw CSVs from AEMO are retained
+                         alongside the typed feather/parquet after the
+                         cache is built. False by default — lean cache.
+        keep_zip (bool): If True, downloaded AEMO archive zips are
+                         retained in the cache directory after
+                         extraction. False by default. Set True on
+                         slow connections to avoid re-downloading on
+                         subsequent runs (see #56).
         **kwargs: additional arguments passed to the pd.to_{fformat}() function
 
     Returns:
@@ -237,6 +253,7 @@ def cache_compiler(
         date_filter=None,
         fformat=fformat,
         keep_csv=keep_csv,
+        keep_zip=keep_zip,
         caching_mode=True,
         rebuild=rebuild,
         write_kwargs=kwargs,
@@ -298,7 +315,7 @@ def static_table(
             static_downloader_map[table_name](
                 _defaults.static_table_url[table_name], path_and_name
             )
-        except:
+        except Exception:
             raise NoDataToReturn(
                 (
                     f"Compiling data for table {table_name} failed. "
@@ -544,7 +561,8 @@ def _dynamic_data_fetch_loop(
     select_columns,
     date_filter,
     fformat="feather",
-    keep_csv=True,
+    keep_csv=False,
+    keep_zip=False,
     caching_mode=False,
     rebuild=False,
     write_kwargs={},
@@ -595,6 +613,7 @@ def _dynamic_data_fetch_loop(
                     chunk,
                     index,
                     raw_data_location,
+                    keep_zip=keep_zip,
                 )
 
             if _glob.glob(full_filename) and fformat != "csv" and not rebuild:
@@ -795,7 +814,8 @@ def _write_to_format(data, fformat, full_filename, write_kwargs):
 
 
 def _download_data(
-    table_name, table_type, filename_stub, day, month, year, chunk, index, raw_data_location
+    table_name, table_type, filename_stub, day, month, year, chunk, index, raw_data_location,
+    keep_zip=False,
 ):
     """
     Dispatch table to downloader to be downloaded.
@@ -820,7 +840,8 @@ def _download_data(
             )
 
     _processing_info_maps.downloader[table_type](
-        year, month, day, chunk, index, filename_stub, raw_data_location
+        year, month, day, chunk, index, filename_stub, raw_data_location,
+        keep_zip=keep_zip,
     )
     return
 
diff --git a/src/nemosis/downloader.py b/src/nemosis/downloader.py
index bfca69b..93f712c 100644
--- a/src/nemosis/downloader.py
+++ b/src/nemosis/downloader.py
@@ -27,7 +27,7 @@
 })
 
 
-def run(year, month, day, chunk, index, filename_stub, down_load_to):
+def run(year, month, day, chunk, index, filename_stub, down_load_to, keep_zip=False):
     """This function"""
 
     url = defaults.aemo_mms_url
@@ -36,15 +36,15 @@ def run(year, month, day, chunk, index, filename_stub, down_load_to):
 
     # Perform the download, unzipping saving of the file
     try:
-        download_unzip_csv(url_formatted, down_load_to)
-    except Exception:
+        download_unzip_csv(url_formatted, down_load_to, keep_zip=keep_zip)
+    except Exception as e:
         if chunk == 1:
-            logger.warning(f"{filename_stub} not downloaded")
+            logger.warning(f"{filename_stub} not downloaded ({e})")
 
 
-def run_bid_tables(year, month, day, chunk, index, filename_stub, down_load_to):
+def run_bid_tables(year, month, day, chunk, index, filename_stub, down_load_to, keep_zip=False):
     if day is None:
-        run(year, month, day, chunk, index, filename_stub, down_load_to)
+        run(year, month, day, chunk, index, filename_stub, down_load_to, keep_zip=keep_zip)
     else:
         try:
             filename_stub = "BIDMOVE_COMPLETE_{year}{month}{day}".format(year=year, month=month, day=day)
@@ -52,44 +52,44 @@ def run_bid_tables(year, month, day, chunk, index, filename_stub, down_load_to):
                 filename_stub,
                 defaults.current_data_page_urls["BIDDING"])
             _download_and_unpack_bid_move_complete_files(
-                download_url, down_load_to
+                download_url, down_load_to, keep_zip=keep_zip
             )
-        except Exception:
-            logger.warning(f"{filename_stub} not downloaded")
+        except Exception as e:
+            logger.warning(f"{filename_stub} not downloaded ({e})")
 
 
-def run_next_day_region_tables(year, month, day, chunk, index, filename_stub, down_load_to):
+def run_next_day_region_tables(year, month, day, chunk, index, filename_stub, down_load_to, keep_zip=False):
     try:
         filename_stub = "PUBLIC_DAILY_{year}{month}{day}".format(year=year, month=month, day=day)
         download_url = _get_current_url(
             filename_stub,
             defaults.current_data_page_urls["DAILY_REGION_SUMMARY"])
         _download_and_unpack_next_region_tables(
-            download_url, down_load_to
+            download_url, down_load_to, keep_zip=keep_zip
         )
-    except Exception:
-        logger.warning(f"{filename_stub} not downloaded")
+    except Exception as e:
+        logger.warning(f"{filename_stub} not downloaded ({e})")
 
 
-def run_next_dispatch_tables(year, month, day, chunk, index, filename_stub, down_load_to):
+def run_next_dispatch_tables(year, month, day, chunk, index, filename_stub, down_load_to, keep_zip=False):
     try:
         filename_stub = "PUBLIC_NEXT_DAY_DISPATCH_{year}{month}{day}".format(year=year, month=month, day=day)
         download_url = _get_current_url(
             filename_stub,
             defaults.current_data_page_urls["NEXT_DAY_DISPATCHLOAD"])
-        _download_and_unpack_next_dispatch_load_files_complete_files(download_url, down_load_to)
-    except Exception:
-        logger.warning(f"{filename_stub} not downloaded")
+        _download_and_unpack_next_dispatch_load_files_complete_files(download_url, down_load_to, keep_zip=keep_zip)
+    except Exception as e:
+        logger.warning(f"{filename_stub} not downloaded ({e})")
 
 
-def run_intermittent_gen_scada(year, month, day, chunk, index, filename_stub, down_load_to):
+def run_intermittent_gen_scada(year, month, day, chunk, index, filename_stub, down_load_to, keep_zip=False):
     try:
         download_url = _get_current_url(
             filename_stub,
             defaults.current_data_page_urls["INTERMITTENT_GEN_SCADA"])
-        _download_and_unpack_intermittent_gen_scada_file(download_url, down_load_to)
-    except Exception:
-        logger.warning(f"{filename_stub} not downloaded")
+        _download_and_unpack_intermittent_gen_scada_file(download_url, down_load_to, keep_zip=keep_zip)
+    except Exception as e:
+        logger.warning(f"{filename_stub} not downloaded ({e})")
 
 
 def _get_current_url(filename_stub, current_page_url):
@@ -100,122 +100,134 @@ def _get_current_url(filename_stub, current_page_url):
 
 
 def _download_and_unpack_bid_move_complete_files(
-    download_url, down_load_to
+    download_url, down_load_to, keep_zip=False
 ):
-    r = session.get(download_url)
-    r.raise_for_status()
-    zipped_file = zipfile.ZipFile(io.BytesIO(r.content))
+    zip_local_path, downloaded = download_to_dir(download_url, down_load_to)
+    try:
+        with zipfile.ZipFile(zip_local_path) as zipped_file:
 
-    file_name = zipped_file.namelist()[
-        0
-    ]  # Just one file so we can pull it out of the list using 0
-    start_row_second_table = _find_start_row_nth_table(
-        zipped_file, file_name, 2
-    )
-    csv_file = zipped_file.open(file_name)
-    BIDDAYOFFER_D = pd.read_csv(
-        csv_file, header=1, nrows=start_row_second_table - 3, dtype=str
-    )
-    BIDDAYOFFER_D.to_csv(
-        os.path.join(
-            down_load_to,
-            "PUBLIC_DVD_BIDDAYOFFER_D_" + file_name[24:32] + ".csv",
-        ),
-        index=False,
-    )
-    csv_file = zipped_file.open(file_name)
-    BIDPEROFFER_D = pd.read_csv(
-        csv_file, header=start_row_second_table - 1, dtype=str
-    )[:-1]
-    BIDPEROFFER_D.to_csv(
-        os.path.join(
-            down_load_to,
-            "PUBLIC_DVD_BIDPEROFFER_D_" + file_name[24:32] + ".csv",
-        ),
-        index=False,
-    )
+            file_name = zipped_file.namelist()[
+                0
+            ]  # Just one file so we can pull it out of the list using 0
+            start_row_second_table = _find_start_row_nth_table(
+                zipped_file, file_name, 2
+            )
+            csv_file = zipped_file.open(file_name)
+            BIDDAYOFFER_D = pd.read_csv(
+                csv_file, header=1, nrows=start_row_second_table - 3, dtype=str
+            )
+            BIDDAYOFFER_D.to_csv(
+                os.path.join(
+                    down_load_to,
+                    "PUBLIC_DVD_BIDDAYOFFER_D_" + file_name[24:32] + ".csv",
+                ),
+                index=False,
+            )
+            csv_file = zipped_file.open(file_name)
+            BIDPEROFFER_D = pd.read_csv(
+                csv_file, header=start_row_second_table - 1, dtype=str
+            )[:-1]
+            BIDPEROFFER_D.to_csv(
+                os.path.join(
+                    down_load_to,
+                    "PUBLIC_DVD_BIDPEROFFER_D_" + file_name[24:32] + ".csv",
+                ),
+                index=False,
+            )
+    finally:
+        if downloaded and not keep_zip and os.path.isfile(zip_local_path):
+            os.unlink(zip_local_path)
 
 
 def _download_and_unpack_next_region_tables(
-    download_url, down_load_to
+    download_url, down_load_to, keep_zip=False
 ):
-    r = session.get(download_url)
-    r.raise_for_status()
-    zipped_file = zipfile.ZipFile(io.BytesIO(r.content))
+    zip_local_path, downloaded = download_to_dir(download_url, down_load_to)
+    try:
+        with zipfile.ZipFile(zip_local_path) as zipped_file:
+
+            file_name = zipped_file.namelist()[
+                0
+            ]  # Just one file so we can pull it out of the list using 0
+            start_row_second_table = _find_start_row_nth_table(
+                zipped_file, file_name, 2
+            )
+            start_row_third_table = _find_start_row_nth_table(
+                zipped_file, file_name, 3
+            )
+            csv_file = zipped_file.open(file_name)
+            DAILY_REGION_SUMMARY = pd.read_csv(
+                csv_file, header=start_row_second_table - 1,
+                nrows=start_row_third_table - start_row_second_table - 1, dtype=str
+            )
+            DAILY_REGION_SUMMARY.to_csv(
+                os.path.join(
+                    down_load_to,
+                    "PUBLIC_DAILY_REGION_SUMMARY_" + file_name[13:21] + ".csv",
+                ),
+                index=False,
+            )
+    finally:
+        if downloaded and not keep_zip and os.path.isfile(zip_local_path):
+            os.unlink(zip_local_path)
 
-    file_name = zipped_file.namelist()[
-        0
-    ]  # Just one file so we can pull it out of the list using 0
-    start_row_second_table = _find_start_row_nth_table(
-        zipped_file, file_name, 2
-    )
-    start_row_third_table = _find_start_row_nth_table(
-        zipped_file, file_name, 3
-    )
-    csv_file = zipped_file.open(file_name)
-    DAILY_REGION_SUMMARY = pd.read_csv(
-        csv_file, header=start_row_second_table - 1,
-        nrows=start_row_third_table - start_row_second_table - 1, dtype=str
-    )
-    DAILY_REGION_SUMMARY.to_csv(
-        os.path.join(
-            down_load_to,
-            "PUBLIC_DAILY_REGION_SUMMARY_" + file_name[13:21] + ".csv",
-        ),
-        index=False,
-    )
-    
 
 def _download_and_unpack_next_dispatch_load_files_complete_files(
-    download_url, down_load_to
+    download_url, down_load_to, keep_zip=False
 ):
-    r = session.get(download_url)
-    r.raise_for_status()
-    zipped_file = zipfile.ZipFile(io.BytesIO(r.content))
+    zip_local_path, downloaded = download_to_dir(download_url, down_load_to)
+    try:
+        with zipfile.ZipFile(zip_local_path) as zipped_file:
 
-    file_name = zipped_file.namelist()[
-        0
-    ]  # Just one file so we can pull it out of the list using 0
-    start_row_second_table = _find_start_row_nth_table(
-        zipped_file, file_name, 2
-    )
-    csv_file = zipped_file.open(file_name)
-    NEXT_DAY_DISPATCHLOAD = pd.read_csv(
-        csv_file, header=1, nrows=start_row_second_table - 3, dtype=str
-    )
-    NEXT_DAY_DISPATCHLOAD.to_csv(
-        os.path.join(
-            down_load_to,
-            "PUBLIC_NEXT_DAY_DISPATCHLOAD_" + file_name[25:33] + ".csv",
-        ),
-        index=False,
-    )
+            file_name = zipped_file.namelist()[
+                0
+            ]  # Just one file so we can pull it out of the list using 0
+            start_row_second_table = _find_start_row_nth_table(
+                zipped_file, file_name, 2
+            )
+            csv_file = zipped_file.open(file_name)
+            NEXT_DAY_DISPATCHLOAD = pd.read_csv(
+                csv_file, header=1, nrows=start_row_second_table - 3, dtype=str
+            )
+            NEXT_DAY_DISPATCHLOAD.to_csv(
+                os.path.join(
+                    down_load_to,
+                    "PUBLIC_NEXT_DAY_DISPATCHLOAD_" + file_name[25:33] + ".csv",
+                ),
+                index=False,
+            )
+    finally:
+        if downloaded and not keep_zip and os.path.isfile(zip_local_path):
+            os.unlink(zip_local_path)
 
 
 def _download_and_unpack_intermittent_gen_scada_file(
-    download_url, down_load_to
+    download_url, down_load_to, keep_zip=False
 ):
-    r = session.get(download_url)
-    r.raise_for_status()
-    zipped_file = zipfile.ZipFile(io.BytesIO(r.content))
+    zip_local_path, downloaded = download_to_dir(download_url, down_load_to)
+    try:
+        with zipfile.ZipFile(zip_local_path) as zipped_file:
 
-    file_name = zipped_file.namelist()[
-        0
-    ]  # Just one file so we can pull it out of the list using 0
-    start_row_second_table = _find_start_row_nth_table(
-        zipped_file, file_name, 1
-    )
-    csv_file = zipped_file.open(file_name)
-    data = pd.read_csv(
-        csv_file, header=1, dtype=str
-    )[:-1]
-    data.to_csv(
-        os.path.join(
-            down_load_to,
-            "PUBLIC_NEXT_DAY_INTERMITTENT_GEN_SCADA_" + file_name[39:47] + ".csv",
-        ),
-        index=False,
-    )
+            file_name = zipped_file.namelist()[
+                0
+            ]  # Just one file so we can pull it out of the list using 0
+            start_row_second_table = _find_start_row_nth_table(
+                zipped_file, file_name, 1
+            )
+            csv_file = zipped_file.open(file_name)
+            data = pd.read_csv(
+                csv_file, header=1, dtype=str
+            )[:-1]
+            data.to_csv(
+                os.path.join(
+                    down_load_to,
+                    "PUBLIC_NEXT_DAY_INTERMITTENT_GEN_SCADA_" + file_name[39:47] + ".csv",
+                ),
+                index=False,
+            )
+    finally:
+        if downloaded and not keep_zip and os.path.isfile(zip_local_path):
+            os.unlink(zip_local_path)
 
 
 def _find_start_row_nth_table(sub_folder_zipfile, file_name, n):
@@ -236,7 +248,7 @@ def _find_start_row_nth_table(sub_folder_zipfile, file_name, n):
 
 
 
-def run_fcas4s(year, month, day, chunk, index, filename_stub, down_load_to):
+def run_fcas4s(year, month, day, chunk, index, filename_stub, down_load_to, keep_zip=False):
     """This function"""
 
     # Add the year and month information to the generic AEMO data url
@@ -246,28 +258,85 @@ def run_fcas4s(year, month, day, chunk, index, filename_stub, down_load_to):
     )
     # Perform the download, unzipping saving of the file
     try:
-        download_unzip_csv(url_formatted_latest, down_load_to)
+        download_unzip_csv(url_formatted_latest, down_load_to, keep_zip=keep_zip)
     except Exception:
         try:
-            download_unzip_csv(url_formatted_hist, down_load_to)
+            download_unzip_csv(url_formatted_hist, down_load_to, keep_zip=keep_zip)
         except Exception as e:
             # FCAS csvs are bundled in 30 minute bundles
             # Check if the csv exists before warning
             file_check = os.path.join(down_load_to, filename_stub + ".csv")
             if not os.path.isfile(file_check):
-                logger.warning(f"{filename_stub} not downloaded")
+                logger.warning(f"{filename_stub} not downloaded {(e)}")
 
+def download_to_dir(url, down_load_to, force_redo=False):
+    """
+    Download a file into `down_load_to`, deriving the filename from
+    the URL. Returns a (path, downloaded) tuple: `path` is the file's
+    absolute path, `downloaded` is True if this call actually fetched
+    over the network and False if the file was already in place and
+    re-used.
 
-def download_unzip_csv(url, down_load_to):
+    Streams the response so large files don't have to fit in memory.
     """
-    This function downloads a zipped csv using a url,
-    extracts the csv and saves it a specified location
+    url = url.replace('#', '%23')
+    filename = url.split('/')[-1].split('?')[0]
+    path = os.path.join(down_load_to, filename)
+    downloaded = download_to_path(url, path, force_redo=force_redo)
+    return path, downloaded
+
+
+def download_to_path(url, path_and_name, force_redo=False):
+    """
+    Download a file from `url` to `path_and_name`. Returns True if a
+    new network fetch occurred, False if the destination already
+    existed and was re-used.
+
+    Idempotent — if the destination already exists, the function
+    short-circuits unless `force_redo=True`. On a write failure
+    mid-stream, the partial output file is removed before the
+    exception propagates.
     """
     url = url.replace('#', '%23')
-    r = session.get(url)
-    r.raise_for_status()
-    z = zipfile.ZipFile(io.BytesIO(r.content))
-    z.extractall(down_load_to)
+    if os.path.isfile(path_and_name) and not force_redo:
+        return False
+
+    with session.get(url, stream=True) as response:
+        response.raise_for_status()
+        try:
+            with open(path_and_name, 'wb') as file:
+                for chunk in response.iter_content(chunk_size=2**13):
+                    file.write(chunk)
+        except Exception as e:
+            logger.error(f"Failed to write file to {path_and_name}: {e}")
+            if os.path.isfile(path_and_name):
+                os.unlink(path_and_name)
+            raise
+    return True
+
+
+def download_unzip_csv(url, down_load_to, keep_zip=False):
+    """
+    Download a zipped csv from a URL, extract its contents into
+    `down_load_to`, and (per `keep_zip`) retain the zip on disk.
+
+    `keep_zip=False` (default) cleans up the zip after extracting,
+    leaving only the extracted CSV in the cache directory. Cleanup
+    only touches zips this call actually downloaded — pre-existing
+    zips (from a previous call, or another concurrent process) are
+    left alone.
+
+    `keep_zip=True` addresses #56: a cached zip on disk so subsequent
+    calls can re-extract without re-downloading (useful on slow
+    connections).
+    """
+    zip_local_path, downloaded = download_to_dir(url, down_load_to)
+    try:
+        with zipfile.ZipFile(zip_local_path) as z:
+            z.extractall(down_load_to)
+    finally:
+        if downloaded and not keep_zip and os.path.isfile(zip_local_path):
+            os.unlink(zip_local_path)
 
 
 def download_csv(url, path_and_name):
@@ -275,10 +344,7 @@ def download_csv(url, path_and_name):
     This function downloads a zipped csv using a url,
     extracts the csv and saves it a specified location
     """
-    r = session.get(url)
-    r.raise_for_status()
-    with open(path_and_name, "wb") as f:
-        f.write(r.content)
+    download_to_path(url, path_and_name)
 
 
 def download_elements_file(url, path_and_name):
@@ -288,21 +354,16 @@ def download_elements_file(url, path_and_name):
     links = soup.find_all("a")
     last_file_name = links[-1].text
     link = url + last_file_name
-    r = session.get(link)
-    r.raise_for_status()
-    with open(path_and_name, "wb") as f:
-        f.write(r.content)
 
+    download_to_path(link, path_and_name)
+    
 
 def download_xl(url, path_and_name):
     """
     This function downloads a zipped csv using a url, extracts the csv and
     saves it a specified location
     """
-    r = session.get(url)
-    r.raise_for_status()
-    with open(path_and_name, "wb") as f:
-        f.write(r.content)
+    download_to_path(url, path_and_name)
 
 
 def format_aemo_url(url, year, month, filename_stub):
@@ -314,7 +375,6 @@ def format_aemo_url(url, year, month, filename_stub):
     year = str(year)
     return url.format(year, year, month, filename_stub)
 
-
 def status_code_return(url):
     r = session.get(url)
     return r.status_code
@@ -328,4 +388,4 @@ def _get_matching_link(url, stub_link):
     for link in links:
         if stub_link in link:
             return link
-    logger.warning(f"{stub_link} not downloaded")
+    logger.warning(f"{stub_link} not downloaded because no match for {stub_link} was found on {url}")
diff --git a/tests/end_to_end_table_tests/test_cache_compiler.py b/tests/end_to_end_table_tests/test_cache_compiler.py
index 181ec54..2b7c1f7 100644
--- a/tests/end_to_end_table_tests/test_cache_compiler.py
+++ b/tests/end_to_end_table_tests/test_cache_compiler.py
@@ -4,13 +4,13 @@
 Verifies (a) the round-trip through cache → dynamic_data_compiler preserves
 typed columns, (b) `select_columns` narrows the cached file itself, not
 just the returned frame, (c) cache directory is auto-created when missing,
-(d) keep_csv=True is the default behaviour, and (e) partial cache files
-are cleaned up when a feather/parquet write fails mid-flight.
+(d) keep_csv / keep_zip defaults and opt-in paths, and (e) partial cache
+files are cleaned up when a feather/parquet write fails mid-flight.
 """
 import pandas as pd
 import pytest
 
-from nemosis import cache_compiler, data_fetch_methods, dynamic_data_compiler
+from nemosis import cache_compiler, data_fetch_methods, defaults, dynamic_data_compiler
 from nemosis.custom_errors import UserInputError
 
 
@@ -92,12 +92,12 @@ def test_raises_when_cache_path_is_a_file(nemosis_fixture):
         )
 
 
-def test_keep_csv_true_by_default_keeps_fetched_csv(nemosis_fixture):
+def test_keep_csv_true_retains_csv(nemosis_fixture):
     """When cache_compiler actually has to fetch (no existing feather, so
-    the code path that downloads + extracts a CSV runs), the default
-    keep_csv=True must leave the extracted CSV on disk alongside the
-    feather. rebuild=True forces the fetch path so this test isn't
-    dependent on tmp_path being empty at start.
+    the code path that downloads + extracts a CSV runs), `keep_csv=True`
+    must leave the extracted CSV on disk alongside the feather.
+    rebuild=True forces the fetch path so this test isn't dependent on
+    tmp_path being empty at start.
 
     AEMO zips contain CSV files with an uppercase .CSV extension —
     NEMOSIS handles this internally with [cC][sS][vV] globs and the
@@ -107,25 +107,25 @@ def test_keep_csv_true_by_default_keeps_fetched_csv(nemosis_fixture):
         table_name="DISPATCHPRICE",
         raw_data_location=str(nemosis_fixture),
         rebuild=True,
-        # no keep_csv kwarg — exercises the default
+        keep_csv=True,
     )
     csv_files = list(nemosis_fixture.glob("*DISPATCHPRICE*.[Cc][Ss][Vv]"))
-    assert csv_files, "default keep_csv=True should retain the fetched CSV"
+    assert csv_files, "keep_csv=True should retain the fetched CSV"
 
 
-def test_keep_csv_false_removes_fetched_csv(nemosis_fixture):
-    """Mirror of the above with the override — verifies the opt-out
-    path still works (the source-side delete in _dynamic_data_fetch_loop
-    fires only when keep_csv is False)."""
+def test_keep_csv_default_is_false(nemosis_fixture):
+    """Default behaviour — keep_csv=False removes the extracted CSV
+    after the typed feather is written, leaving only the feather in
+    the cache. Users who want raw retention opt in via keep_csv=True."""
     cache_compiler(
         start_time=START, end_time=END,
         table_name="DISPATCHPRICE",
         raw_data_location=str(nemosis_fixture),
         rebuild=True,
-        keep_csv=False,
+        # no keep_csv kwarg — exercises the default
     )
     csv_files = list(nemosis_fixture.glob("*DISPATCHPRICE*.[Cc][Ss][Vv]"))
-    assert not csv_files, "keep_csv=False should remove the fetched CSV"
+    assert not csv_files, f"default keep_csv=False should remove CSV; found: {csv_files}"
 
 
 def test_existing_feather_means_no_csv_is_fetched(nemosis_fixture):
@@ -155,6 +155,77 @@ def test_existing_feather_means_no_csv_is_fetched(nemosis_fixture):
     )
 
 
+def test_keep_zip_default_is_false(nemosis_fixture):
+    """Default behaviour — keep_zip=False removes the downloaded zip
+    after extracting the CSV. Lean cache; only the typed feather (and
+    optional CSV if keep_csv=True) remains."""
+    cache_compiler(
+        start_time=START, end_time=END,
+        table_name="DISPATCHPRICE",
+        raw_data_location=str(nemosis_fixture),
+        rebuild=True,
+        # no keep_zip kwarg — exercises the default
+    )
+    zip_files = list(nemosis_fixture.glob("*.zip"))
+    assert not zip_files, f"default keep_zip=False should remove zips; found: {zip_files}"
+
+
+def test_keep_zip_true_retains_zip(nemosis_fixture):
+    """keep_zip=True is the opt-in for #56's slow-internet use case —
+    the AEMO archive zip stays on disk after extraction so subsequent
+    runs can re-extract without re-downloading."""
+    cache_compiler(
+        start_time=START, end_time=END,
+        table_name="DISPATCHPRICE",
+        raw_data_location=str(nemosis_fixture),
+        rebuild=True,
+        keep_zip=True,
+    )
+    zip_files = list(nemosis_fixture.glob("*.zip"))
+    assert zip_files, "keep_zip=True should retain the downloaded zip"
+
+
+def test_cached_zip_extracts_without_network(nemosis_fixture, monkeypatch):
+    """The #56 benefit in action: with keep_zip=True on a first call,
+    a subsequent call that needs the same CSV but finds the feather
+    missing should re-extract from the cached zip locally without
+    hitting nemweb. Proves it by breaking the AEMO URL after the first
+    call — if the second call tries to fetch, it would 404 against the
+    bad URL."""
+    # First call — populate cache with zip retained
+    cache_compiler(
+        start_time=START, end_time=END,
+        table_name="DISPATCHPRICE",
+        raw_data_location=str(nemosis_fixture),
+        rebuild=True,
+        keep_zip=True,
+    )
+    feather_files = list(nemosis_fixture.glob("*.feather"))
+    zip_files = list(nemosis_fixture.glob("*.zip"))
+    assert feather_files and zip_files
+
+    # Delete the feather so the loop has to call _download_data again,
+    # but leave the zip in place so the lower-level download_to_path
+    # short-circuits on the cached file.
+    for f in feather_files:
+        f.unlink()
+
+    # Point the URL at a dead address — any actual network call would fail.
+    monkeypatch.setattr(defaults, "aemo_mms_url", "http://127.0.0.1:1/dead/{}/{}/{}/{}.zip")
+
+    # Should succeed using the cached zip, no network required.
+    cache_compiler(
+        start_time=START, end_time=END,
+        table_name="DISPATCHPRICE",
+        raw_data_location=str(nemosis_fixture),
+        rebuild=True,
+        keep_zip=True,
+    )
+
+    # Feather rebuilt from the cached zip
+    assert list(nemosis_fixture.glob("*.feather")), "cache should be rebuilt from cached zip"
+
+
 @pytest.mark.parametrize("fformat", ["feather", "parquet"])
 def test_write_to_format_cleans_up_partial_file_on_failure(tmp_path, monkeypatch, fformat):
     """A mid-write failure (e.g. disk full) used to leave a partial,
diff --git a/tests/end_to_end_table_tests/test_fformat_csv.py b/tests/end_to_end_table_tests/test_fformat_csv.py
index d99d21a..3f0825a 100644
--- a/tests/end_to_end_table_tests/test_fformat_csv.py
+++ b/tests/end_to_end_table_tests/test_fformat_csv.py
@@ -28,11 +28,16 @@ def test_csv_fformat_round_trip(nemosis_fixture):
     assert not all(data.dtypes == "object")
 
 
-def test_keep_csv_false_leaves_cache_empty(nemosis_fixture):
+def test_keep_csv_false_removes_csv(nemosis_fixture):
     """With `fformat="csv"` and `keep_csv=False`, the raw CSV is fetched,
-    used to produce the return frame, and then deleted — the cache dir
-    should be empty afterwards. This is the opt-out path for users who
-    don't want NEMOSIS accumulating files on disk."""
+    used to produce the return frame, and then deleted.
+
+    Note: the downloaded .zip is retained by default (see #56 — Matt's
+    use case is slow internet, so caching the compressed archive
+    avoids re-downloading on subsequent runs). Users who want a truly
+    empty cache can pass keep_zip=False — covered by
+    test_keep_zip_false_removes_zip in tests/test_downloader.py.
+    """
     dynamic_data_compiler(
         start_time="2018/05/01 00:00:00",
         end_time="2018/05/01 00:30:00",
@@ -42,4 +47,5 @@ def test_keep_csv_false_leaves_cache_empty(nemosis_fixture):
         keep_csv=False,
         select_columns=["SETTLEMENTDATE", "REGIONID", "RRP"],
     )
-    assert list(nemosis_fixture.iterdir()) == []
+    csv_files = list(nemosis_fixture.glob("*.[Cc][Ss][Vv]"))
+    assert csv_files == [], f"keep_csv=False should remove the CSV; found: {csv_files}"
diff --git a/tests/test_downloader.py b/tests/test_downloader.py
index 54b4466..8692989 100644
--- a/tests/test_downloader.py
+++ b/tests/test_downloader.py
@@ -51,10 +51,84 @@ def test_download_csv_raises_on_404(aemo_mock_server):
             )
 
 
-def test_download_xl_raises_on_404(aemo_mock_server):
+def test_download_xlsx_raises_on_404(aemo_mock_server):
     with tempfile.TemporaryDirectory() as tmp:
         with pytest.raises(requests.HTTPError):
-            downloader.download_xl(
+            downloader.download_xlsx(
                 f"{aemo_mock_server}/does/not/exist.xlsx",
                 f"{tmp}/out.xlsx",
             )
+
+
+# ---------------------------------------------------------------------------
+# Streaming helpers: download_to_path / download_to_dir idempotency
+#
+# The (path, downloaded) return signal is what keeps keep_zip=False safe for
+# concurrent same-cache use: cleanup only fires for zips THIS call actually
+# wrote to disk; pre-existing zips from another process are reported as
+# `downloaded=False` and left alone.
+# ---------------------------------------------------------------------------
+
+def test_download_to_path_returns_true_on_first_call(aemo_mock_server, tmp_path):
+    target = tmp_path / "out.csv"
+    url = f"{aemo_mock_server}/-/media/files/electricity/nem/settlements_and_payments/settlements/auction-reports/archive/ancillary-services-market-causer-pays-variables-file.csv"
+
+    downloaded = downloader.download_to_path(url, str(target))
+
+    assert downloaded is True
+    assert target.is_file()
+    assert target.stat().st_size > 0
+
+
+def test_download_to_path_returns_false_when_file_exists(aemo_mock_server, tmp_path):
+    """Idempotency signal: a second call with the destination already
+    present must report downloaded=False and skip the network. This is
+    the contract keep_zip=False relies on to avoid deleting another
+    process's pre-existing zip."""
+    target = tmp_path / "out.csv"
+    target.write_bytes(b"pre-existing contents - pretend another process wrote this")
+    original_bytes = target.read_bytes()
+
+    url = f"{aemo_mock_server}/-/media/files/electricity/nem/settlements_and_payments/settlements/auction-reports/archive/ancillary-services-market-causer-pays-variables-file.csv"
+
+    downloaded = downloader.download_to_path(url, str(target))
+
+    assert downloaded is False
+    assert target.read_bytes() == original_bytes, "existing file must not be overwritten"
+
+
+def test_download_to_path_force_redo_overrides_existing(aemo_mock_server, tmp_path):
+    """force_redo=True bypasses the idempotency check and re-downloads
+    even if the destination already exists. Useful when the user knows
+    the cached file is stale."""
+    target = tmp_path / "out.csv"
+    target.write_bytes(b"stale contents")
+
+    url = f"{aemo_mock_server}/-/media/files/electricity/nem/settlements_and_payments/settlements/auction-reports/archive/ancillary-services-market-causer-pays-variables-file.csv"
+
+    downloaded = downloader.download_to_path(url, str(target), force_redo=True)
+
+    assert downloaded is True
+    assert target.read_bytes() != b"stale contents", "force_redo=True should overwrite"
+
+
+def test_download_to_dir_forwards_force_redo(aemo_mock_server, tmp_path):
+    """Regression test for the PR67 bug where download_to_dir accepted
+    force_redo but never forwarded it to download_to_path. With the bug,
+    force_redo=True would silently no-op."""
+    url = f"{aemo_mock_server}/-/media/files/electricity/nem/settlements_and_payments/settlements/auction-reports/archive/ancillary-services-market-causer-pays-variables-file.csv"
+
+    # First call establishes the file
+    path, downloaded_first = downloader.download_to_dir(url, str(tmp_path))
+    assert downloaded_first is True
+
+    # Second call without force_redo: should short-circuit
+    _, downloaded_second = downloader.download_to_dir(url, str(tmp_path))
+    assert downloaded_second is False
+
+    # Third call WITH force_redo: must re-download (bug regression check)
+    _, downloaded_third = downloader.download_to_dir(url, str(tmp_path), force_redo=True)
+    assert downloaded_third is True, (
+        "force_redo=True should re-download; if this fails, download_to_dir "
+        "may have stopped forwarding force_redo to download_to_path"
+    )