diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 80ba160..e58270a 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,12 +7,33 @@ on:
     branches: [main]
 
 jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          python-version: "3.11"
+          enable-cache: true
+
+      - name: Install dev dependencies
+        run: uv sync --group dev
+
+      - name: Ruff check
+        run: uv run ruff check src/ tests/
+
+      - name: Ruff format check
+        run: uv run ruff format --check src/ tests/
+
   unit-tests:
     runs-on: ubuntu-latest
+    needs: lint
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
       - uses: actions/checkout@v4
@@ -21,6 +42,7 @@ jobs:
         uses: astral-sh/setup-uv@v5
         with:
           python-version: ${{ matrix.python-version }}
+          enable-cache: true
 
       - name: Install dependencies
         run: uv sync --group dev
@@ -66,6 +88,7 @@ jobs:
         uses: astral-sh/setup-uv@v5
         with:
           python-version: "3.11"
+          enable-cache: true
 
       - name: Install dependencies (${{ matrix.engine }})
         run: uv sync --group dev ${{ matrix.extras_flags }}
diff --git a/.gitignore b/.gitignore
index b96c6c8..6b3bc8a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,6 @@ __lakebench_cli_cache__/
 # Optional: Docs builds
 site/
 docs/_build/
+
+# Personal scratch / scratchpads (workspace-specific drivers, demo captures)
+scratch/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..b9de751
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.9
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-toml
+      - id: check-merge-conflict
+      - id: check-added-large-files
+        args: [--maxkb=500]
diff --git a/pyproject.toml b/pyproject.toml
index ab6992d..e4d8583 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,12 +7,11 @@ authors = [
 license = {file = "LICENSE"}
 description = "A multi-modal Python library for benchmarking Azure lakehouse engines and ELT scenarios, supporting both industry-standard and novel benchmarks."
 readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 classifiers = [
     "Development Status :: 5 - Production/Stable",
     "License :: OSI Approved :: MIT License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
@@ -25,19 +24,19 @@ dependencies = [
     "numpy>=1.24.4",
     "sqlglot==26.30.0",
     "fsspec==2025.2.0",
-    "tenacity>=8.2.3,<9; python_version < '3.9'",
-    "tenacity==9.1.2; python_version >= '3.9'"
+    "pyarrow>=15.0.0",
+    "tenacity==9.1.2",
 ]
 
 [project.optional-dependencies]
-duckdb = ["duckdb==1.4.4; python_version >= '3.9'", "deltalake==1.3.3; python_version >= '3.10'", "pyarrow>=15.0.0; python_version >= '3.9'"]
-polars = ["polars==1.38.1; python_version >= '3.10'", "deltalake==1.3.3; python_version >= '3.10'", "pyarrow>=15.0.0; python_version >= '3.9'"]
-daft = ["daft==0.7.3; python_version >= '3.10'", "deltalake==1.3.3; python_version >= '3.10'", "pyarrow>=15.0.0; python_version >= '3.10'"]
-tpcds_datagen = ["duckdb==1.4.4; python_version >= '3.9'", "pyarrow>=15.0.0; python_version >= '3.9'"]
+duckdb = ["duckdb==1.4.4", "deltalake==1.3.3; python_version >= '3.10'", "pyarrow>=15.0.0"]
+polars = ["polars==1.38.1; python_version >= '3.10'", "deltalake==1.3.3; python_version >= '3.10'", "pyarrow>=15.0.0"]
+daft = ["daft==0.7.3; python_version >= '3.10'", "deltalake==1.3.3; python_version >= '3.10'", "pyarrow>=15.0.0"]
+tpcds_datagen = ["duckdb==1.4.4", "pyarrow>=15.0.0"]
 tpch_datagen = ["tpchgen-cli>=2.0.1"]
 sparkmeasure = ["sparkmeasure==0.24.0"]
-spark = ["pyspark>=3.5.0,<4.0.0; python_version >= '3.9'", "delta-spark>=3.2.0,<4.0.0; python_version >= '3.9'", "pyarrow>=15.0.0; python_version >= '3.9'"]
-sail = ["pysail>=0.5.2; python_version >= '3.10'", "pyspark[connect]>=4.0.0; python_version >= '3.9'", "deltalake>=1.2.1; python_version >= '3.9'", "pyarrow>=15.0.0; python_version >= '3.9'"]
+spark = ["pyspark>=3.5.0,<4.0.0", "delta-spark>=3.2.0,<4.0.0", "pyarrow>=15.0.0"]
+sail = ["pysail>=0.5.2; python_version >= '3.10'", "pyspark[connect]>=4.0.0", "deltalake>=1.2.1", "pyarrow>=15.0.0"]
 
 [project.urls]
 github = "https://github.com/mwc360/LakeBench"
@@ -54,8 +53,49 @@ packages = ["src/lakebench"]
 dev = [
     "pytest>=7.0.0",
     "pytest-cov>=4.0.0",
+    "ruff>=0.6.0",
+    "pre-commit>=3.5.0",
 ]
 
+[tool.ruff]
+line-length = 120
+target-version = "py39"
+src = ["src", "tests"]
+extend-exclude = [
+    ".venv",
+    "metastore_db",
+    "src/lakebench/benchmarks/*/resources",
+]
+
+[tool.ruff.lint]
+# Conservative starter set — formatting + obvious bugs only.
+# Expand later (UP, B, SIM, ANN) once the codebase is clean.
+select = [
+    "E",   # pycodestyle errors
+    "F",   # pyflakes
+    "I",   # isort
+    "W",   # pycodestyle warnings
+]
+ignore = [
+    "E501",  # line-too-long (line-length is advisory; many SQL strings are wide)
+    "E731",  # lambda assignments (used intentionally in a few places)
+    "E741",  # ambiguous variable name
+]
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["F401"]  # re-exports
+"tests/**" = ["F401", "F811", "F841", "E712"]  # fixtures + assertion patterns
+"scripts/**" = ["E402", "F401", "F841"]  # ad-hoc scripts
+# Trailing whitespace inside multi-line SQL string literals is intentional/
+# harmless and NOT touched by `ruff format` (it only formats code, not string
+# contents). Keep ignoring W291/W293 globally so the embedded-SQL engines pass.
+"*.py" = ["W291", "W293"]
+# Engine-specific DataFrame DSLs intentionally use `col == True` to build expressions,
+# and assign `result =`/`df =` to force lazy evaluation.
+"src/lakebench/benchmarks/tpcdi/engine_impl/*.py" = ["E712", "F841"]
+"src/lakebench/benchmarks/elt_bench/engine_impl/*.py" = ["F841"]
+"src/lakebench/engines/*.py" = ["F841"]
+
 [tool.uv]
 conflicts = [
     [{ extra = "spark" }, { extra = "sail" }],
diff --git a/src/lakebench/benchmarks/__init__.py b/src/lakebench/benchmarks/__init__.py
index 5642ab2..c6ceb1c 100644
--- a/src/lakebench/benchmarks/__init__.py
+++ b/src/lakebench/benchmarks/__init__.py
@@ -1,5 +1,5 @@
+from .base import BaseBenchmark
 from .clickbench import ClickBench
+from .elt_bench import ELTBench
 from .tpcds import TPCDS
 from .tpch import TPCH
-from .elt_bench import ELTBench
-from .base import BaseBenchmark
\ No newline at end of file
diff --git a/src/lakebench/benchmarks/_load_and_query/__init__.py b/src/lakebench/benchmarks/_load_and_query/__init__.py
index ec2ef93..2e03b50 100644
--- a/src/lakebench/benchmarks/_load_and_query/__init__.py
+++ b/src/lakebench/benchmarks/_load_and_query/__init__.py
@@ -1 +1 @@
-from ._load_and_query import _LoadAndQuery
\ No newline at end of file
+from ._load_and_query import _LoadAndQuery
diff --git a/src/lakebench/benchmarks/_load_and_query/_load_and_query.py b/src/lakebench/benchmarks/_load_and_query/_load_and_query.py
index 40e492e..dbc5a61 100644
--- a/src/lakebench/benchmarks/_load_and_query/_load_and_query.py
+++ b/src/lakebench/benchmarks/_load_and_query/_load_and_query.py
@@ -1,79 +1,212 @@
+import importlib.resources
+import inspect
+import logging
+import posixpath
 from typing import List, Optional
-from ..base import BaseBenchmark
-from ...utils.query_utils import transpile_and_qualify_query, get_table_name_from_ddl
 
 from ...engines.base import BaseEngine
-from ...engines.spark import Spark
-from ...engines.duckdb import DuckDB
 from ...engines.daft import Daft
+from ...engines.duckdb import DuckDB
+from ...engines.livy import Livy
 from ...engines.polars import Polars
 from ...engines.sail import Sail
+from ...engines.spark import Spark
+from ...utils.query_utils import (
+    apply_column_remap,
+    build_column_remap,
+    get_table_name_from_ddl,
+    parse_ddl_columns,
+    transpile_and_qualify_query,
+)
+from ..base import BaseBenchmark
+
+logger = logging.getLogger(__name__)
 
-import importlib.resources
-import inspect
-import posixpath
 
 class _LoadAndQuery(BaseBenchmark):
     """
-    Base class for benchmarks that only have a simple Load and Query phase (TPC-H, TPC-DS, ClickBench). 
-    PLEASE DO NOT INSTANTIATE THIS CLASS DIRECTLY. Use the subclasses instead. 
+    Base class for benchmarks that only have a simple Load and Query phase (TPC-H, TPC-DS, ClickBench).
+    PLEASE DO NOT INSTANTIATE THIS CLASS DIRECTLY. Use the subclasses instead.
     """
+
     BENCHMARK_IMPL_REGISTRY = {
         Spark: None,
         DuckDB: None,
         Daft: None,
         Polars: None,
         Sail: None,
+        Livy: None,
     }
-    MODE_REGISTRY = ['load', 'query', 'power_test', 'load_and_query']
-    BENCHMARK_NAME = ''
+    MODE_REGISTRY = ["load", "query", "power_test", "load_and_query"]
+    BENCHMARK_NAME = ""
     TABLE_REGISTRY = [
-        'call_center', 'catalog_page', 'catalog_returns', 'catalog_sales',
-        'customer', 'customer_address', 'customer_demographics', 'date_dim',
-        'household_demographics', 'income_band', 'inventory', 'item',
-        'promotion', 'reason', 'ship_mode', 'store', 'store_returns',
-        'store_sales', 'time_dim', 'warehouse', 'web_page', 'web_returns',
-        'web_sales', 'web_site'
+        "call_center",
+        "catalog_page",
+        "catalog_returns",
+        "catalog_sales",
+        "customer",
+        "customer_address",
+        "customer_demographics",
+        "date_dim",
+        "household_demographics",
+        "income_band",
+        "inventory",
+        "item",
+        "promotion",
+        "reason",
+        "ship_mode",
+        "store",
+        "store_returns",
+        "store_sales",
+        "time_dim",
+        "warehouse",
+        "web_page",
+        "web_returns",
+        "web_sales",
+        "web_site",
     ]
     QUERY_REGISTRY = [
-        'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10',
-        'q11', 'q12', 'q13', 'q14a', 'q14b', 'q15', 'q16', 'q17', 'q18', 'q19', 'q20',
-        'q21', 'q22', 'q23a', 'q23b', 'q24a', 'q24b', 'q25', 'q26', 'q27', 'q28', 'q29', 'q30',
-        'q31', 'q32', 'q33', 'q34', 'q35', 'q36', 'q37', 'q38', 'q39a', 'q39b', 'q40',
-        'q41', 'q42', 'q43', 'q44', 'q45', 'q46', 'q47', 'q48', 'q49', 'q50',
-        'q51', 'q52', 'q53', 'q54', 'q55', 'q56', 'q57', 'q58', 'q59', 'q60',
-        'q61', 'q62', 'q63', 'q64', 'q65', 'q66', 'q67', 'q68', 'q69', 'q70',
-        'q71', 'q72', 'q73', 'q74', 'q75', 'q76', 'q77', 'q78', 'q79', 'q80',
-        'q81', 'q82', 'q83', 'q84', 'q85', 'q86', 'q87', 'q88', 'q89', 'q90',
-        'q91', 'q92', 'q93', 'q94', 'q95', 'q96', 'q97', 'q98', 'q99'
+        "q1",
+        "q2",
+        "q3",
+        "q4",
+        "q5",
+        "q6",
+        "q7",
+        "q8",
+        "q9",
+        "q10",
+        "q11",
+        "q12",
+        "q13",
+        "q14a",
+        "q14b",
+        "q15",
+        "q16",
+        "q17",
+        "q18",
+        "q19",
+        "q20",
+        "q21",
+        "q22",
+        "q23a",
+        "q23b",
+        "q24a",
+        "q24b",
+        "q25",
+        "q26",
+        "q27",
+        "q28",
+        "q29",
+        "q30",
+        "q31",
+        "q32",
+        "q33",
+        "q34",
+        "q35",
+        "q36",
+        "q37",
+        "q38",
+        "q39a",
+        "q39b",
+        "q40",
+        "q41",
+        "q42",
+        "q43",
+        "q44",
+        "q45",
+        "q46",
+        "q47",
+        "q48",
+        "q49",
+        "q50",
+        "q51",
+        "q52",
+        "q53",
+        "q54",
+        "q55",
+        "q56",
+        "q57",
+        "q58",
+        "q59",
+        "q60",
+        "q61",
+        "q62",
+        "q63",
+        "q64",
+        "q65",
+        "q66",
+        "q67",
+        "q68",
+        "q69",
+        "q70",
+        "q71",
+        "q72",
+        "q73",
+        "q74",
+        "q75",
+        "q76",
+        "q77",
+        "q78",
+        "q79",
+        "q80",
+        "q81",
+        "q82",
+        "q83",
+        "q84",
+        "q85",
+        "q86",
+        "q87",
+        "q88",
+        "q89",
+        "q90",
+        "q91",
+        "q92",
+        "q93",
+        "q94",
+        "q95",
+        "q96",
+        "q97",
+        "q98",
+        "q99",
     ]
-    DDL_FILE_NAME = ''
-    VERSION = ''
+    DDL_FILE_NAME = ""
+    VERSION = ""
 
     def __init__(
-            self, 
-            engine: BaseEngine, 
-            scenario_name: str,
-            scale_factor: Optional[int] = None,
-            query_list: Optional[List[str]] = None,
-            input_parquet_folder_uri: Optional[str] = None,
-            result_table_uri: Optional[str] = None,
-            save_results: bool = False,
-            run_id: Optional[str] = None
-            ):
+        self,
+        engine: BaseEngine,
+        scenario_name: str,
+        scale_factor: Optional[int] = None,
+        query_list: Optional[List[str]] = None,
+        input_parquet_folder_uri: Optional[str] = None,
+        result_table_uri: Optional[str] = None,
+        save_results: bool = False,
+        run_id: Optional[str] = None,
+        auto_remap_columns: bool = False,
+    ):
         self.scale_factor = scale_factor
+        # When True, the query phase introspects actual table columns and
+        # silently rewrites queries to match columns that differ from the
+        # benchmark spec (e.g. spark-sql-perf's `c_last_review_date` typo).
+        # OFF by default: silently rewriting columns undermines benchmark
+        # reproducibility and can mask real data-prep bugs. Opt in only when
+        # you knowingly run against non-spec data you can't regenerate.
+        self.auto_remap_columns = auto_remap_columns
         super().__init__(engine, scenario_name, input_parquet_folder_uri, result_table_uri, save_results, run_id)
         if query_list is not None:
             expanded_query_list = []
             for query in query_list:
-                if query == '*':
+                if query == "*":
                     expanded_query_list.extend(self.QUERY_REGISTRY)  # Replace '*' with all queries
                 else:
                     expanded_query_list.append(query)
             query_set = set(expanded_query_list)
             if not query_set.issubset(self.QUERY_REGISTRY):
                 unsupported_queries = query_set - set(self.QUERY_REGISTRY)
-                raise ValueError(f"Query list contains unsupported queries: {unsupported_queries}. Supported queries: {self.QUERY_REGISTRY}.")
+                raise ValueError(
+                    f"Query list contains unsupported queries: {unsupported_queries}. Supported queries: {self.QUERY_REGISTRY}."
+                )
             self.query_list = expanded_query_list
         else:
             self.query_list = self.QUERY_REGISTRY
@@ -95,7 +228,7 @@ def __init__(
 
         self.benchmark_impl = self.benchmark_impl_class(self.engine) if self.benchmark_impl_class is not None else None
 
-    def run(self, mode: str = 'power_test'):
+    def run(self, mode: str = "power_test"):
         """
         Executes a specific test mode based on the provided mode string.
 
@@ -112,17 +245,17 @@ def run(self, mode: str = 'power_test'):
         -----
         The `MODE_REGISTRY` attribute contains the list of supported modes.
         """
-        self.mode = 'load_and_query' if mode in ('power_test', 'load_and_query') else mode
+        self.mode = "load_and_query" if mode in ("power_test", "load_and_query") else mode
 
-        if mode == 'load':
+        if mode == "load":
             self._run_load_test()
-        elif mode == 'query':
+        elif mode == "query":
             self._run_query_test()
-        elif mode in ('power_test', 'load_and_query'):
+        elif mode in ("power_test", "load_and_query"):
             self._run_power_test()
         else:
             raise ValueError(f"Unknown mode '{mode}'. Supported modes: {self.MODE_REGISTRY}.")
-    
+
     def _prepare_schema(self):
         """
         Prepares the database schema for the benchmark.
@@ -141,56 +274,26 @@ def _prepare_schema(self):
         self.engine.create_schema_if_not_exists(drop_before_create=True)
         self.engine.create_external_location(self.input_parquet_folder_uri)
 
-        engine_class_name = self.engine.__class__.__name__.lower()
-        parent_class_name = self.engine.__class__.__bases__[0].__name__.lower()
-        benchmark_name = self.__class__.__name__.lower()
-        engine_root_lib_name = self.engine.__class__.__module__.split('.')[0]
-        from_dialect = self.engine.SQLGLOT_DIALECT
-
-        try:
-            # Try to load engine-specific query first
-            with importlib.resources.path(
-                f"{engine_root_lib_name}.benchmarks.{benchmark_name}.resources.ddl.{engine_class_name}", 
-                self.DDL_FILE_NAME
-            ) as ddl_path:
-                with open(ddl_path, 'r') as ddl_file:
-                    ddl = ddl_file.read()                
-        except (ModuleNotFoundError, FileNotFoundError):
-            # Try parent engine class name if engine-specific fails
-            try:
-                with importlib.resources.path(
-                    f"lakebench.benchmarks.{benchmark_name}.resources.ddl.{parent_class_name}", 
-                    self.DDL_FILE_NAME
-                ) as ddl_path:
-                    with open(ddl_path, 'r') as ddl_file:
-                        ddl = ddl_file.read()
-            except (ModuleNotFoundError, FileNotFoundError):
-                # Fall back to canonical query
-                with importlib.resources.path(
-                    f"lakebench.benchmarks.{benchmark_name}.resources.ddl.canonical", 
-                    self.DDL_FILE_NAME
-                ) as ddl_path:
-                    with open(ddl_path, 'r') as ddl_file:
-                        ddl = ddl_file.read()
-                from_dialect = 'spark'
-            
-        statements = [s for s in ddl.split(';') if len(s) > 7]
+        ddl, used_canonical = self._load_resource_with_fallback("ddl", self.DDL_FILE_NAME)
+        from_dialect = "spark" if used_canonical else self.engine.SQLGLOT_DIALECT
+
+        statements = [s for s in ddl.split(";") if len(s) > 7]
         for statement in statements:
             prepped_ddl = transpile_and_qualify_query(
-                query=statement, 
-                from_dialect=from_dialect, 
-                to_dialect=self.engine.SQLGLOT_DIALECT, 
-                catalog=getattr(self.engine, 'catalog_name', None),
-                schema=getattr(self.engine, 'schema_name', None)
+                query=statement,
+                from_dialect=from_dialect,
+                to_dialect=self.engine.SQLGLOT_DIALECT,
+                catalog=getattr(self.engine, "catalog_name", None),
+                schema=getattr(self.engine, "schema_name", None),
             )
             table_name = get_table_name_from_ddl(prepped_ddl)
 
             self.engine._create_empty_table(table_name=table_name, ddl=prepped_ddl)
-            
+
     def _run_load_test(self):
         """
-        Executes the load test by loading data from Parquet files into Delta tables 
-        for all tables registered in the `TABLE_REGISTRY`. This method also measures 
+        Executes the load test by loading data from Parquet files into Delta tables
+        for all tables registered in the `TABLE_REGISTRY`. This method also measures
         the time taken for each table load operation and records the results.
 
         Parameters
@@ -199,15 +302,15 @@ def _run_load_test(self):
 
         Notes
         -----
-        - If the engine is an instance of `Spark`, the schema is prepared before 
+        - If the engine is an instance of `Spark`, the schema is prepared before
           loading the data.
-        - The method uses a timer to measure the duration of the load operation 
+        - The method uses a timer to measure the duration of the load operation
           for each table.
         - Results are posted after all tables have been processed.
         """
         # set the mode if the module is being called directly
-        if inspect.currentframe().f_back.f_code.co_name not in ('run', '_run_power_test'):
-            self.mode = 'load'
+        if inspect.currentframe().f_back.f_code.co_name not in ("run", "_run_power_test"):
+            self.mode = "load"
 
         if self.engine.SUPPORTS_SCHEMA_PREP:
             self._prepare_schema()
@@ -217,17 +320,17 @@ def _run_load_test(self):
                     # If a specific benchmark implementation is defined, use it to load the table
                     tc.execution_telemetry = self.benchmark_impl.load_parquet_to_delta(
                         parquet_folder_uri=self.input_parquet_folder_uri,
-                        table_name=table_name, 
+                        table_name=table_name,
                         table_is_precreated=True,
-                        context_decorator=tc.context_decorator
+                        context_decorator=tc.context_decorator,
                     )
                 else:
                     # Otherwise, use the generic load method
                     tc.execution_telemetry = self.engine.load_parquet_to_delta(
-                        parquet_folder_uri=posixpath.join(self.input_parquet_folder_uri, f"{table_name}/"), 
+                        parquet_folder_uri=posixpath.join(self.input_parquet_folder_uri, f"{table_name}/"),
                         table_name=table_name,
                         table_is_precreated=True,
-                        context_decorator=tc.context_decorator
+                        context_decorator=tc.context_decorator,
                     )
         self.post_results()
 
@@ -236,26 +339,52 @@ def _run_query_test(self):
         Executes a series of SQL queries defined in the `query_list` attribute.
         """
         # set the mode if the module is being called directly
-        if inspect.currentframe().f_back.f_code.co_name not in ('run', '_run_power_test'):
-            self.mode = 'query'
+        if inspect.currentframe().f_back.f_code.co_name not in ("run", "_run_power_test"):
+            self.mode = "query"
 
         if isinstance(self.engine, (DuckDB, Daft, Polars, Sail)):
             for table_name in self.TABLE_REGISTRY:
                 self.engine.register_table(table_name)
+
+        # Auto-detect column name mismatches between DDL spec and actual data.
+        # Disabled unless the caller explicitly opts in (auto_remap_columns):
+        # silently renaming columns at query time hurts reproducibility and can
+        # hide real data bugs (see __init__ docstring).
+        self._column_remap = {}
+        if self.auto_remap_columns:
+            try:
+                actual_schemas = {}
+                for table_name in self.TABLE_REGISTRY:
+                    cols = self.engine.get_table_columns(table_name)
+                    if cols:
+                        actual_schemas[table_name] = [c.lower() for c in cols]
+                if actual_schemas:
+                    ddl_columns = self._get_ddl_columns()
+                    self._column_remap = build_column_remap(ddl_columns, actual_schemas)
+                    if self._column_remap:
+                        logger.warning(
+                            "auto_remap_columns is ON: rewriting %d column(s) because the "
+                            "loaded data differs from the benchmark spec. This changes the "
+                            "queries actually executed and may affect comparability. "
+                            "Remap: %s",
+                            len(self._column_remap),
+                            self._column_remap,
+                        )
+            except Exception as e:
+                logger.warning("Schema introspection skipped: %s", e)
+
         for query_name in self.query_list:
             prepped_query = self._return_query_definition(query_name)
             with self.timer(phase="Query", test_item=query_name, engine=self.engine) as tc:
                 if self.benchmark_impl is not None:
                     # If a specific benchmark implementation is defined, use it to perform the query
                     tc.execution_telemetry = self.benchmark_impl.execute_sql_query(
-                        prepped_query,
-                        context_decorator=tc.context_decorator
+                        prepped_query, context_decorator=tc.context_decorator
                     )
                 else:
                     # Otherwise, use the generic query method
                     tc.execution_telemetry = self.engine.execute_sql_query(
-                        prepped_query,
-                        context_decorator=tc.context_decorator
+                        prepped_query, context_decorator=tc.context_decorator
                     )
         self.post_results()
 
@@ -267,11 +396,25 @@ def _run_power_test(self):
         1. Load phase: Loads data into the target system.
         2. Query phase: Executes configured SQL queries to evaluate performance.
         """
-        self.mode = 'load_and_query'
+        self.mode = "load_and_query"
 
         self._run_load_test()
         self._run_query_test()
 
+    def _get_ddl_columns(self) -> dict:
+        """
+        Parse the DDL file and return {table_name: [col1, col2, ...]} with lowercased names.
+        Used for detecting column name mismatches between spec and actual data.
+        """
+        benchmark_name = self.__class__.__name__.lower()
+        # Always use canonical DDL as the reference spec
+        with importlib.resources.path(
+            f"lakebench.benchmarks.{benchmark_name}.resources.ddl.canonical", self.DDL_FILE_NAME
+        ) as ddl_path:
+            with open(ddl_path, "r") as f:
+                ddl_text = f.read()
+        return parse_ddl_columns(ddl_text)
+
     def _return_query_definition(self, query_name: str) -> str:
         """
         Returns the SQL definition for a given query name.
@@ -286,44 +429,19 @@ def _return_query_definition(self, query_name: str) -> str:
         str
             The SQL definition for the specified query.
         """
-        engine_class_name = self.engine.__class__.__name__.lower()
-        parent_class_name = self.engine.__class__.__bases__[0].__name__.lower()
-        benchmark_name = self.__class__.__name__.lower()
-        engine_root_lib_name = self.engine.__class__.__module__.split('.')[0]
-        from_dialect = self.engine.SQLGLOT_DIALECT
-
-        try:
-            # Try to load engine-specific query first
-            with importlib.resources.path(
-                f"{engine_root_lib_name}.benchmarks.{benchmark_name}.resources.queries.{engine_class_name}", 
-                f'{query_name}.sql'
-            ) as query_path:
-                with open(query_path, 'r') as query_file:
-                    query = query_file.read()                
-        except (ModuleNotFoundError, FileNotFoundError):
-            # Try parent engine class name if engine-specific fails
-            try:
-                with importlib.resources.path(
-                    f"lakebench.benchmarks.{benchmark_name}.resources.queries.{parent_class_name}", 
-                    f'{query_name}.sql'
-                ) as query_path:
-                    with open(query_path, 'r') as query_file:
-                        query = query_file.read()
-            except (ModuleNotFoundError, FileNotFoundError):
-                # Fall back to canonical query
-                with importlib.resources.path(
-                    f"lakebench.benchmarks.{benchmark_name}.resources.queries.canonical", 
-                    f'{query_name}.sql'
-                ) as query_path:
-                    with open(query_path, 'r') as query_file:
-                        query = query_file.read()
-                from_dialect = 'spark'
+        query, used_canonical = self._load_resource_with_fallback("queries", f"{query_name}.sql")
+        from_dialect = "spark" if used_canonical else self.engine.SQLGLOT_DIALECT
 
         prepped_query = transpile_and_qualify_query(
-            query=query, 
-            from_dialect=from_dialect, 
-            to_dialect=self.engine.SQLGLOT_DIALECT, 
-            catalog=getattr(self.engine, 'catalog_name', None),
-            schema=getattr(self.engine, 'schema_name', None)
+            query=query,
+            from_dialect=from_dialect,
+            to_dialect=self.engine.SQLGLOT_DIALECT,
+            catalog=getattr(self.engine, "catalog_name", None),
+            schema=getattr(self.engine, "schema_name", None),
         )
-        return prepped_query
\ No newline at end of file
+
+        # Apply column remapping if mismatches were detected
+        if getattr(self, "_column_remap", None):
+            prepped_query = apply_column_remap(prepped_query, self._column_remap, self.engine.SQLGLOT_DIALECT)
+
+        return prepped_query
diff --git a/src/lakebench/benchmarks/base.py b/src/lakebench/benchmarks/base.py
index e31c03b..7b67d34 100644
--- a/src/lakebench/benchmarks/base.py
+++ b/src/lakebench/benchmarks/base.py
@@ -1,10 +1,12 @@
-from abc import ABC, abstractmethod
-from typing import Dict, Type, Optional
 import uuid
+from abc import ABC, abstractmethod
 from datetime import datetime
-from ..utils.timer import timer
+from importlib.metadata import version
+from typing import Dict, Optional, Type
+
 from ..engines.base import BaseEngine
-from importlib.metadata import version, PackageNotFoundError
+from ..utils.timer import timer
+
 
 class BaseBenchmark(ABC):
     """
@@ -34,7 +36,7 @@ class rather than. If only shared methods are used, the dictionary value will be
         A timer object used to measure the duration of benchmark phases.
     results : list
         A list to store benchmark results.
-        
+
     Methods
     -------
     run()
@@ -43,70 +45,71 @@ class rather than. If only shared methods are used, the dictionary value will be
         Processes and saves benchmark results. If `save_results` is True, results are appended to a Delta table
         at the specified `result_table_uri`. Clears the timer results after processing.
     """
+
     BENCHMARK_IMPL_REGISTRY: Dict[Type[BaseEngine], Type] = {}
     RESULT_SCHEMA = [
-        ('run_id', 'STRING'),
-        ('run_datetime', 'TIMESTAMP'),
-        ('lakebench_version', 'STRING'),
-        ('engine', 'STRING'),
-        ('engine_version', 'STRING'),
-        ('benchmark', 'STRING'),
-        ('benchmark_version', 'STRING'),
-        ('mode', 'STRING'),
-        ('scale_factor', 'INT'),
-        ('scenario', 'STRING'),
-        ('total_cores', 'SMALLINT'),
-        ('compute_size', 'STRING'),
-        ('phase', 'STRING'),
-        ('test_item', 'STRING'),
-        ('start_datetime', 'TIMESTAMP'),
-        ('duration_ms', 'INT'),
-        ('estimated_retail_job_cost', 'DECIMAL(18,10)'),
-        ('iteration', 'TINYINT'),
-        ('success', 'BOOLEAN'),
-        ('error_message', 'STRING'),
-        ('engine_properties', 'MAP<STRING, STRING>'),      # Additional Platform configs/metadata
-        ('execution_telemetry', 'MAP<STRING, STRING>')    # Test-item execution details
+        ("run_id", "STRING"),
+        ("run_datetime", "TIMESTAMP"),
+        ("lakebench_version", "STRING"),
+        ("engine", "STRING"),
+        ("engine_version", "STRING"),
+        ("benchmark", "STRING"),
+        ("benchmark_version", "STRING"),
+        ("mode", "STRING"),
+        ("scale_factor", "INT"),
+        ("scenario", "STRING"),
+        ("total_cores", "SMALLINT"),
+        ("compute_size", "STRING"),
+        ("phase", "STRING"),
+        ("test_item", "STRING"),
+        ("start_datetime", "TIMESTAMP"),
+        ("duration_ms", "INT"),
+        ("estimated_retail_job_cost", "DECIMAL(18,10)"),
+        ("iteration", "TINYINT"),
+        ("success", "BOOLEAN"),
+        ("error_message", "STRING"),
+        ("engine_properties", "MAP<STRING, STRING>"),  # Additional Platform configs/metadata
+        ("execution_telemetry", "MAP<STRING, STRING>"),  # Test-item execution details
     ]
-    VERSION = ''
+    VERSION = ""
 
     def __init__(
-            self, 
-            engine: BaseEngine, 
-            scenario_name: str, 
-            input_parquet_folder_uri: Optional[str],
-            result_table_uri: Optional[str], 
-            save_results: bool = False, 
-            run_id: Optional[str] = None
-            ):
+        self,
+        engine: BaseEngine,
+        scenario_name: str,
+        input_parquet_folder_uri: Optional[str],
+        result_table_uri: Optional[str],
+        save_results: bool = False,
+        run_id: Optional[str] = None,
+    ):
         self.engine = engine
         self.scenario_name = scenario_name
         self.result_table_uri = result_table_uri
         self.save_results = save_results
 
-        if not engine.SUPPORTS_MOUNT_PATH and input_parquet_folder_uri[:1] == '/':
+        if not engine.SUPPORTS_MOUNT_PATH and input_parquet_folder_uri[:1] == "/":
             raise ValueError(
                 f"""Mount path is not supported for {type(engine).__name__} engine.
                 Please provide fully qualified uri for `input_parquet_folder_uri`."""
             )
 
         self.header_detail_dict = {
-            'run_id': run_id if run_id is not None else str(uuid.uuid1()),
-            'run_datetime': datetime.now(),
-            'lakebench_version': version('lakebench'),
-            'engine': type(engine).__name__,
-            'engine_version': self.engine.version,
-            'benchmark': self.__class__.__name__,
-            'benchmark_version': self.VERSION,
-            'scale_factor': getattr(self, 'scale_factor', None),
-            'scenario': scenario_name,
-            'total_cores': self.engine.get_total_cores(),
-            'compute_size': self.engine.get_compute_size()
+            "run_id": run_id if run_id is not None else str(uuid.uuid1()),
+            "run_datetime": datetime.now(),
+            "lakebench_version": version("lakebench"),
+            "engine": type(engine).__name__,
+            "engine_version": self.engine.version,
+            "benchmark": self.__class__.__name__,
+            "benchmark_version": self.VERSION,
+            "scale_factor": getattr(self, "scale_factor", None),
+            "scenario": scenario_name,
+            "total_cores": self.engine.get_total_cores(),
+            "compute_size": self.engine.get_compute_size(),
         }
         self.timer = timer
         self.timer.clear_results()
         self.results = []
-        self.mode : str = None
+        self.mode: str = None
 
     @classmethod
     def register_engine(cls, engine_class: Type[BaseEngine], benchmark_impl: Optional[Type] = None):
@@ -129,20 +132,20 @@ def run(self):
     def post_results(self):
         """
         Processes and posts benchmark results, saving them to a specified location if save_results is True.
-        This method collects timing results from the benchmark execution, formats them into a 
-        structured array, and optionally saves the results to a Delta table. It also clears the timer 
+        This method collects timing results from the benchmark execution, formats them into a
+        structured array, and optionally saves the results to a Delta table. It also clears the timer
         instance after offloading results to the `self.results` attribute.
 
         Parameters
         ----------
         None
-        
+
         Notes
         -----
-        - If `save_results` is True, the results are appended to the Delta table specified by 
+        - If `save_results` is True, the results are appended to the Delta table specified by
           `result_table_uri` using the `engine.append_array_to_delta` method.
         - After processing, the results are stored in `self.results` and the timer results are cleared.
-        
+
         Examples
         --------
         >>> benchmark = Benchmark()
@@ -154,17 +157,17 @@ def post_results(self):
         result_array = [
             {
                 **self.header_detail_dict,
-                'mode': self.mode.lower() if self.mode else None,
-                'phase': phase,
-                'test_item': test_item,
-                'start_datetime': start_datetime,
-                'duration_ms': duration_ms,
-                'estimated_retail_job_cost': self.engine.get_job_cost(duration_ms), 
-                'iteration': iteration,
-                'success': success,
-                'error_message': error_message,
-                'engine_properties': self.engine.extended_engine_metadata,
-                'execution_telemetry': execution_telemetry
+                "mode": self.mode.lower() if self.mode else None,
+                "phase": phase,
+                "test_item": test_item,
+                "start_datetime": start_datetime,
+                "duration_ms": duration_ms,
+                "estimated_retail_job_cost": self.engine.get_job_cost(duration_ms),
+                "iteration": iteration,
+                "success": success,
+                "error_message": error_message,
+                "engine_properties": self.engine.extended_engine_metadata,
+                "execution_telemetry": execution_telemetry,
             }
             for phase, test_item, start_datetime, duration_ms, iteration, success, error_message, execution_telemetry in self.timer.results
         ]
diff --git a/src/lakebench/benchmarks/clickbench/__init__.py b/src/lakebench/benchmarks/clickbench/__init__.py
index bc0a31f..be09450 100644
--- a/src/lakebench/benchmarks/clickbench/__init__.py
+++ b/src/lakebench/benchmarks/clickbench/__init__.py
@@ -1 +1 @@
-from .clickbench import ClickBench
\ No newline at end of file
+from .clickbench import ClickBench
diff --git a/src/lakebench/benchmarks/clickbench/clickbench.py b/src/lakebench/benchmarks/clickbench/clickbench.py
index b2a8b01..4fc65c0 100644
--- a/src/lakebench/benchmarks/clickbench/clickbench.py
+++ b/src/lakebench/benchmarks/clickbench/clickbench.py
@@ -1,25 +1,26 @@
-from typing import Optional, List
-from .._load_and_query import _LoadAndQuery
+from typing import List, Optional
 
 from ...engines.base import BaseEngine
-from ...engines.spark import Spark
-from ...engines.duckdb import DuckDB
 from ...engines.daft import Daft
+from ...engines.duckdb import DuckDB
+from ...engines.livy import Livy
 from ...engines.polars import Polars
 from ...engines.sail import Sail
-
-from .engine_impl.spark import SparkClickBench
+from ...engines.spark import Spark
+from .._load_and_query import _LoadAndQuery
+from .engine_impl.daft import DaftClickBench
 from .engine_impl.duckdb import DuckDBClickBench
-from .engine_impl.sail import SailClickBench
 from .engine_impl.polars import PolarsClickBench
-from .engine_impl.daft import DaftClickBench
+from .engine_impl.sail import SailClickBench
+from .engine_impl.spark import SparkClickBench
+
 
 class ClickBench(_LoadAndQuery):
     """
     Class for running the ClickBench benchmark.
 
     This class provides functionality for running the ClickBench benchmark, including loading data,
-    executing queries, and performing power tests. Supported engines are listed in the 
+    executing queries, and performing power tests. Supported engines are listed in the
     `self.BENCHMARK_IMPL_REGISTRY` constant.
 
     Parameters
@@ -35,7 +36,7 @@ class ClickBench(_LoadAndQuery):
     result_table_uri : str, optional
         Table URI where results will be saved. Must be specified if `save_results` is True.
     save_results : bool
-        Whether to save the benchmark results. Results can also be accessed via the `self.results` 
+        Whether to save the benchmark results. Results can also be accessed via the `self.results`
         attribute after running the benchmark.
 
     Methods
@@ -53,42 +54,82 @@ class ClickBench(_LoadAndQuery):
     _run_power_test()
         Runs both the load and query tests.
     """
+
     BENCHMARK_IMPL_REGISTRY = {
         Spark: SparkClickBench,
         DuckDB: DuckDBClickBench,
         Sail: SailClickBench,
+        Livy: None,
         Polars: PolarsClickBench,
         Daft: DaftClickBench,
     }
-    BENCHMARK_NAME = 'ClickBench'
-    TABLE_REGISTRY = [
-        'hits'
-    ]
+    BENCHMARK_NAME = "ClickBench"
+    TABLE_REGISTRY = ["hits"]
     QUERY_REGISTRY = [
-        'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10',
-        'q11', 'q12', 'q13', 'q14', 'q15', 'q16', 'q17', 'q18', 'q19', 'q20',
-        'q21', 'q22', 'q23', 'q24', 'q25', 'q26', 'q27', 'q28', 'q29', 'q30',
-        'q31', 'q32', 'q33', 'q34', 'q35', 'q36', 'q37', 'q38', 'q39', 'q40',
-        'q41', 'q42', 'q43'
+        "q1",
+        "q2",
+        "q3",
+        "q4",
+        "q5",
+        "q6",
+        "q7",
+        "q8",
+        "q9",
+        "q10",
+        "q11",
+        "q12",
+        "q13",
+        "q14",
+        "q15",
+        "q16",
+        "q17",
+        "q18",
+        "q19",
+        "q20",
+        "q21",
+        "q22",
+        "q23",
+        "q24",
+        "q25",
+        "q26",
+        "q27",
+        "q28",
+        "q29",
+        "q30",
+        "q31",
+        "q32",
+        "q33",
+        "q34",
+        "q35",
+        "q36",
+        "q37",
+        "q38",
+        "q39",
+        "q40",
+        "q41",
+        "q42",
+        "q43",
     ]
-    DDL_FILE_NAME = 'ddl.sql'
-    VERSION = 'UNKNOWN'
+    DDL_FILE_NAME = "ddl.sql"
+    VERSION = "UNKNOWN"
 
     def __init__(
-            self, 
-            engine: BaseEngine, 
-            scenario_name: str,
-            query_list: Optional[List[str]] = None,
-            input_parquet_folder_uri: Optional[str] = None,
-            result_table_uri: Optional[str] = None,
-            save_results: bool = False
-        ):
+        self,
+        engine: BaseEngine,
+        scenario_name: str,
+        query_list: Optional[List[str]] = None,
+        input_parquet_folder_uri: Optional[str] = None,
+        result_table_uri: Optional[str] = None,
+        save_results: bool = False,
+        auto_remap_columns: bool = False,
+    ):
         super().__init__(
-            engine=engine, 
+            engine=engine,
             scenario_name=scenario_name,
             scale_factor=None,
             query_list=query_list,
             input_parquet_folder_uri=input_parquet_folder_uri,
             result_table_uri=result_table_uri,
-            save_results=save_results
-        )
\ No newline at end of file
+            save_results=save_results,
+            auto_remap_columns=auto_remap_columns,
+        )
diff --git a/src/lakebench/benchmarks/clickbench/engine_impl/daft.py b/src/lakebench/benchmarks/clickbench/engine_impl/daft.py
index 8c49e22..5098038 100644
--- a/src/lakebench/benchmarks/clickbench/engine_impl/daft.py
+++ b/src/lakebench/benchmarks/clickbench/engine_impl/daft.py
@@ -1,16 +1,18 @@
-from ....engines.daft import Daft
-from ....utils.path_utils import to_file_uri, _REMOTE_SCHEMES
 import pathlib
 import posixpath
 from typing import Optional
 
+from ....engines.daft import Daft
+from ....utils.path_utils import _REMOTE_SCHEMES, to_file_uri
+
 
 class DaftClickBench:
     def __init__(self, engine: Daft):
         self.engine = engine
 
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str,
-                              table_is_precreated: bool = False, context_decorator: str = None):
+    def load_parquet_to_delta(
+        self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: str = None
+    ):
         daft = self.engine.daft
         df = daft.read_parquet(parquet_folder_uri)
 
@@ -27,10 +29,13 @@ def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str,
         col_names = [f.name for f in df.schema()]
         for ts_col in ("EventTime", "ClientEventTime", "LocalEventTime"):
             if ts_col in col_names:
-                df = df.with_columns({
-                    ts_col: (daft.col(ts_col).cast(daft.DataType.int64()) * 1_000_000)
-                            .cast(daft.DataType.timestamp("us"))
-                })
+                df = df.with_columns(
+                    {
+                        ts_col: (daft.col(ts_col).cast(daft.DataType.int64()) * 1_000_000).cast(
+                            daft.DataType.timestamp("us")
+                        )
+                    }
+                )
 
         # Write delta — pre-create dir + to_file_uri (same pattern as Daft.load_parquet_to_delta)
         raw_path = posixpath.join(self.engine.schema_or_working_directory_uri, table_name)
diff --git a/src/lakebench/benchmarks/clickbench/engine_impl/duckdb.py b/src/lakebench/benchmarks/clickbench/engine_impl/duckdb.py
index 2d782cd..ba41aa0 100644
--- a/src/lakebench/benchmarks/clickbench/engine_impl/duckdb.py
+++ b/src/lakebench/benchmarks/clickbench/engine_impl/duckdb.py
@@ -1,13 +1,17 @@
-from ....engines.duckdb import DuckDB
 import posixpath
 from typing import Optional
 
+from ....engines.duckdb import DuckDB
+
+
 class DuckDBClickBench:
     def __init__(self, engine: DuckDB):
-        
+
         self.engine = engine
 
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: str = None):
+    def load_parquet_to_delta(
+        self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: str = None
+    ):
         """
         Loads the ClickBench parquet data into Delta format using Spark.
 
@@ -18,15 +22,15 @@ def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_
         """
         arrow_df = self.engine.duckdb.sql(f"""
             SELECT * REPLACE (make_date(EventDate) AS EventDate) 
-            FROM parquet_scan('{posixpath.join(parquet_folder_uri, '*.parquet')}')
+            FROM parquet_scan('{posixpath.join(parquet_folder_uri, "*.parquet")}')
         """).record_batch()
-        
+
         self.engine.deltars.write_deltalake(
             table_or_uri=posixpath.join(self.engine.schema_or_working_directory_uri, table_name),
             data=arrow_df,
             mode="append",
             storage_options=self.engine.storage_options,
-        ) 
+        )
 
     def execute_sql_query(self, query: str, context_decorator: Optional[str] = None):
-        return self.engine.execute_sql_query(query)
\ No newline at end of file
+        return self.engine.execute_sql_query(query)
diff --git a/src/lakebench/benchmarks/clickbench/engine_impl/polars.py b/src/lakebench/benchmarks/clickbench/engine_impl/polars.py
index 7716a87..ec5a4f1 100644
--- a/src/lakebench/benchmarks/clickbench/engine_impl/polars.py
+++ b/src/lakebench/benchmarks/clickbench/engine_impl/polars.py
@@ -1,16 +1,18 @@
-from ....engines.polars import Polars
 import posixpath
 from typing import Optional
 
+from ....engines.polars import Polars
+
 
 class PolarsClickBench:
     def __init__(self, engine: Polars):
         self.engine = engine
 
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str,
-                              table_is_precreated: bool = False, context_decorator: str = None):
+    def load_parquet_to_delta(
+        self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: str = None
+    ):
         pl = self.engine.pl
-        df = pl.read_parquet(posixpath.join(parquet_folder_uri, '*.parquet'))
+        df = pl.read_parquet(posixpath.join(parquet_folder_uri, "*.parquet"))
 
         # Binary columns → Utf8 (ClickBench parquet omits logical string type on some columns)
         binary_cols = [name for name, dtype in zip(df.columns, df.dtypes) if dtype == pl.Binary]
diff --git a/src/lakebench/benchmarks/clickbench/engine_impl/sail.py b/src/lakebench/benchmarks/clickbench/engine_impl/sail.py
index e8897e1..ba0d728 100644
--- a/src/lakebench/benchmarks/clickbench/engine_impl/sail.py
+++ b/src/lakebench/benchmarks/clickbench/engine_impl/sail.py
@@ -1,13 +1,17 @@
-from ....engines.sail import Sail
 import posixpath
 from typing import Optional
 
+from ....engines.sail import Sail
+
+
 class SailClickBench:
     def __init__(self, engine: Sail):
-        
+
         self.engine = engine
 
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: str = None):
+    def load_parquet_to_delta(
+        self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: str = None
+    ):
         """
         Loads the ClickBench parquet data into Delta format using Spark.
 
@@ -17,6 +21,7 @@ def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_
             Path to the source parquet files.
         """
         from pyspark.sql import functions as sf
+
         # Load parquet files
         df = self.engine.spark.read.parquet(parquet_folder_uri)
 
@@ -29,7 +34,9 @@ def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_
         df = df.withColumn("ClientEventTime", sf.col("ClientEventTime").cast("timestamp"))
         df = df.withColumn("LocalEventTime", sf.col("LocalEventTime").cast("timestamp"))
 
-        df.write.format("delta").mode("append").save(posixpath.join(self.engine.schema_or_working_directory_uri, table_name))
+        df.write.format("delta").mode("append").save(
+            posixpath.join(self.engine.schema_or_working_directory_uri, table_name)
+        )
 
     def execute_sql_query(self, query: str, context_decorator: Optional[str] = None):
-        return self.engine.execute_sql_query(query)
\ No newline at end of file
+        return self.engine.execute_sql_query(query)
diff --git a/src/lakebench/benchmarks/clickbench/engine_impl/spark.py b/src/lakebench/benchmarks/clickbench/engine_impl/spark.py
index e263e1a..7fe33a6 100644
--- a/src/lakebench/benchmarks/clickbench/engine_impl/spark.py
+++ b/src/lakebench/benchmarks/clickbench/engine_impl/spark.py
@@ -1,12 +1,16 @@
-from ....engines.spark import Spark
 from typing import Optional
 
+from ....engines.spark import Spark
+
+
 class SparkClickBench:
     def __init__(self, engine: Spark):
-        
+
         self.engine = engine
 
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: str = None):
+    def load_parquet_to_delta(
+        self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: str = None
+    ):
         """
         Loads the ClickBench parquet data into Delta format using Spark.
 
@@ -16,6 +20,7 @@ def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_
             Path to the source parquet files.
         """
         from pyspark.sql import functions as sf
+
         # Load parquet files
         df = self.engine.spark.read.parquet(parquet_folder_uri)
 
@@ -31,4 +36,4 @@ def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_
         df.write.format("delta").mode("append").saveAsTable(table_name)
 
     def execute_sql_query(self, query: str, context_decorator: Optional[str] = None):
-        return self.engine.execute_sql_query(query)
\ No newline at end of file
+        return self.engine.execute_sql_query(query)
diff --git a/src/lakebench/benchmarks/elt_bench/__init__.py b/src/lakebench/benchmarks/elt_bench/__init__.py
index 1f2d723..5ec4863 100644
--- a/src/lakebench/benchmarks/elt_bench/__init__.py
+++ b/src/lakebench/benchmarks/elt_bench/__init__.py
@@ -1 +1 @@
-from .elt_bench import ELTBench
\ No newline at end of file
+from .elt_bench import ELTBench
diff --git a/src/lakebench/benchmarks/elt_bench/elt_bench.py b/src/lakebench/benchmarks/elt_bench/elt_bench.py
index fc49dbf..554a7e6 100644
--- a/src/lakebench/benchmarks/elt_bench/elt_bench.py
+++ b/src/lakebench/benchmarks/elt_bench/elt_bench.py
@@ -1,24 +1,23 @@
 from __future__ import annotations
-from typing import Optional
-from ..base import BaseBenchmark
-from ...utils.query_utils import transpile_and_qualify_query, get_table_name_from_ddl
 
-from .engine_impl.spark import SparkELTBench
-from .engine_impl.duckdb import DuckDBELTBench
-from .engine_impl.daft import DaftELTBench
-from .engine_impl.polars import PolarsELTBench
-from .engine_impl.sail import SailELTBench
+import importlib.resources
+import posixpath
+from typing import Optional
 
 from ...engines.base import BaseEngine
-from ...engines.spark import Spark
-from ...engines.duckdb import DuckDB
 from ...engines.daft import Daft
+from ...engines.duckdb import DuckDB
 from ...engines.polars import Polars
 from ...engines.sail import Sail
-
+from ...engines.spark import Spark
+from ...utils.query_utils import get_table_name_from_ddl, transpile_and_qualify_query
+from ..base import BaseBenchmark
 from ..tpcds.tpcds import TPCDS
-import importlib.resources
-import posixpath
+from .engine_impl.daft import DaftELTBench
+from .engine_impl.duckdb import DuckDBELTBench
+from .engine_impl.polars import PolarsELTBench
+from .engine_impl.sail import SailELTBench
+from .engine_impl.spark import SparkELTBench
 
 
 class ELTBench(BaseBenchmark):
@@ -53,29 +52,47 @@ class ELTBench(BaseBenchmark):
         DuckDB: DuckDBELTBench,
         Daft: DaftELTBench,
         Polars: PolarsELTBench,
-        Sail: SailELTBench
+        Sail: SailELTBench,
     }
-    MODE_REGISTRY = ['light']
+    MODE_REGISTRY = ["light"]
     TABLE_REGISTRY = [
-        'call_center', 'catalog_page', 'catalog_returns', 'catalog_sales',
-        'customer', 'customer_address', 'customer_demographics', 'date_dim',
-        'household_demographics', 'income_band', 'inventory', 'item',
-        'promotion', 'reason', 'ship_mode', 'store', 'store_returns',
-        'store_sales', 'time_dim', 'warehouse', 'web_page', 'web_returns',
-        'web_sales', 'web_site'
+        "call_center",
+        "catalog_page",
+        "catalog_returns",
+        "catalog_sales",
+        "customer",
+        "customer_address",
+        "customer_demographics",
+        "date_dim",
+        "household_demographics",
+        "income_band",
+        "inventory",
+        "item",
+        "promotion",
+        "reason",
+        "ship_mode",
+        "store",
+        "store_returns",
+        "store_sales",
+        "time_dim",
+        "warehouse",
+        "web_page",
+        "web_returns",
+        "web_sales",
+        "web_site",
     ]
-    VERSION = '1.0.0'
+    VERSION = "1.0.0"
 
     def __init__(
-            self, 
-            engine: BaseEngine, 
-            scenario_name: str,
-            scale_factor: Optional[int] = None,
-            input_parquet_folder_uri: Optional[str] = None,
-            result_table_uri: Optional[str] = None,
-            save_results: bool = False,
-            run_id: Optional[str] = None
-            ):
+        self,
+        engine: BaseEngine,
+        scenario_name: str,
+        scale_factor: Optional[int] = None,
+        input_parquet_folder_uri: Optional[str] = None,
+        result_table_uri: Optional[str] = None,
+        save_results: bool = False,
+        run_id: Optional[str] = None,
+    ):
         self.scale_factor = scale_factor
         super().__init__(engine, scenario_name, input_parquet_folder_uri, result_table_uri, save_results, run_id)
         for base_engine, benchmark_impl in self.BENCHMARK_IMPL_REGISTRY.items():
@@ -95,16 +112,13 @@ def __init__(
 
         self.engine = engine
         self.scenario_name = scenario_name
-        self.benchmark_impl = self.benchmark_impl_class(
-            self.engine
-        )
+        self.benchmark_impl = self.benchmark_impl_class(self.engine)
         self.input_parquet_folder_uri = input_parquet_folder_uri
 
-
-    def run(self, mode: str = 'light'):
+    def run(self, mode: str = "light"):
         """
         Executes the benchmark in the specified mode.
-        
+
         Parameters
         ----------
         mode : str, optional
@@ -113,111 +127,106 @@ def run(self, mode: str = 'light'):
             - 'full': Placeholder for full mode, which is not implemented yet.
         """
 
-        if mode == 'light':
+        if mode == "light":
             self.run_light_mode()
-        elif mode == 'full':
+        elif mode == "full":
             raise NotImplementedError("Full mode is not implemented yet.")
         else:
             raise ValueError(f"Mode '{mode}' is not supported. Supported modes: {self.MODE_REGISTRY}.")
-        
+
     def _prepare_schema(self, tables: list[str]):
-        
 
         self.engine.create_schema_if_not_exists(drop_before_create=True)
         self.engine.create_external_location(self.input_parquet_folder_uri)
 
         engine_class_name = self.engine.__class__.__name__.lower()
         parent_class_name = self.engine.__class__.__bases__[0].__name__.lower()
-        benchmark_name = 'tpcds'
-        engine_root_lib_name = self.engine.__class__.__module__.split('.')[0]
+        benchmark_name = "tpcds"
+        engine_root_lib_name = self.engine.__class__.__module__.split(".")[0]
         from_dialect = self.engine.SQLGLOT_DIALECT
         self.DDL_FILE_NAME = TPCDS.DDL_FILE_NAME
 
         try:
             # Try to load engine-specific query first
             with importlib.resources.path(
-                f"{engine_root_lib_name}.benchmarks.{benchmark_name}.resources.ddl.{engine_class_name}", 
-                self.DDL_FILE_NAME
+                f"{engine_root_lib_name}.benchmarks.{benchmark_name}.resources.ddl.{engine_class_name}",
+                self.DDL_FILE_NAME,
             ) as ddl_path:
-                with open(ddl_path, 'r') as ddl_file:
-                    ddl = ddl_file.read()                
+                with open(ddl_path, "r") as ddl_file:
+                    ddl = ddl_file.read()
         except (ModuleNotFoundError, FileNotFoundError):
             # Try parent engine class name if engine-specific fails
             try:
                 with importlib.resources.path(
-                    f"lakebench.benchmarks.{benchmark_name}.resources.ddl.{parent_class_name}", 
-                    self.DDL_FILE_NAME
+                    f"lakebench.benchmarks.{benchmark_name}.resources.ddl.{parent_class_name}", self.DDL_FILE_NAME
                 ) as ddl_path:
-                    with open(ddl_path, 'r') as ddl_file:
+                    with open(ddl_path, "r") as ddl_file:
                         ddl = ddl_file.read()
             except (ModuleNotFoundError, FileNotFoundError):
                 # Fall back to canonical query
                 with importlib.resources.path(
-                    f"lakebench.benchmarks.{benchmark_name}.resources.ddl.canonical", 
-                    self.DDL_FILE_NAME
+                    f"lakebench.benchmarks.{benchmark_name}.resources.ddl.canonical", self.DDL_FILE_NAME
                 ) as ddl_path:
-                    with open(ddl_path, 'r') as ddl_file:
+                    with open(ddl_path, "r") as ddl_file:
                         ddl = ddl_file.read()
-                from_dialect = 'spark'
-            
-        statements = [s for s in ddl.split(';') if len(s) > 7]
+                from_dialect = "spark"
+
+        statements = [s for s in ddl.split(";") if len(s) > 7]
         for statement in statements:
             prepped_ddl = transpile_and_qualify_query(
-                query=statement, 
-                from_dialect=from_dialect, 
-                to_dialect=self.engine.SQLGLOT_DIALECT, 
-                catalog=getattr(self.engine, 'catalog_name', None),
-                schema=getattr(self.engine, 'schema_name', None)
+                query=statement,
+                from_dialect=from_dialect,
+                to_dialect=self.engine.SQLGLOT_DIALECT,
+                catalog=getattr(self.engine, "catalog_name", None),
+                schema=getattr(self.engine, "schema_name", None),
             )
             table_name = get_table_name_from_ddl(prepped_ddl)
             # only create tables that are in the specified list
             if table_name in tables:
                 self.engine._create_empty_table(table_name=table_name, ddl=prepped_ddl)
-            
 
     def run_light_mode(self):
         """
         Executes the light mode benchmark workflow for processing and querying data.
-        This method performs a series of operations on data tables, including loading data 
-        from parquet files into Delta tables, creating a fact table, merging data, optimizing 
-        the table, vacuuming the table, and running an ad-hoc query. The results are posted 
+        This method performs a series of operations on data tables, including loading data
+        from parquet files into Delta tables, creating a fact table, merging data, optimizing
+        the table, vacuuming the table, and running an ad-hoc query. The results are posted
         at the end of the workflow.
 
         Parameters
         ----------
         None
         """
-        tables = [
-            'store_sales', 'date_dim', 'store', 'item', 'customer'
-        ]
+        tables = ["store_sales", "date_dim", "store", "item", "customer"]
 
-        self.mode = 'light'
+        self.mode = "light"
         if self.engine.SUPPORTS_SCHEMA_PREP:
             self._prepare_schema(tables=tables)
 
         for table_name in tables:
             with self.timer(phase="Read parquet, write delta (x5)", test_item=table_name, engine=self.engine) as tc:
                 tc.execution_telemetry = self.engine.load_parquet_to_delta(
-                    parquet_folder_uri=posixpath.join(self.input_parquet_folder_uri, f"{table_name}/"), 
+                    parquet_folder_uri=posixpath.join(self.input_parquet_folder_uri, f"{table_name}/"),
                     table_name=table_name,
                     table_is_precreated=True,
-                    context_decorator=tc.context_decorator
+                    context_decorator=tc.context_decorator,
                 )
-        with self.timer(phase="Create fact table", test_item='total_sales_fact', engine=self.engine):
+        with self.timer(phase="Create fact table", test_item="total_sales_fact", engine=self.engine):
             self.benchmark_impl.create_total_sales_fact()
 
         for _ in range(3):
-            with self.timer(phase="Merge 0.1% into fact table (3x)", test_item='total_sales_fact', engine=self.engine):
+            with self.timer(phase="Merge 0.1% into fact table (3x)", test_item="total_sales_fact", engine=self.engine):
                 self.benchmark_impl.merge_percent_into_total_sales_fact(0.001)
 
-        with self.timer(phase="OPTIMIZE", test_item='total_sales_fact', engine=self.engine):
-            self.engine.optimize_table('total_sales_fact')
+        with self.timer(phase="OPTIMIZE", test_item="total_sales_fact", engine=self.engine):
+            self.engine.optimize_table("total_sales_fact")
 
-        with self.timer(phase="VACUUM", test_item='total_sales_fact', engine=self.engine):
-            self.engine.vacuum_table('total_sales_fact', retain_hours=0, retention_check=False)
+        with self.timer(phase="VACUUM", test_item="total_sales_fact", engine=self.engine):
+            self.engine.vacuum_table("total_sales_fact", retain_hours=0, retention_check=False)
 
-        with self.timer(phase="Ad-hoc query (small result aggregation)", test_item='total_sales_fact', engine=self.engine):
+        with self.timer(
+            phase="Ad-hoc query (small result aggregation)", test_item="total_sales_fact", engine=self.engine
+        ):
             self.benchmark_impl.query_total_sales_fact()
 
         self.post_results()
-
diff --git a/src/lakebench/benchmarks/elt_bench/engine_impl/daft.py b/src/lakebench/benchmarks/elt_bench/engine_impl/daft.py
index d8c68f2..0b6ca66 100644
--- a/src/lakebench/benchmarks/elt_bench/engine_impl/daft.py
+++ b/src/lakebench/benchmarks/elt_bench/engine_impl/daft.py
@@ -1,15 +1,17 @@
-from ....engines.daft import Daft
-from ....engines.delta_rs import DeltaRs
-from ....utils.path_utils import to_file_uri, _REMOTE_SCHEMES
 import pathlib
 import posixpath
 
+from ....engines.daft import Daft
+from ....engines.delta_rs import DeltaRs
+from ....utils.path_utils import _REMOTE_SCHEMES, to_file_uri
+
 
 class DaftELTBench:
     def __init__(self, engine: Daft):
         self.engine = engine
 
         import numpy as np
+
         self.np = np
         self.delta_rs = DeltaRs()
         self.DeltaTable = self.delta_rs.DeltaTable
@@ -37,6 +39,7 @@ def _read_delta(self, table_name: str):
         is_local = not any(path.startswith(s) for s in _REMOTE_SCHEMES)
         if is_local:
             from deltalake import DeltaTable
+
             file_uris = DeltaTable(path).file_uris()
             return self.engine.daft.read_parquet(file_uris)
         return self.engine.daft.read_deltalake(to_file_uri(path))
@@ -53,22 +56,30 @@ def _write_delta(self, df, table_name: str, mode: str = "overwrite"):
 
     def create_total_sales_fact(self):
         fact_table_df = (
-            self._read_delta('store_sales')
-            .join(self._read_delta('date_dim'),  left_on="ss_sold_date_sk", right_on="d_date_sk")
-            .join(self._read_delta('store'),     left_on="ss_store_sk",     right_on="s_store_sk")
-            .join(self._read_delta('item'),      left_on="ss_item_sk",      right_on="i_item_sk")
-            .join(self._read_delta('customer'),  left_on="ss_customer_sk",  right_on="c_customer_sk")
+            self._read_delta("store_sales")
+            .join(self._read_delta("date_dim"), left_on="ss_sold_date_sk", right_on="d_date_sk")
+            .join(self._read_delta("store"), left_on="ss_store_sk", right_on="s_store_sk")
+            .join(self._read_delta("item"), left_on="ss_item_sk", right_on="i_item_sk")
+            .join(self._read_delta("customer"), left_on="ss_customer_sk", right_on="c_customer_sk")
             .with_columns({"sale_date": self.engine.daft.col("d_date")})
             .where(self.engine.daft.col("d_year") == 2001)
             .groupby(["s_store_id", "i_item_id", "c_customer_id", "sale_date"])
-            .agg([
-                self.engine.daft.col("ss_quantity").sum().alias("total_quantity"),
-                self.engine.daft.col("ss_net_paid").sum().cast(self.engine.daft.DataType.decimal128(38, 2)).alias("total_net_paid"),
-                self.engine.daft.col("ss_net_profit").sum().cast(self.engine.daft.DataType.decimal128(38, 2)).alias("total_net_profit"),
-            ])
+            .agg(
+                [
+                    self.engine.daft.col("ss_quantity").sum().alias("total_quantity"),
+                    self.engine.daft.col("ss_net_paid")
+                    .sum()
+                    .cast(self.engine.daft.DataType.decimal128(38, 2))
+                    .alias("total_net_paid"),
+                    self.engine.daft.col("ss_net_profit")
+                    .sum()
+                    .cast(self.engine.daft.DataType.decimal128(38, 2))
+                    .alias("total_net_profit"),
+                ]
+            )
             .sort(["s_store_id", "sale_date"])
         )
-        self._write_delta(fact_table_df, 'total_sales_fact')
+        self._write_delta(fact_table_df, "total_sales_fact")
 
     def merge_percent_into_total_sales_fact(self, percent: float):
         seed = self.np.random.randint(1, high=1000, size=None, dtype=int)
@@ -77,31 +88,48 @@ def merge_percent_into_total_sales_fact(self, percent: float):
         daft = self.engine.daft
 
         sampled_fact_data = (
-            self._read_delta('store_sales')
-            .join(self._read_delta('date_dim'),  left_on="ss_sold_date_sk", right_on="d_date_sk")
-            .join(self._read_delta('store'),     left_on="ss_store_sk",     right_on="s_store_sk")
-            .join(self._read_delta('item'),      left_on="ss_item_sk",      right_on="i_item_sk")
-            .join(self._read_delta('customer'),  left_on="ss_customer_sk",  right_on="c_customer_sk")
-            .with_columns({
-                "new_uid_val": (daft.col("ss_customer_sk") + daft.col("ss_sold_date_sk") + seed),
-                "s_store_id": daft.col("s_store_id"),
-                "i_item_id":  daft.col("i_item_id"),
-                "sale_date":  daft.col("d_date"),
-            })
+            self._read_delta("store_sales")
+            .join(self._read_delta("date_dim"), left_on="ss_sold_date_sk", right_on="d_date_sk")
+            .join(self._read_delta("store"), left_on="ss_store_sk", right_on="s_store_sk")
+            .join(self._read_delta("item"), left_on="ss_item_sk", right_on="i_item_sk")
+            .join(self._read_delta("customer"), left_on="ss_customer_sk", right_on="c_customer_sk")
+            .with_columns(
+                {
+                    "new_uid_val": (daft.col("ss_customer_sk") + daft.col("ss_sold_date_sk") + seed),
+                    "s_store_id": daft.col("s_store_id"),
+                    "i_item_id": daft.col("i_item_id"),
+                    "sale_date": daft.col("d_date"),
+                }
+            )
             .filter((daft.col("new_uid_val") % modulo) == 0)
-            .with_columns({
-                "c_customer_id":   daft.functions.when(daft.col("new_uid_val") % 2 == 0, daft.col("c_customer_id")).otherwise(daft.lit("NEW_") + daft.col("new_uid_val").cast(daft.DataType.string())),
-                "total_quantity":  daft.col("ss_quantity") + (daft.col("new_uid_val") % 5 + 1),
-                "total_net_paid":  (daft.col("ss_net_paid")   + ((daft.col("new_uid_val") % 5000) / 100.0 + 5)).cast(daft.DataType.decimal128(38, 2)),
-                "total_net_profit":(daft.col("ss_net_profit") + ((daft.col("new_uid_val") % 2000) / 100.0 + 1)).cast(daft.DataType.decimal128(38, 2)),
-            })
-            .select("s_store_id", "i_item_id", "c_customer_id", "sale_date",
-                    "total_quantity", "total_net_paid", "total_net_profit")
+            .with_columns(
+                {
+                    "c_customer_id": daft.functions.when(
+                        daft.col("new_uid_val") % 2 == 0, daft.col("c_customer_id")
+                    ).otherwise(daft.lit("NEW_") + daft.col("new_uid_val").cast(daft.DataType.string())),
+                    "total_quantity": daft.col("ss_quantity") + (daft.col("new_uid_val") % 5 + 1),
+                    "total_net_paid": (daft.col("ss_net_paid") + ((daft.col("new_uid_val") % 5000) / 100.0 + 5)).cast(
+                        daft.DataType.decimal128(38, 2)
+                    ),
+                    "total_net_profit": (
+                        daft.col("ss_net_profit") + ((daft.col("new_uid_val") % 2000) / 100.0 + 1)
+                    ).cast(daft.DataType.decimal128(38, 2)),
+                }
+            )
+            .select(
+                "s_store_id",
+                "i_item_id",
+                "c_customer_id",
+                "sale_date",
+                "total_quantity",
+                "total_net_paid",
+                "total_net_profit",
+            )
             .to_arrow()
         )
 
         fact_table = self.DeltaTable(
-            table_uri=self._table_path('total_sales_fact'),
+            table_uri=self._table_path("total_sales_fact"),
             storage_options=self.engine.storage_options,
         )
         fact_table.merge(
@@ -114,24 +142,28 @@ def merge_percent_into_total_sales_fact(self, percent: float):
             """,
             source_alias="source",
             target_alias="target",
-        ).when_matched_update({
-            "total_quantity":   "target.total_quantity   + source.total_quantity",
-            "total_net_paid":   "target.total_net_paid   + source.total_net_paid",
-            "total_net_profit": "target.total_net_profit + source.total_net_profit",
-        }).when_not_matched_insert({
-            "s_store_id":       "source.s_store_id",
-            "i_item_id":        "source.i_item_id",
-            "c_customer_id":    "source.c_customer_id",
-            "sale_date":        "source.sale_date",
-            "total_quantity":   "source.total_quantity",
-            "total_net_paid":   "source.total_net_paid",
-            "total_net_profit": "source.total_net_profit",
-        }).execute()
+        ).when_matched_update(
+            {
+                "total_quantity": "target.total_quantity   + source.total_quantity",
+                "total_net_paid": "target.total_net_paid   + source.total_net_paid",
+                "total_net_profit": "target.total_net_profit + source.total_net_profit",
+            }
+        ).when_not_matched_insert(
+            {
+                "s_store_id": "source.s_store_id",
+                "i_item_id": "source.i_item_id",
+                "c_customer_id": "source.c_customer_id",
+                "sale_date": "source.sale_date",
+                "total_quantity": "source.total_quantity",
+                "total_net_paid": "source.total_net_paid",
+                "total_net_profit": "source.total_net_profit",
+            }
+        ).execute()
 
     def query_total_sales_fact(self):
         (
-            self._read_delta('total_sales_fact')
+            self._read_delta("total_sales_fact")
             .groupby(self.engine.daft.col("sale_date").year())
             .agg(self.engine.daft.col("total_net_profit").sum().alias("sum_net_profit"))
             .collect()
-        )
\ No newline at end of file
+        )
diff --git a/src/lakebench/benchmarks/elt_bench/engine_impl/duckdb.py b/src/lakebench/benchmarks/elt_bench/engine_impl/duckdb.py
index 1d25a4f..937b06b 100644
--- a/src/lakebench/benchmarks/elt_bench/engine_impl/duckdb.py
+++ b/src/lakebench/benchmarks/elt_bench/engine_impl/duckdb.py
@@ -1,13 +1,15 @@
-from ....engines.duckdb import DuckDB
+import posixpath
+
 from ....engines.delta_rs import DeltaRs
+from ....engines.duckdb import DuckDB
 
-import posixpath
 
 class DuckDBELTBench:
-    def __init__(self, engine : DuckDB):
+    def __init__(self, engine: DuckDB):
         self.engine = engine
 
         import numpy as np
+
         self.np = np
         self.delta_rs = DeltaRs()
         self.write_deltalake = self.delta_rs.write_deltalake
@@ -16,7 +18,7 @@ def __init__(self, engine : DuckDB):
     def create_total_sales_fact(self):
         self.engine.duckdb.sql("use main")
 
-        for table in ['store_sales', 'date_dim', 'store', 'item', 'customer']:
+        for table in ["store_sales", "date_dim", "store", "item", "customer"]:
             self.engine.register_table(table)
 
         arrow_df = self.engine.duckdb.sql("""
@@ -48,7 +50,7 @@ def create_total_sales_fact(self):
         """).record_batch()
 
         self.write_deltalake(
-            table_or_uri=posixpath.join(self.engine.schema_or_working_directory_uri, 'total_sales_fact'),
+            table_or_uri=posixpath.join(self.engine.schema_or_working_directory_uri, "total_sales_fact"),
             data=arrow_df,
             mode="overwrite",
             storage_options=self.engine.storage_options,
@@ -57,9 +59,9 @@ def create_total_sales_fact(self):
     def merge_percent_into_total_sales_fact(self, percent: float):
         self.engine.duckdb.sql("use main")
 
-        for table in ['store_sales', 'date_dim', 'store', 'item', 'customer']:
+        for table in ["store_sales", "date_dim", "store", "item", "customer"]:
             self.engine.register_table(table)
-            
+
         seed = self.np.random.randint(1, high=1000, size=None, dtype=int)
         modulo = int(1 / percent)
 
@@ -83,7 +85,7 @@ def merge_percent_into_total_sales_fact(self, percent: float):
                     WHERE MOD(new_uid_val, {modulo}) = 0
                 ) ss            
             JOIN 
-                delta_scan('{posixpath.join(self.engine.schema_or_working_directory_uri, 'date_dim')}') d ON ss.ss_sold_date_sk = d.d_date_sk
+                delta_scan('{posixpath.join(self.engine.schema_or_working_directory_uri, "date_dim")}') d ON ss.ss_sold_date_sk = d.d_date_sk
             JOIN 
                 store s ON ss.ss_store_sk = s.s_store_sk
             JOIN 
@@ -94,43 +96,40 @@ def merge_percent_into_total_sales_fact(self, percent: float):
         """).record_batch()
 
         fact_table = self.DeltaTable(
-            table_uri=posixpath.join(self.engine.schema_or_working_directory_uri, 'total_sales_fact'),
+            table_uri=posixpath.join(self.engine.schema_or_working_directory_uri, "total_sales_fact"),
             storage_options=self.engine.storage_options,
         )
 
         fact_table.merge(
-                source=synthetic_data,
-                predicate="""
+            source=synthetic_data,
+            predicate="""
                 target.s_store_id = source.s_store_id AND 
                 target.i_item_id = source.i_item_id AND 
                 target.c_customer_id = source.c_customer_id AND 
                 target.sale_date = source.sale_date
                 """,
-                source_alias="source",
-                target_alias="target"
-            ) \
-            .when_matched_update(
-                {
-                    "total_quantity": "target.total_quantity + source.total_quantity",
-                    "total_net_paid": "target.total_net_paid + source.total_net_paid",
-                    "total_net_profit": "target.total_net_profit + source.total_net_profit",
-                }
-            ) \
-            .when_not_matched_insert(
-                {
-                    "s_store_id": "source.s_store_id",
-                    "i_item_id": "source.i_item_id",
-                    "c_customer_id": "source.c_customer_id",
-                    "sale_date": "source.sale_date",
-                    "total_quantity": "source.total_quantity",
-                    "total_net_paid": "source.total_net_paid",
-                    "total_net_profit": "source.total_net_profit",
-                }
-            ) \
-            .execute()
+            source_alias="source",
+            target_alias="target",
+        ).when_matched_update(
+            {
+                "total_quantity": "target.total_quantity + source.total_quantity",
+                "total_net_paid": "target.total_net_paid + source.total_net_paid",
+                "total_net_profit": "target.total_net_profit + source.total_net_profit",
+            }
+        ).when_not_matched_insert(
+            {
+                "s_store_id": "source.s_store_id",
+                "i_item_id": "source.i_item_id",
+                "c_customer_id": "source.c_customer_id",
+                "sale_date": "source.sale_date",
+                "total_quantity": "source.total_quantity",
+                "total_net_paid": "source.total_net_paid",
+                "total_net_profit": "source.total_net_profit",
+            }
+        ).execute()
 
     def query_total_sales_fact(self):
         self.engine.duckdb.sql(f"""
             select sum(total_net_profit), year(sale_date) 
-            from delta_scan('{posixpath.join(self.engine.schema_or_working_directory_uri, 'total_sales_fact')}') group by year(sale_date)
-        """).arrow()
\ No newline at end of file
+            from delta_scan('{posixpath.join(self.engine.schema_or_working_directory_uri, "total_sales_fact")}') group by year(sale_date)
+        """).arrow()
diff --git a/src/lakebench/benchmarks/elt_bench/engine_impl/polars.py b/src/lakebench/benchmarks/elt_bench/engine_impl/polars.py
index 73cc4b3..f54786e 100644
--- a/src/lakebench/benchmarks/elt_bench/engine_impl/polars.py
+++ b/src/lakebench/benchmarks/elt_bench/engine_impl/polars.py
@@ -1,12 +1,14 @@
-from ....engines.polars import Polars
+import posixpath
+
 from ....engines.delta_rs import DeltaRs
+from ....engines.polars import Polars
 
-import posixpath
 
 class PolarsELTBench:
     def __init__(self, engine: Polars):
 
         import numpy as np
+
         self.np = np
         self.delta_rs = DeltaRs()
         self.write_deltalake = self.delta_rs.write_deltalake
@@ -16,96 +18,157 @@ def __init__(self, engine: Polars):
 
     def create_total_sales_fact(self):
         fact_table_df = (
-            self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'store_sales'), storage_options=self.storage_options)
+            self.engine.pl.scan_delta(
+                posixpath.join(self.engine.schema_or_working_directory_uri, "store_sales"),
+                storage_options=self.storage_options,
+            )
             .join(
-                self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'date_dim'), storage_options=self.storage_options), left_on="ss_sold_date_sk", right_on="d_date_sk"
+                self.engine.pl.scan_delta(
+                    posixpath.join(self.engine.schema_or_working_directory_uri, "date_dim"),
+                    storage_options=self.storage_options,
+                ),
+                left_on="ss_sold_date_sk",
+                right_on="d_date_sk",
             )
             .join(
-                self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'store'), storage_options=self.storage_options), left_on="ss_store_sk", right_on="s_store_sk"
+                self.engine.pl.scan_delta(
+                    posixpath.join(self.engine.schema_or_working_directory_uri, "store"),
+                    storage_options=self.storage_options,
+                ),
+                left_on="ss_store_sk",
+                right_on="s_store_sk",
             )
             .join(
-                self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'item'), storage_options=self.storage_options), left_on="ss_item_sk", right_on="i_item_sk"
+                self.engine.pl.scan_delta(
+                    posixpath.join(self.engine.schema_or_working_directory_uri, "item"),
+                    storage_options=self.storage_options,
+                ),
+                left_on="ss_item_sk",
+                right_on="i_item_sk",
             )
             .join(
-                self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'customer'), storage_options=self.storage_options), left_on="ss_customer_sk", right_on="c_customer_sk"
+                self.engine.pl.scan_delta(
+                    posixpath.join(self.engine.schema_or_working_directory_uri, "customer"),
+                    storage_options=self.storage_options,
+                ),
+                left_on="ss_customer_sk",
+                right_on="c_customer_sk",
             )
-            .with_columns(
-                    self.engine.pl.col("d_date").alias("sale_date")
-                )
+            .with_columns(self.engine.pl.col("d_date").alias("sale_date"))
             .filter(self.engine.pl.col("d_year") == 2001)
             .group_by(["s_store_id", "i_item_id", "c_customer_id", "sale_date"])
-            .agg([
-                self.engine.pl.sum("ss_quantity").alias("total_quantity"),
-                self.engine.pl.sum("ss_net_paid").alias("total_net_paid"),
-                self.engine.pl.sum("ss_net_profit").alias("total_net_profit")
-            ])
+            .agg(
+                [
+                    self.engine.pl.sum("ss_quantity").alias("total_quantity"),
+                    self.engine.pl.sum("ss_net_paid").alias("total_net_paid"),
+                    self.engine.pl.sum("ss_net_profit").alias("total_net_profit"),
+                ]
+            )
             .sort(["s_store_id", "sale_date"])
         )
 
-        fact_table_df.collect(engine='streaming').write_delta(
-            posixpath.join(self.engine.schema_or_working_directory_uri, 'total_sales_fact'),
+        fact_table_df.collect(engine="streaming").write_delta(
+            posixpath.join(self.engine.schema_or_working_directory_uri, "total_sales_fact"),
             mode="overwrite",
-            storage_options=self.storage_options
+            storage_options=self.storage_options,
         )
 
-
     def merge_percent_into_total_sales_fact(self, percent: float):
         seed = self.np.random.randint(1, high=1000, size=None, dtype=int)
         modulo = int(1 / percent)
         sampled_fact_data = (
-            self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'store_sales'), storage_options=self.storage_options)
+            self.engine.pl.scan_delta(
+                posixpath.join(self.engine.schema_or_working_directory_uri, "store_sales"),
+                storage_options=self.storage_options,
+            )
             .filter(
-                ((self.engine.pl.col("ss_item_sk") * 1000000 + self.engine.pl.col("ss_ticket_number") + seed).hash() % modulo) == 0
+                (
+                    (self.engine.pl.col("ss_item_sk") * 1000000 + self.engine.pl.col("ss_ticket_number") + seed).hash()
+                    % modulo
+                )
+                == 0
             )
             .join(
-                self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'date_dim'), storage_options=self.storage_options), 
-                left_on="ss_sold_date_sk", right_on="d_date_sk"
+                self.engine.pl.scan_delta(
+                    posixpath.join(self.engine.schema_or_working_directory_uri, "date_dim"),
+                    storage_options=self.storage_options,
+                ),
+                left_on="ss_sold_date_sk",
+                right_on="d_date_sk",
             )
             .join(
-                self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'store'), storage_options=self.storage_options), 
-                left_on="ss_store_sk", right_on="s_store_sk"
+                self.engine.pl.scan_delta(
+                    posixpath.join(self.engine.schema_or_working_directory_uri, "store"),
+                    storage_options=self.storage_options,
+                ),
+                left_on="ss_store_sk",
+                right_on="s_store_sk",
             )
             .join(
-                self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'item'), storage_options=self.storage_options), 
-                left_on="ss_item_sk", right_on="i_item_sk"
+                self.engine.pl.scan_delta(
+                    posixpath.join(self.engine.schema_or_working_directory_uri, "item"),
+                    storage_options=self.storage_options,
+                ),
+                left_on="ss_item_sk",
+                right_on="i_item_sk",
             )
             .join(
-                self.engine.pl.scan_delta(posixpath.join(self.engine.schema_or_working_directory_uri, 'customer'), storage_options=self.storage_options), 
-                left_on="ss_customer_sk", right_on="c_customer_sk"
+                self.engine.pl.scan_delta(
+                    posixpath.join(self.engine.schema_or_working_directory_uri, "customer"),
+                    storage_options=self.storage_options,
+                ),
+                left_on="ss_customer_sk",
+                right_on="c_customer_sk",
             )
-            .with_columns([
-                # Create hash-based pseudo-random values for each row
-                (self.engine.pl.col("ss_customer_sk") + self.engine.pl.col("ss_sold_date_sk") + seed).alias("new_uid_val")
-            ])
-            .filter(
-                (self.engine.pl.col("new_uid_val") % modulo) == 0
+            .with_columns(
+                [
+                    # Create hash-based pseudo-random values for each row
+                    (self.engine.pl.col("ss_customer_sk") + self.engine.pl.col("ss_sold_date_sk") + seed).alias(
+                        "new_uid_val"
+                    )
+                ]
             )
-            .with_columns([
-                self.engine.pl.col("s_store_id"),
-                self.engine.pl.col("i_item_id"),
-                self.engine.pl.when(self.engine.pl.col("new_uid_val") % 2 == 0)
+            .filter((self.engine.pl.col("new_uid_val") % modulo) == 0)
+            .with_columns(
+                [
+                    self.engine.pl.col("s_store_id"),
+                    self.engine.pl.col("i_item_id"),
+                    self.engine.pl.when(self.engine.pl.col("new_uid_val") % 2 == 0)
                     .then(self.engine.pl.col("c_customer_id"))
-                    .otherwise(self.engine.pl.concat_str([self.engine.pl.lit('NEW_'), self.engine.pl.col("new_uid_val")], separator=''))
+                    .otherwise(
+                        self.engine.pl.concat_str(
+                            [self.engine.pl.lit("NEW_"), self.engine.pl.col("new_uid_val")], separator=""
+                        )
+                    )
                     .alias("c_customer_id"),
-                self.engine.pl.col("d_date").alias("sale_date"),
-                (self.engine.pl.col("ss_quantity") + (self.engine.pl.col("new_uid_val") % 5) + 1).alias("total_quantity"),
-                (self.engine.pl.col("ss_net_paid") + ((self.engine.pl.col("new_uid_val") % 5000) / 100.0) + 5).alias("total_net_paid"),
-                (self.engine.pl.col("ss_net_profit") + ((self.engine.pl.col("new_uid_val") % 2000) / 100.0) + 1).alias("total_net_profit")
-            ])
-            .select([
-                "s_store_id",
-                "i_item_id", 
-                "c_customer_id",
-                "sale_date",
-                "total_quantity",
-                "total_net_paid",
-                "total_net_profit"
-            ])
+                    self.engine.pl.col("d_date").alias("sale_date"),
+                    (self.engine.pl.col("ss_quantity") + (self.engine.pl.col("new_uid_val") % 5) + 1).alias(
+                        "total_quantity"
+                    ),
+                    (
+                        self.engine.pl.col("ss_net_paid") + ((self.engine.pl.col("new_uid_val") % 5000) / 100.0) + 5
+                    ).alias("total_net_paid"),
+                    (
+                        self.engine.pl.col("ss_net_profit") + ((self.engine.pl.col("new_uid_val") % 2000) / 100.0) + 1
+                    ).alias("total_net_profit"),
+                ]
+            )
+            .select(
+                [
+                    "s_store_id",
+                    "i_item_id",
+                    "c_customer_id",
+                    "sale_date",
+                    "total_quantity",
+                    "total_net_paid",
+                    "total_net_profit",
+                ]
+            )
         )
 
-        sampled_fact_data.collect(engine='streaming').write_delta(
-            posixpath.join(self.engine.schema_or_working_directory_uri, 'total_sales_fact'), 
-            mode="merge", 
+        sampled_fact_data.collect(engine="streaming").write_delta(
+            posixpath.join(self.engine.schema_or_working_directory_uri, "total_sales_fact"),
+            mode="merge",
             delta_merge_options={
                 "predicate": """
                     target.s_store_id = source.s_store_id AND 
@@ -114,30 +177,34 @@ def merge_percent_into_total_sales_fact(self, percent: float):
                     target.sale_date = source.sale_date
                     """,
                 "source_alias": "source",
-                "target_alias": "target"
-            }, 
-            storage_options=self.storage_options
-        ) \
-        .when_matched_update({
-            "total_quantity": "target.total_quantity + source.total_quantity",
-            "total_net_paid": "target.total_net_paid + source.total_net_paid",
-            "total_net_profit": "target.total_net_profit + source.total_net_profit",
-        }) \
-        .when_not_matched_insert({
-            "s_store_id": "source.s_store_id",
-            "i_item_id": "source.i_item_id",
-            "c_customer_id": "source.c_customer_id",
-            "sale_date": "source.sale_date",
-            "total_quantity": "source.total_quantity",
-            "total_net_paid": "source.total_net_paid",
-            "total_net_profit": "source.total_net_profit",
-        }).execute()
+                "target_alias": "target",
+            },
+            storage_options=self.storage_options,
+        ).when_matched_update(
+            {
+                "total_quantity": "target.total_quantity + source.total_quantity",
+                "total_net_paid": "target.total_net_paid + source.total_net_paid",
+                "total_net_profit": "target.total_net_profit + source.total_net_profit",
+            }
+        ).when_not_matched_insert(
+            {
+                "s_store_id": "source.s_store_id",
+                "i_item_id": "source.i_item_id",
+                "c_customer_id": "source.c_customer_id",
+                "sale_date": "source.sale_date",
+                "total_quantity": "source.total_quantity",
+                "total_net_paid": "source.total_net_paid",
+                "total_net_profit": "source.total_net_profit",
+            }
+        ).execute()
 
     def query_total_sales_fact(self):
-        query_df = self.engine.pl.scan_delta(
-            posixpath.join(self.engine.schema_or_working_directory_uri, 'total_sales_fact'), storage_options=self.storage_options
-        ).group_by(
-            self.engine.pl.col("sale_date").dt.year()
-        ).agg(
-            self.engine.pl.sum("total_net_profit").alias("sum_net_profit")
-        ).collect()
\ No newline at end of file
+        query_df = (
+            self.engine.pl.scan_delta(
+                posixpath.join(self.engine.schema_or_working_directory_uri, "total_sales_fact"),
+                storage_options=self.storage_options,
+            )
+            .group_by(self.engine.pl.col("sale_date").dt.year())
+            .agg(self.engine.pl.sum("total_net_profit").alias("sum_net_profit"))
+            .collect()
+        )
diff --git a/src/lakebench/benchmarks/elt_bench/engine_impl/sail.py b/src/lakebench/benchmarks/elt_bench/engine_impl/sail.py
index d1970b1..2562f5b 100644
--- a/src/lakebench/benchmarks/elt_bench/engine_impl/sail.py
+++ b/src/lakebench/benchmarks/elt_bench/engine_impl/sail.py
@@ -1,16 +1,18 @@
+import posixpath
+
 from ....engines.sail import Sail
 
-import posixpath
 
 class SailELTBench:
     def __init__(self, engine: Sail):
-        
+
         import numpy as np
+
         self.np = np
         self.engine = engine
 
     def create_total_sales_fact(self):
-        for table in ['store_sales', 'date_dim', 'store', 'item', 'customer']:
+        for table in ["store_sales", "date_dim", "store", "item", "customer"]:
             self.engine.register_table(table)
 
         df = self.engine.spark.sql("""
@@ -40,7 +42,9 @@ def create_total_sales_fact(self):
                 s.s_store_id, d.d_date;
         """)
 
-        df.write.format("delta").mode("overwrite").save(posixpath.join(self.engine.schema_or_working_directory_uri, 'total_sales_fact'))
+        df.write.format("delta").mode("overwrite").save(
+            posixpath.join(self.engine.schema_or_working_directory_uri, "total_sales_fact")
+        )
 
     def merge_percent_into_total_sales_fact(self, percent: float):
         seed = self.np.random.randint(1, high=1000, size=None, dtype=int)
@@ -77,45 +81,42 @@ def merge_percent_into_total_sales_fact(self, percent: float):
             """).toArrow()
 
         fact_table = self.engine.deltars.DeltaTable(
-            table_uri=posixpath.join(self.engine.schema_or_working_directory_uri, 'total_sales_fact'),
+            table_uri=posixpath.join(self.engine.schema_or_working_directory_uri, "total_sales_fact"),
             storage_options=self.engine.storage_options,
         )
 
         fact_table.merge(
-                source=sampled_fact_data,
-                predicate="""
+            source=sampled_fact_data,
+            predicate="""
                 target.s_store_id = source.s_store_id AND 
                 target.i_item_id = source.i_item_id AND 
                 target.c_customer_id = source.c_customer_id AND 
                 target.sale_date = source.sale_date
                 """,
-                source_alias="source",
-                target_alias="target"
-            ) \
-            .when_matched_update(
-                {
-                    "total_quantity": "target.total_quantity + source.total_quantity",
-                    "total_net_paid": "target.total_net_paid + source.total_net_paid",
-                    "total_net_profit": "target.total_net_profit + source.total_net_profit",
-                }
-            ) \
-            .when_not_matched_insert(
-                {
-                    "s_store_id": "source.s_store_id",
-                    "i_item_id": "source.i_item_id",
-                    "c_customer_id": "source.c_customer_id",
-                    "sale_date": "source.sale_date",
-                    "total_quantity": "source.total_quantity",
-                    "total_net_paid": "source.total_net_paid",
-                    "total_net_profit": "source.total_net_profit",
-                }
-            ) \
-            .execute()
-        
+            source_alias="source",
+            target_alias="target",
+        ).when_matched_update(
+            {
+                "total_quantity": "target.total_quantity + source.total_quantity",
+                "total_net_paid": "target.total_net_paid + source.total_net_paid",
+                "total_net_profit": "target.total_net_profit + source.total_net_profit",
+            }
+        ).when_not_matched_insert(
+            {
+                "s_store_id": "source.s_store_id",
+                "i_item_id": "source.i_item_id",
+                "c_customer_id": "source.c_customer_id",
+                "sale_date": "source.sale_date",
+                "total_quantity": "source.total_quantity",
+                "total_net_paid": "source.total_net_paid",
+                "total_net_profit": "source.total_net_profit",
+            }
+        ).execute()
+
     def query_total_sales_fact(self):
-        self.engine.register_table('total_sales_fact')
-        df = self.engine.spark.sql(f"""
+        self.engine.register_table("total_sales_fact")
+        df = self.engine.spark.sql("""
                             select sum(total_net_profit), year(sale_date) 
                             from total_sales_fact group by year(sale_date)
                             """)
-        result = df.collect()
\ No newline at end of file
+        result = df.collect()
diff --git a/src/lakebench/benchmarks/elt_bench/engine_impl/spark.py b/src/lakebench/benchmarks/elt_bench/engine_impl/spark.py
index 0644e5c..fffa236 100644
--- a/src/lakebench/benchmarks/elt_bench/engine_impl/spark.py
+++ b/src/lakebench/benchmarks/elt_bench/engine_impl/spark.py
@@ -1,9 +1,11 @@
 from ....engines.spark import Spark
 
+
 class SparkELTBench:
     def __init__(self, engine: Spark):
-        
+
         import numpy as np
+
         self.np = np
         self.engine = engine
 
@@ -75,22 +77,25 @@ def merge_percent_into_total_sales_fact(self, percent: float):
         # fails to resolve target table attributes when source and target share column names.
         # Cloud runtimes (Databricks, Fabric, Synapse) use return this error.
         from delta.tables import DeltaTable
+
         delta_table = DeltaTable.forName(self.engine.spark, "total_sales_fact")
         delta_table.alias("target").merge(
             sampled_fact_data.alias("source"),
             "target.s_store_id = source.s_store_id AND "
             "target.i_item_id = source.i_item_id AND "
             "target.c_customer_id = source.c_customer_id AND "
-            "target.sale_date = source.sale_date"
-        ).whenMatchedUpdate(set={
-            "total_quantity":   "target.total_quantity + source.total_quantity",
-            "total_net_paid":   "target.total_net_paid + source.total_net_paid",
-            "total_net_profit": "target.total_net_profit + source.total_net_profit",
-        }).whenNotMatchedInsertAll().execute()
-        
+            "target.sale_date = source.sale_date",
+        ).whenMatchedUpdate(
+            set={
+                "total_quantity": "target.total_quantity + source.total_quantity",
+                "total_net_paid": "target.total_net_paid + source.total_net_paid",
+                "total_net_profit": "target.total_net_profit + source.total_net_profit",
+            }
+        ).whenNotMatchedInsertAll().execute()
+
     def query_total_sales_fact(self):
-        df = self.engine.spark.sql(f"""
+        df = self.engine.spark.sql("""
                             select sum(total_net_profit), year(sale_date) 
                             from total_sales_fact group by year(sale_date)
                             """)
-        result = df.collect()
\ No newline at end of file
+        result = df.collect()
diff --git a/src/lakebench/benchmarks/tpcds/__init__.py b/src/lakebench/benchmarks/tpcds/__init__.py
index 7cdcd7f..cf17a60 100644
--- a/src/lakebench/benchmarks/tpcds/__init__.py
+++ b/src/lakebench/benchmarks/tpcds/__init__.py
@@ -1 +1 @@
-from .tpcds import TPCDS
\ No newline at end of file
+from .tpcds import TPCDS
diff --git a/src/lakebench/benchmarks/tpcds/tpcds.py b/src/lakebench/benchmarks/tpcds/tpcds.py
index 6da4da6..2e54dd5 100644
--- a/src/lakebench/benchmarks/tpcds/tpcds.py
+++ b/src/lakebench/benchmarks/tpcds/tpcds.py
@@ -1,17 +1,18 @@
-from .._load_and_query import _LoadAndQuery
-
-from ...engines.spark import Spark
-from ...engines.duckdb import DuckDB
 from ...engines.daft import Daft
+from ...engines.duckdb import DuckDB
+from ...engines.livy import Livy
 from ...engines.polars import Polars
 from ...engines.sail import Sail
+from ...engines.spark import Spark
+from .._load_and_query import _LoadAndQuery
+
 
 class TPCDS(_LoadAndQuery):
     """
     Class for running the TPC-DS benchmark.
 
     This class provides functionality for running the TPC-DS benchmark, including loading data,
-    executing queries, and performing power tests. Supported engines are listed in the 
+    executing queries, and performing power tests. Supported engines are listed in the
     `self.BENCHMARK_IMPL_REGISTRY` constant.
 
     Parameters
@@ -23,12 +24,12 @@ class TPCDS(_LoadAndQuery):
     query_list : list of str, optional
         List of queries to execute. Use '*' for all queries. If not specified, all queries will be run.
     input_parquet_folder_uri : str, optional
-        Path to the input parquet files. Must be the root directory containing a folder named after 
+        Path to the input parquet files. Must be the root directory containing a folder named after
         each table in TABLE_REGISTRY.
     result_table_uri : str, optional
         Table URI where results will be saved. Must be specified if `save_results` is True.
     save_results : bool
-        Whether to save the benchmark results. Results can also be accessed via the `self.results` 
+        Whether to save the benchmark results. Results can also be accessed via the `self.results`
         attribute after running the benchmark.
 
     Methods
@@ -46,33 +47,146 @@ class TPCDS(_LoadAndQuery):
     _run_power_test()
         Runs both the load and query tests.
     """
+
     BENCHMARK_IMPL_REGISTRY = {
         Spark: None,
         DuckDB: None,
         Daft: None,
         Polars: None,
         Sail: None,
+        Livy: None,
     }
-    BENCHMARK_NAME = 'TPCDS'
+    BENCHMARK_NAME = "TPCDS"
     TABLE_REGISTRY = [
-        'call_center', 'catalog_page', 'catalog_returns', 'catalog_sales',
-        'customer', 'customer_address', 'customer_demographics', 'date_dim',
-        'household_demographics', 'income_band', 'inventory', 'item',
-        'promotion', 'reason', 'ship_mode', 'store', 'store_returns',
-        'store_sales', 'time_dim', 'warehouse', 'web_page', 'web_returns',
-        'web_sales', 'web_site'
+        "call_center",
+        "catalog_page",
+        "catalog_returns",
+        "catalog_sales",
+        "customer",
+        "customer_address",
+        "customer_demographics",
+        "date_dim",
+        "household_demographics",
+        "income_band",
+        "inventory",
+        "item",
+        "promotion",
+        "reason",
+        "ship_mode",
+        "store",
+        "store_returns",
+        "store_sales",
+        "time_dim",
+        "warehouse",
+        "web_page",
+        "web_returns",
+        "web_sales",
+        "web_site",
     ]
     QUERY_REGISTRY = [
-        'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10',
-        'q11', 'q12', 'q13', 'q14a', 'q14b', 'q15', 'q16', 'q17', 'q18', 'q19', 'q20',
-        'q21', 'q22', 'q23a', 'q23b', 'q24a', 'q24b', 'q25', 'q26', 'q27', 'q28', 'q29', 'q30',
-        'q31', 'q32', 'q33', 'q34', 'q35', 'q36', 'q37', 'q38', 'q39a', 'q39b', 'q40',
-        'q41', 'q42', 'q43', 'q44', 'q45', 'q46', 'q47', 'q48', 'q49', 'q50',
-        'q51', 'q52', 'q53', 'q54', 'q55', 'q56', 'q57', 'q58', 'q59', 'q60',
-        'q61', 'q62', 'q63', 'q64', 'q65', 'q66', 'q67', 'q68', 'q69', 'q70',
-        'q71', 'q72', 'q73', 'q74', 'q75', 'q76', 'q77', 'q78', 'q79', 'q80',
-        'q81', 'q82', 'q83', 'q84', 'q85', 'q86', 'q87', 'q88', 'q89', 'q90',
-        'q91', 'q92', 'q93', 'q94', 'q95', 'q96', 'q97', 'q98', 'q99'
+        "q1",
+        "q2",
+        "q3",
+        "q4",
+        "q5",
+        "q6",
+        "q7",
+        "q8",
+        "q9",
+        "q10",
+        "q11",
+        "q12",
+        "q13",
+        "q14a",
+        "q14b",
+        "q15",
+        "q16",
+        "q17",
+        "q18",
+        "q19",
+        "q20",
+        "q21",
+        "q22",
+        "q23a",
+        "q23b",
+        "q24a",
+        "q24b",
+        "q25",
+        "q26",
+        "q27",
+        "q28",
+        "q29",
+        "q30",
+        "q31",
+        "q32",
+        "q33",
+        "q34",
+        "q35",
+        "q36",
+        "q37",
+        "q38",
+        "q39a",
+        "q39b",
+        "q40",
+        "q41",
+        "q42",
+        "q43",
+        "q44",
+        "q45",
+        "q46",
+        "q47",
+        "q48",
+        "q49",
+        "q50",
+        "q51",
+        "q52",
+        "q53",
+        "q54",
+        "q55",
+        "q56",
+        "q57",
+        "q58",
+        "q59",
+        "q60",
+        "q61",
+        "q62",
+        "q63",
+        "q64",
+        "q65",
+        "q66",
+        "q67",
+        "q68",
+        "q69",
+        "q70",
+        "q71",
+        "q72",
+        "q73",
+        "q74",
+        "q75",
+        "q76",
+        "q77",
+        "q78",
+        "q79",
+        "q80",
+        "q81",
+        "q82",
+        "q83",
+        "q84",
+        "q85",
+        "q86",
+        "q87",
+        "q88",
+        "q89",
+        "q90",
+        "q91",
+        "q92",
+        "q93",
+        "q94",
+        "q95",
+        "q96",
+        "q97",
+        "q98",
+        "q99",
     ]
-    DDL_FILE_NAME = 'ddl_v3.2.0.sql'
-    VERSION = '3.2.0'
\ No newline at end of file
+    DDL_FILE_NAME = "ddl_v3.2.0.sql"
+    VERSION = "3.2.0"
diff --git a/src/lakebench/benchmarks/tpch/__init__.py b/src/lakebench/benchmarks/tpch/__init__.py
index 76ad1fd..4bbfece 100644
--- a/src/lakebench/benchmarks/tpch/__init__.py
+++ b/src/lakebench/benchmarks/tpch/__init__.py
@@ -1 +1 @@
-from .tpch import TPCH
\ No newline at end of file
+from .tpch import TPCH
diff --git a/src/lakebench/benchmarks/tpch/tpch.py b/src/lakebench/benchmarks/tpch/tpch.py
index e113c40..1f832b5 100644
--- a/src/lakebench/benchmarks/tpch/tpch.py
+++ b/src/lakebench/benchmarks/tpch/tpch.py
@@ -1,17 +1,18 @@
-from .._load_and_query import _LoadAndQuery
-
-from ...engines.spark import Spark
-from ...engines.duckdb import DuckDB
 from ...engines.daft import Daft
+from ...engines.duckdb import DuckDB
+from ...engines.livy import Livy
 from ...engines.polars import Polars
 from ...engines.sail import Sail
+from ...engines.spark import Spark
+from .._load_and_query import _LoadAndQuery
+
 
 class TPCH(_LoadAndQuery):
     """
     Class for running the TPC-H benchmark.
 
     This class provides functionality for running the TPC-H benchmark, including loading data,
-    executing queries, and performing power tests. Supported engines are listed in the 
+    executing queries, and performing power tests. Supported engines are listed in the
     `self.BENCHMARK_IMPL_REGISTRY` constant.
 
     Parameters
@@ -23,12 +24,12 @@ class TPCH(_LoadAndQuery):
     query_list : list of str, optional
         List of queries to execute. Use '*' for all queries. If not specified, all queries will be run.
     input_parquet_folder_uri : str, optional
-        Path to the input parquet files. Must be the root directory containing a folder named after 
+        Path to the input parquet files. Must be the root directory containing a folder named after
         each table in TABLE_REGISTRY.
     result_table_uri : str, optional
         Table URI where results will be saved. Must be specified if `save_results` is True.
     save_results : bool
-        Whether to save the benchmark results. Results can also be accessed via the `self.results` 
+        Whether to save the benchmark results. Results can also be accessed via the `self.results`
         attribute after running the benchmark.
 
     Methods
@@ -42,22 +43,40 @@ class TPCH(_LoadAndQuery):
     _run_power_test()
         Runs both the load and query tests.
     """
+
     BENCHMARK_IMPL_REGISTRY = {
         Spark: None,
         DuckDB: None,
         Daft: None,
         Polars: None,
         Sail: None,
+        Livy: None,
     }
-    BENCHMARK_NAME = 'TPCH'
-    TABLE_REGISTRY = [
-        'customer', 'lineitem', 'nation', 'orders', 'part',
-        'partsupp', 'region', 'supplier'
-    ]
+    BENCHMARK_NAME = "TPCH"
+    TABLE_REGISTRY = ["customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier"]
     QUERY_REGISTRY = [
-        'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10',
-        'q11', 'q12', 'q13', 'q14', 'q15', 'q16', 'q17', 'q18', 'q19', 'q20',
-        'q21', 'q22'
+        "q1",
+        "q2",
+        "q3",
+        "q4",
+        "q5",
+        "q6",
+        "q7",
+        "q8",
+        "q9",
+        "q10",
+        "q11",
+        "q12",
+        "q13",
+        "q14",
+        "q15",
+        "q16",
+        "q17",
+        "q18",
+        "q19",
+        "q20",
+        "q21",
+        "q22",
     ]
-    DDL_FILE_NAME = 'ddl_v3.0.1.sql'
-    VERSION = '3.0.1'
\ No newline at end of file
+    DDL_FILE_NAME = "ddl_v3.0.1.sql"
+    VERSION = "3.0.1"
diff --git a/src/lakebench/datagen/__init__.py b/src/lakebench/datagen/__init__.py
index 6858cf8..f2b2cf1 100644
--- a/src/lakebench/datagen/__init__.py
+++ b/src/lakebench/datagen/__init__.py
@@ -1,3 +1,3 @@
+from .clickbench import ClickBenchDataGenerator
 from .tpcds import TPCDSDataGenerator
 from .tpch import TPCHDataGenerator
-from .clickbench import ClickBenchDataGenerator
\ No newline at end of file
diff --git a/src/lakebench/datagen/_tpc.py b/src/lakebench/datagen/_tpc.py
index 8d036d6..14b41f0 100644
--- a/src/lakebench/datagen/_tpc.py
+++ b/src/lakebench/datagen/_tpc.py
@@ -1,16 +1,23 @@
-import posixpath
 import importlib.util
+import logging
+import posixpath
+
 import fsspec
 from fsspec import AbstractFileSystem
+
 from lakebench.utils.path_utils import to_unix_path
 
+logger = logging.getLogger(__name__)
+
+
 class _TPCDataGenerator:
     """
     Base class for TPC data generation. PLEASE DO NOT INSTANTIATE THIS CLASS DIRECTLY. Use the TPCHDataGenerator and TPCDSDataGenerator
     subclasses instead.
     """
-    GEN_UTIL = ''
-    GEN_TYPE = ''
+
+    GEN_UTIL = ""
+    GEN_TYPE = ""
 
     def __init__(self, scale_factor: int, target_folder_uri: str, target_row_group_size_mb: int = 128) -> None:
         """
@@ -28,7 +35,9 @@ def __init__(self, scale_factor: int, target_folder_uri: str, target_row_group_s
         """
         self.scale_factor = scale_factor
         if target_folder_uri.startswith("abfss://"):
-            raise ValueError("abfss path currently not supported. DuckDB is used for data generation and DuckDB is not able to write to Azure remote storage as of now.")
+            raise ValueError(
+                "abfss path currently not supported. DuckDB is used for data generation and DuckDB is not able to write to Azure remote storage as of now."
+            )
             # self.fs: FsspecStore = FsspecStore(protocol=urlparse(target_mount_folder_path).scheme)
         else:
             # workaround: use original fsspec until obstore bugs are fixes:
@@ -41,16 +50,15 @@ def __init__(self, scale_factor: int, target_folder_uri: str, target_row_group_s
             raise ImportError(
                 "DuckDB is used for data generation but is not installed. Install using `%pip install lakebench[duckdb]` or `%pip install lakebench[datagen]`"
             )
-        
-        
+
     def run(self) -> None:
         """
-        This method uses DuckDB to generate in-memory tables based on the specified 
-        scale factor and writes them to Parquet files. It estimates the average row 
-        size in MB using a sample of the data since DuckDB only supports specifying 
-        the number of rows per row group. The generated tables are written to the 
+        This method uses DuckDB to generate in-memory tables based on the specified
+        scale factor and writes them to Parquet files. It estimates the average row
+        size in MB using a sample of the data since DuckDB only supports specifying
+        the number of rows per row group. The generated tables are written to the
         specified target folder with optimized row group sizes.
-       
+
         Notes
         -----
         - The method creates a sample Parquet file for each table to estimate row sizes.
@@ -66,16 +74,20 @@ def run(self) -> None:
         self.fs.mkdirs(self.target_folder_uri, exist_ok=True)
 
         with duckdb.connect() as con:
-            print("Generating in-memory tables")
+            logger.info("Generating in-memory tables")
             con.execute(f"CALL {self.GEN_UTIL}(sf={self.scale_factor})")
             tables = [row[0] for row in con.execute("SHOW TABLES").fetchall()]
-            print(f"Generated in-memory tables: {tables}")
+            logger.info("Generated in-memory tables: %s", tables)
 
             for table in tables:
                 sample_file = posixpath.join(self.target_folder_uri, f"{table}_sample.parquet")
                 full_folder_uri = posixpath.join(self.target_folder_uri, table)
                 # Write a sample for row size estimation
-                print(f"\nSampling {table} to evaluate row count to target {self.target_row_group_size_mb}mb row groups...")
+                logger.info(
+                    "Sampling %s to evaluate row count to target %dmb row groups...",
+                    table,
+                    self.target_row_group_size_mb,
+                )
                 con.execute(f"""
                     COPY (SELECT * FROM {table} LIMIT 1000000)
                     TO '{sample_file}'
@@ -85,14 +97,19 @@ def run(self) -> None:
                 with pq.ParquetFile(sample_file) as pf:
                     rg = pf.metadata.row_group(0)
                 avg_row_size = rg.total_byte_size / rg.num_rows
-                #print(f"{table} sample: {rg.num_rows} rows, {rg.total_byte_size / (1024*1024):.2f} MB")
-                #print(f"Avg row size: {avg_row_size:.2f} bytes")
+                # print(f"{table} sample: {rg.num_rows} rows, {rg.total_byte_size / (1024*1024):.2f} MB")
+                # print(f"Avg row size: {avg_row_size:.2f} bytes")
                 target_size_bytes = self.target_row_group_size_mb * 1024 * 1024
                 target_rows = int(target_size_bytes / avg_row_size)
-                #print(f"Target ROW_GROUP_SIZE for ~{self.target_row_group_size_mb} MB: {target_rows} rows")
+                # print(f"Target ROW_GROUP_SIZE for ~{self.target_row_group_size_mb} MB: {target_rows} rows")
 
                 # Write full table
-                print(f"Writing {table} to {full_folder_uri} with ROW_GROUP_SIZE {target_rows}...")
+                logger.info(
+                    "Writing %s to %s with ROW_GROUP_SIZE %d...",
+                    table,
+                    full_folder_uri,
+                    target_rows,
+                )
                 con.execute(f"""
                     COPY {table} TO '{full_folder_uri}'
                     (FORMAT 'parquet', ROW_GROUP_SIZE {target_rows}, PER_THREAD_OUTPUT, OVERWRITE)
@@ -100,4 +117,4 @@ def run(self) -> None:
 
                 con.execute(f"DROP TABLE {table}")
 
-                self.fs.rm(sample_file)
\ No newline at end of file
+                self.fs.rm(sample_file)
diff --git a/src/lakebench/datagen/_tpc_rs.py b/src/lakebench/datagen/_tpc_rs.py
index a9ad71f..6e49b29 100644
--- a/src/lakebench/datagen/_tpc_rs.py
+++ b/src/lakebench/datagen/_tpc_rs.py
@@ -1,46 +1,56 @@
+import logging
 import posixpath
-import importlib.util
-import fsspec
-from fsspec import AbstractFileSystem
 import subprocess
 import threading
-import math
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from lakebench.utils.path_utils import to_unix_path
 from urllib.parse import urlparse
 
+import fsspec
+from fsspec import AbstractFileSystem
+
+from lakebench.utils.path_utils import to_unix_path
+
+logger = logging.getLogger(__name__)
+
+
 class _TPCRsDataGenerator:
     """
     Base class for TPC Rust based data generation. PLEASE DO NOT INSTANTIATE THIS CLASS DIRECTLY. Use the TPCHDataGenerator and TPCDSDataGenerator
     subclasses instead.
     """
-    GEN_UTIL = ''
-    GEN_TYPE = 'tpch'
-    GEN_TABLE_REGISTRY = [
-        'customer', 'lineitem', 'nation', 'orders', 'part',
-        'partsupp', 'region', 'supplier'
-    ]
+
+    GEN_UTIL = ""
+    GEN_TYPE = "tpch"
+    GEN_TABLE_REGISTRY = ["customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier"]
     TARGET_FILE_SIZE_MAP = [
-        (10, 128), # up to 10GB -> 128MB files
-        (1024, 256), # up to 1TB -> 256MB files
-        (5120, 512), # up to 5TB -> 512MB files
-        (10240, 1024) # up to 10TB and larger -> 1GB files
+        (10, 128),  # up to 10GB -> 128MB files
+        (1024, 256),  # up to 1TB -> 256MB files
+        (5120, 512),  # up to 5TB -> 512MB files
+        (10240, 1024),  # up to 10TB and larger -> 1GB files
     ]
     SF1000_SIZE_GB_DICT = {
-        'lineitem':  152,
-        'orders': 38,
-        'partsupp': 26.7,
-        'part': 4,
-        'customer': 7.6,
-        'supplier': 0.48,
-        'region': 0.00,
-        'nation': 0.00
+        "lineitem": 152,
+        "orders": 38,
+        "partsupp": 26.7,
+        "part": 4,
+        "customer": 7.6,
+        "supplier": 0.48,
+        "region": 0.00,
+        "nation": 0.00,
     }
-    
+
     # Class-level lock for thread-safe printing
     _print_lock = threading.Lock()
 
-    def __init__(self, scale_factor: int, target_folder_uri: str, target_row_group_size_mb: int = 128, compression: str = "ZSTD(1)", table_list: list = None, multithreading: bool = True) -> None:
+    def __init__(
+        self,
+        scale_factor: int,
+        target_folder_uri: str,
+        target_row_group_size_mb: int = 128,
+        compression: str = "ZSTD(1)",
+        table_list: list = None,
+        multithreading: bool = True,
+    ) -> None:
         """
         Initialize the TPC data generator with a scale factor.
 
@@ -58,49 +68,73 @@ def __init__(self, scale_factor: int, target_folder_uri: str, target_row_group_s
         """
         self.scale_factor = scale_factor
         uri_scheme = urlparse(target_folder_uri).scheme
-        
+
         # Allow local file systems: no scheme, file://, or Windows drive letters
-        cloud_schemes = {'s3', 'gs', 'gcs', 'abfs', 'abfss', 'adl', 'wasb', 'wasbs'}
-        
+        cloud_schemes = {"s3", "gs", "gcs", "abfs", "abfss", "adl", "wasb", "wasbs"}
+
         if uri_scheme in cloud_schemes:
-            raise ValueError(f"{uri_scheme} protocol is not currently supported for TPC-RS data generation. Please use a local file system path or mount the storage location.")
-        
-        if compression.split('(')[0] not in ["UNCOMPRESSED", "SNAPPY", "GZIP", "BROTLI", "LZ4", "LZ4_RAW", "LZO", "ZSTD"]:
+            raise ValueError(
+                f"{uri_scheme} protocol is not currently supported for TPC-RS data generation. Please use a local file system path or mount the storage location."
+            )
+
+        if compression.split("(")[0] not in [
+            "UNCOMPRESSED",
+            "SNAPPY",
+            "GZIP",
+            "BROTLI",
+            "LZ4",
+            "LZ4_RAW",
+            "LZO",
+            "ZSTD",
+        ]:
             raise ValueError(f"Unsupported compression codec: {compression}")
-        
+
         self.fs: AbstractFileSystem = fsspec.filesystem("file")
         self.target_folder_uri = to_unix_path(target_folder_uri)
-        self.target_row_group_size_mb = int(target_row_group_size_mb * 2.6) # 2.6 for uncompressed-> ZSTD(1) compression ratio
+        self.target_row_group_size_mb = int(
+            target_row_group_size_mb * 2.6
+        )  # 2.6 for uncompressed-> ZSTD(1) compression ratio
         self.compression = compression
         self.table_list = table_list
         self.multithreading = multithreading
 
         def get_tpcgen_path():
             import shutil
+
             # Try shutil.which first (most reliable)
             path = shutil.which(f"{self.GEN_TYPE}gen-cli")
             if path:
                 return path
 
             # Fallback to user Scripts directory
-            from pathlib import Path
             import sys
-            user_scripts = Path.home() / "AppData" / "Roaming" / "Python" / f"Python{sys.version_info.major}{sys.version_info.minor}" / "Scripts" / "tpchgen-cli.exe"
+            from pathlib import Path
+
+            user_scripts = (
+                Path.home()
+                / "AppData"
+                / "Roaming"
+                / "Python"
+                / f"Python{sys.version_info.major}{sys.version_info.minor}"
+                / "Scripts"
+                / "tpchgen-cli.exe"
+            )
             if user_scripts.exists():
                 return str(user_scripts)
 
-            raise ImportError(f"{self.GEN_TYPE}gen-cli is used for data generation but is not installed. Install using `%pip install {self.GEN_TYPE}gen-cli`")
+            raise ImportError(
+                f"{self.GEN_TYPE}gen-cli is used for data generation but is not installed. Install using `%pip install {self.GEN_TYPE}gen-cli`"
+            )
 
         self.tpcgen_exe = get_tpcgen_path()
-        
-    
+
     def run(self) -> None:
         """
         This method uses multithreading to generate individual tables in parallel using
         a rust-based TPC data generation utility. Each table is generated with an optimal
         number of parts (based on the GEN_SF1000_FILE_COUNT_MAP) to target having files around 1GB.
         """
-        
+
         # cleanup target directory
         def clean_dir(path: str) -> None:
             if self.fs.exists(path):
@@ -113,24 +147,23 @@ def clean_dir(path: str) -> None:
             for table_name in self.table_list:
                 table_path = posixpath.join(self.target_folder_uri, table_name)
                 clean_dir(table_path)
-        
+
         if self.table_list is None:
             tables = self.GEN_TABLE_REGISTRY
         else:
             tables = [table for table in self.GEN_TABLE_REGISTRY if table in self.table_list]
-        
-        print(f"🚀 Starting parallel generation of {len(tables)} tables with multithreading...")
-        print(f"📊 Scale Factor: {self.scale_factor}")
-        print(f"📁 Output Directory: {self.target_folder_uri}")
-        
+
+        logger.info("🚀 Starting parallel generation of %d tables with multithreading...", len(tables))
+        logger.info("📊 Scale Factor: %s", self.scale_factor)
+        logger.info("📁 Output Directory: %s", self.target_folder_uri)
+
         completed_tables = []
         failed_tables = []
-        
+
         if self.multithreading:
             with ThreadPoolExecutor() as executor:
                 future_to_table = {
-                    executor.submit(self._generate_table, table_name): table_name 
-                    for table_name in tables
+                    executor.submit(self._generate_table, table_name): table_name for table_name in tables
                 }
 
                 for future in as_completed(future_to_table):
@@ -139,49 +172,50 @@ def clean_dir(path: str) -> None:
                         result = future.result()
                         if result:
                             completed_tables.append(table_name)
-                            print(f"✅ {table_name} - Generation completed successfully")
+                            logger.info("✅ %s - Generation completed successfully", table_name)
                         else:
                             failed_tables.append(table_name)
-                            print(f"❌ {table_name} - Generation failed")
+                            logger.error("❌ %s - Generation failed", table_name)
                     except Exception as exc:
                         failed_tables.append(table_name)
-                        print(f"❌ {table_name} - Generation failed with exception: {exc}")
+                        logger.error("❌ %s - Generation failed with exception: %s", table_name, exc)
         else:
             for table_name in tables:
                 result = self._generate_table(table_name)
                 if result:
                     completed_tables.append(table_name)
-                    print(f"✅ {table_name} - Generation completed successfully")
+                    logger.info("✅ %s - Generation completed successfully", table_name)
                 else:
                     failed_tables.append(table_name)
-                    print(f"❌ {table_name} - Generation failed")
-        
-        print(f"\n📋 Generation Summary:")
-        print(f"   ✅ Successfully generated: {len(completed_tables)} tables")
+                    logger.error("❌ %s - Generation failed", table_name)
+
+        logger.info("📋 Generation Summary:")
+        logger.info("   ✅ Successfully generated: %d tables", len(completed_tables))
         if completed_tables:
-            print(f"      Tables: {', '.join(completed_tables)}")
-        
+            logger.info("      Tables: %s", ", ".join(completed_tables))
+
         if failed_tables:
-            print(f"   ❌ Failed to generate: {len(failed_tables)} tables")
-            print(f"      Tables: {', '.join(failed_tables)}")
+            logger.error("   ❌ Failed to generate: %d tables", len(failed_tables))
+            logger.error("      Tables: %s", ", ".join(failed_tables))
             raise RuntimeError(f"Failed to generate {len(failed_tables)} tables: {', '.join(failed_tables)}")
         else:
-            print(f"🎉 All {len(tables)} tables generated successfully!")
-    
+            logger.info("🎉 All %d tables generated successfully!", len(tables))
+
     def _generate_table(self, table_name: str) -> bool:
         """
         Generate a single table using the optimal number of parts.
-        
+
         Parameters
         ----------
         table_name: str
             Name of the table to generate
-            
+
         Returns
         -------
         bool
             True if generation was successful, False otherwise
         """
+
         def find_target_size(size: float) -> int:
             for threshold_gb, target_mb in self.TARGET_FILE_SIZE_MAP:
                 if size < threshold_gb:
@@ -193,42 +227,49 @@ def find_target_size(size: float) -> int:
         scale_adj_size_gb = sf1000_size_gb * (self.scale_factor / 1000.0)
         target_size_mb = find_target_size(scale_adj_size_gb)
         optimal_parts = max(round(scale_adj_size_gb * 1024 / target_size_mb), 1)
-                
-        print(f"🔧 {table_name} - Using {optimal_parts} parts (target file size: {target_size_mb}mb)")
-        
+
+        logger.info("🔧 %s - Using %d parts (target file size: %dmb)", table_name, optimal_parts, target_size_mb)
+
         # ensure that 128mb target files have a single row group
         adj_row_group_target_mb = 1024 if target_size_mb == 128 else self.target_row_group_size_mb
         # Build command for individual table generation
         cmd = [
             self.tpcgen_exe,
-            "--scale-factor", str(self.scale_factor),
-            "--output-dir", self.target_folder_uri,
-            "--parts", str(optimal_parts),
-            "--format", "parquet",
-            "--parquet-row-group-bytes", str(adj_row_group_target_mb * 1024 * 1024),
-            "--parquet-compression", self.compression,
-            "--tables", table_name 
+            "--scale-factor",
+            str(self.scale_factor),
+            "--output-dir",
+            self.target_folder_uri,
+            "--parts",
+            str(optimal_parts),
+            "--format",
+            "parquet",
+            "--parquet-row-group-bytes",
+            str(adj_row_group_target_mb * 1024 * 1024),
+            "--parquet-compression",
+            self.compression,
+            "--tables",
+            table_name,
         ]
 
         try:
             result = subprocess.run(cmd, capture_output=True, text=True, check=True)
             if result.stdout:
                 with self._print_lock:
-                    print(f"📝 {table_name} output:")
-                    for line in result.stdout.strip().split('\n'):
+                    logger.info("📝 %s output:", table_name)
+                    for line in result.stdout.strip().split("\n"):
                         if line.strip():
-                            print(f"   {line}")
+                            logger.info("   %s", line)
             return True
-            
+
         except subprocess.CalledProcessError as e:
             with self._print_lock:
-                print(f"❌ {table_name} failed:")
+                logger.error("❌ %s failed:", table_name)
                 if e.stdout:
-                    print(f"   stdout: {e.stdout}")
+                    logger.error("   stdout: %s", e.stdout)
                 if e.stderr:
-                    print(f"   stderr: {e.stderr}")
+                    logger.error("   stderr: %s", e.stderr)
             return False
         except Exception as e:
             with self._print_lock:
-                print(f"❌ {table_name} failed with exception: {e}")
-            return False
\ No newline at end of file
+                logger.error("❌ %s failed with exception: %s", table_name, e)
+            return False
diff --git a/src/lakebench/datagen/clickbench.py b/src/lakebench/datagen/clickbench.py
index ebf0aa8..dc73c58 100644
--- a/src/lakebench/datagen/clickbench.py
+++ b/src/lakebench/datagen/clickbench.py
@@ -1,19 +1,20 @@
+import logging
 import posixpath
 from typing import Optional
 
+logger = logging.getLogger(__name__)
 
-class ClickBenchDataGenerator:
 
+class ClickBenchDataGenerator:
     def __init__(self, target_mount_folder_uri: str = None, partitioned_files: bool = True):
         """
         Initialize the ClickBench data generator. Technically, this just downloads the ClickBench data from the ClickHouse datasets repository.
 
-        :param partitioned_files: If True, the downloaded data will be 100 partitioned files, otherwise it is one massive file. Use partitioned files for better download performance. 
+        :param partitioned_files: If True, the downloaded data will be 100 partitioned files, otherwise it is one massive file. Use partitioned files for better download performance.
         """
         self.target_mount_folder_path = target_mount_folder_uri
         self.partitioned_files = partitioned_files
 
-
     def run(self):
         """
         Download ClickBench Parquet files to the target folder.
@@ -32,6 +33,7 @@ def run(self):
 
         if self.partitioned_files:
             from concurrent.futures import ThreadPoolExecutor
+
             with ThreadPoolExecutor() as executor:
                 executor.map(self.__download_parquet, range(100))
         else:
@@ -39,18 +41,19 @@ def run(self):
 
     def __download_parquet(self, file_index: Optional[int] = None):
         file_name = f"hits_{file_index}.parquet" if file_index is not None else "hits.parquet"
-        source_folder = 'athena_partitioned' if file_index is not None else 'athena'
+        source_folder = "athena_partitioned" if file_index is not None else "athena"
 
         import urllib.request
+
         url = f"https://datasets.clickhouse.com/hits_compatible/{source_folder}/{file_name}"
         local_path = posixpath.join(self.target_mount_folder_path, file_name)
 
-        headers = {'User-Agent': 'Mozilla/5.0'}
+        headers = {"User-Agent": "Mozilla/5.0"}
         req = urllib.request.Request(url, headers=headers)
 
         try:
-            with urllib.request.urlopen(req) as response, open(local_path, 'wb') as out_file:
+            with urllib.request.urlopen(req) as response, open(local_path, "wb") as out_file:
                 out_file.write(response.read())
-            print(f"Downloaded {file_name}")
+            logger.info("Downloaded %s", file_name)
         except Exception as e:
-            print(f"Failed to download {file_name}: {e}")
\ No newline at end of file
+            logger.error("Failed to download %s: %s", file_name, e)
diff --git a/src/lakebench/datagen/tpcds.py b/src/lakebench/datagen/tpcds.py
index f221b21..091fbe2 100644
--- a/src/lakebench/datagen/tpcds.py
+++ b/src/lakebench/datagen/tpcds.py
@@ -1,4 +1,6 @@
 from ._tpc import _TPCDataGenerator
+
+
 class TPCDSDataGenerator(_TPCDataGenerator):
     """
     This class is a wrapper for the DuckDB TPC-DS data generation utility. It generates TPC-DS data in Parquet format
@@ -18,5 +20,6 @@ class TPCDSDataGenerator(_TPCDataGenerator):
     run()
         Generates TPC-DS data in Parquet format based on the input scale factor and writes it to the target folder.
     """
-    GEN_UTIL = 'dsdgen'
-    GEN_TYPE = 'tpds'
\ No newline at end of file
+
+    GEN_UTIL = "dsdgen"
+    GEN_TYPE = "tpds"
diff --git a/src/lakebench/datagen/tpch.py b/src/lakebench/datagen/tpch.py
index c09a037..2588af3 100644
--- a/src/lakebench/datagen/tpch.py
+++ b/src/lakebench/datagen/tpch.py
@@ -1,4 +1,6 @@
 from ._tpc_rs import _TPCRsDataGenerator
+
+
 class TPCHDataGenerator(_TPCRsDataGenerator):
     """
     This class is a multithreading wrapper of the rust-based TPC-H data generator, `tpchgen-rs`. It generates TPC-H data in Parquet format
@@ -22,26 +24,18 @@ class TPCHDataGenerator(_TPCRsDataGenerator):
     run()
         Generates TPC-H data in Parquet format based on the input scale factor and writes it to the target folder.
     """
-    GEN_UTIL = 'dbgen'
-    GEN_TYPE = 'tpch'
-    GEN_SF1000_FILE_COUNT_MAP = {
-        'lineitem': 150,
-        'orders': 40,
-        'partsupp': 26,
-        'part': 4,
-        'customer': 8
-    }
-    GEN_TABLE_REGISTRY = [
-        'customer', 'lineitem', 'nation', 'orders', 'part',
-        'partsupp', 'region', 'supplier'
-    ]
+
+    GEN_UTIL = "dbgen"
+    GEN_TYPE = "tpch"
+    GEN_SF1000_FILE_COUNT_MAP = {"lineitem": 150, "orders": 40, "partsupp": 26, "part": 4, "customer": 8}
+    GEN_TABLE_REGISTRY = ["customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier"]
     SF1000_SIZE_GB_DICT = {
-        'lineitem':  152,
-        'orders': 38,
-        'partsupp': 26.7,
-        'part': 4,
-        'customer': 7.6,
-        'supplier': 0.48,
-        'region': 0.00,
-        'nation': 0.00
-    }
\ No newline at end of file
+        "lineitem": 152,
+        "orders": 38,
+        "partsupp": 26.7,
+        "part": 4,
+        "customer": 7.6,
+        "supplier": 0.48,
+        "region": 0.00,
+        "nation": 0.00,
+    }
diff --git a/src/lakebench/engines/__init__.py b/src/lakebench/engines/__init__.py
index fc55f43..47cbba6 100644
--- a/src/lakebench/engines/__init__.py
+++ b/src/lakebench/engines/__init__.py
@@ -2,9 +2,11 @@
 from .daft import Daft
 from .delta_rs import DeltaRs
 from .duckdb import DuckDB
+from .fabric_spark import FabricSpark
+from .hdi_spark import HDISpark
+from .livy import Livy
 from .polars import Polars
+from .sail import Sail
 from .spark import Spark
-from .fabric_spark import FabricSpark
+from .spark_connect import SparkConnect
 from .synapse_spark import SynapseSpark
-from .hdi_spark import HDISpark
-from .sail import Sail
\ No newline at end of file
diff --git a/src/lakebench/engines/base.py b/src/lakebench/engines/base.py
index 6d613d4..cafdd65 100644
--- a/src/lakebench/engines/base.py
+++ b/src/lakebench/engines/base.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
-from abc import ABC
+
 import os
-from typing import Optional, Any
-from importlib.metadata import version
+from abc import ABC
 from decimal import Decimal
+from importlib.metadata import version
+from typing import Any, Optional
 from urllib.parse import urlparse
+
 import fsspec
 
+
 class BaseEngine(ABC):
     """
     Abstract base class for implementing different engine types.
@@ -32,35 +35,41 @@ class BaseEngine(ABC):
     append_array_to_delta(abfss_path: str, array: list)
         Appends a list of data to a Delta table at the specified path.
     """
+
     SQLGLOT_DIALECT = None
     SUPPORTS_SCHEMA_PREP = False
     SUPPORTS_MOUNT_PATH = True
-    TABLE_FORMAT = 'delta'
-    
-    def __init__(
-            self, 
-            schema_or_working_directory_uri: str = None,
-            storage_options: Optional[dict[str, Any]] = None
-            ):
+    TABLE_FORMAT = "delta"
+    # Default per-statement timeout (seconds). None = engine's default
+    # behavior (no Lakebench-imposed cap).
+    query_timeout_seconds: Optional[int] = None
+
+    def __init__(self, schema_or_working_directory_uri: str = None, storage_options: Optional[dict[str, Any]] = None):
         """
         Parameters
         ----------
         schema_or_working_directory_uri : str, optional
-            The base URI where tables are stored. For non-Spark engines, 
-            tables are stored directly under this path. For Spark engines, 
+            The base URI where tables are stored. For non-Spark engines,
+            tables are stored directly under this path. For Spark engines,
             this serves as the root schema path where tables are created.
         storage_options : dict, optional
             A dictionary of storage options to pass to the engine for filesystem access.
         """
-        self.version: str = ''
+        self.version: str = ""
         self.cost_per_vcore_hour: Optional[float] = None
         self.cost_per_hour: Optional[float] = None
         self.extended_engine_metadata: dict[str, str] = {}
         self.storage_options: dict[str, Any] = storage_options if storage_options is not None else {}
-        self.schema_or_working_directory_uri: str = schema_or_working_directory_uri.replace("file:///", "").replace(chr(92), '/') if schema_or_working_directory_uri else None
+        self.schema_or_working_directory_uri: str = (
+            schema_or_working_directory_uri.replace("file:///", "").replace(chr(92), "/")
+            if schema_or_working_directory_uri
+            else None
+        )
 
-        self.runtime = self._detect_runtime() if getattr(self, 'runtime', None) is None else self.runtime
-        self.operating_system = self._detect_os() if getattr(self, 'operating_system', None) is None else self.operating_system
+        self.runtime = self._detect_runtime() if getattr(self, "runtime", None) is None else self.runtime
+        self.operating_system = (
+            self._detect_os() if getattr(self, "operating_system", None) is None else self.operating_system
+        )
 
         if self.runtime == "fabric":
             import notebookutils
@@ -68,21 +77,26 @@ def __init__(
 
             self._notebookutils = notebookutils
             self._fabric_rest = fabric.FabricRestClient()
-            workspace_id = self._notebookutils.runtime.context['currentWorkspaceId']
-            self.region = self._fabric_rest.get(path_or_url=f"/v1/workspaces/{workspace_id}").json()['capacityRegion'].replace(' ', '').lower()
-            self.capacity_id = self._fabric_rest.get(path_or_url=f"/v1/workspaces/{workspace_id}").json()['capacityId']
-            self._autocalc_usd_cost_per_vcore_hour = self._get_vm_retail_rate(self.region, 'Spark Memory Optimized Capacity Usage')
-            self.extended_engine_metadata.update({'compute_region': self.region})
+            workspace_id = self._notebookutils.runtime.context["currentWorkspaceId"]
+            self.region = (
+                self._fabric_rest.get(path_or_url=f"/v1/workspaces/{workspace_id}")
+                .json()["capacityRegion"]
+                .replace(" ", "")
+                .lower()
+            )
+            self.capacity_id = self._fabric_rest.get(path_or_url=f"/v1/workspaces/{workspace_id}").json()["capacityId"]
+            self._autocalc_usd_cost_per_vcore_hour = self._get_vm_retail_rate(
+                self.region, "Spark Memory Optimized Capacity Usage"
+            )
+            self.extended_engine_metadata.update({"compute_region": self.region})
             # rust object store (used by delta-rs, polars, sail) parametrization; https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variant.Token
             os.environ["AZURE_STORAGE_TOKEN"] = self._notebookutils.credentials.getToken("storage")
         elif self.runtime == "synapse":
             import mssparkutils
+
             self._notebookutils = mssparkutils
 
-        self.extended_engine_metadata.update({
-            'runtime': self.runtime,
-            'os': self.operating_system
-        })
+        self.extended_engine_metadata.update({"runtime": self.runtime, "os": self.operating_system})
 
         if self.schema_or_working_directory_uri is None:
             self.fs = None
@@ -90,7 +104,7 @@ def __init__(
             # workaround: use notebookutils filesystem for abfs due to recursive delete issues in fsspec
             # https://github.com/developmentseed/obstore/issues/556
             self.fs = self._notebookutils.fs
-            self.fs.mkdir = self.fs.mkdirs # notebookutils users mkdirs
+            self.fs.mkdir = self.fs.mkdirs  # notebookutils users mkdirs
             if self.storage_options == {}:
                 self._validate_and_set_azure_storage_config()
         elif urlparse(self.schema_or_working_directory_uri).scheme in ("s3", "gs"):
@@ -107,47 +121,47 @@ def _detect_runtime(self) -> str:
         Dynamically detect the runtime/environment.
         Returns: str - The detected service name
         """
-        import os    
+        import os
 
         # Check for Microsoft Fabric or Synapse
         try:
             notebookutils = None
-            utils_modules = ('notebookutils', 'mssparkutils')
+            utils_modules = ("notebookutils", "mssparkutils")
             for utils_module in utils_modules:
                 try:
                     notebookutils = __import__(utils_module)
                 except ImportError:
                     continue
-            if notebookutils and hasattr(notebookutils, 'runtime'):
-                if hasattr(notebookutils.runtime, 'context'):
+            if notebookutils and hasattr(notebookutils, "runtime"):
+                if hasattr(notebookutils.runtime, "context"):
                     context = notebookutils.runtime.context
-                    if 'productType' in context:
-                        product = context['productType'].lower()
+                    if "productType" in context:
+                        product = context["productType"].lower()
                         return product
-        except:
+        except Exception:
             pass
-        
+
         # Check for Databricks
         try:
             dbutils = None
-            if 'DATABRICKS_RUNTIME_VERSION' in os.environ:
+            if "DATABRICKS_RUNTIME_VERSION" in os.environ:
                 return "databricks"
             try:
-                dbutils = __import__('dbutils')
+                dbutils = __import__("dbutils")
                 if dbutils is not None:
                     return "databricks"
-            except:
+            except Exception:
                 pass
-        except:
+        except Exception:
             pass
-        
+
         # Check for Google Colab
         try:
-            if 'COLAB_RELEASE_TAG' in os.environ:
+            if "COLAB_RELEASE_TAG" in os.environ:
                 return "colab"
         except ImportError:
             pass
-        
+
         # Default fallback
         return "local_unknown"
 
@@ -159,18 +173,20 @@ def _detect_os(self) -> str:
         import sys
 
         os_platform = sys.platform.lower()
-        if os_platform.startswith('win'):
-            return 'windows'
-        elif os_platform.startswith('linux'):
-            return 'linux'
-        elif os_platform.startswith('darwin'):
-            return 'mac'
+        if os_platform.startswith("win"):
+            return "windows"
+        elif os_platform.startswith("linux"):
+            return "linux"
+        elif os_platform.startswith("darwin"):
+            return "mac"
         else:
-            return 'unknown'
+            return "unknown"
 
     def _validate_and_set_azure_storage_config(self) -> None:
         if not os.getenv("AZURE_STORAGE_TOKEN"):
-            raise ValueError("""Please store bearer token as env variable `AZURE_STORAGE_TOKEN` (via `os.environ["AZURE_STORAGE_TOKEN"] = "..."`)""")
+            raise ValueError(
+                """Please store bearer token as env variable `AZURE_STORAGE_TOKEN` (via `os.environ["AZURE_STORAGE_TOKEN"] = "..."`)"""
+            )
         self.storage_options = {
             "bearer_token": os.getenv("AZURE_STORAGE_TOKEN"),
             "allow_invalid_certificates": "true",  # https://github.com/delta-io/delta-rs/issues/3243#issuecomment-2727206866
@@ -178,28 +194,29 @@ def _validate_and_set_azure_storage_config(self) -> None:
 
     def _get_vm_retail_rate(self, region: str, sku: str, spot: bool = False) -> float:
         import requests
+
         query = f"armRegionName eq '{region}' and serviceName eq 'Microsoft Fabric' and skuName eq '{sku}'"
         api_url = "https://prices.azure.com/api/retail/prices?"
-        return requests.get(api_url, params={'$filter': query}).json()['Items'][0]['retailPrice'] / 2
-    
+        return requests.get(api_url, params={"$filter": query}).json()["Items"][0]["retailPrice"] / 2
+
     def get_total_cores(self) -> int:
         """
         Returns the total number of CPU cores available on the system.
         """
         cores = os.cpu_count()
         return cores
-    
+
     def get_compute_size(self) -> str:
         """
         Returns a formatted string with the compute size.
         """
         cores = self.get_total_cores()
         return f"{cores}vCore"
-    
+
     def get_job_cost(self, duration_ms: int) -> Optional[Decimal]:
         """
         Returns the cost per hour for compute as a Decimal.
-        
+
         If `cost_per_vcore_hour` or `cost_per_hour` is provided, it calculates the job cost.
         Otherwise, it returns None.
         """
@@ -209,42 +226,68 @@ def get_job_cost(self, duration_ms: int) -> Optional[Decimal]:
             return None
 
         job_cost = Decimal(self.cost_per_hour) * (Decimal(duration_ms) / Decimal(3600000))  # Convert ms to hours
-        return job_cost.quantize(Decimal('0.0000000000'))  # Ensure precision matches DECIMAL(18,10)
-    
-    
+        return job_cost.quantize(Decimal("0.0000000000"))  # Ensure precision matches DECIMAL(18,10)
+
+    def get_table_columns(self, table_name: str) -> list:
+        """
+        Return column names for a registered/metastore table.
+
+        Override in subclasses that support schema introspection.
+        Returns an empty list by default (introspection not supported).
+        """
+        return []
+
+    def list_databases(self) -> list:
+        """
+        Return database/schema names visible to the engine's catalog.
+
+        Override in subclasses with a real catalog (Spark family, Livy, DuckDB).
+        Engines without a catalog (e.g. Polars, Daft) raise NotImplementedError.
+        """
+        raise NotImplementedError(f"{type(self).__name__} does not support catalog discovery")
+
+    def list_tables(self, database: str) -> list:
+        """
+        Return table names in `database` from the engine's catalog.
+
+        Override in subclasses with a real catalog.
+        """
+        raise NotImplementedError(f"{type(self).__name__} does not support catalog discovery")
+
     def create_external_location(self, location_uri: str):
         """
         Supports engines that need to create external locations for data access.
         By default, this is a no-op and is only overridden by subclasses as needed.
         """
         pass
-    
+
     def create_schema_if_not_exists(self, drop_before_create: bool = True):
         if drop_before_create:
             if self.fs.exists(self.schema_or_working_directory_uri):
                 self.fs.rm(self.schema_or_working_directory_uri, True)
             self.fs.mkdir(self.schema_or_working_directory_uri)
-    
+
     def _convert_generic_to_specific_schema(self, generic_schema: list):
         """
         Convert a generic schema to a specific Spark schema.
         """
         import pyarrow as pa
+
         type_mapping = {
-            'STRING': pa.string(),
-            'TIMESTAMP': pa.timestamp('us', tz='UTC'),
-            'TINYINT': pa.int8(),
-            'SMALLINT': pa.int16(),
-            'INT': pa.int32(),
-            'BIGINT': pa.int64(),
-            'FLOAT': pa.float32(),
-            'DOUBLE': pa.float64(),
-            'DECIMAL(18,10)': pa.decimal128(18, 10),
-            'BOOLEAN': pa.bool_(),
-            'MAP<STRING, STRING>': pa.map_(pa.string(), pa.string())
+            "STRING": pa.string(),
+            "TIMESTAMP": pa.timestamp("us", tz="UTC"),
+            "TINYINT": pa.int8(),
+            "SMALLINT": pa.int16(),
+            "INT": pa.int32(),
+            "BIGINT": pa.int64(),
+            "FLOAT": pa.float32(),
+            "DOUBLE": pa.float64(),
+            "DECIMAL(18,10)": pa.decimal128(18, 10),
+            "BOOLEAN": pa.bool_(),
+            "MAP<STRING, STRING>": pa.map_(pa.string(), pa.string()),
         }
         return pa.schema([(name, type_mapping[data_type]) for name, data_type in generic_schema])
-    
+
     def _append_results_to_delta(self, table_uri: str, results: list, generic_schema: list):
         """
         Appends a list of result records to an existing Delta table.
@@ -269,6 +312,7 @@ def _append_results_to_delta(self, table_uri: str, results: list, generic_schema
         - If the installed `deltalake` version is 0.x, forces the Rust engine.
         """
         import pyarrow as pa
+
         from ..engines.delta_rs import DeltaRs
 
         schema = self._convert_generic_to_specific_schema(generic_schema=generic_schema)
@@ -282,7 +326,7 @@ def _append_results_to_delta(self, table_uri: str, results: list, generic_schema
         engine_map_data = []
         execution_map_data = []
         for result in results:
-            engine_properties = result.pop('engine_properties', {})
+            engine_properties = result.pop("engine_properties", {})
             if engine_properties:
                 map_items = [(str(k), str(v)) for k, v in engine_properties.items()]
             else:
@@ -290,7 +334,7 @@ def _append_results_to_delta(self, table_uri: str, results: list, generic_schema
 
             engine_map_data.append(map_items)
 
-            execution_telemetry = result.pop('execution_telemetry', {})
+            execution_telemetry = result.pop("execution_telemetry", {})
             if execution_telemetry:
                 execution_map_items = [(str(k), str(v)) for k, v in execution_telemetry.items()]
             else:
@@ -301,17 +345,11 @@ def _append_results_to_delta(self, table_uri: str, results: list, generic_schema
         table = pa.Table.from_pylist(results, schema)
         engine_map_array = pa.array(engine_map_data, type=pa.map_(pa.string(), pa.string()))
         execution_map_array = pa.array(execution_map_data, type=pa.map_(pa.string(), pa.string()))
-        table = table.append_column('engine_properties', engine_map_array)
-        table = table.append_column('execution_telemetry', execution_map_array)
+        table = table.append_column("engine_properties", engine_map_array)
+        table = table.append_column("execution_telemetry", execution_map_array)
 
-        if version('deltalake').startswith('0.'):
-            DeltaRs().write_deltalake(
-                table_uri, 
-                table, 
-                mode="append",
-                schema_mode='merge',
-                engine='rust'
-            )
+        if version("deltalake").startswith("0."):
+            DeltaRs().write_deltalake(table_uri, table, mode="append", schema_mode="merge", engine="rust")
         else:
             DeltaRs().write_deltalake(
                 table_or_uri=table_uri,
@@ -319,4 +357,4 @@ def _append_results_to_delta(self, table_uri: str, results: list, generic_schema
                 mode="append",
                 schema_mode="merge",
                 storage_options=self.storage_options,
-            )
\ No newline at end of file
+            )
diff --git a/src/lakebench/engines/daft.py b/src/lakebench/engines/daft.py
index c33571d..2940594 100644
--- a/src/lakebench/engines/daft.py
+++ b/src/lakebench/engines/daft.py
@@ -1,27 +1,25 @@
-from .base import BaseEngine
-from .delta_rs import DeltaRs
-from ..utils.path_utils import to_file_uri, _REMOTE_SCHEMES
-
 import os
 import pathlib
 import posixpath
 from importlib.metadata import version
-from typing import Any, Optional
+from typing import Optional
+
+from ..utils.path_utils import _REMOTE_SCHEMES, to_file_uri
+from .base import BaseEngine
+from .delta_rs import DeltaRs
+
 
 class Daft(BaseEngine):
     """
     Daft Engine
     """
+
     SQLGLOT_DIALECT = "mysql"
     SUPPORTS_ONELAKE = False
     SUPPORTS_SCHEMA_PREP = False
     SUPPORTS_MOUNT_PATH = False
 
-    def __init__(
-            self, 
-            schema_or_working_directory_uri: str,
-            cost_per_vcore_hour: Optional[float] = None
-            ):
+    def __init__(self, schema_or_working_directory_uri: str, cost_per_vcore_hour: Optional[float] = None):
         """
         Parameters
         ----------
@@ -35,7 +33,8 @@ def __init__(
 
         super().__init__(schema_or_working_directory_uri)
         import daft
-        from daft.io import IOConfig, AzureConfig
+        from daft.io import AzureConfig, IOConfig
+
         self.daft = daft
         self.deltars = DeltaRs()
         self.catalog_name = None
@@ -45,18 +44,20 @@ def __init__(
             self.daft.set_planning_config(default_io_config=io_config)
 
         if not self.SUPPORTS_ONELAKE:
-            if 'onelake.' in self.schema_or_working_directory_uri:
-                raise ValueError(
-                    "Daft engine does not support OneLake paths. Provide an ADLS Gen2 path instead."
-                )
-            
+            if "onelake." in self.schema_or_working_directory_uri:
+                raise ValueError("Daft engine does not support OneLake paths. Provide an ADLS Gen2 path instead.")
+
         self.version: str = f"{version('daft')} (deltalake=={version('deltalake')})"
-        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, '_autocalc_usd_cost_per_vcore_hour', None)
-        
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: Optional[str] = None):
-        table_df = self.daft.read_parquet(
-            posixpath.join(parquet_folder_uri)
-        )
+        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, "_autocalc_usd_cost_per_vcore_hour", None)
+
+    def load_parquet_to_delta(
+        self,
+        parquet_folder_uri: str,
+        table_name: str,
+        table_is_precreated: bool = False,
+        context_decorator: Optional[str] = None,
+    ):
+        table_df = self.daft.read_parquet(posixpath.join(parquet_folder_uri))
         raw_path = posixpath.join(self.schema_or_working_directory_uri, table_name)
         is_local = not any(raw_path.startswith(s) for s in _REMOTE_SCHEMES)
         # Daft 0.7.x requires the target directory to exist for local paths
@@ -82,12 +83,11 @@ def register_table(self, table_name: str):
         is_local = not any(table_path.startswith(s) for s in _REMOTE_SCHEMES)
         if is_local:
             from deltalake import DeltaTable
+
             file_uris = DeltaTable(table_path).file_uris()
             globals()[table_name] = self.daft.read_parquet(file_uris)
         else:
-            globals()[table_name] = self.daft.read_deltalake(
-                to_file_uri(table_path)
-            )
+            globals()[table_name] = self.daft.read_deltalake(to_file_uri(table_path))
 
     def execute_sql_query(self, query: str, context_decorator: Optional[str] = None):
         """
@@ -107,4 +107,4 @@ def vacuum_table(self, table_name: str, retain_hours: int = 168, retention_check
             table_uri=posixpath.join(self.schema_or_working_directory_uri, table_name),
             storage_options=self.storage_options,
         )
-        fact_table.vacuum(retain_hours, enforce_retention_duration=retention_check, dry_run=False)
\ No newline at end of file
+        fact_table.vacuum(retain_hours, enforce_retention_duration=retention_check, dry_run=False)
diff --git a/src/lakebench/engines/delta_rs.py b/src/lakebench/engines/delta_rs.py
index e58c0ab..59ad0f6 100644
--- a/src/lakebench/engines/delta_rs.py
+++ b/src/lakebench/engines/delta_rs.py
@@ -1,5 +1,6 @@
 from .base import BaseEngine
 
+
 class DeltaRs(BaseEngine):
     """
     Delta-Rs Engine
@@ -9,8 +10,8 @@ def __init__(self):
         """
         Initialize the Delta-rs Engine Configs
         """
-        from deltalake.writer import write_deltalake
         from deltalake import DeltaTable
+        from deltalake.writer import write_deltalake
+
         self.write_deltalake = write_deltalake
         self.DeltaTable = DeltaTable
-        
\ No newline at end of file
diff --git a/src/lakebench/engines/duckdb.py b/src/lakebench/engines/duckdb.py
index a83baf8..125e2c6 100644
--- a/src/lakebench/engines/duckdb.py
+++ b/src/lakebench/engines/duckdb.py
@@ -1,27 +1,30 @@
 from __future__ import annotations
-from .base import BaseEngine
-from  .delta_rs import DeltaRs
 
 import os
 import posixpath
-from typing import Any, Optional
 from importlib.metadata import version
+from typing import Any, Optional
+
+from .base import BaseEngine
+from .delta_rs import DeltaRs
+
 
 class DuckDB(BaseEngine):
     """
     DuckDB Engine
     """
+
     SQLGLOT_DIALECT = "duckdb"
     SUPPORTS_ONELAKE = True
     SUPPORTS_SCHEMA_PREP = True
     SUPPORTS_MOUNT_PATH = True
 
     def __init__(
-            self, 
-            schema_or_working_directory_uri: str,
-            cost_per_vcore_hour: Optional[float] = None,
-            storage_options: Optional[dict[str, Any]] = None
-            ):
+        self,
+        schema_or_working_directory_uri: str,
+        cost_per_vcore_hour: Optional[float] = None,
+        storage_options: Optional[dict[str, Any]] = None,
+    ):
         """
         Parameters
         ----------
@@ -35,19 +38,22 @@ def __init__(
             A dictionary of storage options to pass to the engine for filesystem access. Optional as LakeBench
             will attempt to read from environment variables depeneding on the compute runtime.
         """
-        
+
         super().__init__(schema_or_working_directory_uri, storage_options)
         import duckdb
+
         self.duckdb = duckdb.connect()
         self.deltars = DeltaRs()
         self.catalog_name = None
         self.schema_name = None
         if self.schema_or_working_directory_uri.startswith("abfss://"):
-            self.duckdb.sql(f""" CREATE OR REPLACE SECRET onelake ( TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{os.getenv("AZURE_STORAGE_TOKEN")}') ;""")
+            self.duckdb.sql(
+                f""" CREATE OR REPLACE SECRET onelake ( TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{os.getenv("AZURE_STORAGE_TOKEN")}') ;"""
+            )
 
         self.version: str = f"{version('duckdb')} (deltalake=={version('deltalake')})"
-        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, '_autocalc_usd_cost_per_vcore_hour', None)
-    
+        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, "_autocalc_usd_cost_per_vcore_hour", None)
+
     def _create_empty_table(self, table_name: str, ddl: str):
         if not ddl.strip().startswith("CREATE OR REPLACE TABLE"):
             ddl = ddl.replace("CREATE TABLE", "CREATE OR REPLACE TABLE")
@@ -62,18 +68,50 @@ def _create_empty_table(self, table_name: str, ddl: str):
             data=arrow_df,
             mode="overwrite",
             storage_options=self.storage_options,
-        )  
+        )
         # Drop the in-memory table
         self.duckdb.sql(f"DROP TABLE IF EXISTS {table_name}")
 
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: Optional[str] = None):
-        arrow_df = self.duckdb.sql(f""" FROM parquet_scan('{posixpath.join(parquet_folder_uri, '*.parquet')}') """).record_batch()
+    def get_table_columns(self, table_name: str) -> list:
+        """Return column names for a DuckDB table/view."""
+        rows = self.duckdb.sql(f"DESCRIBE {table_name}").fetchall()
+        return [row[0] for row in rows]
+
+    def list_databases(self) -> list:
+        """List databases attached to the DuckDB connection (catalogs/schemas)."""
+        try:
+            rows = self.duckdb.sql(
+                "SELECT DISTINCT schema_name FROM information_schema.schemata "
+                "WHERE schema_name NOT IN ('information_schema', 'pg_catalog')"
+            ).fetchall()
+            return [r[0] for r in rows]
+        except Exception:
+            rows = self.duckdb.sql("SHOW DATABASES").fetchall()
+            return [r[0] for r in rows]
+
+    def list_tables(self, database: str) -> list:
+        """List tables in `database` (treated as a DuckDB schema)."""
+        rows = self.duckdb.sql(
+            f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{database}'"
+        ).fetchall()
+        return [r[0] for r in rows]
+
+    def load_parquet_to_delta(
+        self,
+        parquet_folder_uri: str,
+        table_name: str,
+        table_is_precreated: bool = False,
+        context_decorator: Optional[str] = None,
+    ):
+        arrow_df = self.duckdb.sql(
+            f""" FROM parquet_scan('{posixpath.join(parquet_folder_uri, "*.parquet")}') """
+        ).record_batch()
         self.deltars.write_deltalake(
             table_or_uri=posixpath.join(self.schema_or_working_directory_uri, table_name),
             data=arrow_df,
             mode="overwrite",
             storage_options=self.storage_options,
-        )  
+        )
 
     def register_table(self, table_name: str):
         """
@@ -102,4 +140,4 @@ def vacuum_table(self, table_name: str, retain_hours: int = 168, retention_check
             table_uri=posixpath.join(self.schema_or_working_directory_uri, table_name),
             storage_options=self.storage_options,
         )
-        fact_table.vacuum(retain_hours, enforce_retention_duration=retention_check, dry_run=False)
\ No newline at end of file
+        fact_table.vacuum(retain_hours, enforce_retention_duration=retention_check, dry_run=False)
diff --git a/src/lakebench/engines/fabric_spark.py b/src/lakebench/engines/fabric_spark.py
index 1622afa..3354563 100644
--- a/src/lakebench/engines/fabric_spark.py
+++ b/src/lakebench/engines/fabric_spark.py
@@ -1,8 +1,10 @@
-from .spark import Spark
-from typing import Optional
-from decimal import Decimal
 import re
-from urllib.parse import urlparse, parse_qs
+from decimal import Decimal
+from typing import Optional
+from urllib.parse import parse_qs, urlparse
+
+from .spark import Spark
+
 
 class FabricSpark(Spark):
     """
@@ -10,13 +12,13 @@ class FabricSpark(Spark):
     """
 
     def __init__(
-            self,
-            lakehouse_name: str, 
-            lakehouse_schema_name: str,
-            spark_measure_telemetry: bool = False,
-            cost_per_vcore_hour: Optional[float] = None,
-            compute_stats_all_cols: bool = False
-            ):
+        self,
+        lakehouse_name: str,
+        lakehouse_schema_name: str,
+        spark_measure_telemetry: bool = False,
+        cost_per_vcore_hour: Optional[float] = None,
+        compute_stats_all_cols: bool = False,
+    ):
         """
         Parameters
         ----------
@@ -34,15 +36,17 @@ def __init__(
         """
 
         super().__init__(
-            catalog_name=lakehouse_name, 
-            schema_name=lakehouse_schema_name, 
-            spark_measure_telemetry=spark_measure_telemetry, 
+            catalog_name=lakehouse_name,
+            schema_name=lakehouse_schema_name,
+            spark_measure_telemetry=spark_measure_telemetry,
             cost_per_vcore_hour=cost_per_vcore_hour,
-            compute_stats_all_cols=compute_stats_all_cols
+            compute_stats_all_cols=compute_stats_all_cols,
         )
 
-        self.version: str = f"{self.spark.sparkContext.version} (vhd_name=={self.spark.conf.get('spark.synapse.vhd.name')})"
-        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, '_autocalc_usd_cost_per_vcore_hour', None)
+        self.version: str = (
+            f"{self.spark.sparkContext.version} (vhd_name=={self.spark.conf.get('spark.synapse.vhd.name')})"
+        )
+        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, "_autocalc_usd_cost_per_vcore_hour", None)
         self.cost_per_hour = self.get_total_cores() * self.cost_per_vcore_hour
 
         url = self.spark.sparkContext.uiWebUrl
@@ -53,40 +57,47 @@ def __init__(
         # Regex for GUIDs
         guid_pattern = re.compile(r"[0-9a-fA-F-]{36}")
         guids = guid_pattern.findall(url)
-        tenant_id = guids[0]     # after /sparkui/
+        tenant_id = guids[0]  # after /sparkui/
         activity_id = guids[2]  # after /activities/
 
-        self.extended_engine_metadata.update({
-            'spark_history_url': f"https://{self.spark_configs['spark.trident.pbienv'].lower()}.powerbi.com/workloads/de-ds/sparkmonitor/{artifact_id}/{activity_id}?ctid={tenant_id}",
-            'cost_per_hour': Decimal(self.cost_per_hour).quantize(Decimal('0.0000')),
-            'capacity_id': self.capacity_id
-        })
+        self.extended_engine_metadata.update(
+            {
+                "spark_history_url": f"https://{self.spark_configs['spark.trident.pbienv'].lower()}.powerbi.com/workloads/de-ds/sparkmonitor/{artifact_id}/{activity_id}?ctid={tenant_id}",
+                "cost_per_hour": Decimal(self.cost_per_hour).quantize(Decimal("0.0000")),
+                "capacity_id": self.capacity_id,
+            }
+        )
 
-        spark_configs_to_log = {k: v for k, v in self.spark_configs.items() if k in [
-            'spark.sql.parquet.vorder.enabled',
-            'spark.sql.parquet.vorder.default',
-            'spark.microsoft.delta.optimizeWrite.enabled',
-            'spark.microsoft.delta.optimizeWrite.binSize',
-            'spark.synapse.vegas.useCache',
-            'spark.synapse.vegas.cacheSize',
-            'spark.native.enabled',
-            'spark.gluten.enabled',
-            'spark.sql.parquet.native.writer.directWriteEnabled',
-            'spark.synapse.vhd.name',
-            'spark.synapse.vhd.id',
-            'spark.microsoft.delta.stats.collect.extended',
-            'spark.microsoft.delta.stats.injection.enabled',
-            'spark.microsoft.delta.snapshot.driverMode.enabled',
-            'spark.microsoft.delta.stats.collect.extended.property.setAtTableCreation',
-            'spark.microsoft.delta.targetFileSize.adaptive.enabled',
-            'spark.app.id',
-            'spark.cluster.name'
-        ]}
+        spark_configs_to_log = {
+            k: v
+            for k, v in self.spark_configs.items()
+            if k
+            in [
+                "spark.sql.parquet.vorder.enabled",
+                "spark.sql.parquet.vorder.default",
+                "spark.microsoft.delta.optimizeWrite.enabled",
+                "spark.microsoft.delta.optimizeWrite.binSize",
+                "spark.synapse.vegas.useCache",
+                "spark.synapse.vegas.cacheSize",
+                "spark.native.enabled",
+                "spark.gluten.enabled",
+                "spark.sql.parquet.native.writer.directWriteEnabled",
+                "spark.synapse.vhd.name",
+                "spark.synapse.vhd.id",
+                "spark.microsoft.delta.stats.collect.extended",
+                "spark.microsoft.delta.stats.injection.enabled",
+                "spark.microsoft.delta.snapshot.driverMode.enabled",
+                "spark.microsoft.delta.stats.collect.extended.property.setAtTableCreation",
+                "spark.microsoft.delta.targetFileSize.adaptive.enabled",
+                "spark.app.id",
+                "spark.cluster.name",
+            ]
+        }
 
         self.extended_engine_metadata.update(spark_configs_to_log)
 
         self.compute_stats_all_cols = compute_stats_all_cols
-        self.run_analyze_after_load = False # Fabric Spark supports auto stats collection
+        self.run_analyze_after_load = False  # Fabric Spark supports auto stats collection
         if self.compute_stats_all_cols:
             # Enable auto stats collection
             self.spark.conf.set("spark.microsoft.delta.stats.collect.extended", "true")
diff --git a/src/lakebench/engines/hdi_spark.py b/src/lakebench/engines/hdi_spark.py
index 5dc950c..210e5c2 100644
--- a/src/lakebench/engines/hdi_spark.py
+++ b/src/lakebench/engines/hdi_spark.py
@@ -1,17 +1,16 @@
-from .spark import Spark
 from typing import Optional
 
+from .spark import Spark
+
+
 class HDISpark(Spark):
     """
     HDInsight Spark Engine
     """
 
     def __init__(
-            self,
-            schema_name: str,
-            spark_measure_telemetry: bool = False,
-            cost_per_vcore_hour: Optional[float] = None
-            ):
+        self, schema_name: str, spark_measure_telemetry: bool = False, cost_per_vcore_hour: Optional[float] = None
+    ):
         """
         Parameters
         ----------
@@ -25,9 +24,9 @@ def __init__(
         """
 
         super().__init__(
-            catalog_name=None, 
-            schema_name=schema_name, 
+            catalog_name=None,
+            schema_name=schema_name,
             spark_measure_telemetry=spark_measure_telemetry,
             cost_per_vcore_hour=cost_per_vcore_hour,
-            compute_stats_all_cols=False
-            )
+            compute_stats_all_cols=False,
+        )
diff --git a/src/lakebench/engines/livy.py b/src/lakebench/engines/livy.py
new file mode 100644
index 0000000..811333e
--- /dev/null
+++ b/src/lakebench/engines/livy.py
@@ -0,0 +1,472 @@
+import json
+import os
+import time
+from datetime import datetime
+from typing import Any, Dict, Optional
+
+from .base import BaseEngine
+
+
+class Livy(BaseEngine):
+    """
+    Livy Engine — executes Spark workloads via the Apache Livy REST API.
+
+    Submits PySpark code snippets to a remote Livy server. Unlike SparkConnect
+    and Databricks engines, there is no local SparkSession — all execution
+    happens remotely via HTTP.
+
+    Requires: requests
+
+    Parameters
+    ----------
+    url : str
+        Livy server URL (e.g., 'https://livy.example.com' or Fabric Livy endpoint).
+    schema_or_working_directory_uri : str
+        Working directory URI for Delta tables on the remote cluster.
+    auth : str, default 'none'
+        Authentication method: 'none', 'basic', 'kerberos', 'bearer', 'az'.
+        - 'bearer': Uses token from env var specified by token_env.
+        - 'az': Uses Azure CLI to get a token for the specified scope.
+    kind : str, default 'pyspark'
+        Livy session kind.
+    username : str, optional
+        Username for basic auth.
+    password_env : str, optional
+        Env var name containing password for basic auth.
+    token_env : str, optional
+        Env var name containing bearer token (for auth='bearer').
+    az_scope : str, optional
+        Azure AD scope for az CLI auth (default: 'https://api.fabric.microsoft.com/.default').
+    session_conf : dict, optional
+        Additional Spark configuration to pass when creating the Livy session.
+    cost_per_vcore_hour : float, optional
+        Cost per vCore hour for cost estimation.
+    storage_options : dict, optional
+        Storage options for remote filesystem access.
+    """
+
+    SQLGLOT_DIALECT = "spark"
+    SUPPORTS_SCHEMA_PREP = False
+
+    def __init__(
+        self,
+        url: str,
+        schema_or_working_directory_uri: str,
+        auth: str = "none",
+        kind: str = "pyspark",
+        schema_name: Optional[str] = None,
+        catalog_name: Optional[str] = None,
+        username: Optional[str] = None,
+        password_env: Optional[str] = None,
+        token_env: Optional[str] = None,
+        az_scope: Optional[str] = None,
+        session_conf: Optional[Dict[str, str]] = None,
+        cost_per_vcore_hour: Optional[float] = None,
+        storage_options: Optional[Dict[str, Any]] = None,
+        query_timeout_seconds: Optional[int] = None,
+    ):
+        super().__init__(
+            schema_or_working_directory_uri=schema_or_working_directory_uri,
+            storage_options=storage_options,
+        )
+        import requests
+
+        self._url = url.rstrip("/")
+        self._kind = kind
+        self._requests = requests
+        self._session_conf = session_conf or {}
+        self.cost_per_vcore_hour = cost_per_vcore_hour
+        self.version = f"livy ({url})"
+        self.schema_name = schema_name
+        self.catalog_name = catalog_name
+        self.query_timeout_seconds = query_timeout_seconds
+
+        # Set up auth
+        self._session = requests.Session()
+        if auth == "basic":
+            password = os.environ.get(password_env or "") if password_env else None
+            self._session.auth = (username or "", password or "")
+        elif auth == "kerberos":
+            from requests_kerberos import HTTPKerberosAuth
+
+            self._session.auth = HTTPKerberosAuth()
+        elif auth == "bearer":
+            token = os.environ.get(token_env or "")
+            if not token:
+                raise EnvironmentError(f"Environment variable '{token_env}' is not set for bearer auth.")
+            self._session.headers.update({"Authorization": f"Bearer {token}"})
+        elif auth == "az":
+            self._az_scope = az_scope or "https://api.fabric.microsoft.com/.default"
+            self._auth_method = "az"
+            self._token_expiry = 0.0
+            token = self._get_az_token(self._az_scope)
+            self._session.headers.update({"Authorization": f"Bearer {token}"})
+
+        self._session.headers.update({"Content-Type": "application/json"})
+
+        # Create Livy session
+        self._livy_session_id = self._create_session()
+        self.extended_engine_metadata.update(
+            {
+                "livy_url": url,
+                "livy_session_id": str(self._livy_session_id),
+            }
+        )
+
+    def _get_az_token(self, scope: str) -> str:
+        """Get an Azure AD token via the az CLI and record its real expiry."""
+        import subprocess
+
+        result = subprocess.run(
+            ["az", "account", "get-access-token", "--scope", scope, "-o", "json"],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        if result.returncode != 0:
+            raise RuntimeError(
+                f"Failed to get Azure token via 'az' CLI: {result.stderr.strip()}\n"
+                f"Make sure you are logged in with 'az login'."
+            )
+        data = json.loads(result.stdout)
+        # expiresOn format: "YYYY-MM-DD HH:MM:SS.ffffff" in local time
+        try:
+            self._token_expiry = datetime.fromisoformat(data["expiresOn"]).timestamp()
+        except (KeyError, ValueError):
+            # Fallback: assume 55 minutes (azure tokens are nominally 1h)
+            self._token_expiry = time.time() + 55 * 60
+        return data["accessToken"]
+
+    def _refresh_token_if_needed(self, force: bool = False):
+        """Refresh Azure AD token before it expires (2-min safety margin)."""
+        if getattr(self, "_auth_method", None) != "az":
+            return
+        if force or time.time() > (self._token_expiry - 120):
+            token = self._get_az_token(self._az_scope)
+            self._session.headers.update({"Authorization": f"Bearer {token}"})
+
+    def _is_synapse_endpoint(self) -> bool:
+        """True if `self._url` looks like an Azure Synapse Livy endpoint.
+
+        Synapse URLs follow the pattern
+        `https://<workspace>.dev.azuresynapse.net/livyApi/...`; the
+        `azuresynapse.net` host suffix is the most reliable marker.
+        Fabric / HDInsight / OSS Livy don't share this suffix.
+        """
+        return "azuresynapse.net" in self._url.lower()
+
+    def _create_session(self):
+        """Create a new Livy interactive session and wait until it's ready."""
+        # Synapse's Livy REST API requires a non-empty session name
+        # ("Cannot be empty (Parameter 'Name')"). Fabric/standard Livy accept
+        # it harmlessly, so we always include one.
+        session_name = f"lakebench-{int(time.time())}"
+        conf = dict(self._session_conf) if self._session_conf else {}
+
+        # Synapse's pool API requires `spark.executor.instances` to be present
+        # at session-create time, even when dynamic allocation is enabled — its
+        # parameter-resolution layer rejects the request with HTTP 400 when
+        # `spark.executor.instances` is missing from inputs / conf / pool
+        # defaults. (Fabric's Livy resolves this from the lakehouse capacity.)
+        # If the user has dynamic allocation configured, default to
+        # `minExecutors`; otherwise fall back to a safe small value (2).
+        if self._is_synapse_endpoint() and "spark.executor.instances" not in conf:
+            min_execs = conf.get("spark.dynamicAllocation.minExecutors")
+            conf["spark.executor.instances"] = str(min_execs) if min_execs else "2"
+
+        payload = {"kind": self._kind, "name": session_name}
+        if conf:
+            payload["conf"] = conf
+        resp = self._session.post(
+            f"{self._url}/sessions",
+            data=json.dumps(payload),
+        )
+        if not resp.ok:
+            raise RuntimeError(f"Failed to create Livy session ({resp.status_code}): {resp.text}")
+        session_id = resp.json()["id"]
+
+        # Wait for session to be ready
+        for _ in range(120):  # 10 minute timeout
+            resp = self._session.get(f"{self._url}/sessions/{session_id}")
+            resp.raise_for_status()
+            data = resp.json()
+            # Fabric uses livyInfo.currentState; standard Livy uses state
+            state = data.get("state") or data.get("livyInfo", {}).get("currentState", "")
+            if state == "idle":
+                return session_id
+            elif state in ("error", "dead", "shutting_down", "killed"):
+                raise RuntimeError(f"Livy session {session_id} entered state '{state}'. Check Livy server logs.")
+            time.sleep(5)
+
+        raise TimeoutError(f"Livy session {session_id} did not become ready within 10 minutes.")
+
+    def _submit_statement(self, code: str, timeout_seconds: Optional[int] = None) -> Dict[str, Any]:
+        """Submit a code statement to the Livy session and wait for result.
+
+        Parameters
+        ----------
+        code : str
+            PySpark/SQL code to run.
+        timeout_seconds : int, optional
+            Per-statement wall-clock cap. None = use the engine default
+            (``self.query_timeout_seconds`` if set, else 3 hours). On
+            timeout we POST to the cancel endpoint, mark the session
+            wedged, and raise ``TimeoutError``.
+        """
+        effective_timeout = (
+            timeout_seconds if timeout_seconds is not None else (self.query_timeout_seconds or 3 * 60 * 60)
+        )
+        deadline = time.time() + effective_timeout
+        poll_interval = 5
+
+        self._refresh_token_if_needed()
+        resp = self._session.post(
+            f"{self._url}/sessions/{self._livy_session_id}/statements",
+            data=json.dumps({"code": code, "kind": self._kind}),
+        )
+        if resp.status_code == 401:
+            # Token may have been invalidated server-side despite our expiry check.
+            self._refresh_token_if_needed(force=True)
+            resp = self._session.post(
+                f"{self._url}/sessions/{self._livy_session_id}/statements",
+                data=json.dumps({"code": code, "kind": self._kind}),
+            )
+        if not resp.ok:
+            raise RuntimeError(f"Livy statement submission failed ({resp.status_code}): {resp.text}")
+        statement_id = resp.json()["id"]
+
+        # Poll for completion
+        while time.time() < deadline:
+            self._refresh_token_if_needed()
+            resp = self._session.get(f"{self._url}/sessions/{self._livy_session_id}/statements/{statement_id}")
+            if resp.status_code == 401:
+                self._refresh_token_if_needed(force=True)
+                resp = self._session.get(f"{self._url}/sessions/{self._livy_session_id}/statements/{statement_id}")
+            resp.raise_for_status()
+            result = resp.json()
+            state = result["state"]
+            if state == "available":
+                output = result.get("output", {})
+                if output.get("status") == "error":
+                    raise RuntimeError(
+                        f"Livy statement error: {output.get('evalue', 'Unknown error')}\n{output.get('traceback', '')}"
+                    )
+                return output
+            elif state in ("error", "cancelled"):
+                raise RuntimeError(f"Livy statement {statement_id} failed with state '{state}'.")
+            time.sleep(poll_interval)
+
+        # Timed out — best-effort cancel, then mark the session wedged
+        # so callers can decide whether to recreate it.
+        self._cancel_statement(statement_id)
+        self._session_wedged = True
+        raise TimeoutError(f"Livy statement {statement_id} did not complete within {effective_timeout} seconds.")
+
+    def _cancel_statement(self, statement_id: int) -> None:
+        """Best-effort POST to the Livy cancel endpoint; never raises."""
+        try:
+            self._refresh_token_if_needed()
+            self._session.post(
+                f"{self._url}/sessions/{self._livy_session_id}/statements/{statement_id}/cancel",
+                timeout=30,
+            )
+        except Exception:
+            pass
+
+    def _close_session(self) -> None:
+        """Best-effort DELETE of the Livy session."""
+        try:
+            self._refresh_token_if_needed()
+            self._session.delete(
+                f"{self._url}/sessions/{self._livy_session_id}",
+                timeout=30,
+            )
+        except Exception:
+            pass
+
+    def _recreate_session(self) -> None:
+        """Tear down the wedged session and start a fresh one."""
+        old_id = getattr(self, "_livy_session_id", None)
+        self._close_session()
+        self._livy_session_id = self._create_session()
+        self._session_wedged = False
+        self.extended_engine_metadata.update(
+            {
+                "livy_session_id": str(self._livy_session_id),
+                "livy_session_recreated_from": str(old_id),
+            }
+        )
+
+    def get_table_columns(self, table_name: str) -> list:
+        """Return column names for a Spark table/view via Livy."""
+        escaped = table_name.replace("\\", "\\\\").replace('"', '\\"')
+        code = f'print(spark.table("{escaped}").columns)'
+        output = self._submit_statement(code)
+        # output data text looks like "['col1', 'col2', ...]"
+        text = output.get("data", {}).get("text/plain", "")
+        if text:
+            import ast
+
+            try:
+                return ast.literal_eval(text.strip())
+            except (ValueError, SyntaxError):
+                return []
+        return []
+
+    def list_databases(self) -> list:
+        """List databases visible to the Livy-attached Spark session."""
+        code = (
+            'rows = spark.sql("SHOW DATABASES").collect()\n'
+            'print("\\n".join([(r.asDict().get("namespace") '
+            'or r.asDict().get("databaseName") '
+            "or list(r.asDict().values())[0]) for r in rows]))"
+        )
+        try:
+            output = self._submit_statement(code)
+        except RuntimeError as exc:
+            msg = str(exc)
+            # Hive metastore initialization HEADs the warehouse path; if the
+            # cluster identity lacks Storage Blob Data Reader on it, ADLS
+            # returns 403 and Spark wraps it as AccessDeniedException.
+            if "AccessDeniedException" in msg or ("403" in msg and "warehouse" in msg.lower()):
+                import re
+
+                m = re.search(r"https://[^\s\"']+warehouse[^\s\"']*", msg)
+                warehouse_url = m.group(0) if m else "(warehouse path)"
+                raise RuntimeError(
+                    f"SHOW DATABASES failed with HTTP 403 on the Hive warehouse path:\n"
+                    f"  {warehouse_url}\n\n"
+                    f"The cluster's identity (Synapse workspace MSI / AAD passthrough "
+                    f"user / linked-service SP) lacks read access to that ADLS Gen2 path.\n"
+                    f"Fix: grant 'Storage Blob Data Reader' (or Contributor for writes) "
+                    f"on the storage account or container to the right principal, then retry.\n\n"
+                    f"Original error:\n{msg}"
+                ) from exc
+            raise
+        text = output.get("data", {}).get("text/plain", "") or ""
+        return [s.strip() for s in text.splitlines() if s.strip()]
+
+    def list_tables(self, database: str) -> list:
+        """List tables in `database` via Livy.
+
+        Backtick each dotted segment separately so multi-part names like
+        Fabric's `workspace.lakehouse.schema` resolve as a real namespace
+        rather than a single literal identifier.
+        """
+        segments = [seg.replace("`", "") for seg in database.split(".")]
+        qualified = ".".join(f"`{seg}`" for seg in segments)
+        code = (
+            f'rows = spark.sql("SHOW TABLES IN {qualified}").collect()\n'
+            'print("\\n".join([r.asDict().get("tableName", "") for r in rows]))'
+        )
+        output = self._submit_statement(code)
+        text = output.get("data", {}).get("text/plain", "") or ""
+        return [s.strip() for s in text.splitlines() if s.strip()]
+
+    def execute_sql_query(self, query: str, context_decorator: Optional[str] = None):
+        """Execute a SQL query via Livy."""
+        self._heal_session_if_wedged()
+        escaped = query.replace("\\", "\\\\").replace('"""', '\\"\\"\\"')
+        code = f'spark.sql("""{escaped}""").collect()'
+        try:
+            self._submit_statement(code)
+        except (TimeoutError, ConnectionError, self._requests.exceptions.ConnectionError):
+            # Session is now wedged/unreachable; mark it for recovery on
+            # the next call so subsequent queries don't all cascade-fail.
+            self._session_wedged = True
+            raise
+
+    def execute_sql_statement(self, statement: str, context_decorator: Optional[str] = None):
+        """Execute a SQL statement (DDL/DML) via Livy."""
+        self._heal_session_if_wedged()
+        escaped = statement.replace("\\", "\\\\").replace('"""', '\\"\\"\\"')
+        code = f'spark.sql("""{escaped}""")'
+        try:
+            self._submit_statement(code)
+        except (TimeoutError, ConnectionError, self._requests.exceptions.ConnectionError):
+            self._session_wedged = True
+            raise
+
+    def _heal_session_if_wedged(self) -> None:
+        """If the previous statement timed out / dropped the connection,
+        recreate the Livy session before the next call.
+
+        Logged as a warning. If session recreation itself fails the
+        original error propagates so the caller knows the engine is dead.
+        """
+        if not getattr(self, "_session_wedged", False):
+            return
+        import logging
+
+        logging.getLogger("lakebench.engines.livy").warning(
+            "Livy session %s appears wedged; recreating before next call.",
+            getattr(self, "_livy_session_id", "?"),
+        )
+        try:
+            self._recreate_session()
+        except Exception as exc:
+            raise RuntimeError(f"Failed to recreate Livy session after previous timeout: {exc}") from exc
+
+    def load_parquet_to_delta(
+        self,
+        parquet_folder_uri: str,
+        table_name: str,
+        table_is_precreated: bool = False,
+        context_decorator: Optional[str] = None,
+    ):
+        """Load parquet data via Livy.
+
+        Uses createOrReplaceTempView instead of saveAsTable to avoid a
+        Fabric Spark bug where DeltaOptimizedWriterColumnarExec crashes
+        with a NoSuchMethodError in the Gluten/Velox columnar engine.
+        Temp views keep NEE (Native Execution Engine) active for queries.
+        """
+        escaped_uri = parquet_folder_uri.replace("\\", "\\\\").replace('"""', '\\"\\"\\"')
+        escaped_name = table_name.replace("\\", "\\\\").replace('"""', '\\"\\"\\"')
+        code = f'''
+df = spark.read.parquet("{escaped_uri}")
+df.createOrReplaceTempView("{escaped_name}")
+'''
+        self._submit_statement(code)
+
+    def optimize_table(self, table_name: str):
+        """Run OPTIMIZE on a Delta table."""
+        self.execute_sql_statement(f"OPTIMIZE {table_name}")
+
+    def vacuum_table(self, table_name: str, retention_hours: int = 168):
+        """Run VACUUM on a Delta table."""
+        self.execute_sql_statement(f"VACUUM {table_name} RETAIN {retention_hours} HOURS")
+
+    def create_schema_if_not_exists(self, drop_before_create: bool = False):
+        """Create schema via remote Spark SQL."""
+        # Livy sessions on Fabric use the lakehouse's default schema
+        # No explicit schema creation needed
+        pass
+
+    def create_external_location(self, uri: str):
+        """No-op for Livy — locations are managed by the cluster."""
+        pass
+
+    def _create_empty_table(self, table_name: str, ddl: str):
+        """Create an empty table using DDL via Livy."""
+        # Use CREATE OR REPLACE to handle re-runs
+        ddl = ddl.replace("CREATE TABLE", "CREATE OR REPLACE TABLE")
+        ddl = ddl.replace("CREATE OR REPLACE OR REPLACE", "CREATE OR REPLACE")
+        self.execute_sql_statement(ddl)
+
+    def _delete_session(self):
+        """Delete the Livy session."""
+        try:
+            self._session.delete(f"{self._url}/sessions/{self._livy_session_id}")
+        except Exception:
+            pass
+
+    def __del__(self):
+        self._delete_session()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self._delete_session()
+        return False
diff --git a/src/lakebench/engines/polars.py b/src/lakebench/engines/polars.py
index 0a8982a..30f64f9 100644
--- a/src/lakebench/engines/polars.py
+++ b/src/lakebench/engines/polars.py
@@ -1,26 +1,29 @@
 from __future__ import annotations
-from .base import BaseEngine
-from .delta_rs import DeltaRs
 
 import posixpath
-from typing import Any, Optional
 from importlib.metadata import version
+from typing import Any, Optional
+
+from .base import BaseEngine
+from .delta_rs import DeltaRs
+
 
 class Polars(BaseEngine):
     """
     Polars Engine
     """
+
     SQLGLOT_DIALECT = "duckdb"
     SUPPORTS_ONELAKE = True
     SUPPORTS_SCHEMA_PREP = False
     SUPPORTS_MOUNT_PATH = True
 
     def __init__(
-            self, 
-            schema_or_working_directory_uri: str,
-            cost_per_vcore_hour: Optional[float] = None,
-            storage_options: Optional[dict[str, Any]] = None
-            ):
+        self,
+        schema_or_working_directory_uri: str,
+        cost_per_vcore_hour: Optional[float] = None,
+        storage_options: Optional[dict[str, Any]] = None,
+    ):
         """
         Parameters
         ----------
@@ -34,35 +37,38 @@ def __init__(
             A dictionary of storage options to pass to the engine for filesystem access. Optional as LakeBench
             will attempt to read from environment variables depeneding on the compute runtime.
         """
-        
+
         super().__init__(schema_or_working_directory_uri, storage_options)
         import polars as pl
+
         self.pl = pl
         self.deltars = DeltaRs()
         self.catalog_name = None
         self.schema_name = None
         self.sql = pl.SQLContext()
         self.version: str = f"{version('polars')} (deltalake=={version('deltalake')})"
-        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, '_autocalc_usd_cost_per_vcore_hour', None)
+        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, "_autocalc_usd_cost_per_vcore_hour", None)
 
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: Optional[str] = None):
+    def load_parquet_to_delta(
+        self,
+        parquet_folder_uri: str,
+        table_name: str,
+        table_is_precreated: bool = False,
+        context_decorator: Optional[str] = None,
+    ):
         table_df = self.pl.scan_parquet(
-            posixpath.join(parquet_folder_uri, '*.parquet'), 
-            storage_options=self.storage_options
+            posixpath.join(parquet_folder_uri, "*.parquet"), storage_options=self.storage_options
         )
         # Cast any Decimal columns to Float64 before collecting — TPC-DS datagen can
         # produce values that exceed the column's declared precision at small scale factors,
         # causing a Rust-level panic in Polars strict decimal enforcement.
-        decimal_cols = [name for name, dtype in table_df.schema.items()
-                        if str(dtype).startswith("Decimal")]
+        decimal_cols = [name for name, dtype in table_df.schema.items() if str(dtype).startswith("Decimal")]
         if decimal_cols:
-            table_df = table_df.with_columns(
-                [self.pl.col(c).cast(self.pl.Float64, strict=False) for c in decimal_cols]
-            )
-        table_df.collect(engine='streaming').write_delta(
-            posixpath.join(self.schema_or_working_directory_uri, table_name), 
-            mode="overwrite", 
-            storage_options=self.storage_options
+            table_df = table_df.with_columns([self.pl.col(c).cast(self.pl.Float64, strict=False) for c in decimal_cols])
+        table_df.collect(engine="streaming").write_delta(
+            posixpath.join(self.schema_or_working_directory_uri, table_name),
+            mode="overwrite",
+            storage_options=self.storage_options,
         )
 
     def register_table(self, table_name: str):
@@ -70,8 +76,7 @@ def register_table(self, table_name: str):
         Register a Delta table LazyFrame in Polars.
         """
         df = self.pl.scan_delta(
-            posixpath.join(self.schema_or_working_directory_uri, table_name), 
-            storage_options=self.storage_options
+            posixpath.join(self.schema_or_working_directory_uri, table_name), storage_options=self.storage_options
         )
         self.sql.register(table_name, df)
 
@@ -79,7 +84,7 @@ def execute_sql_query(self, query: str, context_decorator: Optional[str] = None)
         """
         Execute a SQL query using Polars.
         """
-        result = self.sql.execute(query).collect(engine='streaming')
+        result = self.sql.execute(query).collect(engine="streaming")
 
     def optimize_table(self, table_name: str):
         fact_table = self.deltars.DeltaTable(
@@ -93,4 +98,4 @@ def vacuum_table(self, table_name: str, retain_hours: int = 168, retention_check
             table_uri=posixpath.join(self.schema_or_working_directory_uri, table_name),
             storage_options=self.storage_options,
         )
-        fact_table.vacuum(retain_hours, enforce_retention_duration=retention_check, dry_run=False)
\ No newline at end of file
+        fact_table.vacuum(retain_hours, enforce_retention_duration=retention_check, dry_run=False)
diff --git a/src/lakebench/engines/sail.py b/src/lakebench/engines/sail.py
index 531f0b4..4039634 100644
--- a/src/lakebench/engines/sail.py
+++ b/src/lakebench/engines/sail.py
@@ -1,12 +1,12 @@
 from __future__ import annotations
-from .base import BaseEngine
-from .delta_rs import DeltaRs
 
 import os
 import posixpath
-from typing import Any, Optional
 from importlib.metadata import version
+from typing import Any, Optional
 
+from .base import BaseEngine
+from .delta_rs import DeltaRs
 
 
 class Sail(BaseEngine):
@@ -15,6 +15,7 @@ class Sail(BaseEngine):
 
     File system support: https://docs.lakesail.com/sail/main/guide/storage/
     """
+
     _SAIL_SERVER = None
     _SPARK = None
     SQLGLOT_DIALECT = "spark"
@@ -26,7 +27,7 @@ def __init__(
         self,
         schema_or_working_directory_uri: str,
         cost_per_vcore_hour: Optional[float] = None,
-        storage_options: Optional[dict[str, Any]] = None
+        storage_options: Optional[dict[str, Any]] = None,
     ):
         """
         Parameters
@@ -41,14 +42,15 @@ def __init__(
             A dictionary of storage options to pass to the engine for filesystem access. Optional as LakeBench
             will attempt to read from environment variables depeneding on the compute runtime.
         """
-        
+
         super().__init__(schema_or_working_directory_uri, storage_options)
         from pysail.spark import SparkConnectServer
         from pyspark.sql import SparkSession
+
         self.deltars = DeltaRs()
         self.catalog_name = None
         self.schema_name = None
-        
+
         # Set Sail specific environment variables
         os.environ["SAIL_OPTIMIZER__ENABLE_JOIN_REORDER"] = "true"
 
@@ -62,9 +64,7 @@ def __init__(
         if Sail._SPARK is None:
             sail_server_hostname, sail_server_port = self.sail_server.listening_address
             try:
-                spark = SparkSession.builder.remote(
-                    f"sc://{sail_server_hostname}:{sail_server_port}"
-                ).getOrCreate()
+                spark = SparkSession.builder.remote(f"sc://{sail_server_hostname}:{sail_server_port}").getOrCreate()
                 spark.conf.set("spark.sql.warehouse.dir", schema_or_working_directory_uri)
                 Sail._SPARK = spark
             except ImportError as ex:
@@ -73,12 +73,8 @@ def __init__(
                 ) from ex
         self.spark = Sail._SPARK
 
-        self.version: str = (
-            f"""{version("pysail")} (deltalake=={version("deltalake")})"""
-        )
-        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(
-            self, "_autocalc_usd_cost_per_vcore_hour", None
-        )
+        self.version: str = f"""{version("pysail")} (deltalake=={version("deltalake")})"""
+        self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, "_autocalc_usd_cost_per_vcore_hour", None)
 
     def load_parquet_to_delta(
         self,
@@ -87,10 +83,9 @@ def load_parquet_to_delta(
         table_is_precreated: bool = False,
         context_decorator: Optional[str] = None,
     ):
-        self.spark.read.parquet(parquet_folder_uri) \
-            .write.format("delta") \
-            .mode("overwrite") \
-            .save(posixpath.join(self.schema_or_working_directory_uri, table_name))
+        self.spark.read.parquet(parquet_folder_uri).write.format("delta").mode("overwrite").save(
+            posixpath.join(self.schema_or_working_directory_uri, table_name)
+        )
 
     def register_table(self, table_name: str):
         """
@@ -127,13 +122,9 @@ def optimize_table(self, table_name: str):
         )
         fact_table.optimize.compact()
 
-    def vacuum_table(
-        self, table_name: str, retain_hours: int = 168, retention_check: bool = True
-    ):
+    def vacuum_table(self, table_name: str, retain_hours: int = 168, retention_check: bool = True):
         fact_table = self.deltars.DeltaTable(
             table_uri=posixpath.join(self.schema_or_working_directory_uri, table_name),
             storage_options=self.storage_options,
         )
-        fact_table.vacuum(
-            retain_hours, enforce_retention_duration=retention_check, dry_run=False
-        )
+        fact_table.vacuum(retain_hours, enforce_retention_duration=retention_check, dry_run=False)
diff --git a/src/lakebench/engines/spark.py b/src/lakebench/engines/spark.py
index 4aeeefa..7e5e60a 100644
--- a/src/lakebench/engines/spark.py
+++ b/src/lakebench/engines/spark.py
@@ -1,9 +1,12 @@
-from .base import BaseEngine
 import os
-from typing import Optional
 import posixpath
+from typing import Optional
+
 import tenacity
 
+from .base import BaseEngine
+
+
 class Spark(BaseEngine):
     """
     Generic Spark Engine
@@ -29,21 +32,21 @@ class Spark(BaseEngine):
     append_array_to_delta(abfss_path: str, array: list)
         Appends a list of data to a Delta table at the specified path.
     """
+
     SQLGLOT_DIALECT = "spark"
     SUPPORTS_MOUNT_PATH = True
     SUPPORTS_ONELAKE = True
     SUPPORTS_SCHEMA_PREP = True
-    
 
     def __init__(
-            self,
-            schema_name: str,
-            catalog_name: Optional[str] = None,
-            schema_uri: Optional[str] = None,
-            spark_measure_telemetry: bool = False,
-            cost_per_vcore_hour: Optional[float] = None,
-            compute_stats_all_cols: bool = False
-            ):
+        self,
+        schema_name: str,
+        catalog_name: Optional[str] = None,
+        schema_uri: Optional[str] = None,
+        spark_measure_telemetry: bool = False,
+        cost_per_vcore_hour: Optional[float] = None,
+        compute_stats_all_cols: bool = False,
+    ):
         """
         Parameters
         ----------
@@ -62,31 +65,29 @@ def __init__(
             Whether to compute statistics for all columns after each table is loaded.
         """
         super().__init__(schema_or_working_directory_uri=schema_uri)
-        from pyspark.sql import SparkSession
         import pyspark.sql.functions as sf
+        from pyspark.sql import SparkSession
+
         self.sf = sf
 
         self.spark = SparkSession.builder
         if self.runtime == "local_unknown":
-            warehouse_dir = posixpath.dirname(schema_uri.rstrip('/').rstrip('\\'))
+            warehouse_dir = posixpath.dirname(schema_uri.rstrip("/").rstrip("\\"))
             self.spark = (
-                self.spark
-                    .master("local[*]")
-                    .config("spark.sql.warehouse.dir", warehouse_dir)
-                    .config("spark.driver.host", "localhost")
-                    .config("spark.driver.bindAddress", "localhost")
-                    .config("spark.ui.enabled", "false")
-                    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
-                    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
-                    .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.0")
-                    .config("spark.sql.catalogImplementation", "hive")
+                self.spark.master("local[*]")
+                .config("spark.sql.warehouse.dir", warehouse_dir)
+                .config("spark.driver.host", "localhost")
+                .config("spark.driver.bindAddress", "localhost")
+                .config("spark.ui.enabled", "false")
+                .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
+                .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
+                .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.0")
+                .config("spark.sql.catalogImplementation", "hive")
             )
             if self.operating_system == "windows":
                 # Windows-specific configurations to avoid native IO issues
-                self.spark = (
-                    self.spark
-                        .config("spark.hadoop.io.native.lib.available", "false")
-                        .config("spark.hadoop.fs.file.impl.disable.cache", "true")
+                self.spark = self.spark.config("spark.hadoop.io.native.lib.available", "false").config(
+                    "spark.hadoop.fs.file.impl.disable.cache", "true"
                 )
 
         self.spark = self.spark.getOrCreate()
@@ -95,32 +96,45 @@ def __init__(
         if spark_measure_telemetry:
             try:
                 from sparkmeasure import StageMetrics
+
                 self.capture_metrics = StageMetrics(self.spark)
             except ModuleNotFoundError:
-                raise ModuleNotFoundError("`sparkmeasure` is not installed, either disable the `spark_measure_telemetry` flag, run `%pip install sparkmeasure==0.24.0`, or install LakeBench with the sparkmeasure option: `%pip install lakebench[sparkmeasure]`.")
+                raise ModuleNotFoundError(
+                    "`sparkmeasure` is not installed, either disable the `spark_measure_telemetry` flag, run `%pip install sparkmeasure==0.24.0`, or install LakeBench with the sparkmeasure option: `%pip install lakebench[sparkmeasure]`."
+                )
         self.spark_measure_telemetry = spark_measure_telemetry
 
         self.version: str = self.spark.sparkContext.version
 
         self.catalog_name = catalog_name if self.runtime != "local_unknown" else None
         self.schema_name = schema_name
-        self.full_catalog_schema_reference : str = f"`{self.catalog_name}`.`{self.schema_name}`" if catalog_name else f"`{self.schema_name}`"
+        self.full_catalog_schema_reference: str = (
+            f"`{self.catalog_name}`.`{self.schema_name}`" if catalog_name else f"`{self.schema_name}`"
+        )
         self.cost_per_vcore_hour = cost_per_vcore_hour
         self.spark_configs = self.__get_spark_session_configs()
-        self.extended_engine_metadata.update({
-            'parquet.block.size': self.spark.sparkContext._jsc.hadoopConfiguration().get("parquet.block.size") or '',
-        })
-        spark_configs_to_log = {k: v for k, v in self.spark_configs.items() if k in [
-            'spark.executor.memory',
-            'spark.databricks.delta.optimizeWrite.enabled',
-            'spark.databricks.delta.optimizeWrite.binSize',
-            'spark.sql.autoBroadcastJoinThreshold',
-            'spark.sql.sources.parallelPartitionDiscovery.parallelism',
-            'spark.sql.cbo.enabled',
-            'spark.sql.shuffle.partitions',
-            'spark.task.cpus',
-            'spark.sql.parquet.compression.codec'
-        ]}
+        self.extended_engine_metadata.update(
+            {
+                "parquet.block.size": self.spark.sparkContext._jsc.hadoopConfiguration().get("parquet.block.size")
+                or "",
+            }
+        )
+        spark_configs_to_log = {
+            k: v
+            for k, v in self.spark_configs.items()
+            if k
+            in [
+                "spark.executor.memory",
+                "spark.databricks.delta.optimizeWrite.enabled",
+                "spark.databricks.delta.optimizeWrite.binSize",
+                "spark.sql.autoBroadcastJoinThreshold",
+                "spark.sql.sources.parallelPartitionDiscovery.parallelism",
+                "spark.sql.cbo.enabled",
+                "spark.sql.shuffle.partitions",
+                "spark.task.cpus",
+                "spark.sql.parquet.compression.codec",
+            ]
+        }
 
         self.extended_engine_metadata.update(spark_configs_to_log)
 
@@ -138,7 +152,7 @@ def __get_spark_session_configs(self) -> dict:
         """
         scala_map = self.spark.conf._jconf.getAll()
         spark_conf_dict = {}
- 
+
         iterator = scala_map.iterator()
         while iterator.hasNext():
             entry = iterator.next()
@@ -146,14 +160,13 @@ def __get_spark_session_configs(self) -> dict:
             value = entry._2()
             spark_conf_dict[key] = value
         return spark_conf_dict
-    
+
     # Use tenacity to retry on NativeIO error common in spark running on local Windows
     @tenacity.retry(
         retry=tenacity.retry_if_exception(
-            lambda e: "java.lang.UnsatisfiedLinkError" in str(e) and 
-                     "NativeIO$POSIX.stat" in str(e)
+            lambda e: "java.lang.UnsatisfiedLinkError" in str(e) and "NativeIO$POSIX.stat" in str(e)
         ),
-        stop=tenacity.stop_after_attempt(2)
+        stop=tenacity.stop_after_attempt(2),
     )
     def create_schema_if_not_exists(self, drop_before_create: bool = True):
         """
@@ -169,7 +182,7 @@ def create_schema_if_not_exists(self, drop_before_create: bool = True):
         Uses tenacity retry decorator to handle NativeIO errors common in Spark
         running on local Windows environments.
         """
-        location_str = f"LOCATION '{self.schema_uri}'" if self.schema_uri is not None else ''
+        location_str = f"LOCATION '{self.schema_uri}'" if self.schema_uri is not None else ""
 
         if drop_before_create:
             self.spark.sql(f"DROP SCHEMA IF EXISTS {self.full_catalog_schema_reference} CASCADE")
@@ -192,16 +205,12 @@ def _create_empty_table(self, table_name: Optional[str], ddl: str):
         Automatically adds 'USING delta' clause if no storage format is specified.
         """
         # Explicitly set the table type to Delta if not already specified
-        if 'using ' not in ddl.lower():
+        if "using " not in ddl.lower():
             # Find the closing parenthesis of the column definitions
             closing_paren_index = ddl.rfind(")")
             if closing_paren_index != -1:
                 # Insert 'USING delta' after the closing parenthesis
-                ddl = (
-                    ddl[:closing_paren_index + 1]
-                    + " using delta"
-                    + ddl[closing_paren_index + 1:]
-                )
+                ddl = ddl[: closing_paren_index + 1] + " using delta" + ddl[closing_paren_index + 1 :]
 
         self.execute_sql_statement(ddl)
 
@@ -209,19 +218,34 @@ def _convert_generic_to_specific_schema(self, generic_schema: list):
         """
         Convert a generic schema to a specific Spark schema.
         """
-        from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, DoubleType, BooleanType, TimestampType, MapType, ByteType, ShortType, LongType, DecimalType
+        from pyspark.sql.types import (
+            BooleanType,
+            ByteType,
+            DecimalType,
+            DoubleType,
+            FloatType,
+            IntegerType,
+            LongType,
+            MapType,
+            ShortType,
+            StringType,
+            StructField,
+            StructType,
+            TimestampType,
+        )
+
         type_mapping = {
-            'STRING': StringType(),
-            'TIMESTAMP': TimestampType(),
-            'TINYINT': ByteType(),
-            'SMALLINT': ShortType(),
-            'INT': IntegerType(),
-            'BIGINT': LongType(),
-            'FLOAT': FloatType(),
-            'DOUBLE': DoubleType(),
-            'DECIMAL(18,10)': DecimalType(18,10),  # Spark does not have a specific Decimal type, using DoubleType
-            'BOOLEAN': BooleanType(),
-            'MAP<STRING, STRING>': MapType(StringType(), StringType())
+            "STRING": StringType(),
+            "TIMESTAMP": TimestampType(),
+            "TINYINT": ByteType(),
+            "SMALLINT": ShortType(),
+            "INT": IntegerType(),
+            "BIGINT": LongType(),
+            "FLOAT": FloatType(),
+            "DOUBLE": DoubleType(),
+            "DECIMAL(18,10)": DecimalType(18, 10),  # Spark does not have a specific Decimal type, using DoubleType
+            "BOOLEAN": BooleanType(),
+            "MAP<STRING, STRING>": MapType(StringType(), StringType()),
         }
         return StructType([StructField(name, type_mapping[data_type], True) for name, data_type in generic_schema])
 
@@ -229,50 +253,72 @@ def _append_results_to_delta(self, table_uri: str, results: list, generic_schema
         """
         Append an array to a Delta table.
         """
-        import pyspark.sql.functions as sf
         schema = self._convert_generic_to_specific_schema(generic_schema)
         # Use default order of columns in dictionary
         columns = list(results[0].keys())
         df = self.spark.createDataFrame(results, schema=schema).select(*columns)
-        df.write.format("delta") \
-            .option("mergeSchema", "true") \
-            .option("delta.enableDeletionVectors", "false") \
-            .option("delta.autoOptimize.autoCompact", "true") \
-            .option("delta.autoOptimize.optimizeWrite", "true") \
-            .mode("append") \
-            .save(table_uri)
+        df.write.format("delta").option("mergeSchema", "true").option("delta.enableDeletionVectors", "false").option(
+            "delta.autoOptimize.autoCompact", "true"
+        ).option("delta.autoOptimize.optimizeWrite", "true").mode("append").save(table_uri)
 
     def get_total_cores(self) -> int:
         """
         Returns the total number of CPU cores available in the Spark cluster.
-        
+
         Assumes that the driver and workers nodes are all the same VM size.
         """
-        cores = int(len(set(executor.host() for executor in self.spark.sparkContext._jsc.sc().statusTracker().getExecutorInfos())) * os.cpu_count())
+        cores = int(
+            len(
+                set(
+                    executor.host() for executor in self.spark.sparkContext._jsc.sc().statusTracker().getExecutorInfos()
+                )
+            )
+            * os.cpu_count()
+        )
         return cores
-        
+
     def get_compute_size(self) -> str:
         """
         Returns a formatted string with the compute size.
-        
+
         Assumes that the driver and workers nodes are all the same VM size.
-        """        
+        """
         sc_conf_dict = {key: value for key, value in self.spark.sparkContext.getConf().getAll()}
         executor_count = self.spark.sparkContext._jsc.sc().getExecutorMemoryStatus().size() - 1
-        executor_cores = int(sc_conf_dict.get('spark.executor.cores', os.cpu_count()))
-        vm_host_count = len(set(executor.host() for executor in self.spark.sparkContext._jsc.sc().statusTracker().getExecutorInfos()))
+        executor_cores = int(sc_conf_dict.get("spark.executor.cores", os.cpu_count()))
+        vm_host_count = len(
+            set(executor.host() for executor in self.spark.sparkContext._jsc.sc().statusTracker().getExecutorInfos())
+        )
         worker_count = vm_host_count - 1
         worker_cores = os.cpu_count()
-        as_min_workers = sc_conf_dict.get('spark.dynamicAllocation.initialExecutors') if sc_conf_dict.get('spark.autoscale.executorResourceInfoTag.enabled', 'false') == 'true' else None
-        as_max_workers = sc_conf_dict.get('spark.dynamicAllocation.maxExecutors') if sc_conf_dict.get('spark.autoscale.executorResourceInfoTag.enabled', 'false') == 'true' else None
-        as_enabled = True if as_min_workers != as_max_workers and sc_conf_dict.get('spark.dynamicAllocation.minExecutors', None) != sc_conf_dict.get('spark.dynamicAllocation.maxExecutors', None) else False
-        type = "SingleNode" if vm_host_count == 1 and not as_enabled else 'MultiNode'
-        workers_word = 'Workers' if worker_count > 1 or (as_max_workers is not None and int(as_max_workers) > 1)  else 'Worker'
+        as_min_workers = (
+            sc_conf_dict.get("spark.dynamicAllocation.initialExecutors")
+            if sc_conf_dict.get("spark.autoscale.executorResourceInfoTag.enabled", "false") == "true"
+            else None
+        )
+        as_max_workers = (
+            sc_conf_dict.get("spark.dynamicAllocation.maxExecutors")
+            if sc_conf_dict.get("spark.autoscale.executorResourceInfoTag.enabled", "false") == "true"
+            else None
+        )
+        as_enabled = (
+            True
+            if as_min_workers != as_max_workers
+            and sc_conf_dict.get("spark.dynamicAllocation.minExecutors", None)
+            != sc_conf_dict.get("spark.dynamicAllocation.maxExecutors", None)
+            else False
+        )
+        type = "SingleNode" if vm_host_count == 1 and not as_enabled else "MultiNode"
+        workers_word = (
+            "Workers" if worker_count > 1 or (as_max_workers is not None and int(as_max_workers) > 1) else "Worker"
+        )
         executors_per_worker = int(executor_count / worker_count) if worker_count > 0 else 1
-        executors_word = 'Executors' if executors_per_worker > 1 else 'Executor'
-        executor_str = f"({executors_per_worker} x {executor_cores}vCore {executors_word}{' ea.' if type != 'SingleNode' else ''})"
+        executors_word = "Executors" if executors_per_worker > 1 else "Executor"
+        executor_str = (
+            f"({executors_per_worker} x {executor_cores}vCore {executors_word}{' ea.' if type != 'SingleNode' else ''})"
+        )
 
-        if type == 'SingleNode':
+        if type == "SingleNode":
             cluster_config = f"{worker_cores}vCore {type} {executor_str}"
         elif as_enabled:
             cluster_config = f"{as_min_workers}-{as_max_workers} x {worker_cores}vCore {workers_word} {executor_str}"
@@ -280,20 +326,51 @@ def get_compute_size(self) -> str:
             cluster_config = f"{worker_count} x {worker_cores}vCore {workers_word} {executor_str}"
 
         return cluster_config
-    
-    def load_parquet_to_delta(self, parquet_folder_uri: str, table_name: str, table_is_precreated: bool = False, context_decorator: Optional[str] = None):
+
+    def get_table_columns(self, table_name: str) -> list:
+        """Return column names for a Spark metastore table."""
+        qualified = f"{self.full_catalog_schema_reference}.{table_name}"
+        return [f.name for f in self.spark.table(qualified).schema.fields]
+
+    def list_databases(self) -> list:
+        """List databases/schemas visible to the current Spark catalog."""
+        rows = self.spark.sql("SHOW DATABASES").collect()
+        # SHOW DATABASES column name varies by Spark version: namespace | databaseName
+        out = []
+        for r in rows:
+            d = r.asDict()
+            out.append(d.get("namespace") or d.get("databaseName") or next(iter(d.values())))
+        return out
+
+    def list_tables(self, database: str) -> list:
+        """List tables in `database` from the Spark catalog."""
+        # Backtick each dotted segment separately so multi-part names like
+        # `catalog.schema` (or Fabric's `workspace.lakehouse.schema`) resolve
+        # correctly. Wrapping the whole thing in one backtick turns it into a
+        # single literal identifier, which Spark mis-resolves.
+        qualified = ".".join(f"`{seg}`" for seg in database.split("."))
+        rows = self.spark.sql(f"SHOW TABLES IN {qualified}").collect()
+        return [r.asDict().get("tableName") for r in rows if r.asDict().get("tableName")]
+
+    def load_parquet_to_delta(
+        self,
+        parquet_folder_uri: str,
+        table_name: str,
+        table_is_precreated: bool = False,
+        context_decorator: Optional[str] = None,
+    ):
         df = self.spark.read.parquet(parquet_folder_uri)
         if table_is_precreated:
             df.write.insertInto(table_name, overwrite=True)
         else:
-            df.write.format('delta').mode("append").saveAsTable(table_name)
+            df.write.format("delta").mode("append").saveAsTable(table_name)
 
         if self.run_analyze_after_load:
-            self.spark.sql(f"ANALYZE TABLE {table_name} COMPUTE STATISTICS FOR ALL COLUMNS;")    
+            self.spark.sql(f"ANALYZE TABLE {table_name} COMPUTE STATISTICS FOR ALL COLUMNS;")
 
     def execute_sql_query(self, query: str, context_decorator: Optional[str] = None):
         execute_sql = self.spark.sql(query).collect()
-    
+
     def execute_sql_statement(self, statement: str, context_decorator: Optional[str] = None):
         """
         Execute a SQL statement.
diff --git a/src/lakebench/engines/spark_connect.py b/src/lakebench/engines/spark_connect.py
new file mode 100644
index 0000000..ffbed0b
--- /dev/null
+++ b/src/lakebench/engines/spark_connect.py
@@ -0,0 +1,79 @@
+from typing import Optional
+
+from .base import BaseEngine
+from .spark import Spark
+
+
+class SparkConnect(Spark):
+    """
+    Spark Connect Engine — connects to a remote Spark cluster via Spark Connect protocol.
+
+    Uses the `sc://` URL scheme to establish a remote SparkSession. All Spark-based
+    benchmark implementations work automatically since this inherits from Spark.
+
+    Requires: pyspark[connect]
+
+    Parameters
+    ----------
+    remote : str
+        Spark Connect remote URL (e.g., 'sc://localhost:15002').
+    schema_name : str
+        The name of the schema (database) to use.
+    catalog_name : str, optional
+        The name of the catalog to use.
+    schema_uri : str, optional
+        The URI of the schema.
+    spark_measure_telemetry : bool, default False
+        Whether to enable sparkmeasure telemetry.
+    cost_per_vcore_hour : float, optional
+        Cost per vCore hour for cost estimation.
+    compute_stats_all_cols : bool, default False
+        Whether to compute statistics for all columns after loading.
+    """
+
+    def __init__(
+        self,
+        remote: str,
+        schema_name: str,
+        catalog_name: Optional[str] = None,
+        schema_uri: Optional[str] = None,
+        spark_measure_telemetry: bool = False,
+        cost_per_vcore_hour: Optional[float] = None,
+        compute_stats_all_cols: bool = False,
+    ):
+        import pyspark.sql.functions as sf
+        from pyspark.sql import SparkSession
+
+        # Call BaseEngine.__init__ directly (skip Spark's local session creation)
+        BaseEngine.__init__(self, schema_or_working_directory_uri=schema_uri)
+        self.sf = sf
+
+        # Build session with Spark Connect remote
+        self.spark = SparkSession.builder.remote(remote).getOrCreate()
+
+        self.schema_uri = schema_uri
+        self._remote_url = remote
+
+        if spark_measure_telemetry:
+            try:
+                from sparkmeasure import StageMetrics
+
+                self.capture_metrics = StageMetrics(self.spark)
+            except ModuleNotFoundError:
+                raise ModuleNotFoundError(
+                    "`sparkmeasure` is not installed. Install with: `pip install lakebench[sparkmeasure]`."
+                )
+        self.spark_measure_telemetry = spark_measure_telemetry
+
+        self.version = f"spark-connect ({remote})"
+
+        self.catalog_name = catalog_name
+        self.schema_name = schema_name
+        self.full_catalog_schema_reference = (
+            f"`{self.catalog_name}`.`{self.schema_name}`" if catalog_name else f"`{self.schema_name}`"
+        )
+        self.cost_per_vcore_hour = cost_per_vcore_hour
+        self.compute_stats_all_cols = compute_stats_all_cols
+        self.run_analyze_after_load = self.compute_stats_all_cols
+        self.spark_configs = {}
+        self.extended_engine_metadata.update({"spark_connect_remote": remote})
diff --git a/src/lakebench/engines/synapse_spark.py b/src/lakebench/engines/synapse_spark.py
index ed5bc68..8c10d50 100644
--- a/src/lakebench/engines/synapse_spark.py
+++ b/src/lakebench/engines/synapse_spark.py
@@ -1,6 +1,8 @@
-from .spark import Spark
-from typing import Optional
 from decimal import Decimal
+from typing import Optional
+
+from .spark import Spark
+
 
 class SynapseSpark(Spark):
     """
@@ -8,12 +10,12 @@ class SynapseSpark(Spark):
     """
 
     def __init__(
-            self,
-            schema_name: str,
-            schema_uri: Optional[str] = None,
-            spark_measure_telemetry: bool = False,
-            cost_per_vcore_hour: Optional[float] = None
-            ):
+        self,
+        schema_name: str,
+        schema_uri: Optional[str] = None,
+        spark_measure_telemetry: bool = False,
+        cost_per_vcore_hour: Optional[float] = None,
+    ):
         """
         Parameters
         ----------
@@ -29,43 +31,56 @@ def __init__(
         """
 
         super().__init__(
-            catalog_name=None, 
-            schema_name=schema_name, 
+            catalog_name=None,
+            schema_name=schema_name,
             schema_uri=schema_uri,
             spark_measure_telemetry=spark_measure_telemetry,
             cost_per_vcore_hour=cost_per_vcore_hour,
-            compute_stats_all_cols=False
-            )        
+            compute_stats_all_cols=False,
+        )
 
-        if self.runtime != 'synapse':
+        if self.runtime != "synapse":
             raise RuntimeError("This engine is only supports Synapse Spark Pools.")
-        self.version: str = f"{self.spark.sparkContext.version} (vhd_name=={self.spark.conf.get('spark.synapse.vhd.name')})"
-        region = self.spark.conf.get('spark.cluster.region')
-        self.cost_per_vcore_hour = cost_per_vcore_hour if cost_per_vcore_hour is not None else self._get_vm_retail_rate(region=region, sku='vCore')
+        self.version: str = (
+            f"{self.spark.sparkContext.version} (vhd_name=={self.spark.conf.get('spark.synapse.vhd.name')})"
+        )
+        region = self.spark.conf.get("spark.cluster.region")
+        self.cost_per_vcore_hour = (
+            cost_per_vcore_hour
+            if cost_per_vcore_hour is not None
+            else self._get_vm_retail_rate(region=region, sku="vCore")
+        )
         self.cost_per_hour = self.get_total_cores() * self.cost_per_vcore_hour
 
-        self.extended_engine_metadata.update({
-            'spark_history_url': self.spark_configs['spark.tracking.webUrl'],
-            'cost_per_hour': Decimal(self.cost_per_hour).quantize(Decimal('0.0000')),
-            'compute_region': region
-        })
+        self.extended_engine_metadata.update(
+            {
+                "spark_history_url": self.spark_configs["spark.tracking.webUrl"],
+                "cost_per_hour": Decimal(self.cost_per_hour).quantize(Decimal("0.0000")),
+                "compute_region": region,
+            }
+        )
 
-        spark_configs_to_log = {k: v for k, v in self.spark_configs.items() if k in [
-            'spark.microsoft.delta.optimizeWrite.enabled',
-            'spark.microsoft.delta.optimizeWrite.binSize',
-            'spark.synapse.vegas.useCache',
-            'spark.synapse.vegas.cacheSize',
-            'spark.synapse.vhd.name',
-            'spark.synapse.vhd.id',
-            'spark.app.id',
-            'spark.cluster.name'
-        ]}
+        spark_configs_to_log = {
+            k: v
+            for k, v in self.spark_configs.items()
+            if k
+            in [
+                "spark.microsoft.delta.optimizeWrite.enabled",
+                "spark.microsoft.delta.optimizeWrite.binSize",
+                "spark.synapse.vegas.useCache",
+                "spark.synapse.vegas.cacheSize",
+                "spark.synapse.vhd.name",
+                "spark.synapse.vhd.id",
+                "spark.app.id",
+                "spark.cluster.name",
+            ]
+        }
 
         self.extended_engine_metadata.update(spark_configs_to_log)
 
     def _get_vm_retail_rate(self, region: str, sku: str, spot: bool = False) -> float:
         import requests
+
         query = f"armRegionName eq '{region}' and serviceName eq 'Azure Synapse Analytics' and productName eq 'Azure Synapse Analytics Serverless Apache Spark Pool - Memory Optimized'"
         api_url = "https://prices.azure.com/api/retail/prices?"
-        return requests.get(api_url, params={'$filter': query}).json()['Items'][0]['retailPrice']
-    
\ No newline at end of file
+        return requests.get(api_url, params={"$filter": query}).json()["Items"][0]["retailPrice"]
diff --git a/src/lakebench/utils/__init__.py b/src/lakebench/utils/__init__.py
index 9405827..6717ddb 100644
--- a/src/lakebench/utils/__init__.py
+++ b/src/lakebench/utils/__init__.py
@@ -1 +1 @@
-from .path_utils import abfss_to_https, to_unix_path, to_file_uri, _REMOTE_SCHEMES
\ No newline at end of file
+from .path_utils import _REMOTE_SCHEMES, abfss_to_https, to_file_uri, to_unix_path
diff --git a/src/lakebench/utils/path_utils.py b/src/lakebench/utils/path_utils.py
index 8bcd2c4..703c7ce 100644
--- a/src/lakebench/utils/path_utils.py
+++ b/src/lakebench/utils/path_utils.py
@@ -1,34 +1,38 @@
 def abfss_to_https(abfss_path: str) -> str:
     """
     Convert an ABFSS path to an HTTPS URL.
-    
+
     Example:
         abfss_path = "abfss://
     """
     import posixpath
-    storage_account_endpoint = abfss_path.split('@')[1].split('/')[0]
-    container = abfss_path.split('@')[0].split('abfss://')[1]
-    file_path = abfss_path.split('@')[1].split('/')[1:]
-    https_parquet_folder_path = posixpath.join('https://', storage_account_endpoint,  container, '/'.join(file_path))
+
+    storage_account_endpoint = abfss_path.split("@")[1].split("/")[0]
+    container = abfss_path.split("@")[0].split("abfss://")[1]
+    file_path = abfss_path.split("@")[1].split("/")[1:]
+    https_parquet_folder_path = posixpath.join("https://", storage_account_endpoint, container, "/".join(file_path))
 
     return https_parquet_folder_path
 
+
 def to_unix_path(path_str) -> str:
     # Handle Windows drive letters and backslashes
-    result = path_str.replace('\\', '/')
-    
+    result = path_str.replace("\\", "/")
+
     # Remove Windows drive letters (C:, D:, etc.)
-    if len(result) >= 2 and result[1] == ':':
+    if len(result) >= 2 and result[1] == ":":
         result = result[2:]
-    
+
     # Ensure it starts with '/'
-    if not result.startswith('/'):
-        result = '/' + result
-        
+    if not result.startswith("/"):
+        result = "/" + result
+
     return result
 
+
 _REMOTE_SCHEMES = ("abfss://", "wasbs://", "az://", "s3://", "gs://", "file://")
 
+
 def to_file_uri(path: str) -> str:
     """Convert a local filesystem path to a ``file:///`` URI.
 
@@ -44,4 +48,5 @@ def to_file_uri(path: str) -> str:
     if any(path.startswith(s) for s in _REMOTE_SCHEMES):
         return path
     import pathlib
-    return pathlib.Path(path).as_uri()
\ No newline at end of file
+
+    return pathlib.Path(path).as_uri()
diff --git a/src/lakebench/utils/query_utils.py b/src/lakebench/utils/query_utils.py
index 1f192ce..615d52b 100644
--- a/src/lakebench/utils/query_utils.py
+++ b/src/lakebench/utils/query_utils.py
@@ -1,24 +1,231 @@
-def transpile_and_qualify_query(query:str, from_dialect:str, to_dialect:str, catalog:str, schema:str)-> str:
+def transpile_and_qualify_query(
+    query: str,
+    from_dialect: str,
+    to_dialect: str,
+    catalog: str,
+    schema: str,
+) -> str:
+    """Transpile a query from one dialect to another and qualify its tables.
+
+    Tables in the query are written with bare names; this prepends the engine's
+    catalog/schema. Both ``catalog`` and ``schema`` may themselves be multi-part
+    dotted names — e.g. Fabric's ``workspace.lakehouse.schema`` or Unity
+    Catalog's ``catalog.schema`` — yielding 3- and 4-part qualified names.
+
+    For Spark-family dialects each segment is emitted as its own quoted
+    identifier (``\\`a\\`.\\`b\\`.\\`c\\`.tbl``); other dialects use bare dotted
+    segments. CTE/derived-table references are left untouched because
+    ``qualify_tables`` only annotates real base tables.
+    """
     import sqlglot as sg
+    from sqlglot import exp
     from sqlglot.optimizer.qualify_tables import qualify_tables
-    expression = sg.parse_one(query, dialect=from_dialect)
 
-    qualified_sql = qualify_tables(
-        expression, 
-        catalog=catalog, 
-        db=schema, 
-        dialect=from_dialect) \
-    .sql(to_dialect, normalize=False, pretty=True)
+    tree = sg.parse_one(query, dialect=from_dialect)
+
+    # Collect the full namespace prefix (catalog segments, then schema segments).
+    prefix_segments = []
+    if catalog:
+        prefix_segments += [s for s in str(catalog).split(".") if s]
+    if schema:
+        prefix_segments += [s for s in str(schema).split(".") if s]
+
+    if not prefix_segments:
+        return tree.sql(to_dialect, normalize=False, pretty=True)
+
+    # Qualify using only the rightmost segment as the db. This makes
+    # qualify_tables annotate exactly the base tables (and skip CTEs / derived
+    # tables), after which we rebuild the full multi-part prefix ourselves.
+    db_marker = prefix_segments[-1]
+    tree = qualify_tables(tree, db=db_marker, dialect=from_dialect)
+
+    # Spark / Hive / Databricks need backticked identifiers for multi-part
+    # names; other engines (DuckDB, Postgres, …) take bare dotted segments and
+    # sqlglot will quote as its dialect requires.
+    quoted = to_dialect in ("spark", "hive", "databricks")
+
+    def _identifier(name: str) -> exp.Identifier:
+        return exp.to_identifier(name, quoted=quoted)
+
+    for table in tree.find_all(exp.Table):
+        # Only rewrite the base tables we just qualified: db == db_marker and no
+        # catalog yet. Anything else (already-qualified, CTE refs) is left alone.
+        if table.db != db_marker or table.catalog:
+            continue
+
+        table_name = table.name
+        table_alias = table.args.get("alias")
+
+        # Build `seg1`.`seg2`.….`table` as a chained Dot expression so an
+        # arbitrary number of prefix segments is supported.
+        parts = [_identifier(seg) for seg in prefix_segments] + [_identifier(table_name)]
+        node = parts[0]
+        for part in parts[1:]:
+            node = exp.Dot(this=node, expression=part)
+
+        new_table = exp.Table(this=node)
+        if table_alias is not None:
+            new_table.set("alias", table_alias)
+        table.replace(new_table)
+
+    return tree.sql(to_dialect, normalize=False, pretty=True)
 
-    return qualified_sql
 
 def get_table_name_from_ddl(ddl: str) -> str:
     import sqlglot
-    from sqlglot.expressions import Table, Identifier
+    from sqlglot.expressions import Identifier, Table
 
     expression = sqlglot.parse_one(ddl)
     table = expression.find(Table)
     if not table or not isinstance(table.this, Identifier):
         raise ValueError("Table name not found in DDL statement.")
 
-    return table.this.this
\ No newline at end of file
+    return table.this.this
+
+
+def parse_ddl_columns(ddl_text: str) -> dict:
+    """
+    Parse a DDL file containing multiple CREATE TABLE statements.
+    Returns {table_name: [col1, col2, ...]} with lowercased names.
+    """
+    import sqlglot
+    from sqlglot.expressions import ColumnDef, Create, Identifier, Table
+
+    result = {}
+    for statement_text in ddl_text.split(";"):
+        statement_text = statement_text.strip()
+        if len(statement_text) < 8:
+            continue
+        try:
+            expr = sqlglot.parse_one(statement_text)
+            if not isinstance(expr, Create):
+                continue
+            table = expr.find(Table)
+            if not table or not isinstance(table.this, Identifier):
+                continue
+            table_name = table.this.this.lower()
+            columns = []
+            for col_def in expr.find_all(ColumnDef):
+                if isinstance(col_def.this, Identifier):
+                    columns.append(col_def.this.this.lower())
+            if columns:
+                result[table_name] = columns
+        except Exception:
+            continue
+    return result
+
+
+def build_column_remap(ddl_columns: dict, actual_schemas: dict) -> dict:
+    """
+    Compare DDL-defined columns vs actual table columns and build a remap dict.
+
+    Parameters
+    ----------
+    ddl_columns : dict
+        {table_name: [col1, col2, ...]} from DDL (lowercased).
+    actual_schemas : dict
+        {table_name: [col1, col2, ...]} from engine introspection (lowercased).
+
+    Returns
+    -------
+    dict
+        {ddl_col_name: actual_col_name} for mismatched columns.
+    """
+    remap = {}
+    for table_name, ddl_cols in ddl_columns.items():
+        actual_cols = actual_schemas.get(table_name)
+        if not actual_cols:
+            continue
+        actual_set = set(actual_cols)
+        ddl_set = set(ddl_cols)
+
+        # Find DDL columns missing from actual data
+        missing = ddl_set - actual_set
+        # Find actual columns not in DDL
+        extra = actual_set - ddl_set
+
+        for m_col in missing:
+            # Try common suffix/prefix variations
+            match = None
+            # Case 1: DDL has _sk suffix, actual doesn't
+            if m_col.endswith("_sk"):
+                candidate = m_col[:-3]  # strip _sk
+                if candidate in extra:
+                    match = candidate
+            # Case 2: actual has _sk suffix, DDL doesn't
+            if not match and (m_col + "_sk") in extra:
+                match = m_col + "_sk"
+            # Case 3: DDL has _date suffix, actual doesn't (or vice versa)
+            if not match and m_col.endswith("_date"):
+                candidate = m_col[:-5]
+                if candidate in extra:
+                    match = candidate
+            if not match and (m_col + "_date") in extra:
+                match = m_col + "_date"
+            # Case 4: simple Levenshtein for close matches
+            if not match:
+                for e_col in extra:
+                    if _levenshtein_ratio(m_col, e_col) > 0.85:
+                        match = e_col
+                        break
+
+            if match:
+                remap[m_col] = match
+                extra.discard(match)  # don't reuse
+
+    return remap
+
+
+def _levenshtein_ratio(s1: str, s2: str) -> float:
+    """Compute similarity ratio between two strings (0.0 to 1.0)."""
+    if s1 == s2:
+        return 1.0
+    len1, len2 = len(s1), len(s2)
+    if len1 == 0 or len2 == 0:
+        return 0.0
+    # Simple Levenshtein distance
+    matrix = list(range(len2 + 1))
+    for i in range(1, len1 + 1):
+        prev = matrix[0]
+        matrix[0] = i
+        for j in range(1, len2 + 1):
+            temp = matrix[j]
+            if s1[i - 1] == s2[j - 1]:
+                matrix[j] = prev
+            else:
+                matrix[j] = 1 + min(prev, matrix[j], matrix[j - 1])
+            prev = temp
+    distance = matrix[len2]
+    max_len = max(len1, len2)
+    return 1.0 - (distance / max_len)
+
+
+def apply_column_remap(query: str, remap: dict, dialect: str) -> str:
+    """
+    Apply column name remapping to a SQL query using sqlglot AST transformation.
+
+    Parameters
+    ----------
+    query : str
+        The SQL query string.
+    remap : dict
+        {old_column_name: new_column_name} mapping (lowercased keys).
+    dialect : str
+        The SQL dialect for parsing/generating.
+
+    Returns
+    -------
+    str
+        The query with column names remapped.
+    """
+    import sqlglot
+    from sqlglot.expressions import Column
+
+    tree = sqlglot.parse_one(query, dialect=dialect)
+
+    for col_node in tree.find_all(Column):
+        col_name = col_node.name.lower()
+        if col_name in remap:
+            col_node.this.set("this", remap[col_name])
+
+    return tree.sql(dialect=dialect, normalize=False, pretty=True)
diff --git a/src/lakebench/utils/timer.py b/src/lakebench/utils/timer.py
index 11a429f..39efb7b 100644
--- a/src/lakebench/utils/timer.py
+++ b/src/lakebench/utils/timer.py
@@ -1,15 +1,31 @@
+import logging
 import time
-from datetime import datetime
 from contextlib import contextmanager
+from datetime import datetime
+
 from ..engines.spark import Spark
 
+logger = logging.getLogger(__name__)
+
+
+def _has_spark_context(engine):
+    """Check if engine has a usable sparkContext (not available in Databricks Connect)."""
+    if not isinstance(engine, Spark):
+        return False
+    try:
+        engine.spark.sparkContext
+        return True
+    except Exception:
+        return False
+
+
 @contextmanager
-def timer(phase: str = "Elapsed time", test_item: str = '', engine: str = None):
+def timer(phase: str = "Elapsed time", test_item: str = "", engine: str = None):
     if not hasattr(timer, "results"):
         timer.results = []
 
     iteration = sum(1 for result in timer.results if result[0] == phase and result[1] == test_item) + 1
-    
+
     class TimerContext:
         def __init__(self, phase: str, test_item: str, iteration: int):
             self.execution_telemetry = {}
@@ -17,7 +33,8 @@ def __init__(self, phase: str, test_item: str, iteration: int):
 
     timer_context = TimerContext(phase, test_item, iteration)
 
-    if isinstance(engine, Spark):
+    has_sc = _has_spark_context(engine)
+    if has_sc:
         engine.spark.sparkContext.setJobDescription(timer_context.context_decorator)
         if engine.spark_measure_telemetry:
             engine.capture_metrics.begin()
@@ -29,49 +46,54 @@ def __init__(self, phase: str, test_item: str, iteration: int):
     error_message = None
     error_type = None
 
-
     try:
         yield timer_context
     except Exception as e:
         success = False
         error_message = str(e)
         error_type = type(e).__name__  # Capture the error type
-        print(f"Error during {phase} - {test_item}... {error_type}: {error_message}")
-        
+        logger.error("Error during %s - %s... %s: %s", phase, test_item, error_type, error_message)
+
     finally:
         end = time.time()
         duration = int((end - start) * 1000)
-        print(f"{phase} - {test_item}{f' [i:{iteration}]' if iteration > 1 else ''}: {(duration / 1000):.2f} seconds")
+        logger.info(
+            "%s - %s%s: %.2f seconds",
+            phase,
+            test_item,
+            f" [i:{iteration}]" if iteration > 1 else "",
+            duration / 1000,
+        )
         # Set execution metadata to an empty dict if it is not set or was set to anything other than a dict
         if not isinstance(timer_context.execution_telemetry, dict):
             timer_context.execution_telemetry = {}
 
-        if isinstance(engine, Spark):
+        if has_sc:
             engine.spark.sparkContext.setJobDescription(None)
             if engine.spark_measure_telemetry:
                 engine.capture_metrics.end()
-                listener_metrics = engine.capture_metrics.create_stagemetrics_DF()
                 listener_metrics_agg = engine.capture_metrics.aggregate_stagemetrics_DF()
                 listener_metrics_dict = listener_metrics_agg.toPandas().iloc[0].to_dict()
                 listener_metrics_str_dict = {k: str(v) for k, v in listener_metrics_dict.items()}
                 timer_context.execution_telemetry.update(listener_metrics_str_dict)
-        
 
         timer.results.append(
             (
-                phase, 
-                test_item, 
-                start_datetime, 
-                duration, 
-                iteration, 
-                success, 
-                f"{error_type}: {error_message}" if error_message else '', 
-                timer_context.execution_telemetry
+                phase,
+                test_item,
+                start_datetime,
+                duration,
+                iteration,
+                success,
+                f"{error_type}: {error_message}" if error_message else "",
+                timer_context.execution_telemetry,
             )
         )
 
+
 def _clear_results():
     if hasattr(timer, "results"):
         timer.results = []
 
-timer.clear_results = _clear_results
\ No newline at end of file
+
+timer.clear_results = _clear_results
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 99cee52..5654043 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -19,9 +19,11 @@
 reports/coverage/<engine>.md  whenever report_and_assert is called at least
 once.  Run any integration test to refresh the reports.
 """
+
 import datetime
-import warnings
 import pathlib
+import warnings
+
 import pytest
 
 pytest.importorskip("duckdb", reason="requires lakebench[tpcds_datagen] extra")
@@ -37,8 +39,8 @@
 # Shared reporting helper
 # ---------------------------------------------------------------------------
 
-def report_and_assert(results, benchmark_name: str, engine_label: str,
-                      run_exception=None, min_pass_rate: float = 0.0):
+
+def report_and_assert(results, benchmark_name: str, engine_label: str, run_exception=None, min_pass_rate: float = 0.0):
     """Print a run summary, emit warnings on partial failures, and assert
     pass rate meets *min_pass_rate*.
 
@@ -48,7 +50,7 @@ def report_and_assert(results, benchmark_name: str, engine_label: str,
     Works for both load-and-query benchmarks (TPC-H, TPC-DS, ClickBench) and
     task-based benchmarks (ELTBench).
     """
-    load_results  = [r for r in results if r["phase"] == "Load"]
+    load_results = [r for r in results if r["phase"] == "Load"]
     query_results = [r for r in results if r["phase"] == "Query"]
 
     def _assert_rate(passed, total, unit):
@@ -62,9 +64,7 @@ def _assert_rate(passed, total, unit):
                 f"is below required {min_pass_rate:.0%}."
             )
         else:
-            assert len(passed) > 0, (
-                f"{benchmark_name} [{engine_label}]: ALL {total} {unit} failed."
-            )
+            assert len(passed) > 0, f"{benchmark_name} [{engine_label}]: ALL {total} {unit} failed."
 
     # ELTBench: no Load/Query phases — treat every result as a "task"
     if not load_results and not query_results:
@@ -72,21 +72,21 @@ def _assert_rate(passed, total, unit):
         passed = [r for r in task_results if r["success"]]
         failed = [r for r in task_results if not r["success"]]
 
-        print(f"\n{'='*60}")
+        print(f"\n{'=' * 60}")
         print(f"{benchmark_name} [{engine_label}]")
         print(f"  Tasks : {len(passed)}/{len(task_results)} passed, {len(failed)} failed")
         for r in failed:
             print(f"    x {r['test_item']} ({r['phase']}): {r['error_message'][:120]}")
         if run_exception:
-            print(f"  [WARN] raised before completion: "
-                  f"{type(run_exception).__name__}: {str(run_exception)[:200]}")
-        print(f"{'='*60}")
+            print(f"  [WARN] raised before completion: {type(run_exception).__name__}: {str(run_exception)[:200]}")
+        print(f"{'=' * 60}")
 
         if len(task_results) == 0 and run_exception is not None:
             warnings.warn(
                 f"{benchmark_name} [{engine_label}]: engine crashed before any tasks ran: "
                 f"{type(run_exception).__name__}: {str(run_exception)[:200]}",
-                UserWarning, stacklevel=2,
+                UserWarning,
+                stacklevel=2,
             )
             return
 
@@ -94,35 +94,41 @@ def _assert_rate(passed, total, unit):
             warnings.warn(
                 f"{benchmark_name} [{engine_label}]: {len(failed)} of {len(task_results)} "
                 f"tasks failed: {[r['test_item'] for r in failed]}",
-                UserWarning, stacklevel=2,
+                UserWarning,
+                stacklevel=2,
             )
         _assert_rate(passed, len(task_results), "tasks")
-        _RESULTS.append({
-            "benchmark": benchmark_name, "engine": engine_label,
-            "unit": "tasks", "passed": len(passed), "total": len(task_results),
-            "failed": [{"name": r["test_item"], "phase": r["phase"],
-                        "error": r["error_message"]} for r in failed],
-            "run_exception": str(run_exception) if run_exception else None,
-            "timestamp": datetime.datetime.utcnow().isoformat(),
-        })
+        _RESULTS.append(
+            {
+                "benchmark": benchmark_name,
+                "engine": engine_label,
+                "unit": "tasks",
+                "passed": len(passed),
+                "total": len(task_results),
+                "failed": [{"name": r["test_item"], "phase": r["phase"], "error": r["error_message"]} for r in failed],
+                "run_exception": str(run_exception) if run_exception else None,
+                "timestamp": datetime.datetime.utcnow().isoformat(),
+            }
+        )
         return
 
     # Load-and-query benchmarks (TPC-H, TPC-DS, ClickBench)
     passed = [r for r in query_results if r["success"]]
     failed = [r for r in query_results if not r["success"]]
-    lf     = [r for r in load_results  if not r["success"]]
+    lf = [r for r in load_results if not r["success"]]
 
-    print(f"\n{'='*60}")
+    print(f"\n{'=' * 60}")
     print(f"{benchmark_name} [{engine_label}]")
-    print(f"  Load  : {len(load_results) - len(lf)}/{len(load_results)} tables loaded OK"
-          + (f"  [WARN] failed: {[r['test_item'] for r in lf]}" if lf else ""))
+    print(
+        f"  Load  : {len(load_results) - len(lf)}/{len(load_results)} tables loaded OK"
+        + (f"  [WARN] failed: {[r['test_item'] for r in lf]}" if lf else "")
+    )
     print(f"  Query : {len(passed)}/{len(query_results)} passed, {len(failed)} failed")
     for r in failed:
         print(f"    x {r['test_item']}: {r['error_message'][:120]}")
     if run_exception:
-        print(f"  [WARN] raised before completion: "
-              f"{type(run_exception).__name__}: {str(run_exception)[:200]}")
-    print(f"{'='*60}")
+        print(f"  [WARN] raised before completion: {type(run_exception).__name__}: {str(run_exception)[:200]}")
+    print(f"{'=' * 60}")
 
     if lf and len(lf) == len(load_results) and len(load_results) > 0:
         pytest.fail(
@@ -134,7 +140,8 @@ def _assert_rate(passed, total, unit):
         warnings.warn(
             f"{benchmark_name} [{engine_label}]: engine crashed before any queries ran: "
             f"{type(run_exception).__name__}: {str(run_exception)[:200]}",
-            UserWarning, stacklevel=2,
+            UserWarning,
+            stacklevel=2,
         )
         return
 
@@ -142,24 +149,30 @@ def _assert_rate(passed, total, unit):
         warnings.warn(
             f"{benchmark_name} [{engine_label}]: {len(failed)} of {len(query_results)} "
             f"queries failed: {[r['test_item'] for r in failed]}",
-            UserWarning, stacklevel=2,
+            UserWarning,
+            stacklevel=2,
         )
     _assert_rate(passed, len(query_results), "queries")
-    _RESULTS.append({
-        "benchmark": benchmark_name, "engine": engine_label,
-        "unit": "queries", "passed": len(passed), "total": len(query_results),
-        "failed": [{"name": r["test_item"], "phase": "Query",
-                    "error": r["error_message"]} for r in failed],
-        "load_failed": [{"name": r["test_item"], "error": r["error_message"]} for r in lf],
-        "run_exception": str(run_exception) if run_exception else None,
-        "timestamp": datetime.datetime.utcnow().isoformat(),
-    })
+    _RESULTS.append(
+        {
+            "benchmark": benchmark_name,
+            "engine": engine_label,
+            "unit": "queries",
+            "passed": len(passed),
+            "total": len(query_results),
+            "failed": [{"name": r["test_item"], "phase": "Query", "error": r["error_message"]} for r in failed],
+            "load_failed": [{"name": r["test_item"], "error": r["error_message"]} for r in lf],
+            "run_exception": str(run_exception) if run_exception else None,
+            "timestamp": datetime.datetime.utcnow().isoformat(),
+        }
+    )
 
 
 # ---------------------------------------------------------------------------
 # Shared benchmark runner
 # ---------------------------------------------------------------------------
 
+
 def run_benchmark(engine, BenchmarkCls, input_dir: str, run_mode: str, **kwargs):
     """Instantiate *BenchmarkCls*, run it, and return (results, exception).
 
@@ -184,6 +197,7 @@ def run_benchmark(engine, BenchmarkCls, input_dir: str, run_mode: str, **kwargs)
 # Data fixtures
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture(scope="session")
 def tpch_parquet_dir(tmp_path_factory):
     """Generate TPC-H SF0.1 parquet data once per session."""
@@ -211,8 +225,7 @@ def clickbench_parquet_dir():
     """Return the directory containing the committed ClickBench 100-row sample."""
     data_dir = pathlib.Path(__file__).parent / "data"
     assert (data_dir / "clickbench_sample.parquet").exists(), (
-        "ClickBench sample parquet not found. "
-        "Run: python tests/integration/data/generate_clickbench_sample.py"
+        "ClickBench sample parquet not found. Run: python tests/integration/data/generate_clickbench_sample.py"
     )
     return str(data_dir)
 
@@ -231,27 +244,26 @@ def _engine_slug(label: str) -> str:
 
 
 def _render_engine_report(engine_label: str, records: list) -> str:
-    ordered = sorted(records, key=lambda r: (
-        _BENCHMARK_ORDER.index(r["benchmark"])
-        if r["benchmark"] in _BENCHMARK_ORDER else 99
-    ))
+    ordered = sorted(
+        records, key=lambda r: _BENCHMARK_ORDER.index(r["benchmark"]) if r["benchmark"] in _BENCHMARK_ORDER else 99
+    )
     ts = max(r["timestamp"] for r in records)
     lines = [
         f"# {engine_label} Benchmark Report",
         "",
-        f"_Auto-generated by the LakeBench integration test suite._  ",
+        "_Auto-generated by the LakeBench integration test suite._  ",
         f"_Last updated: {ts[:19].replace('T', ' ')} UTC_",
         "",
         "---",
         "",
     ]
     for r in ordered:
-        bm      = r["benchmark"]
-        passed  = r["passed"]
-        total   = r["total"]
-        unit    = r["unit"]
-        failed  = r.get("failed", [])
-        lf      = r.get("load_failed", [])
+        bm = r["benchmark"]
+        passed = r["passed"]
+        total = r["total"]
+        unit = r["unit"]
+        failed = r.get("failed", [])
+        lf = r.get("load_failed", [])
         exc_str = r.get("run_exception")
 
         rate = passed / total if total > 0 else 0.0
@@ -272,7 +284,7 @@ def _render_engine_report(engine_label: str, records: list) -> str:
                 "|-------|-------|",
             ]
             for item in lf:
-                err = item['error'][:200].replace('\n', ' ').replace('|', '\\|')
+                err = item["error"][:200].replace("\n", " ").replace("|", "\\|")
                 lines.append(f"| `{item['name']}` | {err} |")
             lines.append("")
 
@@ -285,7 +297,7 @@ def _render_engine_report(engine_label: str, records: list) -> str:
                 "|---|---|",
             ]
             for item in failed:
-                err = item['error'][:300].replace('\n', ' ').replace('|', '\\|')
+                err = item["error"][:300].replace("\n", " ").replace("|", "\\|")
                 lines.append(f"| `{item['name']}` | {err} |")
             lines.append("")
 
@@ -307,6 +319,7 @@ def pytest_sessionfinish(session, exitstatus):
         return
 
     from collections import defaultdict
+
     by_engine: dict[str, list] = defaultdict(list)
     for r in _RESULTS:
         by_engine[r["engine"]].append(r)
@@ -314,10 +327,10 @@ def pytest_sessionfinish(session, exitstatus):
     _DOCS_DIR.mkdir(parents=True, exist_ok=True)
     for engine_label, records in by_engine.items():
         slug = _engine_slug(engine_label)
-        out  = _DOCS_DIR / f"{slug}.md"
+        out = _DOCS_DIR / f"{slug}.md"
         # Merge with existing records for other benchmarks not run this session
         existing = _load_existing_records(out)
-        merged   = _merge_records(existing, records)
+        merged = _merge_records(existing, records)
         out.write_text(_render_engine_report(engine_label, merged), encoding="utf-8")
         print(f"\n[report] {out}")
 
diff --git a/tests/integration/test_daft.py b/tests/integration/test_daft.py
index b5953e3..87d2362 100644
--- a/tests/integration/test_daft.py
+++ b/tests/integration/test_daft.py
@@ -5,43 +5,57 @@
     uv sync --group dev --extra daft --extra tpcds_datagen --extra tpch_datagen
     uv run pytest tests/integration/test_tpc_daft.py -v -s
 """
+
 import pytest
-from tests.integration.conftest import report_and_assert, run_benchmark
+
 from lakebench.utils.path_utils import to_file_uri
+from tests.integration.conftest import report_and_assert, run_benchmark
 
-pytest.importorskip("daft",      reason="requires lakebench[daft] extra")
+pytest.importorskip("daft", reason="requires lakebench[daft] extra")
 pytest.importorskip("deltalake", reason="requires lakebench[daft] extra")
 
 
 def _engine(tmp_path, name):
     from lakebench.engines import Daft
+
     return Daft(schema_or_working_directory_uri=str(tmp_path / name))
 
 
 @pytest.mark.integration
 def test_tpch_daft(tpch_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCH
-    results, exc = run_benchmark(_engine(tmp_path, "tpch"), TPCH, to_file_uri(tpch_parquet_dir), "power_test", scale_factor=0.1)
+
+    results, exc = run_benchmark(
+        _engine(tmp_path, "tpch"), TPCH, to_file_uri(tpch_parquet_dir), "power_test", scale_factor=0.1
+    )
     report_and_assert(results, "TPC-H", "Daft", exc)
 
 
 @pytest.mark.integration
 def test_tpcds_daft(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCDS
-    results, exc = run_benchmark(_engine(tmp_path, "tpcds"), TPCDS, to_file_uri(tpcds_parquet_dir), "power_test", scale_factor=0.1)
+
+    results, exc = run_benchmark(
+        _engine(tmp_path, "tpcds"), TPCDS, to_file_uri(tpcds_parquet_dir), "power_test", scale_factor=0.1
+    )
     report_and_assert(results, "TPC-DS", "Daft", exc)
 
 
 @pytest.mark.integration
 def test_clickbench_daft(clickbench_parquet_dir, tmp_path):
     from lakebench.benchmarks import ClickBench
-    results, exc = run_benchmark(_engine(tmp_path, "clickbench"), ClickBench, to_file_uri(clickbench_parquet_dir), "power_test")
+
+    results, exc = run_benchmark(
+        _engine(tmp_path, "clickbench"), ClickBench, to_file_uri(clickbench_parquet_dir), "power_test"
+    )
     report_and_assert(results, "ClickBench", "Daft", exc)
 
 
 @pytest.mark.integration
 def test_eltbench_daft(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import ELTBench
-    results, exc = run_benchmark(_engine(tmp_path, "eltbench"), ELTBench, to_file_uri(tpcds_parquet_dir), "light", scale_factor=0.1)
-    report_and_assert(results, "ELTBench", "Daft", exc, min_pass_rate=1.0)
 
+    results, exc = run_benchmark(
+        _engine(tmp_path, "eltbench"), ELTBench, to_file_uri(tpcds_parquet_dir), "light", scale_factor=0.1
+    )
+    report_and_assert(results, "ELTBench", "Daft", exc, min_pass_rate=1.0)
diff --git a/tests/integration/test_duckdb.py b/tests/integration/test_duckdb.py
index 7c718c9..0509852 100644
--- a/tests/integration/test_duckdb.py
+++ b/tests/integration/test_duckdb.py
@@ -5,21 +5,25 @@
     uv sync --group dev --extra duckdb --extra tpcds_datagen --extra tpch_datagen
     uv run pytest tests/integration/test_tpc_duckdb.py -v -s
 """
+
 import pytest
+
 from tests.integration.conftest import report_and_assert, run_benchmark
 
-pytest.importorskip("duckdb",     reason="requires lakebench[duckdb] extra")
-pytest.importorskip("deltalake",  reason="requires lakebench[duckdb] extra")
+pytest.importorskip("duckdb", reason="requires lakebench[duckdb] extra")
+pytest.importorskip("deltalake", reason="requires lakebench[duckdb] extra")
 
 
 def _engine(tmp_path, name):
     from lakebench.engines import DuckDB
+
     return DuckDB(schema_or_working_directory_uri=str(tmp_path / name))
 
 
 @pytest.mark.integration
 def test_tpch_duckdb(tpch_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCH
+
     results, exc = run_benchmark(_engine(tmp_path, "tpch"), TPCH, tpch_parquet_dir, "power_test", scale_factor=0.1)
     report_and_assert(results, "TPC-H", "DuckDB", exc, min_pass_rate=1.0)
 
@@ -27,6 +31,7 @@ def test_tpch_duckdb(tpch_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_tpcds_duckdb(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCDS
+
     results, exc = run_benchmark(_engine(tmp_path, "tpcds"), TPCDS, tpcds_parquet_dir, "power_test", scale_factor=0.1)
     report_and_assert(results, "TPC-DS", "DuckDB", exc, min_pass_rate=1.0)
 
@@ -34,6 +39,7 @@ def test_tpcds_duckdb(tpcds_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_clickbench_duckdb(clickbench_parquet_dir, tmp_path):
     from lakebench.benchmarks import ClickBench
+
     results, exc = run_benchmark(_engine(tmp_path, "clickbench"), ClickBench, clickbench_parquet_dir, "power_test")
     report_and_assert(results, "ClickBench", "DuckDB", exc, min_pass_rate=1.0)
 
@@ -41,5 +47,6 @@ def test_clickbench_duckdb(clickbench_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_eltbench_duckdb(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import ELTBench
+
     results, exc = run_benchmark(_engine(tmp_path, "eltbench"), ELTBench, tpcds_parquet_dir, "light", scale_factor=0.1)
     report_and_assert(results, "ELTBench", "DuckDB", exc, min_pass_rate=1.0)
diff --git a/tests/integration/test_polars.py b/tests/integration/test_polars.py
index b1029d7..b5f8888 100644
--- a/tests/integration/test_polars.py
+++ b/tests/integration/test_polars.py
@@ -5,21 +5,25 @@
     uv sync --group dev --extra polars --extra tpcds_datagen --extra tpch_datagen
     uv run pytest tests/integration/test_tpc_polars.py -v -s
 """
+
 import pytest
+
 from tests.integration.conftest import report_and_assert, run_benchmark
 
-pytest.importorskip("polars",    reason="requires lakebench[polars] extra")
+pytest.importorskip("polars", reason="requires lakebench[polars] extra")
 pytest.importorskip("deltalake", reason="requires lakebench[polars] extra")
 
 
 def _engine(tmp_path, name):
     from lakebench.engines import Polars
+
     return Polars(schema_or_working_directory_uri=str(tmp_path / name))
 
 
 @pytest.mark.integration
 def test_tpch_polars(tpch_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCH
+
     results, exc = run_benchmark(_engine(tmp_path, "tpch"), TPCH, tpch_parquet_dir, "power_test", scale_factor=0.1)
     report_and_assert(results, "TPC-H", "Polars", exc)
 
@@ -27,6 +31,7 @@ def test_tpch_polars(tpch_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_tpcds_polars(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCDS
+
     results, exc = run_benchmark(_engine(tmp_path, "tpcds"), TPCDS, tpcds_parquet_dir, "power_test", scale_factor=0.1)
     report_and_assert(results, "TPC-DS", "Polars", exc)
 
@@ -34,6 +39,7 @@ def test_tpcds_polars(tpcds_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_clickbench_polars(clickbench_parquet_dir, tmp_path):
     from lakebench.benchmarks import ClickBench
+
     results, exc = run_benchmark(_engine(tmp_path, "clickbench"), ClickBench, clickbench_parquet_dir, "power_test")
     report_and_assert(results, "ClickBench", "Polars", exc)
 
@@ -41,6 +47,6 @@ def test_clickbench_polars(clickbench_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_eltbench_polars(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import ELTBench
+
     results, exc = run_benchmark(_engine(tmp_path, "eltbench"), ELTBench, tpcds_parquet_dir, "light", scale_factor=0.1)
     report_and_assert(results, "ELTBench", "Polars", exc)
-
diff --git a/tests/integration/test_sail.py b/tests/integration/test_sail.py
index b515dfd..86b532a 100644
--- a/tests/integration/test_sail.py
+++ b/tests/integration/test_sail.py
@@ -7,21 +7,25 @@
     uv sync --group dev --extra sail --extra tpcds_datagen --extra tpch_datagen
     uv run pytest tests/integration/test_tpc_sail.py -v -s
 """
+
 import pytest
+
 from tests.integration.conftest import report_and_assert, run_benchmark
 
-pytest.importorskip("pysail",  reason="requires lakebench[sail] extra")
+pytest.importorskip("pysail", reason="requires lakebench[sail] extra")
 pytest.importorskip("pyspark", reason="requires lakebench[sail] extra")
 
 
 def _engine(tmp_path, name):
     from lakebench.engines import Sail
+
     return Sail(schema_or_working_directory_uri=str(tmp_path / name).replace("\\", "/") + "/")
 
 
 @pytest.mark.integration
 def test_tpch_sail(tpch_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCH
+
     results, exc = run_benchmark(_engine(tmp_path, "tpch"), TPCH, tpch_parquet_dir, "power_test", scale_factor=0.1)
     report_and_assert(results, "TPC-H", "Sail", exc, min_pass_rate=1.0)
 
@@ -29,6 +33,7 @@ def test_tpch_sail(tpch_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_tpcds_sail(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCDS
+
     results, exc = run_benchmark(_engine(tmp_path, "tpcds"), TPCDS, tpcds_parquet_dir, "power_test", scale_factor=0.1)
     report_and_assert(results, "TPC-DS", "Sail", exc, min_pass_rate=1.0)
 
@@ -36,6 +41,7 @@ def test_tpcds_sail(tpcds_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_clickbench_sail(clickbench_parquet_dir, tmp_path):
     from lakebench.benchmarks import ClickBench
+
     results, exc = run_benchmark(_engine(tmp_path, "clickbench"), ClickBench, clickbench_parquet_dir, "power_test")
     report_and_assert(results, "ClickBench", "Sail", exc, min_pass_rate=1.0)
 
@@ -43,6 +49,6 @@ def test_clickbench_sail(clickbench_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_eltbench_sail(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import ELTBench
+
     results, exc = run_benchmark(_engine(tmp_path, "eltbench"), ELTBench, tpcds_parquet_dir, "light", scale_factor=0.1)
     report_and_assert(results, "ELTBench", "Sail", exc, min_pass_rate=1.0)
-
diff --git a/tests/integration/test_spark.py b/tests/integration/test_spark.py
index ac7c91c..6018201 100644
--- a/tests/integration/test_spark.py
+++ b/tests/integration/test_spark.py
@@ -8,8 +8,11 @@
     uv sync --group dev --extra spark --extra tpcds_datagen --extra tpch_datagen
     uv run pytest tests/integration/test_tpc_spark.py -v -s
 """
+
 import warnings
+
 import pytest
+
 from tests.integration.conftest import report_and_assert, run_benchmark
 
 pytest.importorskip("pyspark", reason="requires lakebench[spark] extra")
@@ -21,29 +24,28 @@
 # is GC'd, so without this fixture the JVM dies between tests.
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture(scope="module", autouse=True)
 def _spark_session_lifecycle(tmp_path_factory):
-    from pyspark.sql import SparkSession
     import platform
 
+    from pyspark.sql import SparkSession
+
     warehouse = str(tmp_path_factory.mktemp("spark_warehouse")).replace("\\", "/") + "/"
     builder = (
-        SparkSession.builder
-            .master("local[*]")
-            .config("spark.sql.warehouse.dir", warehouse)
-            .config("spark.driver.host", "localhost")
-            .config("spark.driver.bindAddress", "localhost")
-            .config("spark.ui.enabled", "false")
-            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
-            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
-            .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.0")
-            .config("spark.sql.catalogImplementation", "hive")
+        SparkSession.builder.master("local[*]")
+        .config("spark.sql.warehouse.dir", warehouse)
+        .config("spark.driver.host", "localhost")
+        .config("spark.driver.bindAddress", "localhost")
+        .config("spark.ui.enabled", "false")
+        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
+        .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
+        .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.0")
+        .config("spark.sql.catalogImplementation", "hive")
     )
     if platform.system() == "Windows":
-        builder = (
-            builder
-                .config("spark.hadoop.io.native.lib.available", "false")
-                .config("spark.hadoop.fs.file.impl.disable.cache", "true")
+        builder = builder.config("spark.hadoop.io.native.lib.available", "false").config(
+            "spark.hadoop.fs.file.impl.disable.cache", "true"
         )
     spark = builder.getOrCreate()
     yield spark
@@ -57,13 +59,15 @@ def _spark_session_lifecycle(tmp_path_factory):
 # Engine factory — Spark takes schema_name + schema_uri separately
 # ---------------------------------------------------------------------------
 
+
 def _engine(tmp_path, name):
     from lakebench.engines import Spark
+
     schema_uri = str(tmp_path / name).replace("\\", "/") + "/"
     try:
         return Spark(schema_name=name, schema_uri=schema_uri)
     except Exception as e:
-        return e   # caller checks isinstance(engine, Exception)
+        return e  # caller checks isinstance(engine, Exception)
 
 
 def _run(engine_or_exc, BenchmarkCls, input_dir, run_mode, benchmark_name, **kwargs):
@@ -71,7 +75,8 @@ def _run(engine_or_exc, BenchmarkCls, input_dir, run_mode, benchmark_name, **kwa
     if isinstance(engine_or_exc, Exception):
         warnings.warn(
             f"{benchmark_name} [Spark]: JVM unavailable at test start: {engine_or_exc}",
-            UserWarning, stacklevel=2,
+            UserWarning,
+            stacklevel=2,
         )
         return [], None
     return run_benchmark(engine_or_exc, BenchmarkCls, input_dir, run_mode, **kwargs)
@@ -81,9 +86,11 @@ def _run(engine_or_exc, BenchmarkCls, input_dir, run_mode, benchmark_name, **kwa
 # Tests
 # ---------------------------------------------------------------------------
 
+
 @pytest.mark.integration
 def test_tpch_spark(tpch_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCH
+
     engine = _engine(tmp_path, "tpch")
     results, exc = _run(engine, TPCH, tpch_parquet_dir, "power_test", "TPC-H", scale_factor=0.1)
     if results is not None:
@@ -93,6 +100,7 @@ def test_tpch_spark(tpch_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_tpcds_spark(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import TPCDS
+
     engine = _engine(tmp_path, "tpcds")
     results, exc = _run(engine, TPCDS, tpcds_parquet_dir, "power_test", "TPC-DS", scale_factor=0.1)
     if results is not None:
@@ -102,6 +110,7 @@ def test_tpcds_spark(tpcds_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_clickbench_spark(clickbench_parquet_dir, tmp_path):
     from lakebench.benchmarks import ClickBench
+
     engine = _engine(tmp_path, "clickbench")
     results, exc = _run(engine, ClickBench, clickbench_parquet_dir, "power_test", "ClickBench")
     if results is not None:
@@ -111,8 +120,8 @@ def test_clickbench_spark(clickbench_parquet_dir, tmp_path):
 @pytest.mark.integration
 def test_eltbench_spark(tpcds_parquet_dir, tmp_path):
     from lakebench.benchmarks import ELTBench
+
     engine = _engine(tmp_path, "eltbench")
     results, exc = _run(engine, ELTBench, tpcds_parquet_dir, "light", "ELTBench", scale_factor=0.1)
     if results is not None:
         report_and_assert(results, "ELTBench", "Spark", exc, min_pass_rate=1.0)
-
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 5558ccd..e2edd2d 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -1,4 +1,5 @@
 import pytest
+
 from lakebench.engines.base import BaseEngine
 
 
diff --git a/tests/test_path_utils.py b/tests/test_path_utils.py
index fa03ecd..7fa22bb 100644
--- a/tests/test_path_utils.py
+++ b/tests/test_path_utils.py
@@ -1,4 +1,5 @@
 import pytest
+
 from lakebench.utils.path_utils import abfss_to_https, to_unix_path
 
 
diff --git a/tests/test_query_utils.py b/tests/test_query_utils.py
index 6aed90b..b2a73b8 100644
--- a/tests/test_query_utils.py
+++ b/tests/test_query_utils.py
@@ -1,5 +1,6 @@
 import pytest
-from lakebench.utils.query_utils import transpile_and_qualify_query, get_table_name_from_ddl
+
+from lakebench.utils.query_utils import get_table_name_from_ddl, transpile_and_qualify_query
 
 
 class TestTranspileAndQualifyQuery:
@@ -50,6 +51,97 @@ def test_no_catalog_no_schema(self):
         )
         assert "lineitem" in result
 
+    # ---- multi-part (3- and 4-part) name qualification ----
+
+    def test_three_part_schema_no_catalog_spark(self):
+        """Fabric-style workspace.lakehouse.schema → 4 backticked segments."""
+        result = transpile_and_qualify_query(
+            query="SELECT * FROM orders",
+            from_dialect="spark",
+            to_dialect="spark",
+            catalog=None,
+            schema="ws.lakehouse.dbo",
+        )
+        assert "`ws`.`lakehouse`.`dbo`.`orders`" in result
+
+    def test_catalog_plus_two_part_schema_spark(self):
+        """catalog + dotted schema must NOT drop the catalog (the old bug)."""
+        result = transpile_and_qualify_query(
+            query="SELECT * FROM orders",
+            from_dialect="spark",
+            to_dialect="spark",
+            catalog="cat",
+            schema="mid.sch",
+        )
+        assert "`cat`.`mid`.`sch`.`orders`" in result
+
+    def test_two_part_catalog_schema_spark(self):
+        result = transpile_and_qualify_query(
+            query="SELECT * FROM orders",
+            from_dialect="spark",
+            to_dialect="spark",
+            catalog="cat",
+            schema="sch",
+        )
+        assert "`cat`.`sch`.`orders`" in result
+
+    def test_multi_part_applies_to_all_tables_in_join(self):
+        result = transpile_and_qualify_query(
+            query="SELECT a FROM orders o JOIN customers c ON o.id = c.id",
+            from_dialect="spark",
+            to_dialect="spark",
+            catalog="cat",
+            schema="mid.sch",
+        )
+        assert "`cat`.`mid`.`sch`.`orders`" in result
+        assert "`cat`.`mid`.`sch`.`customers`" in result
+
+    def test_non_spark_dialect_uses_bare_segments(self):
+        """DuckDB et al. don't get backticks; sqlglot quotes per-dialect."""
+        result = transpile_and_qualify_query(
+            query="SELECT * FROM orders",
+            from_dialect="spark",
+            to_dialect="duckdb",
+            catalog="cat",
+            schema="sch",
+        )
+        assert "`" not in result
+        assert "cat.sch.orders" in result
+
+    def test_cte_reference_is_not_qualified(self):
+        """A CTE name must stay bare; only the real base table is qualified."""
+        result = transpile_and_qualify_query(
+            query="WITH t AS (SELECT * FROM orders) SELECT * FROM t",
+            from_dialect="spark",
+            to_dialect="spark",
+            catalog=None,
+            schema="db",
+        )
+        assert "`db`.`orders`" in result
+        # The final `FROM t` must reference the CTE, not `db`.`t`.
+        assert "`db`.`t`" not in result
+
+    def test_schema_with_leading_or_trailing_dots_tolerated(self):
+        result = transpile_and_qualify_query(
+            query="SELECT * FROM orders",
+            from_dialect="spark",
+            to_dialect="spark",
+            catalog=None,
+            schema="ws..dbo.",
+        )
+        # Empty segments are dropped.
+        assert "`ws`.`dbo`.`orders`" in result
+
+    def test_four_part_name_catalog_and_three_part_schema(self):
+        result = transpile_and_qualify_query(
+            query="SELECT * FROM orders",
+            from_dialect="spark",
+            to_dialect="spark",
+            catalog="cat",
+            schema="a.b.c",
+        )
+        assert "`cat`.`a`.`b`.`c`.`orders`" in result
+
 
 class TestGetTableNameFromDdl:
     def test_simple_create_table(self):
diff --git a/uv.lock b/uv.lock
index 39483e4..d097999 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,11 +1,10 @@
 version = 1
 revision = 3
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 resolution-markers = [
     "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
     "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version < '3.9' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version < '3.10' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
     "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
@@ -13,12 +12,10 @@ resolution-markers = [
     "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version < '3.9' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version < '3.10' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version < '3.9' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version < '3.10' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
 ]
 conflicts = [[
     { package = "lakebench", extra = "sail" },
@@ -30,7 +27,7 @@ name = "arro3-core"
 version = "0.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.12') or (python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail') or (python_full_version < '3.9' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark') or (python_full_version >= '3.12' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "typing-extensions", marker = "(python_full_version >= '3.10' and python_full_version < '3.12') or (python_full_version < '3.10' and extra == 'extra-9-lakebench-sail') or (python_full_version >= '3.12' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a5/e7/d84370ea85be641a8c57f4f8296e8465d30e46938cc9480d384a3ee0084c/arro3_core-0.8.0.tar.gz", hash = "sha256:b75d8281b87a87d3b66836bab89951ae06421970e5f880717723a93e38743f40", size = 93557, upload-time = "2026-02-23T15:12:20.622Z" }
 wheels = [
@@ -114,99 +111,46 @@ wheels = [
 ]
 
 [[package]]
-name = "colorama"
-version = "0.4.6"
+name = "cfgv"
+version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" },
 ]
 
 [[package]]
-name = "coverage"
-version = "7.6.1"
+name = "cfgv"
+version = "3.5.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f7/08/7e37f82e4d1aead42a7443ff06a1e406aabf7302c4f00a546e4b320b994c/coverage-7.6.1.tar.gz", hash = "sha256:953510dfb7b12ab69d20135a0662397f077c59b1e6379a768e97c59d852ee51d", size = 798791, upload-time = "2024-08-04T19:45:30.9Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/61/eb7ce5ed62bacf21beca4937a90fe32545c91a3c8a42a30c6616d48fc70d/coverage-7.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b06079abebbc0e89e6163b8e8f0e16270124c154dc6e4a47b413dd538859af16", size = 206690, upload-time = "2024-08-04T19:43:07.695Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/73/041928e434442bd3afde5584bdc3f932fb4562b1597629f537387cec6f3d/coverage-7.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cf4b19715bccd7ee27b6b120e7e9dd56037b9c0681dcc1adc9ba9db3d417fa36", size = 207127, upload-time = "2024-08-04T19:43:10.15Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/c8/6ca52b5147828e45ad0242388477fdb90df2c6cbb9a441701a12b3c71bc8/coverage-7.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61c0abb4c85b095a784ef23fdd4aede7a2628478e7baba7c5e3deba61070a02", size = 235654, upload-time = "2024-08-04T19:43:12.405Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/da/9ac2b62557f4340270942011d6efeab9833648380109e897d48ab7c1035d/coverage-7.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd21f6ae3f08b41004dfb433fa895d858f3f5979e7762d052b12aef444e29afc", size = 233598, upload-time = "2024-08-04T19:43:14.078Z" },
-    { url = "https://files.pythonhosted.org/packages/53/23/9e2c114d0178abc42b6d8d5281f651a8e6519abfa0ef460a00a91f80879d/coverage-7.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f59d57baca39b32db42b83b2a7ba6f47ad9c394ec2076b084c3f029b7afca23", size = 234732, upload-time = "2024-08-04T19:43:16.632Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/7e/a0230756fb133343a52716e8b855045f13342b70e48e8ad41d8a0d60ab98/coverage-7.6.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a1ac0ae2b8bd743b88ed0502544847c3053d7171a3cff9228af618a068ed9c34", size = 233816, upload-time = "2024-08-04T19:43:19.049Z" },
-    { url = "https://files.pythonhosted.org/packages/28/7c/3753c8b40d232b1e5eeaed798c875537cf3cb183fb5041017c1fdb7ec14e/coverage-7.6.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e6a08c0be454c3b3beb105c0596ebdc2371fab6bb90c0c0297f4e58fd7e1012c", size = 232325, upload-time = "2024-08-04T19:43:21.246Z" },
-    { url = "https://files.pythonhosted.org/packages/57/e3/818a2b2af5b7573b4b82cf3e9f137ab158c90ea750a8f053716a32f20f06/coverage-7.6.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f5796e664fe802da4f57a168c85359a8fbf3eab5e55cd4e4569fbacecc903959", size = 233418, upload-time = "2024-08-04T19:43:22.945Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/fb/4532b0b0cefb3f06d201648715e03b0feb822907edab3935112b61b885e2/coverage-7.6.1-cp310-cp310-win32.whl", hash = "sha256:7bb65125fcbef8d989fa1dd0e8a060999497629ca5b0efbca209588a73356232", size = 209343, upload-time = "2024-08-04T19:43:25.121Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/25/af337cc7421eca1c187cc9c315f0a755d48e755d2853715bfe8c418a45fa/coverage-7.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:3115a95daa9bdba70aea750db7b96b37259a81a709223c8448fa97727d546fe0", size = 210136, upload-time = "2024-08-04T19:43:26.851Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/5f/67af7d60d7e8ce61a4e2ddcd1bd5fb787180c8d0ae0fbd073f903b3dd95d/coverage-7.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dea0889685db8550f839fa202744652e87c60015029ce3f60e006f8c4462c93", size = 206796, upload-time = "2024-08-04T19:43:29.115Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/0e/e52332389e057daa2e03be1fbfef25bb4d626b37d12ed42ae6281d0a274c/coverage-7.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed37bd3c3b063412f7620464a9ac1314d33100329f39799255fb8d3027da50d3", size = 207244, upload-time = "2024-08-04T19:43:31.285Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/cd/766b45fb6e090f20f8927d9c7cb34237d41c73a939358bc881883fd3a40d/coverage-7.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85f5e9a5f8b73e2350097c3756ef7e785f55bd71205defa0bfdaf96c31616ff", size = 239279, upload-time = "2024-08-04T19:43:33.581Z" },
-    { url = "https://files.pythonhosted.org/packages/70/6c/a9ccd6fe50ddaf13442a1e2dd519ca805cbe0f1fcd377fba6d8339b98ccb/coverage-7.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bc572be474cafb617672c43fe989d6e48d3c83af02ce8de73fff1c6bb3c198d", size = 236859, upload-time = "2024-08-04T19:43:35.301Z" },
-    { url = "https://files.pythonhosted.org/packages/14/6f/8351b465febb4dbc1ca9929505202db909c5a635c6fdf33e089bbc3d7d85/coverage-7.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6", size = 238549, upload-time = "2024-08-04T19:43:37.578Z" },
-    { url = "https://files.pythonhosted.org/packages/68/3c/289b81fa18ad72138e6d78c4c11a82b5378a312c0e467e2f6b495c260907/coverage-7.6.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f4aa8219db826ce6be7099d559f8ec311549bfc4046f7f9fe9b5cea5c581c56", size = 237477, upload-time = "2024-08-04T19:43:39.92Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/1c/aa1efa6459d822bd72c4abc0b9418cf268de3f60eeccd65dc4988553bd8d/coverage-7.6.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fc5a77d0c516700ebad189b587de289a20a78324bc54baee03dd486f0855d234", size = 236134, upload-time = "2024-08-04T19:43:41.453Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/c8/521c698f2d2796565fe9c789c2ee1ccdae610b3aa20b9b2ef980cc253640/coverage-7.6.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b48f312cca9621272ae49008c7f613337c53fadca647d6384cc129d2996d1133", size = 236910, upload-time = "2024-08-04T19:43:43.037Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/30/033e663399ff17dca90d793ee8a2ea2890e7fdf085da58d82468b4220bf7/coverage-7.6.1-cp311-cp311-win32.whl", hash = "sha256:1125ca0e5fd475cbbba3bb67ae20bd2c23a98fac4e32412883f9bcbaa81c314c", size = 209348, upload-time = "2024-08-04T19:43:44.787Z" },
-    { url = "https://files.pythonhosted.org/packages/20/05/0d1ccbb52727ccdadaa3ff37e4d2dc1cd4d47f0c3df9eb58d9ec8508ca88/coverage-7.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:8ae539519c4c040c5ffd0632784e21b2f03fc1340752af711f33e5be83a9d6c6", size = 210230, upload-time = "2024-08-04T19:43:46.707Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/d4/300fc921dff243cd518c7db3a4c614b7e4b2431b0d1145c1e274fd99bd70/coverage-7.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:95cae0efeb032af8458fc27d191f85d1717b1d4e49f7cb226cf526ff28179778", size = 206983, upload-time = "2024-08-04T19:43:49.082Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/ab/6bf00de5327ecb8db205f9ae596885417a31535eeda6e7b99463108782e1/coverage-7.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5621a9175cf9d0b0c84c2ef2b12e9f5f5071357c4d2ea6ca1cf01814f45d2391", size = 207221, upload-time = "2024-08-04T19:43:52.15Z" },
-    { url = "https://files.pythonhosted.org/packages/92/8f/2ead05e735022d1a7f3a0a683ac7f737de14850395a826192f0288703472/coverage-7.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:260933720fdcd75340e7dbe9060655aff3af1f0c5d20f46b57f262ab6c86a5e8", size = 240342, upload-time = "2024-08-04T19:43:53.746Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/ef/94043e478201ffa85b8ae2d2c79b4081e5a1b73438aafafccf3e9bafb6b5/coverage-7.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07e2ca0ad381b91350c0ed49d52699b625aab2b44b65e1b4e02fa9df0e92ad2d", size = 237371, upload-time = "2024-08-04T19:43:55.993Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/0f/c890339dd605f3ebc269543247bdd43b703cce6825b5ed42ff5f2d6122c7/coverage-7.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44fee9975f04b33331cb8eb272827111efc8930cfd582e0320613263ca849ca", size = 239455, upload-time = "2024-08-04T19:43:57.618Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/04/7fd7b39ec7372a04efb0f70c70e35857a99b6a9188b5205efb4c77d6a57a/coverage-7.6.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877abb17e6339d96bf08e7a622d05095e72b71f8afd8a9fefc82cf30ed944163", size = 238924, upload-time = "2024-08-04T19:44:00.012Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/bf/73ce346a9d32a09cf369f14d2a06651329c984e106f5992c89579d25b27e/coverage-7.6.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e0cadcf6733c09154b461f1ca72d5416635e5e4ec4e536192180d34ec160f8a", size = 237252, upload-time = "2024-08-04T19:44:01.713Z" },
-    { url = "https://files.pythonhosted.org/packages/86/74/1dc7a20969725e917b1e07fe71a955eb34bc606b938316bcc799f228374b/coverage-7.6.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3c02d12f837d9683e5ab2f3d9844dc57655b92c74e286c262e0fc54213c216d", size = 238897, upload-time = "2024-08-04T19:44:03.898Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/e9/d9cc3deceb361c491b81005c668578b0dfa51eed02cd081620e9a62f24ec/coverage-7.6.1-cp312-cp312-win32.whl", hash = "sha256:e05882b70b87a18d937ca6768ff33cc3f72847cbc4de4491c8e73880766718e5", size = 209606, upload-time = "2024-08-04T19:44:05.532Z" },
-    { url = "https://files.pythonhosted.org/packages/47/c8/5a2e41922ea6740f77d555c4d47544acd7dc3f251fe14199c09c0f5958d3/coverage-7.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:b5d7b556859dd85f3a541db6a4e0167b86e7273e1cdc973e5b175166bb634fdb", size = 210373, upload-time = "2024-08-04T19:44:07.079Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/f9/9aa4dfb751cb01c949c990d136a0f92027fbcc5781c6e921df1cb1563f20/coverage-7.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a4acd025ecc06185ba2b801f2de85546e0b8ac787cf9d3b06e7e2a69f925b106", size = 207007, upload-time = "2024-08-04T19:44:09.453Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/67/e1413d5a8591622a46dd04ff80873b04c849268831ed5c304c16433e7e30/coverage-7.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a6d3adcf24b624a7b778533480e32434a39ad8fa30c315208f6d3e5542aeb6e9", size = 207269, upload-time = "2024-08-04T19:44:11.045Z" },
-    { url = "https://files.pythonhosted.org/packages/14/5b/9dec847b305e44a5634d0fb8498d135ab1d88330482b74065fcec0622224/coverage-7.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0c212c49b6c10e6951362f7c6df3329f04c2b1c28499563d4035d964ab8e08c", size = 239886, upload-time = "2024-08-04T19:44:12.83Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/b7/35760a67c168e29f454928f51f970342d23cf75a2bb0323e0f07334c85f3/coverage-7.6.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e81d7a3e58882450ec4186ca59a3f20a5d4440f25b1cff6f0902ad890e6748a", size = 237037, upload-time = "2024-08-04T19:44:15.393Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/95/d2fd31f1d638df806cae59d7daea5abf2b15b5234016a5ebb502c2f3f7ee/coverage-7.6.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78b260de9790fd81e69401c2dc8b17da47c8038176a79092a89cb2b7d945d060", size = 239038, upload-time = "2024-08-04T19:44:17.466Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/bd/110689ff5752b67924efd5e2aedf5190cbbe245fc81b8dec1abaffba619d/coverage-7.6.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a78d169acd38300060b28d600344a803628c3fd585c912cacc9ea8790fe96862", size = 238690, upload-time = "2024-08-04T19:44:19.336Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/a8/08d7b38e6ff8df52331c83130d0ab92d9c9a8b5462f9e99c9f051a4ae206/coverage-7.6.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2c09f4ce52cb99dd7505cd0fc8e0e37c77b87f46bc9c1eb03fe3bc9991085388", size = 236765, upload-time = "2024-08-04T19:44:20.994Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/6a/9cf96839d3147d55ae713eb2d877f4d777e7dc5ba2bce227167d0118dfe8/coverage-7.6.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6878ef48d4227aace338d88c48738a4258213cd7b74fd9a3d4d7582bb1d8a155", size = 238611, upload-time = "2024-08-04T19:44:22.616Z" },
-    { url = "https://files.pythonhosted.org/packages/74/e4/7ff20d6a0b59eeaab40b3140a71e38cf52547ba21dbcf1d79c5a32bba61b/coverage-7.6.1-cp313-cp313-win32.whl", hash = "sha256:44df346d5215a8c0e360307d46ffaabe0f5d3502c8a1cefd700b34baf31d411a", size = 209671, upload-time = "2024-08-04T19:44:24.418Z" },
-    { url = "https://files.pythonhosted.org/packages/35/59/1812f08a85b57c9fdb6d0b383d779e47b6f643bc278ed682859512517e83/coverage-7.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:8284cf8c0dd272a247bc154eb6c95548722dce90d098c17a883ed36e67cdb129", size = 210368, upload-time = "2024-08-04T19:44:26.276Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/15/08913be1c59d7562a3e39fce20661a98c0a3f59d5754312899acc6cb8a2d/coverage-7.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d3296782ca4eab572a1a4eca686d8bfb00226300dcefdf43faa25b5242ab8a3e", size = 207758, upload-time = "2024-08-04T19:44:29.028Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/ae/b5d58dff26cade02ada6ca612a76447acd69dccdbb3a478e9e088eb3d4b9/coverage-7.6.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:502753043567491d3ff6d08629270127e0c31d4184c4c8d98f92c26f65019962", size = 208035, upload-time = "2024-08-04T19:44:30.673Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/d7/62095e355ec0613b08dfb19206ce3033a0eedb6f4a67af5ed267a8800642/coverage-7.6.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a89ecca80709d4076b95f89f308544ec8f7b4727e8a547913a35f16717856cb", size = 250839, upload-time = "2024-08-04T19:44:32.412Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/1e/c2967cb7991b112ba3766df0d9c21de46b476d103e32bb401b1b2adf3380/coverage-7.6.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a318d68e92e80af8b00fa99609796fdbcdfef3629c77c6283566c6f02c6d6704", size = 246569, upload-time = "2024-08-04T19:44:34.547Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/61/a7a6a55dd266007ed3b1df7a3386a0d760d014542d72f7c2c6938483b7bd/coverage-7.6.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13b0a73a0896988f053e4fbb7de6d93388e6dd292b0d87ee51d106f2c11b465b", size = 248927, upload-time = "2024-08-04T19:44:36.313Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/fa/13a6f56d72b429f56ef612eb3bc5ce1b75b7ee12864b3bd12526ab794847/coverage-7.6.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4421712dbfc5562150f7554f13dde997a2e932a6b5f352edcce948a815efee6f", size = 248401, upload-time = "2024-08-04T19:44:38.155Z" },
-    { url = "https://files.pythonhosted.org/packages/75/06/0429c652aa0fb761fc60e8c6b291338c9173c6aa0f4e40e1902345b42830/coverage-7.6.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:166811d20dfea725e2e4baa71fffd6c968a958577848d2131f39b60043400223", size = 246301, upload-time = "2024-08-04T19:44:39.883Z" },
-    { url = "https://files.pythonhosted.org/packages/52/76/1766bb8b803a88f93c3a2d07e30ffa359467810e5cbc68e375ebe6906efb/coverage-7.6.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:225667980479a17db1048cb2bf8bfb39b8e5be8f164b8f6628b64f78a72cf9d3", size = 247598, upload-time = "2024-08-04T19:44:41.59Z" },
-    { url = "https://files.pythonhosted.org/packages/66/8b/f54f8db2ae17188be9566e8166ac6df105c1c611e25da755738025708d54/coverage-7.6.1-cp313-cp313t-win32.whl", hash = "sha256:170d444ab405852903b7d04ea9ae9b98f98ab6d7e63e1115e82620807519797f", size = 210307, upload-time = "2024-08-04T19:44:43.301Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/b0/e0dca6da9170aefc07515cce067b97178cefafb512d00a87a1c717d2efd5/coverage-7.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b9f222de8cded79c49bf184bdbc06630d4c58eec9459b939b4a690c82ed05657", size = 211453, upload-time = "2024-08-04T19:44:45.677Z" },
-    { url = "https://files.pythonhosted.org/packages/81/d0/d9e3d554e38beea5a2e22178ddb16587dbcbe9a1ef3211f55733924bf7fa/coverage-7.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6db04803b6c7291985a761004e9060b2bca08da6d04f26a7f2294b8623a0c1a0", size = 206674, upload-time = "2024-08-04T19:44:47.694Z" },
-    { url = "https://files.pythonhosted.org/packages/38/ea/cab2dc248d9f45b2b7f9f1f596a4d75a435cb364437c61b51d2eb33ceb0e/coverage-7.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f1adfc8ac319e1a348af294106bc6a8458a0f1633cc62a1446aebc30c5fa186a", size = 207101, upload-time = "2024-08-04T19:44:49.32Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/6f/f82f9a500c7c5722368978a5390c418d2a4d083ef955309a8748ecaa8920/coverage-7.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a95324a9de9650a729239daea117df21f4b9868ce32e63f8b650ebe6cef5595b", size = 236554, upload-time = "2024-08-04T19:44:51.631Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/94/d3055aa33d4e7e733d8fa309d9adf147b4b06a82c1346366fc15a2b1d5fa/coverage-7.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b43c03669dc4618ec25270b06ecd3ee4fa94c7f9b3c14bae6571ca00ef98b0d3", size = 234440, upload-time = "2024-08-04T19:44:53.464Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/6e/885bcd787d9dd674de4a7d8ec83faf729534c63d05d51d45d4fa168f7102/coverage-7.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8929543a7192c13d177b770008bc4e8119f2e1f881d563fc6b6305d2d0ebe9de", size = 235889, upload-time = "2024-08-04T19:44:55.165Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/63/df50120a7744492710854860783d6819ff23e482dee15462c9a833cc428a/coverage-7.6.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:a09ece4a69cf399510c8ab25e0950d9cf2b42f7b3cb0374f95d2e2ff594478a6", size = 235142, upload-time = "2024-08-04T19:44:57.269Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/5d/9d0acfcded2b3e9ce1c7923ca52ccc00c78a74e112fc2aee661125b7843b/coverage-7.6.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9054a0754de38d9dbd01a46621636689124d666bad1936d76c0341f7d71bf569", size = 233805, upload-time = "2024-08-04T19:44:59.033Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/56/50abf070cb3cd9b1dd32f2c88f083aab561ecbffbcd783275cb51c17f11d/coverage-7.6.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0dbde0f4aa9a16fa4d754356a8f2e36296ff4d83994b2c9d8398aa32f222f989", size = 234655, upload-time = "2024-08-04T19:45:01.398Z" },
-    { url = "https://files.pythonhosted.org/packages/25/ee/b4c246048b8485f85a2426ef4abab88e48c6e80c74e964bea5cd4cd4b115/coverage-7.6.1-cp38-cp38-win32.whl", hash = "sha256:da511e6ad4f7323ee5702e6633085fb76c2f893aaf8ce4c51a0ba4fc07580ea7", size = 209296, upload-time = "2024-08-04T19:45:03.819Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/1c/96cf86b70b69ea2b12924cdf7cabb8ad10e6130eab8d767a1099fbd2a44f/coverage-7.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:3f1156e3e8f2872197af3840d8ad307a9dd18e615dc64d9ee41696f287c57ad8", size = 210137, upload-time = "2024-08-04T19:45:06.25Z" },
-    { url = "https://files.pythonhosted.org/packages/19/d3/d54c5aa83268779d54c86deb39c1c4566e5d45c155369ca152765f8db413/coverage-7.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abd5fd0db5f4dc9289408aaf34908072f805ff7792632250dcb36dc591d24255", size = 206688, upload-time = "2024-08-04T19:45:08.358Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/fe/137d5dca72e4a258b1bc17bb04f2e0196898fe495843402ce826a7419fe3/coverage-7.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:547f45fa1a93154bd82050a7f3cddbc1a7a4dd2a9bf5cb7d06f4ae29fe94eaf8", size = 207120, upload-time = "2024-08-04T19:45:11.526Z" },
-    { url = "https://files.pythonhosted.org/packages/78/5b/a0a796983f3201ff5485323b225d7c8b74ce30c11f456017e23d8e8d1945/coverage-7.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:645786266c8f18a931b65bfcefdbf6952dd0dea98feee39bd188607a9d307ed2", size = 235249, upload-time = "2024-08-04T19:45:13.202Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/e1/76089d6a5ef9d68f018f65411fcdaaeb0141b504587b901d74e8587606ad/coverage-7.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e0b2df163b8ed01d515807af24f63de04bebcecbd6c3bfeff88385789fdf75a", size = 233237, upload-time = "2024-08-04T19:45:14.961Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/6f/eef79b779a540326fee9520e5542a8b428cc3bfa8b7c8f1022c1ee4fc66c/coverage-7.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:609b06f178fe8e9f89ef676532760ec0b4deea15e9969bf754b37f7c40326dbc", size = 234311, upload-time = "2024-08-04T19:45:16.924Z" },
-    { url = "https://files.pythonhosted.org/packages/75/e1/656d65fb126c29a494ef964005702b012f3498db1a30dd562958e85a4049/coverage-7.6.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:702855feff378050ae4f741045e19a32d57d19f3e0676d589df0575008ea5004", size = 233453, upload-time = "2024-08-04T19:45:18.672Z" },
-    { url = "https://files.pythonhosted.org/packages/68/6a/45f108f137941a4a1238c85f28fd9d048cc46b5466d6b8dda3aba1bb9d4f/coverage-7.6.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2bdb062ea438f22d99cba0d7829c2ef0af1d768d1e4a4f528087224c90b132cb", size = 231958, upload-time = "2024-08-04T19:45:20.63Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/e7/47b809099168b8b8c72ae311efc3e88c8d8a1162b3ba4b8da3cfcdb85743/coverage-7.6.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9c56863d44bd1c4fe2abb8a4d6f5371d197f1ac0ebdee542f07f35895fc07f36", size = 232938, upload-time = "2024-08-04T19:45:23.062Z" },
-    { url = "https://files.pythonhosted.org/packages/52/80/052222ba7058071f905435bad0ba392cc12006380731c37afaf3fe749b88/coverage-7.6.1-cp39-cp39-win32.whl", hash = "sha256:6e2cd258d7d927d09493c8df1ce9174ad01b381d4729a9d8d4e38670ca24774c", size = 209352, upload-time = "2024-08-04T19:45:25.042Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/d8/1b92e0b3adcf384e98770a00ca095da1b5f7b483e6563ae4eb5e935d24a1/coverage-7.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:06a737c882bd26d0d6ee7269b20b12f14a8704807a01056c80bb881a4b2ce6ca", size = 210153, upload-time = "2024-08-04T19:45:27.079Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/2b/0354ed096bca64dc8e32a7cbcae28b34cb5ad0b1fe2125d6d99583313ac0/coverage-7.6.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:e9a6e0eb86070e8ccaedfbd9d38fec54864f3125ab95419970575b42af7541df", size = 198926, upload-time = "2024-08-04T19:45:28.875Z" },
+    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
 ]
 
-[package.optional-dependencies]
-toml = [
-    { name = "tomli", marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
 ]
 
 [[package]]
@@ -214,7 +158,7 @@ name = "coverage"
 version = "7.10.7"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" }
 wheels = [
@@ -325,7 +269,7 @@ wheels = [
 
 [package.optional-dependencies]
 toml = [
-    { name = "tomli", marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "tomli", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
 ]
 
 [[package]]
@@ -468,7 +412,7 @@ dependencies = [
     { name = "packaging", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "pyarrow", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "tqdm", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "typing-extensions", marker = "python_full_version == '3.10.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/8a/db/32cf6cffa3f9e99a6c0d666fbe32883a1abfa7f1e013ac686c785196a7e2/daft-0.7.3.tar.gz", hash = "sha256:1adfb4301f4417de33b6ffbcfc07c8e8414655141556065d1bf1ab9ae988b90d", size = 2820158, upload-time = "2026-02-13T22:57:25.031Z" }
 wheels = [
@@ -484,8 +428,8 @@ name = "delta-spark"
 version = "3.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "importlib-metadata", marker = "python_full_version >= '3.9'" },
-    { name = "pyspark", version = "3.5.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "importlib-metadata" },
+    { name = "pyspark", version = "3.5.8", source = { registry = "https://pypi.org/simple" } },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/38/06/a64cc4e17fe959cf60dc126bf3283fc9f22fc91f000b7f3f5e465338022d/delta-spark-3.2.0.tar.gz", hash = "sha256:641967828e47c64805f8c746513da80bea24b5f19b069cdcf64561cd3692e11d", size = 22147, upload-time = "2024-05-09T17:26:10.754Z" }
 wheels = [
@@ -497,11 +441,11 @@ name = "deltalake"
 version = "1.2.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "arro3-core", marker = "python_full_version == '3.9.*'" },
-    { name = "deprecated", marker = "python_full_version == '3.9.*'" },
+    { name = "arro3-core", marker = "python_full_version < '3.10'" },
+    { name = "deprecated", marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d0/f2/1ee40a1e1d65386ff8c34b268cd456e9baa5cbfda05f8762f1dd6d2f5700/deltalake-1.2.1.tar.gz", hash = "sha256:76ace48961de01b7d7cc4b1a2b2462271fb49bf74838c8bdfa0c6372e053d905", size = 5144436, upload-time = "2025-10-21T08:49:45.265Z" }
 wheels = [
@@ -549,13 +493,22 @@ name = "deprecated"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "wrapt", marker = "python_full_version >= '3.10' or (python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail') or (python_full_version < '3.9' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "wrapt", marker = "python_full_version >= '3.10' or extra == 'extra-9-lakebench-sail'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/49/85/12f0a49a7c4ffb70572b6c2ef13c90c88fd190debda93b23f026b25f9634/deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223", size = 2932523, upload-time = "2025-10-30T08:19:02.757Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/84/d0/205d54408c08b13550c733c4b85429e7ead111c7f0014309637425520a9a/deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f", size = 11298, upload-time = "2025-10-30T08:19:00.758Z" },
 ]
 
+[[package]]
+name = "distlib"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
+]
+
 [[package]]
 name = "duckdb"
 version = "1.4.4"
@@ -609,14 +562,47 @@ name = "exceptiongroup"
 version = "1.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", version = "4.13.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.9' and python_full_version < '3.11') or (python_full_version < '3.9' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark') or (python_full_version >= '3.11' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
 ]
 
+[[package]]
+name = "filelock"
+version = "3.19.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/bb/0ab3e58d22305b6f5440629d20683af28959bf793d98d11950e305c1c326/filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58", size = 17687, upload-time = "2025-08-14T16:56:03.016Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload-time = "2025-08-14T16:56:01.633Z" },
+]
+
+[[package]]
+name = "filelock"
+version = "3.29.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size = 57571, upload-time = "2026-04-19T15:39:10.068Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" },
+]
+
 [[package]]
 name = "fsspec"
 version = "2025.2.0"
@@ -631,7 +617,7 @@ name = "googleapis-common-protos"
 version = "1.72.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "protobuf", marker = "python_full_version >= '3.9'" },
+    { name = "protobuf" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433, upload-time = "2025-11-06T18:29:24.087Z" }
 wheels = [
@@ -643,7 +629,7 @@ name = "grpcio"
 version = "1.78.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1f/de/de568532d9907552700f80dcec38219d8d298ad9e71f5e0a095abaf2761e/grpcio-1.78.1.tar.gz", hash = "sha256:27c625532d33ace45d57e775edf1982e183ff8641c72e4e91ef7ba667a149d72", size = 12835760, upload-time = "2026-02-20T01:16:10.869Z" }
 wheels = [
@@ -714,21 +700,55 @@ name = "grpcio-status"
 version = "1.78.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "googleapis-common-protos", marker = "python_full_version >= '3.9'" },
-    { name = "grpcio", marker = "python_full_version >= '3.9'" },
-    { name = "protobuf", marker = "python_full_version >= '3.9'" },
+    { name = "googleapis-common-protos" },
+    { name = "grpcio" },
+    { name = "protobuf" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/73/be/0a88b27a058d3a640bbe42e2b4e1323a19cabcedaeab1b3a44af231777e9/grpcio_status-1.78.1.tar.gz", hash = "sha256:47e7fa903549c5881344f1cba23c814b5f69d09233541036eb25642d32497c8e", size = 13814, upload-time = "2026-02-20T01:21:50.761Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/85/dd/08819a8108753e8b2a89aab259d7301dba696ebc581a307a3cd4bb786b57/grpcio_status-1.78.1-py3-none-any.whl", hash = "sha256:5f6660b99063f918b7f84d99cab68084aeb0dd09949e1224a6073026cea6820c", size = 14525, upload-time = "2026-02-20T01:21:35.793Z" },
 ]
 
+[[package]]
+name = "identify"
+version = "2.6.15"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ff/e7/685de97986c916a6d93b3876139e00eef26ad5bbbd61925d670ae8013449/identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf", size = 99311, upload-time = "2025-10-02T17:43:40.631Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0f/1c/e5fd8f973d4f375adb21565739498e2e9a1e54c858a97b9a8ccfdc81da9b/identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757", size = 99183, upload-time = "2025-10-02T17:43:39.137Z" },
+]
+
+[[package]]
+name = "identify"
+version = "2.6.19"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/52/63/51723b5f116cc04b061cb6f5a561790abf249d25931d515cd375e063e0f4/identify-2.6.19.tar.gz", hash = "sha256:6be5020c38fcb07da56c53733538a3081ea5aa70d36a156f83044bfbf9173842", size = 99567, upload-time = "2026-04-17T18:39:50.265Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/84/d9273cd09688070a6523c4aee4663a8538721b2b755c4962aafae0011e72/identify-2.6.19-py2.py3-none-any.whl", hash = "sha256:20e6a87f786f768c092a721ad107fc9df0eb89347be9396cadf3f4abbd1fb78a", size = 99397, upload-time = "2026-04-17T18:39:49.221Z" },
+]
+
 [[package]]
 name = "importlib-metadata"
 version = "8.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "zipp", marker = "python_full_version >= '3.9'" },
+    { name = "zipp" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" }
 wheels = [
@@ -740,8 +760,7 @@ name = "iniconfig"
 version = "2.1.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
-    "python_full_version < '3.9'",
+    "python_full_version < '3.10'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
 wheels = [
@@ -776,50 +795,49 @@ version = "1.0.1"
 source = { editable = "." }
 dependencies = [
     { name = "fsspec" },
-    { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pyarrow" },
     { name = "sqlglot" },
-    { name = "tenacity", version = "8.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "tenacity", version = "9.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "tenacity" },
 ]
 
 [package.optional-dependencies]
 daft = [
     { name = "daft", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "deltalake", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pyarrow", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pyarrow" },
 ]
 duckdb = [
     { name = "deltalake", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "duckdb", marker = "python_full_version >= '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "duckdb" },
+    { name = "pyarrow" },
 ]
 polars = [
     { name = "deltalake", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "polars", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pyarrow" },
 ]
 sail = [
-    { name = "deltalake", version = "1.2.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "deltalake", version = "1.2.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.10' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "deltalake", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9'" },
+    { name = "pyarrow" },
     { name = "pysail", marker = "python_full_version >= '3.10'" },
-    { name = "pyspark", version = "4.0.2", source = { registry = "https://pypi.org/simple" }, extra = ["connect"], marker = "(python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pyspark", version = "4.0.2", source = { registry = "https://pypi.org/simple" }, extra = ["connect"], marker = "(python_full_version < '3.10' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "pyspark", version = "4.1.1", source = { registry = "https://pypi.org/simple" }, extra = ["connect"], marker = "(python_full_version >= '3.10' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
 ]
 spark = [
-    { name = "delta-spark", marker = "python_full_version >= '3.9'" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9'" },
-    { name = "pyspark", version = "3.5.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "delta-spark" },
+    { name = "pyarrow" },
+    { name = "pyspark", version = "3.5.8", source = { registry = "https://pypi.org/simple" } },
 ]
 sparkmeasure = [
     { name = "sparkmeasure" },
 ]
 tpcds-datagen = [
-    { name = "duckdb", marker = "python_full_version >= '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "duckdb" },
+    { name = "pyarrow" },
 ]
 tpch-datagen = [
     { name = "tpchgen-cli" },
@@ -827,85 +845,59 @@ tpch-datagen = [
 
 [package.dev-dependencies]
 dev = [
-    { name = "pytest", version = "8.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pre-commit", version = "4.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pre-commit", version = "4.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "pytest", version = "9.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pytest-cov", version = "5.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pytest-cov", version = "7.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pytest-cov" },
+    { name = "ruff" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "daft", marker = "python_full_version >= '3.10' and extra == 'daft'", specifier = "==0.7.3" },
-    { name = "delta-spark", marker = "python_full_version >= '3.9' and extra == 'spark'", specifier = ">=3.2.0,<4.0.0" },
-    { name = "deltalake", marker = "python_full_version >= '3.9' and extra == 'sail'", specifier = ">=1.2.1" },
+    { name = "delta-spark", marker = "extra == 'spark'", specifier = ">=3.2.0,<4.0.0" },
     { name = "deltalake", marker = "python_full_version >= '3.10' and extra == 'daft'", specifier = "==1.3.3" },
     { name = "deltalake", marker = "python_full_version >= '3.10' and extra == 'duckdb'", specifier = "==1.3.3" },
     { name = "deltalake", marker = "python_full_version >= '3.10' and extra == 'polars'", specifier = "==1.3.3" },
-    { name = "duckdb", marker = "python_full_version >= '3.9' and extra == 'duckdb'", specifier = "==1.4.4" },
-    { name = "duckdb", marker = "python_full_version >= '3.9' and extra == 'tpcds-datagen'", specifier = "==1.4.4" },
+    { name = "deltalake", marker = "extra == 'sail'", specifier = ">=1.2.1" },
+    { name = "duckdb", marker = "extra == 'duckdb'", specifier = "==1.4.4" },
+    { name = "duckdb", marker = "extra == 'tpcds-datagen'", specifier = "==1.4.4" },
     { name = "fsspec", specifier = "==2025.2.0" },
     { name = "numpy", specifier = ">=1.24.4" },
     { name = "polars", marker = "python_full_version >= '3.10' and extra == 'polars'", specifier = "==1.38.1" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9' and extra == 'duckdb'", specifier = ">=15.0.0" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9' and extra == 'polars'", specifier = ">=15.0.0" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9' and extra == 'sail'", specifier = ">=15.0.0" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9' and extra == 'spark'", specifier = ">=15.0.0" },
-    { name = "pyarrow", marker = "python_full_version >= '3.9' and extra == 'tpcds-datagen'", specifier = ">=15.0.0" },
-    { name = "pyarrow", marker = "python_full_version >= '3.10' and extra == 'daft'", specifier = ">=15.0.0" },
+    { name = "pyarrow", specifier = ">=15.0.0" },
+    { name = "pyarrow", marker = "extra == 'daft'", specifier = ">=15.0.0" },
+    { name = "pyarrow", marker = "extra == 'duckdb'", specifier = ">=15.0.0" },
+    { name = "pyarrow", marker = "extra == 'polars'", specifier = ">=15.0.0" },
+    { name = "pyarrow", marker = "extra == 'sail'", specifier = ">=15.0.0" },
+    { name = "pyarrow", marker = "extra == 'spark'", specifier = ">=15.0.0" },
+    { name = "pyarrow", marker = "extra == 'tpcds-datagen'", specifier = ">=15.0.0" },
     { name = "pysail", marker = "python_full_version >= '3.10' and extra == 'sail'", specifier = ">=0.5.2" },
-    { name = "pyspark", marker = "python_full_version >= '3.9' and extra == 'spark'", specifier = ">=3.5.0,<4.0.0" },
-    { name = "pyspark", extras = ["connect"], marker = "python_full_version >= '3.9' and extra == 'sail'", specifier = ">=4.0.0" },
+    { name = "pyspark", marker = "extra == 'spark'", specifier = ">=3.5.0,<4.0.0" },
+    { name = "pyspark", extras = ["connect"], marker = "extra == 'sail'", specifier = ">=4.0.0" },
     { name = "sparkmeasure", marker = "extra == 'sparkmeasure'", specifier = "==0.24.0" },
     { name = "sqlglot", specifier = "==26.30.0" },
-    { name = "tenacity", marker = "python_full_version < '3.9'", specifier = ">=8.2.3,<9" },
-    { name = "tenacity", marker = "python_full_version >= '3.9'", specifier = "==9.1.2" },
+    { name = "tenacity", specifier = "==9.1.2" },
     { name = "tpchgen-cli", marker = "extra == 'tpch-datagen'", specifier = ">=2.0.1" },
 ]
 provides-extras = ["duckdb", "polars", "daft", "tpcds-datagen", "tpch-datagen", "sparkmeasure", "spark", "sail"]
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "pre-commit", specifier = ">=3.5.0" },
     { name = "pytest", specifier = ">=7.0.0" },
     { name = "pytest-cov", specifier = ">=4.0.0" },
+    { name = "ruff", specifier = ">=0.6.0" },
 ]
 
 [[package]]
-name = "numpy"
-version = "1.24.4"
+name = "nodeenv"
+version = "1.10.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229, upload-time = "2023-06-26T13:39:33.218Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6b/80/6cdfb3e275d95155a34659163b83c09e3a3ff9f1456880bec6cc63d71083/numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64", size = 19789140, upload-time = "2023-06-26T13:22:33.184Z" },
-    { url = "https://files.pythonhosted.org/packages/64/5f/3f01d753e2175cfade1013eea08db99ba1ee4bdb147ebcf3623b75d12aa7/numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1", size = 13854297, upload-time = "2023-06-26T13:22:59.541Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/b3/2f9c21d799fa07053ffa151faccdceeb69beec5a010576b8991f614021f7/numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4", size = 13995611, upload-time = "2023-06-26T13:23:22.167Z" },
-    { url = "https://files.pythonhosted.org/packages/10/be/ae5bf4737cb79ba437879915791f6f26d92583c738d7d960ad94e5c36adf/numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6", size = 17282357, upload-time = "2023-06-26T13:23:51.446Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/64/908c1087be6285f40e4b3e79454552a701664a079321cff519d8c7051d06/numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc", size = 12429222, upload-time = "2023-06-26T13:24:13.849Z" },
-    { url = "https://files.pythonhosted.org/packages/22/55/3d5a7c1142e0d9329ad27cece17933b0e2ab4e54ddc5c1861fbfeb3f7693/numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e", size = 14841514, upload-time = "2023-06-26T13:24:38.129Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/cc/5ed2280a27e5dab12994c884f1f4d8c3bd4d885d02ae9e52a9d213a6a5e2/numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810", size = 19775508, upload-time = "2023-06-26T13:25:08.882Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/bc/77635c657a3668cf652806210b8662e1aff84b818a55ba88257abf6637a8/numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254", size = 13840033, upload-time = "2023-06-26T13:25:33.417Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/4c/96cdaa34f54c05e97c1c50f39f98d608f96f0677a6589e64e53104e22904/numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7", size = 13991951, upload-time = "2023-06-26T13:25:55.725Z" },
-    { url = "https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5", size = 17278923, upload-time = "2023-06-26T13:26:25.658Z" },
-    { url = "https://files.pythonhosted.org/packages/35/e2/76a11e54139654a324d107da1d98f99e7aa2a7ef97cfd7c631fba7dbde71/numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d", size = 12422446, upload-time = "2023-06-26T13:26:49.302Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/ec/ebef2f7d7c28503f958f0f8b992e7ce606fb74f9e891199329d5f5f87404/numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694", size = 14834466, upload-time = "2023-06-26T13:27:16.029Z" },
-    { url = "https://files.pythonhosted.org/packages/11/10/943cfb579f1a02909ff96464c69893b1d25be3731b5d3652c2e0cf1281ea/numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61", size = 19780722, upload-time = "2023-06-26T13:27:49.573Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/ae/f53b7b265fdc701e663fbb322a8e9d4b14d9cb7b2385f45ddfabfc4327e4/numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f", size = 13843102, upload-time = "2023-06-26T13:28:12.288Z" },
-    { url = "https://files.pythonhosted.org/packages/25/6f/2586a50ad72e8dbb1d8381f837008a0321a3516dfd7cb57fc8cf7e4bb06b/numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e", size = 14039616, upload-time = "2023-06-26T13:28:35.659Z" },
-    { url = "https://files.pythonhosted.org/packages/98/5d/5738903efe0ecb73e51eb44feafba32bdba2081263d40c5043568ff60faf/numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc", size = 17316263, upload-time = "2023-06-26T13:29:09.272Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/57/8d328f0b91c733aa9aa7ee540dbc49b58796c862b4fbcb1146c701e888da/numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2", size = 12455660, upload-time = "2023-06-26T13:29:33.434Z" },
-    { url = "https://files.pythonhosted.org/packages/69/65/0d47953afa0ad569d12de5f65d964321c208492064c38fe3b0b9744f8d44/numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706", size = 14868112, upload-time = "2023-06-26T13:29:58.385Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/cd/d5b0402b801c8a8b56b04c1e85c6165efab298d2f0ab741c2406516ede3a/numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400", size = 19816549, upload-time = "2023-06-26T13:30:36.976Z" },
-    { url = "https://files.pythonhosted.org/packages/14/27/638aaa446f39113a3ed38b37a66243e21b38110d021bfcb940c383e120f2/numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f", size = 13879950, upload-time = "2023-06-26T13:31:01.787Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/27/91894916e50627476cff1a4e4363ab6179d01077d71b9afed41d9e1f18bf/numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9", size = 14030228, upload-time = "2023-06-26T13:31:26.696Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/7c/d7b2a0417af6428440c0ad7cb9799073e507b1a465f827d058b826236964/numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d", size = 17311170, upload-time = "2023-06-26T13:31:56.615Z" },
-    { url = "https://files.pythonhosted.org/packages/18/9d/e02ace5d7dfccee796c37b995c63322674daf88ae2f4a4724c5dd0afcc91/numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835", size = 12454918, upload-time = "2023-06-26T13:32:16.8Z" },
-    { url = "https://files.pythonhosted.org/packages/63/38/6cc19d6b8bfa1d1a459daf2b3fe325453153ca7019976274b6f33d8b5663/numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8", size = 14867441, upload-time = "2023-06-26T13:32:40.521Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/fd/8dff40e25e937c94257455c237b9b6bf5a30d42dd1cc11555533be099492/numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef", size = 19156590, upload-time = "2023-06-26T13:33:10.36Z" },
-    { url = "https://files.pythonhosted.org/packages/42/e7/4bf953c6e05df90c6d351af69966384fed8e988d0e8c54dad7103b59f3ba/numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a", size = 16705744, upload-time = "2023-06-26T13:33:36.703Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/dd/9106005eb477d022b60b3817ed5937a43dad8fd1f20b0610ea8a32fcb407/numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2", size = 14734290, upload-time = "2023-06-26T13:34:05.409Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" },
 ]
 
 [[package]]
@@ -913,7 +905,7 @@ name = "numpy"
 version = "2.0.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015, upload-time = "2024-08-26T20:19:40.945Z" }
 wheels = [
@@ -1132,14 +1124,14 @@ version = "2.3.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.10' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail') or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "python-dateutil", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" },
-    { name = "pytz", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" },
-    { name = "tzdata", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" },
+    { name = "python-dateutil", marker = "python_full_version < '3.11'" },
+    { name = "pytz", marker = "python_full_version < '3.11'" },
+    { name = "tzdata", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
 wheels = [
@@ -1268,25 +1260,24 @@ wheels = [
 ]
 
 [[package]]
-name = "pluggy"
-version = "1.5.0"
+name = "platformdirs"
+version = "4.4.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.9'",
+    "python_full_version < '3.10'",
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955, upload-time = "2024-04-20T21:34:42.531Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/23/e8/21db9c9987b0e728855bd57bff6984f67952bea55d6f75e055c46b5383e8/platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf", size = 21634, upload-time = "2025-08-26T14:32:04.268Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556, upload-time = "2024-04-20T21:34:40.434Z" },
+    { url = "https://files.pythonhosted.org/packages/40/4b/2028861e724d3bd36227adfa20d3fd24c3fc6d52032f4a93c133be5d17ce/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", size = 18654, upload-time = "2025-08-26T14:32:02.735Z" },
 ]
 
 [[package]]
-name = "pluggy"
-version = "1.6.0"
+name = "platformdirs"
+version = "4.10.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
     "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
     "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
@@ -1294,11 +1285,18 @@ resolution-markers = [
     "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
     "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/d7/47/e4501f49c178ae1d9f4a75073fda4204f52647993f075a9db4d14930e0c5/platformdirs-4.10.0.tar.gz", hash = "sha256:31e761a6a0ca04faf7353ea759bdba55652be214725111e5aac52dfa29d4bef7", size = 31224, upload-time = "2026-05-28T03:32:53.587Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/e6/cd9575ac904136b3cbf7aa7ee819ef86eedb7274e46f230e94ea4342e729/platformdirs-4.10.0-py3-none-any.whl", hash = "sha256:fb516cdb12eb0d857d0cd85a7c57cea4d060bee4578d6cf5a14dfdf8cbf8784a", size = 22743, upload-time = "2026-05-28T03:32:52.175Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
@@ -1332,6 +1330,54 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bf/18/72c216f4ab0c82b907009668f79183ae029116ff0dd245d56ef58aac48e7/polars_runtime_32-1.38.1-cp310-abi3-win_arm64.whl", hash = "sha256:6d07d0cc832bfe4fb54b6e04218c2c27afcfa6b9498f9f6bbf262a00d58cc7c4", size = 41639413, upload-time = "2026-02-06T18:12:22.044Z" },
 ]
 
+[[package]]
+name = "pre-commit"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "cfgv", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "identify", version = "2.6.15", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "nodeenv", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pyyaml", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "virtualenv", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ff/29/7cf5bbc236333876e4b41f56e06857a87937ce4bf91e117a6991a2dbb02a/pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16", size = 193792, upload-time = "2025-08-09T18:56:14.651Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5b/a5/987a405322d78a73b66e39e4a90e4ef156fd7141bf71df987e50717c321b/pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8", size = 220965, upload-time = "2025-08-09T18:56:13.192Z" },
+]
+
+[[package]]
+name = "pre-commit"
+version = "4.6.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
+]
+dependencies = [
+    { name = "cfgv", version = "3.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "identify", version = "2.6.19", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "nodeenv", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pyyaml", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "virtualenv", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8e/22/2de9408ac81acbb8a7d05d4cc064a152ccf33b3d480ebe0cd292153db239/pre_commit-4.6.0.tar.gz", hash = "sha256:718d2208cef53fdc38206e40524a6d4d9576d103eb16f0fec11c875e7716e9d9", size = 198525, upload-time = "2026-04-21T20:31:41.613Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/80/6e/4b28b62ecb6aae56769c34a8ff1d661473ec1e9519e2d5f8b2c150086b26/pre_commit-4.6.0-py2.py3-none-any.whl", hash = "sha256:e2cf246f7299edcabcf15f9b0571fdce06058527f0a06535068a86d38089f29b", size = 226472, upload-time = "2026-04-21T20:31:40.092Z" },
+]
+
 [[package]]
 name = "protobuf"
 version = "6.33.5"
@@ -1437,10 +1483,10 @@ source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
     "python_full_version >= '3.11'",
     "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "py4j", marker = "python_full_version >= '3.9'" },
+    { name = "py4j" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/80/5a/3806f44eb47387e8af803508cdd6bbc0df784febf4dc010700be04a1ff89/pyspark-3.5.8.tar.gz", hash = "sha256:54cca0767b21b40e3953ad1d30f8601c53abf9cbda763653289cdcfcac52313c", size = 317817299, upload-time = "2026-01-15T11:46:14.487Z" }
 
@@ -1449,21 +1495,21 @@ name = "pyspark"
 version = "4.0.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "py4j", marker = "python_full_version == '3.9.*'" },
+    { name = "py4j", marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/96/89/408b42c803db71f4a4d8a3f1ab0745a40dfe41aeacdfc453545665a171f4/pyspark-4.0.2.tar.gz", hash = "sha256:938b4a1883383374d331ebfcb5d92debfa1891cf3d7a6d730520a1a2d23f1a90", size = 434209940, upload-time = "2026-02-05T19:31:13.6Z" }
 
 [package.optional-dependencies]
 connect = [
-    { name = "googleapis-common-protos", marker = "python_full_version == '3.9.*'" },
-    { name = "grpcio", marker = "python_full_version == '3.9.*'" },
-    { name = "grpcio-status", marker = "python_full_version == '3.9.*'" },
-    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "pyarrow", marker = "python_full_version == '3.9.*'" },
+    { name = "googleapis-common-protos", marker = "python_full_version < '3.10'" },
+    { name = "grpcio", marker = "python_full_version < '3.10'" },
+    { name = "grpcio-status", marker = "python_full_version < '3.10'" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "pyarrow", marker = "python_full_version < '3.10'" },
 ]
 
 [[package]]
@@ -1497,41 +1543,21 @@ connect = [
     { name = "zstandard", marker = "python_full_version >= '3.10'" },
 ]
 
-[[package]]
-name = "pytest"
-version = "8.3.5"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-dependencies = [
-    { name = "colorama", marker = "(python_full_version < '3.9' and sys_platform == 'win32') or (python_full_version >= '3.9' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark') or (sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "exceptiongroup", marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "packaging", marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pluggy", version = "1.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "tomli", marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" },
-]
-
 [[package]]
 name = "pytest"
 version = "8.4.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "colorama", marker = "(python_full_version == '3.9.*' and sys_platform == 'win32') or (python_full_version != '3.9.*' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark') or (sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "exceptiongroup", marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "packaging", marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pluggy", version = "1.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pygments", marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "tomli", marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "colorama", marker = "(python_full_version < '3.10' and sys_platform == 'win32') or (python_full_version >= '3.10' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark') or (sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "packaging", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pluggy", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pygments", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "tomli", marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
 wheels = [
@@ -1560,7 +1586,7 @@ dependencies = [
     { name = "exceptiongroup", marker = "python_full_version == '3.10.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "iniconfig", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "packaging", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pluggy", version = "1.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pluggy", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "pygments", marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "tomli", marker = "python_full_version == '3.10.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
 ]
@@ -1569,47 +1595,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
 ]
 
-[[package]]
-name = "pytest-cov"
-version = "5.0.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-dependencies = [
-    { name = "coverage", version = "7.6.1", source = { registry = "https://pypi.org/simple" }, extra = ["toml"], marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pytest", version = "8.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/74/67/00efc8d11b630c56f15f4ad9c7f9223f1e5ec275aaae3fa9118c6a223ad2/pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857", size = 63042, upload-time = "2024-03-24T20:16:34.856Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652", size = 21990, upload-time = "2024-03-24T20:16:32.444Z" },
-]
-
 [[package]]
 name = "pytest-cov"
 version = "7.0.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-]
 dependencies = [
-    { name = "coverage", version = "7.10.7", source = { registry = "https://pypi.org/simple" }, extra = ["toml"], marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "coverage", version = "7.10.7", source = { registry = "https://pypi.org/simple" }, extra = ["toml"], marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "coverage", version = "7.13.4", source = { registry = "https://pypi.org/simple" }, extra = ["toml"], marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pluggy", version = "1.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
-    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "pluggy" },
+    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
     { name = "pytest", version = "9.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
@@ -1622,13 +1616,28 @@ name = "python-dateutil"
 version = "2.9.0.post0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "six", marker = "python_full_version >= '3.9'" },
+    { name = "six" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
 ]
 
+[[package]]
+name = "python-discovery"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock", version = "3.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "filelock", version = "3.29.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "platformdirs", version = "4.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "platformdirs", version = "4.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a6/12/38c1a0b1e64806780c9563e3fc9f6e472251839662587cfbe9bfaf2ae10a/python_discovery-1.4.0.tar.gz", hash = "sha256:eb8bc7daad3c226c147e45bb4e970a1feb1bf4048ee178e6db59e197b8010ce3", size = 68455, upload-time = "2026-05-28T01:15:37.639Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/8d/3d316429f65029532bb1e28ff77b797d86b5ac3915bb44ca4e19aa283d43/python_discovery-1.4.0-py3-none-any.whl", hash = "sha256:26ed78d703e234879a66244c7d4114563fb13ec5cd30a2d1357e5fb4850782da", size = 33217, upload-time = "2026-05-28T01:15:36.573Z" },
+]
+
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -1638,6 +1647,104 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
 ]
 
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size = 184227, upload-time = "2025-09-25T21:31:46.04Z" },
+    { url = "https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size = 174019, upload-time = "2025-09-25T21:31:47.706Z" },
+    { url = "https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size = 740646, upload-time = "2025-09-25T21:31:49.21Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size = 840793, upload-time = "2025-09-25T21:31:50.735Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size = 770293, upload-time = "2025-09-25T21:31:51.828Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size = 732872, upload-time = "2025-09-25T21:31:53.282Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size = 758828, upload-time = "2025-09-25T21:31:54.807Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size = 142415, upload-time = "2025-09-25T21:31:55.885Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size = 158561, upload-time = "2025-09-25T21:31:57.406Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" },
+    { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" },
+    { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" },
+    { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" },
+    { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
+    { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
+    { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
+    { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
+    { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+    { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/62/67fc8e68a75f738c9200422bf65693fb79a4cd0dc5b23310e5202e978090/pyyaml-6.0.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da", size = 184450, upload-time = "2025-09-25T21:33:00.618Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/92/861f152ce87c452b11b9d0977952259aa7df792d71c1053365cc7b09cc08/pyyaml-6.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917", size = 174319, upload-time = "2025-09-25T21:33:02.086Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/cd/f0cfc8c74f8a030017a2b9c771b7f47e5dd702c3e28e5b2071374bda2948/pyyaml-6.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9", size = 737631, upload-time = "2025-09-25T21:33:03.25Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/b2/18f2bd28cd2055a79a46c9b0895c0b3d987ce40ee471cecf58a1a0199805/pyyaml-6.0.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5", size = 836795, upload-time = "2025-09-25T21:33:05.014Z" },
+    { url = "https://files.pythonhosted.org/packages/73/b9/793686b2d54b531203c160ef12bec60228a0109c79bae6c1277961026770/pyyaml-6.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a", size = 750767, upload-time = "2025-09-25T21:33:06.398Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/86/a137b39a611def2ed78b0e66ce2fe13ee701a07c07aebe55c340ed2a050e/pyyaml-6.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926", size = 727982, upload-time = "2025-09-25T21:33:08.708Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/62/71c27c94f457cf4418ef8ccc71735324c549f7e3ea9d34aba50874563561/pyyaml-6.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7", size = 755677, upload-time = "2025-09-25T21:33:09.876Z" },
+    { url = "https://files.pythonhosted.org/packages/29/3d/6f5e0d58bd924fb0d06c3a6bad00effbdae2de5adb5cda5648006ffbd8d3/pyyaml-6.0.3-cp39-cp39-win32.whl", hash = "sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0", size = 142592, upload-time = "2025-09-25T21:33:10.983Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/0c/25113e0b5e103d7f1490c0e947e303fe4a696c10b501dea7a9f49d4e876c/pyyaml-6.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007", size = 158777, upload-time = "2025-09-25T21:33:15.55Z" },
+]
+
+[[package]]
+name = "ruff"
+version = "0.15.15"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/84/6f/a76f7d96e5c962f5b69cee865e49c15c1116897c01990faa8a57edb62e7f/ruff-0.15.15.tar.gz", hash = "sha256:b8dff018130b46d8e5bf0f926ef6b60cf871d6d5ae45fc9334e09632daa741d6", size = 4706985, upload-time = "2026-05-28T14:16:57.784Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/9d/3a45c05b8ab04b4705989de70a79008e27c8003296a0feaee9edc18dd7e9/ruff-0.15.15-py3-none-linux_armv6l.whl", hash = "sha256:cf93e5388f412e1b108b1f8b34a6e036b70fe8aff89393befad96fe48670311b", size = 10710652, upload-time = "2026-05-28T14:16:06.701Z" },
+    { url = "https://files.pythonhosted.org/packages/05/66/da974431624bf3b49f6ee1f9543c02d929ff1cba78b0d5a79c38cf21f744/ruff-0.15.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ac5a646d1f6a7dadd5d50842dae2c1f9862ac887ef5d1b1375e02def791fde6e", size = 11096615, upload-time = "2026-05-28T14:16:23.313Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/09/7443452e5d290230a712103f2fdceeef7184f3ec99a2bd01c8be78aaceb5/ruff-0.15.15-py3-none-macosx_11_0_arm64.whl", hash = "sha256:77d955a431430c66f72dd94e379ad38a16daea3d25094872ac4edf9e797be530", size = 10436683, upload-time = "2026-05-28T14:16:40.974Z" },
+    { url = "https://files.pythonhosted.org/packages/53/01/d330c26a57fa4f3943a14424904027428315b700fe4d14a84bb123a649e5/ruff-0.15.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7614ee79c69788cf6cedd568069ade9cecc22a1ad20494efe8d0c9ebb4b622d4", size = 10769064, upload-time = "2026-05-28T14:16:28.905Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/85/cc8770f8bdff541b1da8392d1634141fe4a0e3f4ee596605959b7906c27f/ruff-0.15.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3cdb1679e06a1f6b47bc384714ae96f6e2fb65ca441eb78c43d2ca554176ce1f", size = 10511987, upload-time = "2026-05-28T14:16:43.732Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/29/8c190c1472b63013583ba391f3342036e02010544c1270455ed8e519bdf3/ruff-0.15.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2728b93d7b23a603ea2c0ac6eb73d760bd38ec9de35f35fb41e18f7a3fee7622", size = 11275100, upload-time = "2026-05-28T14:16:55.244Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/6b/7e145ce2cc8e63d6834eca03d83a0e18d121def5c69f91b4cf4011ed4879/ruff-0.15.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be582fcc0db438902c7792b08d6ddf6c9b9e21addaa10092c2c741cfb09e5a45", size = 12176903, upload-time = "2026-05-28T14:16:14.368Z" },
+    { url = "https://files.pythonhosted.org/packages/80/a3/d5974637f68e451f7fadf015cf3101d1cd7d8ba5027cffe0b9e3826ebe6b/ruff-0.15.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7aa77465b8ecaf1a27bea098d696f7fed5e1eccbd10b321b682d6de586ae5627", size = 11404550, upload-time = "2026-05-28T14:16:20.138Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/1c/e6e5e568f22be4fb05d6244234aba384c06b451252453b821e1a529263cf/ruff-0.15.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48decfa11d740de4889de623be1463308346312f2409a56e24aa280c86162dc4", size = 11382027, upload-time = "2026-05-28T14:16:46.615Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/01/170921b49fcd2e8858825593f91cf7146c3e40a5c3e6df763e4bb0484dde/ruff-0.15.15-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a5015088452ca0081387063649ec67f06d3d1d6b8b936a1f836b5e9657ecd48c", size = 11366041, upload-time = "2026-05-28T14:16:26.247Z" },
+    { url = "https://files.pythonhosted.org/packages/87/54/a7bad711d7de93254e15e06a4c375b89a03d18de45d3e5dcc86a4472fb1a/ruff-0.15.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f5294aab6356c81600fcdea3a62bb1b924dfd5e91767c12318d3f68f86af57cd", size = 10741795, upload-time = "2026-05-28T14:16:17.11Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/31/38c075963668f8b41c6914ee0f6f318727fbe30ab9145cb29e6df464c5fa/ruff-0.15.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:db5bd4d802415cca656dc1616070b725952d6ae95eb5d4831e49fbd94a38f75f", size = 10511117, upload-time = "2026-05-28T14:16:31.767Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/96/6ff689e1f7e375d1d97075eca022f74c2bab59554a432fe4d2e6f091986a/ruff-0.15.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:587a6278ed42059191c1a466e490bd7930fb50bd2e255398bc29616c895a61cb", size = 10994867, upload-time = "2026-05-28T14:16:35.149Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/c2/5dce0ab9f92a8d534fa62b9bf9caca3eddb8c1a81b616f5e195ada4f0d6e/ruff-0.15.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df0c1c084f5f4be9812f61518a45c440d3c30d69ce4bf6c5270e66d38338f02a", size = 11482101, upload-time = "2026-05-28T14:16:49.598Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/c0/1003b60edd697c649faf61f1a34094b1abb38fb3d1181e3f895781250a08/ruff-0.15.15-py3-none-win32.whl", hash = "sha256:29428ea79694afbe756d45fd59b36f22b6b020dc0443cf7de0173046236964b9", size = 10716774, upload-time = "2026-05-28T14:16:52.337Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a8/1269eddd6945a06c23f055ef7848886e37cf9d6a8bebb386a3115f01470c/ruff-0.15.15-py3-none-win_amd64.whl", hash = "sha256:8df0323902e15e24bc4bf246da830573d3cf3352bd0b9a164eab335d111ff4a4", size = 11868463, upload-time = "2026-05-28T14:16:11.333Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/b2/920464c907b191e37469d477a1aa8bc048b8f36c4c1610dfa4ab87b39e18/ruff-0.15.15-py3-none-win_arm64.whl", hash = "sha256:3c8ceca6792f38196b8f589bc92eccd03eef286602da92e5dc05cc42ef6441b7", size = 11138498, upload-time = "2026-05-28T14:16:38.425Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
@@ -1665,38 +1772,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/13/90/4cf168c31b804e628f11238eb370dcb8a6b3f09e7e7e793a5d192cbef3be/sqlglot-26.30.0-py3-none-any.whl", hash = "sha256:7e6db3a4c4a7c421413339027b2166cfae4504b785dfabcfceb47f5c813ba8d0", size = 472603, upload-time = "2025-06-21T11:06:22.101Z" },
 ]
 
-[[package]]
-name = "tenacity"
-version = "8.5.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a3/4d/6a19536c50b849338fcbe9290d562b52cbdcf30d8963d3588a68a4107df1/tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78", size = 47309, upload-time = "2024-07-05T07:25:31.836Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687", size = 28165, upload-time = "2024-07-05T07:25:29.591Z" },
-]
-
 [[package]]
 name = "tenacity"
 version = "9.1.2"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" },
@@ -1789,38 +1868,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
 ]
 
-[[package]]
-name = "typing-extensions"
-version = "4.13.2"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload-time = "2025-04-10T14:19:05.416Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" },
-]
-
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra == 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version >= '3.11' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.10.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-    "python_full_version == '3.9.*' and extra != 'extra-9-lakebench-sail' and extra != 'extra-9-lakebench-spark'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
@@ -1835,6 +1886,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" },
 ]
 
+[[package]]
+name = "virtualenv"
+version = "21.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "distlib" },
+    { name = "filelock", version = "3.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "filelock", version = "3.29.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "platformdirs", version = "4.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "platformdirs", version = "4.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+    { name = "python-discovery" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-9-lakebench-sail' and extra == 'extra-9-lakebench-spark')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/f0/b47ecf438211a25a97f8f0e4b23c22bc2496ebfea18dd6ec16210f09cc36/virtualenv-21.4.1.tar.gz", hash = "sha256:2ca543c713b72840ceffd94e9bdedfbd09a661defa1f7f69e5429ad4059442e2", size = 7613344, upload-time = "2026-05-28T04:12:49.905Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ff/dc/ac4f3a987a87e1a18556896f257c4e15c95ed157b7975347ec6b313b75ce/virtualenv-21.4.1-py3-none-any.whl", hash = "sha256:caf4ff72d1b4039057f41d8e8466e859513d67c0400d9c6b62c02c9d1ebc3e12", size = 7594078, upload-time = "2026-05-28T04:12:47.686Z" },
+]
+
 [[package]]
 name = "wrapt"
 version = "2.1.1"