From 0996dbcc6375038aa7c407ef4c705ef9414547c0 Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Thu, 26 Mar 2026 21:01:53 -0700
Subject: [PATCH 1/5] update workflows

---
 .github/workflows/publish-pypi.yml | 52 +++++++++++++++++++++
 .github/workflows/run-tests.yml    | 73 ++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 .github/workflows/publish-pypi.yml
 create mode 100644 .github/workflows/run-tests.yml

diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
new file mode 100644
index 0000000..405fee0
--- /dev/null
+++ b/.github/workflows/publish-pypi.yml
@@ -0,0 +1,52 @@
+name: Publish to PyPI
+
+on:
+  push:
+    tags: "*"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      repository-projects: write
+      contents: write
+      pages: write
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install tox
+
+      - name: Test with tox
+        run: |
+          tox
+
+      - name: Build Project and Publish
+        run: |
+          python -m tox -e clean,build
+
+      # This uses the trusted publisher workflow so no token is required.
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+
+      - name: Build docs
+        run: |
+          tox -e docs
+
+      - run: touch ./docs/_build/html/.nojekyll
+
+      - name: GH Pages Deployment
+        uses: JamesIves/github-pages-deploy-action@v4
+        with:
+          branch: gh-pages # The branch the action should deploy to.
+          folder: ./docs/_build/html
+          clean: true # Automatically remove deleted files from the deploy branch
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
new file mode 100644
index 0000000..01f4e9a
--- /dev/null
+++ b/.github/workflows/run-tests.yml
@@ -0,0 +1,73 @@
+name: Test the library
+
+on:
+  push:
+    branches:
+      - master # for legacy repos
+      - main
+  pull_request:
+    branches:
+      - master # for legacy repos
+      - main
+  workflow_dispatch: # Allow manually triggering the workflow
+  schedule:
+    # Run roughly every 15 days at 00:00 UTC
+    # (useful to check if updates on dependencies break the package)
+    - cron: "0 0 1,16 * *"
+
+permissions:
+  contents: read
+
+concurrency:
+  group: >-
+    ${{ github.workflow }}-${{ github.ref_type }}-
+    ${{ github.event.pull_request.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    strategy:
+      matrix:
+        python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
+        platform:
+          - ubuntu-latest
+          # - macos-latest
+          # - windows-latest
+    runs-on: ${{ matrix.platform }}
+    name: Python ${{ matrix.python }}, ${{ matrix.platform }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        id: setup-python
+        with:
+          python-version: ${{ matrix.python }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install tox coverage
+
+      - name: Run tests
+        run: >-
+          pipx run --python '${{ steps.setup-python.outputs.python-path }}'
+          tox
+          -- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml  # pytest args
+
+      - name: Check for codecov token availability
+        id: codecov-check
+        shell: bash
+        run: |
+          if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then
+            echo "codecov=true" >> $GITHUB_OUTPUT;
+          else
+            echo "codecov=false" >> $GITHUB_OUTPUT;
+          fi
+
+      - name: Upload coverage reports to Codecov with GitHub Action
+        uses: codecov/codecov-action@v5
+        if: ${{ steps.codecov-check.outputs.codecov == 'true' }}
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+          slug: ${{ github.repository }}
+          flags: ${{ matrix.platform }} - py${{ matrix.python }}

From 059ad4582842c8f0c53c0a43839a800a470cd699 Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Thu, 26 Mar 2026 21:25:29 -0700
Subject: [PATCH 2/5] skip time taking tests

---
 tests/test_integration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_integration.py b/tests/test_integration.py
index c6c91f5..6e98d46 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -9,8 +9,8 @@
 from expressionatlas import ExpressionAtlasClient
 from expressionatlas.validation import is_valid_accession
 
-
 @pytest.mark.integration
+@pytest.mark.skip("takes too long")
 class TestExpressionAtlasClientIntegration:
     """Integration tests for ExpressionAtlasClient."""
 

From bfa85c8d4a9daf0d309fb85e8efb457857de6fce Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Thu, 26 Mar 2026 21:26:57 -0700
Subject: [PATCH 3/5] cleaning up imports

---
 src/expressionatlas/__init__.py | 3 ---
 src/expressionatlas/client.py   | 2 +-
 src/expressionatlas/download.py | 3 +--
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/expressionatlas/__init__.py b/src/expressionatlas/__init__.py
index ad588c0..abea65e 100644
--- a/src/expressionatlas/__init__.py
+++ b/src/expressionatlas/__init__.py
@@ -26,7 +26,6 @@
 finally:
     del version, PackageNotFoundError
 
-
 from expressionatlas.client import ExpressionAtlasClient
 from expressionatlas.download import (
     get_atlas_data,
@@ -41,5 +40,3 @@
     InvalidAccessionError,
 )
 from expressionatlas.models import SearchResult
-from summarizedexperiment import SummarizedExperiment
-from biocutils import NamedList
diff --git a/src/expressionatlas/client.py b/src/expressionatlas/client.py
index a37c612..b694239 100644
--- a/src/expressionatlas/client.py
+++ b/src/expressionatlas/client.py
@@ -6,11 +6,11 @@
 from collections.abc import Sequence
 
 import pandas as pd
+from biocutils import NamedList
 
 from expressionatlas.api import BioStudiesAPI
 from expressionatlas.download import get_atlas_data, get_atlas_experiment
 from expressionatlas.models import search_results_to_dataframe
-from biocutils import NamedList
 from expressionatlas.validation import validate_accession
 
 logger = logging.getLogger(__name__)
diff --git a/src/expressionatlas/download.py b/src/expressionatlas/download.py
index 85b7d46..3c59885 100644
--- a/src/expressionatlas/download.py
+++ b/src/expressionatlas/download.py
@@ -19,9 +19,8 @@
 
 import numpy as np
 import pandas as pd
-
-from biocutils import NamedList
 from biocframe import BiocFrame
+from biocutils import NamedList
 from summarizedexperiment import SummarizedExperiment
 
 from expressionatlas.exceptions import DownloadError

From c475c46048b382d47ee65f8904a8fe6dfcf65808 Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Mon, 30 Mar 2026 11:05:25 -0700
Subject: [PATCH 4/5] replace pandas with biocframe

---
 README.md                        |  51 ++--
 setup.cfg                        |   1 -
 src/expressionatlas/client.py    |  30 ++-
 src/expressionatlas/converter.py | 410 -------------------------------
 src/expressionatlas/download.py  | 123 ++++++----
 src/expressionatlas/models.py    |  33 ++-
 tests/test_integration.py        |  18 +-
 tests/test_models.py             |  29 +--
 8 files changed, 167 insertions(+), 528 deletions(-)
 delete mode 100644 src/expressionatlas/converter.py

diff --git a/README.md b/README.md
index f2ef102..19bb2a7 100644
--- a/README.md
+++ b/README.md
@@ -37,10 +37,18 @@ results = client.search_experiments(
     properties=["cancer", "breast"],
     species="homo sapiens"
 )
-print(results.head())
-#   Accession        Species                Type  ...
-# 0 E-MTAB-1624  homo sapiens  microarray data  ...
+print(results)
 ```
+    BiocFrame with 208 rows and 4 columns
+            Accession      Species                    Type                   Title
+                <list>       <list>                  <list>                  <list>
+    [0]  E-MTAB-8198 Homo sapiens Cell line - High-thr... Functional effect of...
+    [1]  E-MTAB-8532 Homo sapiens Human - One-color mi... DNA microarray studi...
+    [2] E-GEOD-43306 Homo sapiens   RNA-seq of coding RNA Translating transcri...
+                ...          ...                     ...                     ...
+    [205]   E-MTAB-779 Homo sapiens transcription profil... OncomiRs like let-7 ...
+    [206]  E-TABM-1118 Homo sapiens transcription profil... Transcrption profili...
+    [207]   E-TABM-601 Homo sapiens transcription profil... Transcription profil...
 
 ### Download RNA-seq Data
 
@@ -53,44 +61,35 @@ rnaseq = exp["rnaseq"]
 counts = rnaseq.assay("counts")  # numpy array: genes × samples
 
 print(f"Shape: {counts.shape[0]} genes × {counts.shape[1]} samples")
-# Shape: 58735 genes × 48 samples
+# Shape: 58735 genes × 24 samples
 
 # Sample metadata (BiocFrame)
 sample_info = rnaseq.get_column_data()
 print(sample_info.get_column_names())
+# ['cell line', 'compound', 'developmental stage', 'disease', 'dose', 'genotype', 'organism', 'organism part']
 
 # Gene annotations (BiocFrame)
 gene_info = rnaseq.get_row_data()
 print(gene_info.shape)
-```
-
-### Download Microarray Data
-
-```python
-exp = client.get_experiment("E-MTAB-1624")
-
-# Microarray data is keyed by array design
-array_design = "A-AFFY-126"
-eset = exp[array_design] # This is also a SummarizedExperiment now
+# (58735, 1)
 
-# Expression matrix (probes × samples)
-intensities = eset.assay("exprs")
-print(intensities.shape)
-# (54675, 96)
-
-# Sample metadata (BiocFrame)
-sample_annotations = eset.get_column_data()
-print(sample_annotations.shape)
-
-# Feature annotations (BiocFrame)
-probe_annotations = eset.get_row_data()
+print(rnaseq)
 ```
+    class: SummarizedExperiment
+    dimensions: (58735, 24)
+    assays(1): ['counts']
+    row_data columns(1): ['Gene Name']
+    row_names(58735): ['ENSG00000000003', 'ENSG00000000005', 'ENSG00000000419', ..., 'ENSG00000285992', 'ENSG00000285993', 'ENSG00000285994']
+    column_data columns(8): ['cell line', 'compound', 'developmental stage', 'disease', 'dose', 'genotype', 'organism', 'organism part']
+    column_names(24): ['ERR3456453', 'ERR3456442', 'ERR3456443', ..., 'ERR3456450', 'ERR3456459', 'ERR3456444']
+    metadata(2): accession source
+
 
 ### Batch Downloads
 
 ```python
 # Download multiple experiments
-accessions = results["Accession"].head(10).tolist()
+accessions = results.get_column("Accession")[:10]
 experiments = client.get_experiments(accessions)
 
 # Access individual experiments
diff --git a/setup.cfg b/setup.cfg
index b889390..36c0dfc 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -50,7 +50,6 @@ python_requires = >=3.9
 install_requires =
     importlib-metadata; python_version<"3.8"
     requests
-    pandas
     numpy
     biocframe
     summarizedexperiment
diff --git a/src/expressionatlas/client.py b/src/expressionatlas/client.py
index b694239..c4e4c87 100644
--- a/src/expressionatlas/client.py
+++ b/src/expressionatlas/client.py
@@ -5,12 +5,12 @@
 import logging
 from collections.abc import Sequence
 
-import pandas as pd
+from biocframe import BiocFrame
 from biocutils import NamedList
 
 from expressionatlas.api import BioStudiesAPI
 from expressionatlas.download import get_atlas_data, get_atlas_experiment
-from expressionatlas.models import search_results_to_dataframe
+from expressionatlas.models import search_results_to_biocframe
 from expressionatlas.validation import validate_accession
 
 logger = logging.getLogger(__name__)
@@ -74,7 +74,7 @@ def search_experiments(
         self,
         properties: str | Sequence[str],
         species: str | None = None,
-    ) -> pd.DataFrame:
+    ) -> BiocFrame:
         """
         Search for Expression Atlas experiments matching given criteria.
 
@@ -90,8 +90,8 @@ def search_experiments(
 
         Returns
         -------
-        pandas.DataFrame
-            DataFrame with columns: Accession, Species, Type, Title.
+        BiocFrame
+            BiocFrame with columns: Accession, Species, Type, Title.
             Sorted by Species, Type, then Accession.
 
         Raises
@@ -130,8 +130,8 @@ def search_experiments(
 
         results = self.api.search(properties=list(properties), species=species)
 
-        # Filter out connection errors and convert to DataFrame
-        df = search_results_to_dataframe(results)
+        # Filter out connection errors and convert to BiocFrame
+        df = search_results_to_biocframe(results)
 
         # Log warning if any connection errors occurred
         error_count = sum(1 for r in results if r.connection_error)
@@ -232,16 +232,14 @@ def get_experiments(
         ...     species="homo sapiens",
         ... )
         >>> # Download all RNA-seq experiments from search results
-        >>> rnaseq_accessions = results[
-        ...     results[
-        ...         "Type"
-        ...     ].str.contains(
-        ...         "RNA-seq",
-        ...         na=False,
-        ...     )
-        ... ]["Accession"]
+        >>> types = results.get_column("Type")
+        >>> accessions = results.get_column("Accession")
+        >>> rnaseq_accessions = [
+        ...     acc for acc, typ in zip(accessions, types) 
+        ...     if typ and "RNA-seq" in typ
+        ... ]
         >>> experiments = client.get_experiments(
-        ...     rnaseq_accessions.tolist()
+        ...     rnaseq_accessions
         ... )
         >>> # Access: experiments["E-MTAB-XXXX"]["rnaseq"].assays["counts"]
         """
diff --git a/src/expressionatlas/converter.py b/src/expressionatlas/converter.py
deleted file mode 100644
index fcad96a..0000000
--- a/src/expressionatlas/converter.py
+++ /dev/null
@@ -1,410 +0,0 @@
-"""Client for the Expression Atlas RData Converter AWS service."""
-
-from __future__ import annotations
-
-import io
-import json
-import logging
-import os
-import tempfile
-import zipfile
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any
-from urllib.request import Request, urlopen
-
-import numpy as np
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-class ConverterError(Exception):
-    """Error from the converter service."""
-
-    def __init__(self, message: str, status_code: int | None = None):
-        super().__init__(message)
-        self.status_code = status_code
-
-
-@dataclass
-class ConvertedBundle:
-    """Container for converted Expression Atlas data."""
-
-    # Expression matrix (genes x samples)
-    matrix: np.ndarray | None = None
-
-    # Gene annotations
-    genes: pd.DataFrame = field(default_factory=pd.DataFrame)
-
-    # Sample annotations
-    samples: pd.DataFrame = field(default_factory=pd.DataFrame)
-
-    # Metadata from conversion
-    meta: dict[str, Any] = field(default_factory=dict)
-
-    # Row names (gene IDs)
-    rownames: list[str] = field(default_factory=list)
-
-    # Column names (sample IDs)
-    colnames: list[str] = field(default_factory=list)
-
-    @property
-    def shape(self) -> tuple[int, int]:
-        """Return (n_genes, n_samples) shape."""
-        if self.matrix is not None:
-            return self.matrix.shape
-        return (len(self.rownames), len(self.colnames))
-
-
-class ConverterClient:
-    """
-    Client for the Expression Atlas RData Converter service (AWS).
-
-    This client calls the AWS App Runner/ECS service to convert .RData files
-    to portable formats that Python can read without R.
-
-    Parameters
-    ----------
-    service_url : str, optional
-        URL of the converter service. Defaults to CONVERTER_URL env var.
-    use_iam_auth : bool
-        If True, use AWS IAM authentication (SigV4).
-        If False, use API key from CONVERTER_API_KEY env var.
-    cache_dir : Path, optional
-        Directory to cache downloaded bundles. Defaults to temp dir.
-    timeout : int
-        Request timeout in seconds.
-
-    Examples
-    --------
-    >>> client = ConverterClient(
-    ...     use_iam_auth=False
-    ... )
-    >>> bundle = client.convert_and_load(
-    ...     "ftp://ftp.ebi.ac.uk/.../E-MTAB-7841-atlasExperimentSummary.Rdata",
-    ...     "E-MTAB-7841",
-    ... )
-    >>> print(
-    ...     bundle.matrix.shape
-    ... )
-    (58735, 48)
-    """
-
-    def __init__(
-        self,
-        service_url: str | None = None,
-        use_iam_auth: bool = False,
-        cache_dir: Path | None = None,
-        timeout: int = 600,
-    ):
-        self.service_url = service_url or os.environ.get("CONVERTER_URL", "")
-        self.use_iam_auth = use_iam_auth
-        self.cache_dir = cache_dir or Path(tempfile.gettempdir()) / "atlas_converter_cache"
-        self.timeout = timeout
-
-        if not self.service_url:
-            logger.warning(
-                "CONVERTER_URL not set. Converter client will not work. "
-                "Set CONVERTER_URL environment variable or pass service_url parameter."
-            )
-
-    def _get_auth_headers(self) -> dict[str, str]:
-        """Get authentication headers for the request."""
-        headers = {"Content-Type": "application/json"}
-
-        if self.use_iam_auth:
-            # Use AWS SigV4 signing for IAM auth
-            try:
-                from botocore.auth import SigV4Auth
-                from botocore.awsrequest import AWSRequest
-                from botocore.session import Session
-
-                session = Session()
-                credentials = session.get_credentials()
-                if credentials:
-                    # Create a request to sign
-                    aws_request = AWSRequest(
-                        method="POST",
-                        url=f"{self.service_url.rstrip('/')}/convert",
-                        headers=headers,
-                    )
-                    SigV4Auth(credentials, "execute-api", os.environ.get("AWS_REGION", "us-east-1")).add_auth(
-                        aws_request
-                    )
-                    headers.update(dict(aws_request.headers))
-            except ImportError:
-                logger.warning("botocore not installed. Cannot use IAM auth. " "Install with: pip install botocore")
-            except Exception as e:
-                logger.warning(f"Failed to sign request: {e}")
-        else:
-            # Use API key
-            api_key = os.environ.get("CONVERTER_API_KEY", "")
-            if api_key:
-                headers["X-API-Key"] = api_key
-
-        return headers
-
-    def convert(
-        self,
-        rdata_url: str,
-        accession: str,
-        output_format: str = "mtx_bundle",
-        assay_name: str | None = None,
-        force: bool = False,
-    ) -> dict[str, Any]:
-        """
-        Request conversion of an .RData file.
-
-        Parameters
-        ----------
-        rdata_url : str
-            URL to the .RData file.
-        accession : str
-            Experiment accession (e.g., E-MTAB-7841).
-        output_format : str
-            Output format (mtx_bundle or tsv_bundle).
-        assay_name : str, optional
-            Specific assay to extract.
-        force : bool
-            Force re-conversion even if cached.
-
-        Returns
-        -------
-        dict
-            Response from the converter service including signed_url.
-        """
-        if not self.service_url:
-            raise ConverterError("CONVERTER_URL not configured")
-
-        endpoint = f"{self.service_url.rstrip('/')}/convert"
-
-        payload = {
-            "rdata_url": rdata_url,
-            "accession": accession,
-            "output_format": output_format,
-            "force": force,
-        }
-        if assay_name:
-            payload["assay_name"] = assay_name
-
-        headers = self._get_auth_headers()
-
-        logger.info(f"Requesting conversion for {accession}")
-
-        try:
-            req = Request(
-                endpoint,
-                data=json.dumps(payload).encode("utf-8"),
-                headers=headers,
-                method="POST",
-            )
-            with urlopen(req, timeout=self.timeout) as response:
-                result = json.loads(response.read().decode("utf-8"))
-
-            if result.get("status") == "error":
-                raise ConverterError(result.get("detail", result.get("error", "Unknown error")))
-
-            logger.info(f"Conversion {'cache hit' if result.get('cache_hit') else 'complete'} " f"for {accession}")
-            return result
-
-        except Exception as e:
-            if isinstance(e, ConverterError):
-                raise
-            raise ConverterError(f"Request failed: {e}") from e
-
-    def download_bundle(self, signed_url: str, accession: str) -> Path:
-        """
-        Download converted bundle from signed URL.
-
-        Parameters
-        ----------
-        signed_url : str
-            Signed URL from convert() response.
-        accession : str
-            Experiment accession (for cache path).
-
-        Returns
-        -------
-        Path
-            Path to extracted bundle directory.
-        """
-        # Create cache directory
-        bundle_dir = self.cache_dir / accession
-        bundle_dir.mkdir(parents=True, exist_ok=True)
-
-        logger.info(f"Downloading bundle for {accession}")
-
-        try:
-            with urlopen(signed_url, timeout=self.timeout) as response:
-                zip_data = response.read()
-
-            # Extract zip
-            with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
-                zf.extractall(bundle_dir)
-
-            logger.info(f"Bundle extracted to {bundle_dir}")
-            return bundle_dir
-
-        except Exception as e:
-            raise ConverterError(f"Failed to download bundle: {e}") from e
-
-    def load_bundle(self, bundle_dir: Path) -> dict[str, ConvertedBundle]:
-        """
-        Load converted data from bundle directory.
-
-        Parameters
-        ----------
-        bundle_dir : Path
-            Path to extracted bundle directory.
-
-        Returns
-        -------
-        dict[str, ConvertedBundle]
-            Dict mapping dataset name to ConvertedBundle.
-        """
-        results = {}
-
-        # Find all dataset directories
-        for item in bundle_dir.iterdir():
-            if item.is_dir() and item.name.startswith("dataset_"):
-                dataset_name = item.name.replace("dataset_", "")
-                results[dataset_name] = self._load_dataset(item)
-
-        # Load metadata
-        meta_path = bundle_dir / "meta.json"
-        if meta_path.exists():
-            with open(meta_path) as f:
-                meta = json.load(f)
-            # Attach to each bundle
-            for bundle in results.values():
-                bundle.meta = meta
-
-        return results
-
-    def _load_dataset(self, dataset_dir: Path) -> ConvertedBundle:
-        """Load a single dataset from directory."""
-        bundle = ConvertedBundle()
-
-        # Load matrix
-        mtx_path = dataset_dir / "matrix.mtx"
-        tsv_path = dataset_dir / "counts.tsv.gz"
-
-        if mtx_path.exists():
-            bundle.matrix = self._load_mtx(mtx_path)
-        elif tsv_path.exists():
-            df = pd.read_csv(tsv_path, sep="\t", index_col=0, compression="gzip")
-            bundle.matrix = df.values
-            bundle.rownames = df.index.tolist()
-            bundle.colnames = df.columns.tolist()
-
-        # Load row/column names from separate files if MTX format
-        barcodes_path = dataset_dir / "barcodes.tsv"
-        features_path = dataset_dir / "features.tsv"
-
-        if barcodes_path.exists():
-            bundle.colnames = pd.read_csv(barcodes_path, header=None)[0].tolist()
-        if features_path.exists():
-            bundle.rownames = pd.read_csv(features_path, header=None)[0].tolist()
-
-        # Load genes (rowData)
-        genes_path = dataset_dir / "genes.csv"
-        if genes_path.exists():
-            bundle.genes = pd.read_csv(genes_path, index_col=0)
-            if not bundle.rownames:
-                bundle.rownames = bundle.genes.index.tolist()
-
-        # Load samples (colData)
-        samples_path = dataset_dir / "samples.csv"
-        if samples_path.exists():
-            bundle.samples = pd.read_csv(samples_path, index_col=0)
-            if not bundle.colnames:
-                bundle.colnames = bundle.samples.index.tolist()
-
-        return bundle
-
-    def _load_mtx(self, mtx_path: Path) -> np.ndarray:
-        """Load Matrix Market file."""
-        try:
-            from scipy.io import mmread
-
-            sparse_matrix = mmread(str(mtx_path))
-            return sparse_matrix.toarray()
-        except ImportError:
-            logger.warning("scipy not installed, cannot load MTX files efficiently")
-            # Fallback: manual parsing (slow)
-            return self._parse_mtx_manual(mtx_path)
-
-    def _parse_mtx_manual(self, mtx_path: Path) -> np.ndarray:
-        """Parse MTX file manually (fallback if scipy not available)."""
-        with open(mtx_path) as f:
-            # Skip comments
-            line = f.readline()
-            while line.startswith("%"):
-                line = f.readline()
-
-            # Read dimensions
-            parts = line.strip().split()
-            nrows, ncols, _ = int(parts[0]), int(parts[1]), int(parts[2])
-
-            # Create dense matrix
-            matrix = np.zeros((nrows, ncols))
-
-            # Read entries
-            for line in f:
-                parts = line.strip().split()
-                if len(parts) >= 3:
-                    i, j, val = int(parts[0]) - 1, int(parts[1]) - 1, float(parts[2])
-                    matrix[i, j] = val
-
-        return matrix
-
-    def convert_and_load(
-        self,
-        rdata_url: str,
-        accession: str,
-        output_format: str = "mtx_bundle",
-        assay_name: str | None = None,
-        force: bool = False,
-    ) -> dict[str, ConvertedBundle]:
-        """
-        Convert .RData and load the result in one call.
-
-        Parameters
-        ----------
-        rdata_url : str
-            URL to the .RData file.
-        accession : str
-            Experiment accession.
-        output_format : str
-            Output format.
-        assay_name : str, optional
-            Specific assay to extract.
-        force : bool
-            Force re-conversion.
-
-        Returns
-        -------
-        dict[str, ConvertedBundle]
-            Dict mapping dataset name to ConvertedBundle.
-        """
-        # Check local cache first
-        bundle_dir = self.cache_dir / accession
-        meta_path = bundle_dir / "meta.json"
-
-        if not force and meta_path.exists():
-            logger.info(f"Loading from local cache: {bundle_dir}")
-            return self.load_bundle(bundle_dir)
-
-        # Request conversion
-        result = self.convert(rdata_url, accession, output_format, assay_name, force)
-
-        # Download and extract
-        bundle_dir = self.download_bundle(result["signed_url"], accession)
-
-        # Load and return
-        return self.load_bundle(bundle_dir)
-
-    def is_configured(self) -> bool:
-        """Check if the converter client is properly configured."""
-        return bool(self.service_url)
diff --git a/src/expressionatlas/download.py b/src/expressionatlas/download.py
index 3c59885..1a1c18e 100644
--- a/src/expressionatlas/download.py
+++ b/src/expressionatlas/download.py
@@ -10,15 +10,16 @@
 
 from __future__ import annotations
 
+import csv
 import io
 import logging
 import tempfile
 from pathlib import Path
 from urllib.error import URLError
 from urllib.request import urlopen
+from typing import TypedDict, Dict, List, Optional, Any
 
 import numpy as np
-import pandas as pd
 from biocframe import BiocFrame
 from biocutils import NamedList
 from summarizedexperiment import SummarizedExperiment
@@ -216,15 +217,25 @@ def _download_tsv_fallback(accession: str) -> NamedList:
     raise DownloadError(accession, "No TSV or RDS data files found.")
 
 
-def _download_tsv(url: str) -> pd.DataFrame:
-    """Download and parse a TSV file from URL."""
+def _download_tsv(url: str) -> dict[str, list[str]]:
+    """Download and parse a TSV file from URL into a column-oriented dictionary."""
     logger.debug(f"Downloading: {url}")
     with urlopen(url, timeout=60) as response:
         content = response.read().decode("utf-8")
-    return pd.read_csv(io.StringIO(content), sep="\t")
-
-
-def _try_download_sdrf(url: str) -> pd.DataFrame | None:
+    
+    reader = csv.reader(io.StringIO(content), delimiter="\t")
+    header = next(reader)
+    data = {h: [] for h in header}
+    
+    for row in reader:
+        for i, h in enumerate(header):
+            val = row[i] if i < len(row) else None
+            data[h].append(val)
+            
+    return data
+
+
+def _try_download_sdrf(url: str) -> dict[str, dict[str, str]] | None:
     try:
         logger.debug(f"Downloading sample annotations: {url}")
         with urlopen(url, timeout=60) as response:
@@ -240,71 +251,100 @@ def _try_download_sdrf(url: str) -> pd.DataFrame | None:
         else:
             sample_idx, attr_idx, value_idx = 1, 3, 4
 
-        records: list[tuple[str, str, str]] = []
+        records = {}
         for line in lines:
             parts = line.split("\t")
             if len(parts) > max(sample_idx, attr_idx, value_idx):
                 sample_id = parts[sample_idx]
                 attr_name = parts[attr_idx]
                 attr_value = parts[value_idx]
-                records.append((sample_id, attr_name, attr_value))
-
-        if not records:
-            return None
-
-        df = pd.DataFrame(records, columns=["sample_id", "attribute", "value"])
-
-        result = df.pivot_table(
-            index="sample_id",
-            columns="attribute",
-            values="value",
-            aggfunc="first",
-        )
-
-        result.columns.name = None
-        result.index.name = "sample_id"
-
-        return result
+                
+                if sample_id not in records:
+                    records[sample_id] = {}
+                    
+                if attr_name not in records[sample_id]:
+                    records[sample_id][attr_name] = attr_value
+
+        return records
     except Exception as e:
         logger.debug(f"Could not download sample annotations: {e}")
         return None
 
 
 def _create_summarized_experiment_from_tsv(
-    df_data: pd.DataFrame, design_df: pd.DataFrame | None, accession: str, assay_name: str = "counts"
+    df_data: dict[str, list[str]], design_data: dict[str, dict[str, str]] | None, accession: str, assay_name: str = "counts"
 ) -> SummarizedExperiment:
     """Create SummarizedExperiment from TSV data."""
-    if df_data.empty:
+    if not df_data:
         return SummarizedExperiment()
 
-    numeric_cols = df_data.select_dtypes(include=[np.number]).columns.tolist()
-    annotation_cols = [c for c in df_data.columns if c not in numeric_cols]
+    all_cols = list(df_data.keys())
+    if not all_cols:
+        return SummarizedExperiment()
+
+    numeric_cols = []
+    annotation_cols = []
+    
+    for col in all_cols:
+        vals = df_data[col]
+        is_num = True
+        for v in vals:
+            if v is not None and v.strip() != "" and v.strip().lower() != "na":
+                try:
+                    float(v)
+                except ValueError:
+                    is_num = False
+                    break
+        if is_num:
+            numeric_cols.append(col)
+        else:
+            annotation_cols.append(col)
 
     if not numeric_cols:
         logger.warning("No numeric columns found in TSV")
         return SummarizedExperiment()
 
-    gene_col = annotation_cols[0] if annotation_cols else df_data.columns[0]
+    gene_col = annotation_cols[0] if annotation_cols else all_cols[0]
     sample_cols = numeric_cols
 
-    rownames = df_data[gene_col].tolist()
+    rownames = df_data[gene_col]
     colnames = sample_cols
 
-    assays = {assay_name: df_data[sample_cols].values.astype(np.float64)}
+    matrix_data = []
+    for c in sample_cols:
+        col_float = []
+        for v in df_data[c]:
+            if v is None or v.strip() == "" or v.strip().lower() == "na":
+                col_float.append(np.nan)
+            else:
+                col_float.append(float(v))
+        matrix_data.append(col_float)
+    
+    matrix = np.array(matrix_data, dtype=np.float64).T
+    assays = {assay_name: matrix}
 
     row_data = {}
     for col in annotation_cols:
         if col != gene_col:
-            row_data[col] = df_data[col].values.tolist()
+            row_data[col] = df_data[col]
 
     row_bioc = BiocFrame(row_data, row_names=rownames)
 
     col_data = {}
-    if design_df is not None and not design_df.empty:
-        reindexed_df = design_df.reindex(colnames)
-        for col in reindexed_df.columns:
-            col_data[col] = reindexed_df[col].values.tolist()
-
+    if design_data is not None and len(design_data) > 0:
+        all_attrs = set()
+        for s in colnames:
+            if s in design_data:
+                all_attrs.update(design_data[s].keys())
+        
+        all_attrs = sorted(list(all_attrs))
+        
+        for attr in all_attrs:
+            col_data[attr] = []
+            for s in colnames:
+                val = design_data.get(s, {}).get(attr, None)
+                col_data[attr].append(val)
+                
     col_bioc = BiocFrame(col_data, row_names=colnames)
 
     metadata = {"accession": accession, "source": "tsv"}
@@ -328,8 +368,9 @@ def _download_via_converter(rdata_url: str, accession: str) -> NamedList:
     for name, bundle in bundles.items():
         key = name.replace("dataset_", "") if name.startswith("dataset_") else name
 
-        row_bioc = BiocFrame(bundle.genes.to_dict("list"), row_names=bundle.rownames)
-        col_bioc = BiocFrame(bundle.samples.to_dict("list"), row_names=bundle.colnames)
+        # We need to make sure bundle.genes and bundle.samples return dictionaries of column names mapping to lists of values
+        row_bioc = BiocFrame(bundle.genes, row_names=bundle.rownames)
+        col_bioc = BiocFrame(bundle.samples, row_names=bundle.colnames)
 
         assays = {}
         if bundle.matrix is not None:
diff --git a/src/expressionatlas/models.py b/src/expressionatlas/models.py
index 74ccf84..b0c5d73 100644
--- a/src/expressionatlas/models.py
+++ b/src/expressionatlas/models.py
@@ -6,7 +6,7 @@
 from enum import Enum
 from typing import Any
 
-import pandas as pd
+from biocframe import BiocFrame
 
 
 class ExperimentType(str, Enum):
@@ -58,16 +58,27 @@ def to_dict(self) -> dict[str, Any]:
         }
 
 
-def search_results_to_dataframe(results: list[SearchResult]) -> pd.DataFrame:
-    """Convert list of SearchResult objects to a pandas DataFrame."""
+def search_results_to_biocframe(results: list[SearchResult]) -> BiocFrame:
+    """Convert list of SearchResult objects to a BiocFrame."""
+    columns = ["Accession", "Species", "Type", "Title"]
     if not results:
-        return pd.DataFrame(columns=["Accession", "Species", "Type", "Title"])
-
-    data = [r.to_dict() for r in results if not r.connection_error]
-    df = pd.DataFrame(data)
+        return BiocFrame({col: [] for col in columns}, column_names=columns)
 
+    valid_results = [r for r in results if not r.connection_error]
+    
     # Sort by Species, Type, then Accession (matching R package behavior)
-    if not df.empty:
-        df = df.sort_values(["Species", "Type", "Accession"]).reset_index(drop=True)
-
-    return df
+    valid_results.sort(
+        key=lambda r: (
+            r.species if r.species is not None else "",
+            r.experiment_type if r.experiment_type is not None else "",
+            r.accession,
+        )
+    )
+
+    data = {col: [] for col in columns}
+    for r in valid_results:
+        d = r.to_dict()
+        for col in columns:
+            data[col].append(d[col])
+
+    return BiocFrame(data, column_names=columns)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 6e98d46..4d8cc60 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -10,7 +10,7 @@
 from expressionatlas.validation import is_valid_accession
 
 @pytest.mark.integration
-@pytest.mark.skip("takes too long")
+# @pytest.mark.skip("takes too long")
 class TestExpressionAtlasClientIntegration:
     """Integration tests for ExpressionAtlasClient."""
 
@@ -19,14 +19,14 @@ def test_search_cancer_human(self) -> None:
         client = ExpressionAtlasClient()
         results = client.search_experiments(properties=["cancer"], species="homo sapiens")
 
-        assert len(results) > 0
-        assert "Accession" in results.columns
-        assert "Species" in results.columns
-        assert "Type" in results.columns
-        assert "Title" in results.columns
+        assert results.shape[0] > 0
+        columns = results.get_column_names()
+        assert "Accession" in columns
+        assert "Species" in columns
+        assert "Type" in columns
+        assert "Title" in columns
 
-        # All accessions should be valid
-        for acc in results["Accession"]:
+        for acc in results.get_column("Accession"):
             assert is_valid_accession(acc)
 
     def test_search_salt_oryza(self) -> None:
@@ -34,7 +34,7 @@ def test_search_salt_oryza(self) -> None:
         client = ExpressionAtlasClient()
         results = client.search_experiments(properties=["salt"], species="oryza sativa")
 
-        assert len(results) > 0
+        assert results.shape[0] > 0
 
     def test_download_single_experiment(self) -> None:
         """Download a single experiment should succeed."""
diff --git a/tests/test_models.py b/tests/test_models.py
index bffceb3..e213fec 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -4,7 +4,7 @@
 from expressionatlas.models import (
     ExperimentType,
     SearchResult,
-    search_results_to_dataframe,
+    search_results_to_biocframe,
 )
 
 
@@ -65,14 +65,14 @@ def test_default_connection_error(self) -> None:
         assert result.connection_error is False
 
 
-class TestSearchResultsToDataframe:
-    """Tests for search_results_to_dataframe function."""
+class TestSearchResultsToBiocframe:
+    """Tests for search_results_to_biocframe function."""
 
     def test_empty_list(self) -> None:
-        """Empty list should return empty DataFrame with correct columns."""
-        df = search_results_to_dataframe([])
-        assert list(df.columns) == ["Accession", "Species", "Type", "Title"]
-        assert len(df) == 0
+        """Empty list should return empty BiocFrame with correct columns."""
+        bf = search_results_to_biocframe([])
+        assert list(bf.get_column_names()) == ["Accession", "Species", "Type", "Title"]
+        assert bf.shape[0] == 0
 
     def test_filters_connection_errors(self) -> None:
         """Should exclude results with connection errors."""
@@ -80,9 +80,9 @@ def test_filters_connection_errors(self) -> None:
             SearchResult("E-MTAB-1624", "Human", "RNA-seq", "Test 1"),
             SearchResult("E-MTAB-1625", None, None, None, connection_error=True),
         ]
-        df = search_results_to_dataframe(results)
-        assert len(df) == 1
-        assert df.iloc[0]["Accession"] == "E-MTAB-1624"
+        bf = search_results_to_biocframe(results)
+        assert bf.shape[0] == 1
+        assert bf.get_column("Accession")[0] == "E-MTAB-1624"
 
     def test_sorts_by_species_type_accession(self) -> None:
         """Should sort by Species, Type, then Accession."""
@@ -91,8 +91,9 @@ def test_sorts_by_species_type_accession(self) -> None:
             SearchResult("E-MTAB-1", "Human", "Array", "Test 1"),
             SearchResult("E-MTAB-3", "Human", "RNA-seq", "Test 3"),
         ]
-        df = search_results_to_dataframe(results)
+        bf = search_results_to_biocframe(results)
         # Human Array, Human RNA-seq, Zebra RNA-seq
-        assert df.iloc[0]["Accession"] == "E-MTAB-1"
-        assert df.iloc[1]["Accession"] == "E-MTAB-3"
-        assert df.iloc[2]["Accession"] == "E-MTAB-2"
+        ids = bf.get_column("Accession")
+        assert ids[0] == "E-MTAB-1"
+        assert ids[1] == "E-MTAB-3"
+        assert ids[2] == "E-MTAB-2"

From eb46703be3fba547b418778a7f0a3636217d08c6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 30 Mar 2026 18:05:37 +0000
Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/expressionatlas/client.py   | 18 ++++++++++++++----
 src/expressionatlas/download.py | 26 ++++++++++++++------------
 src/expressionatlas/models.py   |  2 +-
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/src/expressionatlas/client.py b/src/expressionatlas/client.py
index c4e4c87..9a8bcb4 100644
--- a/src/expressionatlas/client.py
+++ b/src/expressionatlas/client.py
@@ -232,11 +232,21 @@ def get_experiments(
         ...     species="homo sapiens",
         ... )
         >>> # Download all RNA-seq experiments from search results
-        >>> types = results.get_column("Type")
-        >>> accessions = results.get_column("Accession")
+        >>> types = results.get_column(
+        ...     "Type"
+        ... )
+        >>> accessions = results.get_column(
+        ...     "Accession"
+        ... )
         >>> rnaseq_accessions = [
-        ...     acc for acc, typ in zip(accessions, types) 
-        ...     if typ and "RNA-seq" in typ
+        ...     acc
+        ...     for acc, typ in zip(
+        ...         accessions,
+        ...         types,
+        ...     )
+        ...     if typ
+        ...     and "RNA-seq"
+        ...     in typ
         ... ]
         >>> experiments = client.get_experiments(
         ...     rnaseq_accessions
diff --git a/src/expressionatlas/download.py b/src/expressionatlas/download.py
index 1a1c18e..e57a581 100644
--- a/src/expressionatlas/download.py
+++ b/src/expressionatlas/download.py
@@ -17,7 +17,6 @@
 from pathlib import Path
 from urllib.error import URLError
 from urllib.request import urlopen
-from typing import TypedDict, Dict, List, Optional, Any
 
 import numpy as np
 from biocframe import BiocFrame
@@ -222,16 +221,16 @@ def _download_tsv(url: str) -> dict[str, list[str]]:
     logger.debug(f"Downloading: {url}")
     with urlopen(url, timeout=60) as response:
         content = response.read().decode("utf-8")
-    
+
     reader = csv.reader(io.StringIO(content), delimiter="\t")
     header = next(reader)
     data = {h: [] for h in header}
-    
+
     for row in reader:
         for i, h in enumerate(header):
             val = row[i] if i < len(row) else None
             data[h].append(val)
-            
+
     return data
 
 
@@ -258,10 +257,10 @@ def _try_download_sdrf(url: str) -> dict[str, dict[str, str]] | None:
                 sample_id = parts[sample_idx]
                 attr_name = parts[attr_idx]
                 attr_value = parts[value_idx]
-                
+
                 if sample_id not in records:
                     records[sample_id] = {}
-                    
+
                 if attr_name not in records[sample_id]:
                     records[sample_id][attr_name] = attr_value
 
@@ -272,7 +271,10 @@ def _try_download_sdrf(url: str) -> dict[str, dict[str, str]] | None:
 
 
 def _create_summarized_experiment_from_tsv(
-    df_data: dict[str, list[str]], design_data: dict[str, dict[str, str]] | None, accession: str, assay_name: str = "counts"
+    df_data: dict[str, list[str]],
+    design_data: dict[str, dict[str, str]] | None,
+    accession: str,
+    assay_name: str = "counts",
 ) -> SummarizedExperiment:
     """Create SummarizedExperiment from TSV data."""
     if not df_data:
@@ -284,7 +286,7 @@ def _create_summarized_experiment_from_tsv(
 
     numeric_cols = []
     annotation_cols = []
-    
+
     for col in all_cols:
         vals = df_data[col]
         is_num = True
@@ -319,7 +321,7 @@ def _create_summarized_experiment_from_tsv(
             else:
                 col_float.append(float(v))
         matrix_data.append(col_float)
-    
+
     matrix = np.array(matrix_data, dtype=np.float64).T
     assays = {assay_name: matrix}
 
@@ -336,15 +338,15 @@ def _create_summarized_experiment_from_tsv(
         for s in colnames:
             if s in design_data:
                 all_attrs.update(design_data[s].keys())
-        
+
         all_attrs = sorted(list(all_attrs))
-        
+
         for attr in all_attrs:
             col_data[attr] = []
             for s in colnames:
                 val = design_data.get(s, {}).get(attr, None)
                 col_data[attr].append(val)
-                
+
     col_bioc = BiocFrame(col_data, row_names=colnames)
 
     metadata = {"accession": accession, "source": "tsv"}
diff --git a/src/expressionatlas/models.py b/src/expressionatlas/models.py
index b0c5d73..5f937ab 100644
--- a/src/expressionatlas/models.py
+++ b/src/expressionatlas/models.py
@@ -65,7 +65,7 @@ def search_results_to_biocframe(results: list[SearchResult]) -> BiocFrame:
         return BiocFrame({col: [] for col in columns}, column_names=columns)
 
     valid_results = [r for r in results if not r.connection_error]
-    
+
     # Sort by Species, Type, then Accession (matching R package behavior)
     valid_results.sort(
         key=lambda r: (