Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/publish-pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Publish to PyPI

on:
push:
tags: "*"

jobs:
build:
runs-on: ubuntu-latest
permissions:
id-token: write
repository-projects: write
contents: write
pages: write

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: 3.12

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tox

- name: Test with tox
run: |
tox

- name: Build Project and Publish
run: |
python -m tox -e clean,build

# This uses the trusted publisher workflow so no token is required.
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1

- name: Build docs
run: |
tox -e docs

- run: touch ./docs/_build/html/.nojekyll

- name: GH Pages Deployment
uses: JamesIves/github-pages-deploy-action@v4
with:
branch: gh-pages # The branch the action should deploy to.
folder: ./docs/_build/html
clean: true # Automatically remove deleted files from the deploy branch
73 changes: 73 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: Test the library

on:
push:
branches:
- master # for legacy repos
- main
pull_request:
branches:
- master # for legacy repos
- main
workflow_dispatch: # Allow manually triggering the workflow
schedule:
# Run roughly every 15 days at 00:00 UTC
# (useful to check if updates on dependencies break the package)
- cron: "0 0 1,16 * *"

permissions:
contents: read

concurrency:
group: >-
${{ github.workflow }}-${{ github.ref_type }}-
${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true

jobs:
test:
strategy:
matrix:
python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
platform:
- ubuntu-latest
# - macos-latest
# - windows-latest
runs-on: ${{ matrix.platform }}
name: Python ${{ matrix.python }}, ${{ matrix.platform }}
steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
id: setup-python
with:
python-version: ${{ matrix.python }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tox coverage

- name: Run tests
run: >-
pipx run --python '${{ steps.setup-python.outputs.python-path }}'
tox
-- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml # pytest args

- name: Check for codecov token availability
id: codecov-check
shell: bash
run: |
if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then
echo "codecov=true" >> $GITHUB_OUTPUT;
else
echo "codecov=false" >> $GITHUB_OUTPUT;
fi

- name: Upload coverage reports to Codecov with GitHub Action
uses: codecov/codecov-action@v5
if: ${{ steps.codecov-check.outputs.codecov == 'true' }}
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
slug: ${{ github.repository }}
flags: ${{ matrix.platform }} - py${{ matrix.python }}
51 changes: 25 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,18 @@ results = client.search_experiments(
properties=["cancer", "breast"],
species="homo sapiens"
)
print(results.head())
# Accession Species Type ...
# 0 E-MTAB-1624 homo sapiens microarray data ...
print(results)
```
BiocFrame with 208 rows and 4 columns
Accession Species Type Title
<list> <list> <list> <list>
[0] E-MTAB-8198 Homo sapiens Cell line - High-thr... Functional effect of...
[1] E-MTAB-8532 Homo sapiens Human - One-color mi... DNA microarray studi...
[2] E-GEOD-43306 Homo sapiens RNA-seq of coding RNA Translating transcri...
... ... ... ...
[205] E-MTAB-779 Homo sapiens transcription profil... OncomiRs like let-7 ...
[206] E-TABM-1118 Homo sapiens transcription profil... Transcrption profili...
[207] E-TABM-601 Homo sapiens transcription profil... Transcription profil...

### Download RNA-seq Data

Expand All @@ -53,44 +61,35 @@ rnaseq = exp["rnaseq"]
counts = rnaseq.assay("counts") # numpy array: genes × samples

print(f"Shape: {counts.shape[0]} genes × {counts.shape[1]} samples")
# Shape: 58735 genes × 48 samples
# Shape: 58735 genes × 24 samples

# Sample metadata (BiocFrame)
sample_info = rnaseq.get_column_data()
print(sample_info.get_column_names())
# ['cell line', 'compound', 'developmental stage', 'disease', 'dose', 'genotype', 'organism', 'organism part']

# Gene annotations (BiocFrame)
gene_info = rnaseq.get_row_data()
print(gene_info.shape)
```

### Download Microarray Data

```python
exp = client.get_experiment("E-MTAB-1624")

# Microarray data is keyed by array design
array_design = "A-AFFY-126"
eset = exp[array_design] # This is also a SummarizedExperiment now
# (58735, 1)

# Expression matrix (probes × samples)
intensities = eset.assay("exprs")
print(intensities.shape)
# (54675, 96)

# Sample metadata (BiocFrame)
sample_annotations = eset.get_column_data()
print(sample_annotations.shape)

# Feature annotations (BiocFrame)
probe_annotations = eset.get_row_data()
print(rnaseq)
```
class: SummarizedExperiment
dimensions: (58735, 24)
assays(1): ['counts']
row_data columns(1): ['Gene Name']
row_names(58735): ['ENSG00000000003', 'ENSG00000000005', 'ENSG00000000419', ..., 'ENSG00000285992', 'ENSG00000285993', 'ENSG00000285994']
column_data columns(8): ['cell line', 'compound', 'developmental stage', 'disease', 'dose', 'genotype', 'organism', 'organism part']
column_names(24): ['ERR3456453', 'ERR3456442', 'ERR3456443', ..., 'ERR3456450', 'ERR3456459', 'ERR3456444']
metadata(2): accession source


### Batch Downloads

```python
# Download multiple experiments
accessions = results["Accession"].head(10).tolist()
accessions = results.get_column("Accession")[:10]
experiments = client.get_experiments(accessions)

# Access individual experiments
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ python_requires = >=3.9
install_requires =
importlib-metadata; python_version<"3.8"
requests
pandas
numpy
biocframe
summarizedexperiment
Expand Down
3 changes: 0 additions & 3 deletions src/expressionatlas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
finally:
del version, PackageNotFoundError


from expressionatlas.client import ExpressionAtlasClient
from expressionatlas.download import (
get_atlas_data,
Expand All @@ -41,5 +40,3 @@
InvalidAccessionError,
)
from expressionatlas.models import SearchResult
from summarizedexperiment import SummarizedExperiment
from biocutils import NamedList
40 changes: 24 additions & 16 deletions src/expressionatlas/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import logging
from collections.abc import Sequence

import pandas as pd
from biocframe import BiocFrame
from biocutils import NamedList

from expressionatlas.api import BioStudiesAPI
from expressionatlas.download import get_atlas_data, get_atlas_experiment
from expressionatlas.models import search_results_to_dataframe
from biocutils import NamedList
from expressionatlas.models import search_results_to_biocframe
from expressionatlas.validation import validate_accession

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -74,7 +74,7 @@ def search_experiments(
self,
properties: str | Sequence[str],
species: str | None = None,
) -> pd.DataFrame:
) -> BiocFrame:
"""
Search for Expression Atlas experiments matching given criteria.

Expand All @@ -90,8 +90,8 @@ def search_experiments(

Returns
-------
pandas.DataFrame
DataFrame with columns: Accession, Species, Type, Title.
BiocFrame
BiocFrame with columns: Accession, Species, Type, Title.
Sorted by Species, Type, then Accession.

Raises
Expand Down Expand Up @@ -130,8 +130,8 @@ def search_experiments(

results = self.api.search(properties=list(properties), species=species)

# Filter out connection errors and convert to DataFrame
df = search_results_to_dataframe(results)
# Filter out connection errors and convert to BiocFrame
df = search_results_to_biocframe(results)

# Log warning if any connection errors occurred
error_count = sum(1 for r in results if r.connection_error)
Expand Down Expand Up @@ -232,16 +232,24 @@ def get_experiments(
... species="homo sapiens",
... )
>>> # Download all RNA-seq experiments from search results
>>> rnaseq_accessions = results[
... results[
... "Type"
... ].str.contains(
... "RNA-seq",
... na=False,
>>> types = results.get_column(
... "Type"
... )
>>> accessions = results.get_column(
... "Accession"
... )
>>> rnaseq_accessions = [
... acc
... for acc, typ in zip(
... accessions,
... types,
... )
... ]["Accession"]
... if typ
... and "RNA-seq"
... in typ
... ]
>>> experiments = client.get_experiments(
... rnaseq_accessions.tolist()
... rnaseq_accessions
... )
>>> # Access: experiments["E-MTAB-XXXX"]["rnaseq"].assays["counts"]
"""
Expand Down
Loading
Loading