From 7b332d92c1a05c48de3e665dd7a779b71aad0fd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= <luabidaa@gmail.com>
Date: Fri, 22 May 2026 12:31:33 -0300
Subject: [PATCH] docs: include docstrings with rst format to autogenerate in
 RDT

---
 docs/source/conf.py             |   1 +
 pysus/api/_impl/databases.py    | 272 ++++++++++++++++++++++++-
 pysus/api/client.py             |  99 ++++++++-
 pysus/api/dadosgov/client.py    | 149 ++++++++++++--
 pysus/api/dadosgov/databases.py | 261 ++++++++++++++++++++++--
 pysus/api/dadosgov/models.py    | 135 +++++++++++--
 pysus/api/ducklake/catalog.py   |  92 ++++++++-
 pysus/api/ducklake/client.py    | 188 ++++++++++++++++--
 pysus/api/ducklake/models.py    | 150 ++++++++++++--
 pysus/api/extensions.py         |  68 ++++++-
 pysus/api/ftp/client.py         |  74 ++++++-
 pysus/api/ftp/databases.py      | 342 ++++++++++++++++++++++++++++----
 pysus/api/ftp/models.py         | 213 +++++++++++++++++---
 pysus/api/models.py             |  43 +++-
 pysus/api/utils.py              |  13 ++
 15 files changed, 1913 insertions(+), 187 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 7c1af8b..9b2510c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -12,6 +12,7 @@
     "sphinx.ext.mathjax",
     "sphinx.ext.viewcode",
     "sphinx.ext.intersphinx",
+    "sphinx.ext.napoleon",
     "nbsphinx",
 ]
 
diff --git a/pysus/api/_impl/databases.py b/pysus/api/_impl/databases.py
index 684edce..be44cd3 100644
--- a/pysus/api/_impl/databases.py
+++ b/pysus/api/_impl/databases.py
@@ -38,9 +38,45 @@ def _fetch_data(
     show_progress: bool = True,
     **kwargs,
 ) -> pd.DataFrame:
-    """Query, download, and concatenate Parquet files for a given dataset."""
+    """Query, download, and concatenate Parquet files for a given dataset.
+
+    Internally creates an async event loop, queries the PySUS API for matching
+    files, downloads them, and reads them into a single DataFrame.
+
+    Parameters
+    ----------
+    dataset : str
+        Name of the dataset (e.g. ``"sinan"``, ``"sinasc"``).
+    group : str, optional
+        Group or disease code to filter by.
+    state : str, optional
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int], optional
+        Year or list of years to fetch.
+    month : int | list[int], optional
+        Month or list of months to fetch.
+    show_progress : bool, optional
+        Whether to display a tqdm progress bar during download.  Default is
+        ``True``.
+    **kwargs
+        Additional arguments forwarded to :meth:`PySUS.read_parquet`.
+
+    Returns
+    -------
+    pd.DataFrame
+        Concatenated data from all matching Parquet files.  Returns an empty
+        DataFrame when no files are found.
+
+    Raises
+    ------
+    RuntimeError
+        If an event loop is already running but ``nest_asyncio`` is not
+        installed.
+    """
 
     async def _fetch():
+        """Coroutine that performs the actual API query, download, and read."""
+
         async with PySUS() as pysus:
             years = [year] if isinstance(year, int) else (year or [None])
             months = [month] if isinstance(month, int) else (month or [None])
@@ -157,7 +193,25 @@ def sinan(
     year: int | list[int],
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch SINAN records for a given disease and year(s)."""
+    """Fetch SINAN records for a given disease and year(s).
+
+    SINAN (Sistema de Informação de Agravos de Notificação) is the Brazilian
+    notifiable-disease information system.
+
+    Parameters
+    ----------
+    disease : Literal
+        Disease code (e.g. ``"DENG"`` for dengue, ``"ZIKA"`` for zika).
+    year : int | list[int]
+        Year or list of years to fetch.
+    **kwargs
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        SINAN records for the specified disease and year(s).
+    """
     return _fetch_data(
         dataset="sinan",
         group=disease.upper(),
@@ -171,7 +225,27 @@ def sinasc(
     group: str | None = None,
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch SINASC birth certificates for a given state, year(s), and group."""
+    """Fetch SINASC birth certificates for a given state, year(s), and group.
+
+    SINASC (Sistema de Informação sobre Nascidos Vivos) is the Brazilian live
+    birth information system.
+
+    Parameters
+    ----------
+    state : State
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int]
+        Year or list of years to fetch.
+    group : str, optional
+        Additional grouping code.
+    **kwargs
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        SINASC birth records for the specified state, year(s), and group.
+    """
     return _fetch_data(
         dataset="sinasc",
         state=state.upper(),
@@ -186,7 +260,27 @@ def sim(
     group: str | None = None,
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch SIM mortality records for a given state, year(s), and group."""
+    """Fetch SIM mortality records for a given state, year(s), and group.
+
+    SIM (Sistema de Informação sobre Mortalidade) is the Brazilian mortality
+    information system.
+
+    Parameters
+    ----------
+    state : State
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int]
+        Year or list of years to fetch.
+    group : str, optional
+        Additional grouping code.
+    **kwargs
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        SIM mortality records for the specified state, year(s), and group.
+    """
     return _fetch_data(
         dataset="sim",
         state=state.upper(),
@@ -202,7 +296,29 @@ def sih(
     group: str | None = None,
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch SIH hospital admissions for a state, year, month, and group."""
+    """Fetch SIH hospital admissions for a state, year, month, and group.
+
+    SIH (Sistema de Informação Hospitalar) is the Brazilian hospital
+    admission information system.
+
+    Parameters
+    ----------
+    state : State
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int]
+        Year or list of years to fetch.
+    month : int | list[int]
+        Month or list of months to fetch.
+    group : str, optional
+        Additional grouping code.
+    **kwargs
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        SIH hospital admission records.
+    """
     return _fetch_data(
         dataset="sih",
         state=state.upper(),
@@ -219,7 +335,29 @@ def sia(
     group: str | None = None,
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch SIA ambulatory care for a state, year, month, and group."""
+    """Fetch SIA ambulatory care for a state, year, month, and group.
+
+    SIA (Sistema de Informação Ambulatorial) is the Brazilian ambulatory care
+    information system.
+
+    Parameters
+    ----------
+    state : State
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int]
+        Year or list of years to fetch.
+    month : int | list[int]
+        Month or list of months to fetch.
+    group : str, optional
+        Additional grouping code.
+    **kwargs
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        SIA ambulatory care records.
+    """
     return _fetch_data(
         dataset="sia",
         state=state.upper(),
@@ -235,7 +373,27 @@ def pni(
     group: str | None = None,
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch PNI immunisation records for a given state, year(s), and group."""
+    """Fetch PNI immunisation records for a given state, year(s), and group.
+
+    PNI (Programa Nacional de Imunizações) is the Brazilian national
+    immunisation programme.
+
+    Parameters
+    ----------
+    state : State
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int]
+        Year or list of years to fetch.
+    group : str, optional
+        Additional grouping code.
+    **kwargs
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        PNI immunisation records.
+    """
     return _fetch_data(
         dataset="pni",
         state=state.upper(),
@@ -249,7 +407,25 @@ def ibge(
     group: str | None = None,
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch IBGE census data for given year(s) and optional group."""
+    """Fetch IBGE census data for given year(s) and optional group.
+
+    IBGE (Instituto Brasileiro de Geografia e Estatística) provides census
+    and demographic data.
+
+    Parameters
+    ----------
+    year : int | list[int]
+        Year or list of years to fetch.
+    group : str, optional
+        Additional grouping code.
+    **kwargs
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        IBGE census data for the specified year(s) and group.
+    """
     return _fetch_data(dataset="ibge", group=group, year=year)
 
 
@@ -260,7 +436,29 @@ def cnes(
     group: str | None = None,
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch CNES health facilities for a state, year, month, and group."""
+    """Fetch CNES health facilities for a state, year, month, and group.
+
+    CNES (Cadastro Nacional de Estabelecimentos de Saúde) is the Brazilian
+    registry of health-care facilities.
+
+    Parameters
+    ----------
+    state : State
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int]
+        Year or list of years to fetch.
+    month : int | list[int]
+        Month or list of months to fetch.
+    group : str, optional
+        Additional grouping code.
+    **kwargs
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        CNES health-facility records.
+    """
     return _fetch_data(
         dataset="cnes",
         state=state.upper(),
@@ -277,7 +475,29 @@ def ciha(
     group: str | None = "CIHA",
     **kwargs,
 ) -> pd.DataFrame:
-    """Fetch CIHA hospitalisation records for state, year, month, and group."""
+    """Fetch CIHA hospitalisation records for state, year, month, and group.
+
+    CIHA (Comunicação de Internação Hospitalar) provides hospitalisation
+    records.
+
+    Parameters
+    ----------
+    state : State
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int]
+        Year or list of years to fetch.
+    month : int | list[int]
+        Month or list of months to fetch.
+    group : str, optional
+        Additional grouping code.  Default is ``"CIHA"``.
+    ``**kwargs``
+        Additional arguments forwarded to :func:`_fetch_data`.
+
+    Returns
+    -------
+    pd.DataFrame
+        CIHA hospitalisation records.
+    """
     return _fetch_data(
         dataset="ciha",
         state=state.upper(),
@@ -306,9 +526,39 @@ def list_files(
     month: int | list[int] | None = None,
     **kwargs,
 ) -> pd.DataFrame:
-    """List catalog files filtered by client, group, state, year, and month."""
+    """List catalog files filtered by client, group, state, year, and month.
+
+    Queries the PySUS API metadata and returns a DataFrame with file name,
+    path, dataset, group, year, month, state, and last-modified timestamp for
+    every matching file without downloading the actual data.
+
+    Parameters
+    ----------
+    dataset : Literal
+        Dataset name (e.g. ``"SINAN"``, ``"SINASC"``, etc.).
+    client : Literal["FTP", "DadosGov"], optional
+        Data source client to query.
+    group : str, optional
+        Group or disease code to filter by.
+    state : str, optional
+        Two-letter state abbreviation (e.g. ``"RJ"``).
+    year : int | list[int], optional
+        Year or list of years to filter by.
+    month : int | list[int], optional
+        Month or list of months to filter by.
+    **kwargs
+        Additional arguments forwarded to :meth:`PySUS.query`.
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with columns ``name``, ``path``, ``dataset``, ``group``,
+        ``year``, ``month``, ``state``, and ``modify``.
+    """
 
     async def _list():
+        """Coroutine that queries the PySUS API and builds the file list."""
+
         async with PySUS() as pysus:
             years = [year] if isinstance(year, int) else (year or [None])
             months = [month] if isinstance(month, int) else (month or [None])
diff --git a/pysus/api/client.py b/pysus/api/client.py
index 1373755..89f8ff1 100644
--- a/pysus/api/client.py
+++ b/pysus/api/client.py
@@ -69,7 +69,17 @@ class PySUS:
     """Central orchestrator for downloading and querying PySUS datasets."""
 
     def __init__(self, db_path: Path = CACHEPATH / "config.db"):
-        """Initialize PySUS with a DuckDB-backed SQLAlchemy engine."""
+        """Initialize the PySUS orchestrator.
+
+        Creates a SQLAlchemy engine backed by DuckDB, initializes the
+        schema, and sets up the session factory.
+
+        Parameters
+        ----------
+        db_path : Path, optional
+            Path to the DuckDB database file. Defaults to
+            ``CACHEPATH / "config.db"``.
+        """
 
         db_path = Path(db_path)
         db_path.parent.mkdir(parents=True, exist_ok=True)
@@ -241,12 +251,31 @@ async def download(
     ) -> BaseLocalFile:
         """Download a remote file and return a local file handle.
 
+        Skips re-download if a matching local copy already exists.
+
         Parameters
         ----------
-        timeout : float | None
-            Maximum seconds to wait for the download. ``None`` (default) means
-            no timeout – use this when the socket-level timeout on the
-            underlying client is sufficient.
+        file : BaseRemoteFile
+            The remote file to download.
+        token : str, optional
+            Access token for authenticated clients (e.g. DadosGov).
+        callback : Callable, optional
+            Progress callback invoked during the download.
+        timeout : float, optional
+            Maximum seconds to wait for the download. ``None`` (default)
+            means no timeout.
+
+        Returns
+        -------
+        BaseLocalFile
+            The downloaded file wrapped in the appropriate handler.
+
+        Raises
+        ------
+        ValueError
+            If the file's client is not recognised.
+        RuntimeError
+            If the download fails for any reason.
         """
 
         from pysus.api.extensions import ExtensionFactory
@@ -332,7 +361,32 @@ async def download_to_parquet(
         timeout: float | None = None,
         add_dv: bool = True,
     ) -> Parquet:
-        """Download a file and convert it to Parquet format."""
+        """Download a file and convert it to Parquet format.
+
+        Parameters
+        ----------
+        file : BaseRemoteFile
+            The remote file to download and convert.
+        token : str, optional
+            Access token for authenticated clients.
+        callback : Callable[[int, int], None], optional
+            Progress callback.
+        timeout : float, optional
+            Maximum seconds to wait for the download.
+        add_dv : bool, optional
+            Whether to apply the IBGE verification digit on load
+            (default True).
+
+        Returns
+        -------
+        Parquet
+            The converted Parquet file handler.
+
+        Raises
+        ------
+        NotImplementedError
+            If the downloaded file type cannot be converted to Parquet.
+        """
 
         local_file = await self.download(
             file=file,
@@ -368,8 +422,13 @@ async def download_to_parquet(
         )
 
     def get_local_hierarchy(self):
-        """
-        Build a nested dict of cached files grouped by client and dataset.
+        """Build a nested dict of cached files grouped by client and dataset.
+
+        Returns
+        -------
+        dict
+            Nested dict keyed by
+            ``{client: {dataset: {group: [files]}}}``.
         """
 
         with self.Session() as session:
@@ -446,10 +505,27 @@ def read_parquet(
 
         Parameters
         ----------
-        add_dv : bool
+        paths : list of Path
+            One or more Parquet file paths to read.
+        sql : str, optional
+            Optional SQL filter expression applied to the result.
+        mode : {"union", "intersection", "strict"}, optional
+            Schema resolution mode (default ``"union"``).
+        add_dv : bool, optional
             When True, automatically applies the IBGE verification digit to
-            municipality code columns. If there are matching columns, a
-            DataFrame is returned instead of a DuckDBPyConnection.
+            municipality code columns. If matching columns are found, a
+            DataFrame is returned instead of a ``DuckDBPyConnection``.
+
+        Returns
+        -------
+        DuckDBPyConnection or pd.DataFrame
+            The query result.
+
+        Raises
+        ------
+        ValueError
+            If no paths are provided, or if the schema mode is ``"strict"``
+            and the files have differing schemas.
         """
 
         from pysus.api.utils import add_dv as _add_dv_fn
@@ -459,6 +535,7 @@ def read_parquet(
             raise ValueError("No paths provided")
 
         def get_columns(path: Path) -> set[tuple[str, str]]:
+            """Return the schema of a Parquet file as (name, type) pairs."""
             result = duckdb.execute(f"SELECT * FROM '{path}' LIMIT 0")
             return {(col[0], str(col[1])) for col in result.description}
 
diff --git a/pysus/api/dadosgov/client.py b/pysus/api/dadosgov/client.py
index d56b57b..5487f80 100644
--- a/pysus/api/dadosgov/client.py
+++ b/pysus/api/dadosgov/client.py
@@ -17,7 +17,19 @@
 
 
 def to_datetime(value: Any) -> datetime | None:
-    """Parse a Brazilian date string into a datetime object."""
+    """Parse a Brazilian date string into a datetime object.
+
+    Parameters
+    ----------
+    value : Any
+        The value to parse, expected to be a date string in Brazilian format
+        (e.g., ``%d/%m/%Y %H:%M:%S`` or ``%d/%m/%Y``).
+
+    Returns
+    -------
+    datetime or None
+        Parsed datetime object, or None if the value cannot be parsed.
+    """
     if not value or not isinstance(value, str) or "Indisponível" in value:
         return None
     for fmt in ("%d/%m/%Y %H:%M:%S", "%d/%m/%Y"):
@@ -29,7 +41,18 @@ def to_datetime(value: Any) -> datetime | None:
 
 
 def to_bool(value: Any) -> bool:
-    """Parse a Brazilian Portuguese boolean value ("sim"/"não") into a bool."""
+    """Parse a Brazilian Portuguese boolean value into a bool.
+
+    Parameters
+    ----------
+    value : Any
+        The value to parse (e.g., ``"sim"``, ``"não"``, ``True``, ``False``).
+
+    Returns
+    -------
+    bool
+        True if the value represents an affirmative, False otherwise.
+    """
     if isinstance(value, bool):
         return value
     return str(value).lower() in ("sim", "true", "1")
@@ -48,26 +71,62 @@ class DadosGov(BaseRemoteClient):
     _client: httpx.AsyncClient | None = PrivateAttr(default=None)
 
     def __init__(self, **data):
-        """Initialize the DadosGov client."""
+        """Initialize the DadosGov client.
+
+        Parameters
+        ----------
+        ``**data``
+            Additional keyword arguments forwarded to the parent constructor.
+        """
         super().__init__(**data)
 
     @property
     def name(self) -> str:
-        """Return the short client name."""
+        """Return the short client name.
+
+        Returns
+        -------
+        str
+            The abbreviated client name ``"DadosGov"``.
+        """
         return "DadosGov"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable client name."""
+        """Return the human-readable client name.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the portal.
+        """
         return "Portal Brasileiro de Dados Abertos"
 
     @property
     def description(self) -> str:
-        """Return a description of the client."""
+        """Return a description of the client.
+
+        Returns
+        -------
+        str
+            A Portuguese description of the API interface.
+        """
         return "Interface de acesso ao API do Portal de Dados Abertos"
 
     async def connect(self, token: str | None = None) -> None:
-        """Connect to the dados.gov.br API with the given token."""
+        """Connect to the dados.gov.br API with the given token.
+
+        Parameters
+        ----------
+        token : str, optional
+            The API authentication token. If not provided, uses the
+            previously stored token.
+
+        Raises
+        ------
+        ValueError
+            If no token is provided and none was previously stored.
+        """
         _token = token or self._token
 
         if not _token:
@@ -95,23 +154,62 @@ async def connect(self, token: str | None = None) -> None:
         )
 
     async def login(self, token: str | None = None, **kwargs) -> None:
-        """Authenticate with the API (delegates to connect)."""
+        """Authenticate with the API.
+
+        Delegates to the :meth:`connect` method.
+
+        Parameters
+        ----------
+        token : str, optional
+            The API authentication token.
+        ``**kwargs``
+            Additional keyword arguments (currently unused).
+        """
         await self.connect(token=token)
 
     async def close(self) -> None:
-        """Close the underlying HTTP client."""
+        """Close the underlying HTTP client and release resources."""
         if self._client:
             await self._client.aclose()
             self._client = None
 
     async def datasets(self, **kwargs) -> list[Dataset]:
-        """Return a list of pre-configured health datasets."""
+        """Return a list of pre-configured health datasets.
+
+        Returns
+        -------
+        list[:class:`~pysus.api.dadosgov.models.Dataset`]
+            A list of available :class:`~pysus.api.dadosgov.models.Dataset`
+            instances for known health databases.
+        """
         from .databases import AVAILABLE_DATABASES
 
         return [db_class(client=self) for db_class in AVAILABLE_DATABASES]
 
     async def list_datasets(self, **kwargs) -> list[ConjuntoDados]:
-        """Search and list available datasets from the portal."""
+        """Search and list available datasets from the portal.
+
+        Parameters
+        ----------
+        ``**kwargs``
+            Search parameters. Supported keys:
+
+            - ``pagina`` (int): Page number for pagination.
+            - ``nome_conjunto`` (str): Filter by dataset name.
+            - ``dados_abertos`` (bool): Filter by open data flag.
+            - ``is_privado`` (bool): Filter by private datasets.
+            - ``id_organizacao`` (str): Filter by organisation ID.
+
+        Returns
+        -------
+        list[ConjuntoDados]
+            A list of datasets matching the search criteria.
+
+        Raises
+        ------
+        ConnectionError
+            If the client is not connected.
+        """
         if self._client is None:
             raise ConnectionError(
                 "Client not connected. Call login(token=...) first.",
@@ -136,7 +234,23 @@ async def list_datasets(self, **kwargs) -> list[ConjuntoDados]:
         return [ConjuntoDados(**item, client=self) for item in data]
 
     async def get_dataset(self, id: str) -> ConjuntoDados:
-        """Fetch a single dataset by its ID."""
+        """Fetch a single dataset by its ID.
+
+        Parameters
+        ----------
+        id : str
+            The unique identifier of the dataset.
+
+        Returns
+        -------
+        ConjuntoDados
+            The requested dataset.
+
+        Raises
+        ------
+        ConnectionError
+            If the client is not connected.
+        """
         if self._client is None:
             raise ConnectionError(
                 "Client not connected. Call login(token=...) first.",
@@ -194,7 +308,16 @@ class Recurso(BaseModel):
     file_name: str | None = Field(None, alias="nomeArquivo")
 
     async def get_size(self) -> int:
-        """Retrieve the file size from the remote server."""
+        """Retrieve the file size from the remote server.
+
+        Makes a HEAD request (falling back to GET with a Range header)
+        to determine the Content-Length of the resource.
+
+        Returns
+        -------
+        int
+            The file size in bytes, or 0 if the size could not be determined.
+        """
         async with httpx.AsyncClient(follow_redirects=True) as client:
             response = await client.head(self.url)
 
diff --git a/pysus/api/dadosgov/databases.py b/pysus/api/dadosgov/databases.py
index 192587a..0729709 100644
--- a/pysus/api/dadosgov/databases.py
+++ b/pysus/api/dadosgov/databases.py
@@ -24,6 +24,7 @@
 
 
 def _parse_year(val: str) -> int | None:
+    """Parse a year string into an integer within the valid range."""
     try:
         y = int(val)
         return y if 1970 <= y <= 2100 else None
@@ -32,6 +33,7 @@ def _parse_year(val: str) -> int | None:
 
 
 def _skip(name: str) -> bool:
+    """Check whether a filename should be skipped by naming conventions."""
     return name.startswith("get_") or name.lower().endswith(".pdf")
 
 
@@ -45,16 +47,35 @@ class CNES(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the short name."""
+        """Return the short name.
+
+        Returns
+        -------
+        str
+            The abbreviated dataset name ``"CNES"``.
+        """
         return "CNES"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name."""
+        """Return the human-readable name.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Cadastro Nacional de Estabelecimentos de Saúde"
 
     @property
     def description(self) -> str:
+        """Return a description of the dataset.
+
+        Returns
+        -------
+        str
+            A Portuguese description of the CNES information system.
+        """
         return (
             "O Cadastro Nacional de Estabelecimentos de Saúde (CNES) é o "
             "sistema de informação oficial de cadastramento de informações "
@@ -62,7 +83,19 @@ def description(self) -> str:
         )
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a CNES filename and extract metadata."""
+        """Parse a CNES filename and extract metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the file to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dictionary with keys ``state``, ``year``, and ``month``.
+            Unrecognised files return ``None`` for all keys.
+        """
         try:
             name = filename.strip()
             if _skip(name):
@@ -108,20 +141,51 @@ class PNI(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the short name."""
+        """Return the short name.
+
+        Returns
+        -------
+        str
+            The abbreviated dataset name ``"PNI"``.
+        """
         return "PNI"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name."""
+        """Return the human-readable name.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Programa Nacional de Imunizações"
 
     @property
     def description(self) -> str:
+        """Return a description of the dataset.
+
+        Returns
+        -------
+        str
+            A Portuguese description of the PNI vaccination monitoring system.
+        """
         return "O PNI monitora a cobertura vacinal e doses aplicadas no Brasil."
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a PNI vaccination filename into month and year."""
+        """Parse a PNI vaccination filename into month and year.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the file to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dictionary with keys ``state``, ``year``, and ``month``.
+            Unrecognised files return ``None`` for all keys.
+        """
         try:
             name = filename.strip().lower()
             if _skip(name):
@@ -147,22 +211,53 @@ class SIA(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the short name."""
+        """Return the short name.
+
+        Returns
+        -------
+        str
+            The abbreviated dataset name ``"SIA"``.
+        """
         return "SIA"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name."""
+        """Return the human-readable name.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informações Ambulatoriais"
 
     @property
     def description(self) -> str:
+        """Return a description of the dataset.
+
+        Returns
+        -------
+        str
+            A Portuguese description of the SIA outpatient information system.
+        """
         return """
             O SIA acompanha as ações de saúde produzidas no âmbito ambulatorial.
         """
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse an SIA filename into year."""
+        """Parse an SIA filename into year.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the file to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dictionary with keys ``state``, ``year``, and ``month``.
+            Unrecognised files return ``None`` for all keys.
+        """
         try:
             name = filename.strip().lower()
             if _skip(name):
@@ -214,23 +309,54 @@ class SINAN(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the short name."""
+        """Return the short name.
+
+        Returns
+        -------
+        str
+            The abbreviated dataset name ``"SINAN"``.
+        """
         return "SINAN"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name."""
+        """Return the human-readable name.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informação de Agravos de Notificação"
 
     @property
     def description(self) -> str:
+        """Return a description of the dataset.
+
+        Returns
+        -------
+        str
+            A Portuguese description of the SINAN notifiable diseases system.
+        """
         return """
             O SINAN é alimentado pela notificação de doenças de notificação
             compulsória
             """
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a SINAN filename into state and year."""
+        """Parse a SINAN filename into state and year.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the file to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dictionary with keys ``state``, ``year``, and ``month``.
+            Unrecognised files return ``None`` for all keys.
+        """
         try:
             name = filename.strip().upper()
             if _skip(name):
@@ -270,22 +396,53 @@ class SIM(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the short name."""
+        """Return the short name.
+
+        Returns
+        -------
+        str
+            The abbreviated dataset name ``"SIM"``.
+        """
         return "SIM"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name."""
+        """Return the human-readable name.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informação sobre Mortalidade"
 
     @property
     def description(self) -> str:
+        """Return a description of the dataset.
+
+        Returns
+        -------
+        str
+            A Portuguese description of the SIM mortality information system.
+        """
         return """
             O SIM coleta dados sobre óbitos no país para análise epidemiológica.
         """
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a SIM filename into year."""
+        """Parse a SIM filename into year.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the file to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dictionary with keys ``state``, ``year``, and ``month``.
+            Unrecognised files return ``None`` for all keys.
+        """
         try:
             name = filename.strip()
             if _skip(name):
@@ -325,23 +482,54 @@ class SINASC(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the short name."""
+        """Return the short name.
+
+        Returns
+        -------
+        str
+            The abbreviated dataset name ``"SINASC"``.
+        """
         return "SINASC"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name."""
+        """Return the human-readable name.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informações sobre Nascidos Vivos"
 
     @property
     def description(self) -> str:
+        """Return a description of the dataset.
+
+        Returns
+        -------
+        str
+            Portuguese description of the SINASC live birth system.
+        """
         return """
             O SINASC fornece subsídios para o diagnóstico de saúde e
             planejamento de políticas de natalidade.
         """
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a SINASC filename into year."""
+        """Parse a SINASC filename into year.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the file to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dictionary with keys ``state``, ``year``, and ``month``.
+            Unrecognised files return ``None`` for all keys.
+        """
         try:
             name = filename.strip()
             if _skip(name):
@@ -377,20 +565,51 @@ class COVID19(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the short name."""
+        """Return the short name.
+
+        Returns
+        -------
+        str
+            The abbreviated dataset name ``"COVID19"``.
+        """
         return "COVID19"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name."""
+        """Return the human-readable name.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Casos Confirmados de COVID-19"
 
     @property
     def description(self) -> str:
+        """Return a description of the dataset.
+
+        Returns
+        -------
+        str
+            A Portuguese description of the COVID-19 confirmed cases dataset.
+        """
         return "Dados anonimizados de casos confirmados de COVID-19."
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a COVID-19 filename."""
+        """Parse a COVID-19 filename and extract metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The name of the file to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dictionary with keys ``state``, ``year``, and ``month``.
+            Unrecognised files return ``None`` for all keys.
+        """
         try:
             name = filename.strip().lower()
             if _skip(name) or name.endswith(".xlsx"):
diff --git a/pysus/api/dadosgov/models.py b/pysus/api/dadosgov/models.py
index a582eb7..4a962ca 100644
--- a/pysus/api/dadosgov/models.py
+++ b/pysus/api/dadosgov/models.py
@@ -59,16 +59,33 @@ class File(BaseRemoteFile):
     _metadata: dict[str, Any] = PrivateAttr(default_factory=dict)
 
     def __init__(self, **data):
-        """Initialize the File with optional metadata."""
+        """Initialize the File with optional metadata.
+
+        Parameters
+        ----------
+        **data
+            Keyword arguments including an optional ``_metadata`` dict
+            that is stored on the private attribute ``_metadata``.
+        """
         metadata = data.pop("_metadata", {})
         super().__init__(**data)
         self._metadata = metadata
 
     def __repr__(self):
+        """Return the file basename as its string representation."""
         return self.basename
 
     def model_post_init(self, __context: Any) -> None:
-        """Fetch remote metadata if size or modify date is missing."""
+        """Fetch remote metadata if size or modify date is missing.
+
+        If both ``api_size`` and ``last_modified`` are falsy, schedules a
+        background task to fetch metadata from the remote server.
+
+        Parameters
+        ----------
+        __context : Any
+            Pydantic validation context (unused).
+        """
         if not self.record.api_size or not self.record.last_modified:
             try:
                 loop = asyncio.get_running_loop()
@@ -80,19 +97,42 @@ def model_post_init(self, __context: Any) -> None:
 
     @property
     def extension(self) -> str:
-        """Return the file extension."""
+        """Return the file extension.
+
+        Returns
+        -------
+        str
+            The file extension (e.g., ``".csv"``, ``".zip"``).
+        """
         if self.record.file_name:
             return pathlib.Path(self.record.file_name).suffix
         return pathlib.Path(self.record.url.split("/")[-1].split("?")[0]).suffix
 
     @property
     def size(self) -> int:
-        """Return the file size in bytes."""
+        """Return the file size in bytes.
+
+        Returns
+        -------
+        int
+            The file size, or 0 if unknown.
+        """
         return self.record.api_size or 0
 
     @property
     def modify(self) -> datetime:
-        """Return the last modification date."""
+        """Return the last modification date.
+
+        Returns
+        -------
+        datetime
+            The last modification datetime.
+
+        Raises
+        ------
+        ValueError
+            If the modification date has not been set.
+        """
         m = self.record.last_modified
         if not m:
             raise ValueError("File requires a modify date")
@@ -100,21 +140,43 @@ def modify(self) -> datetime:
 
     @property
     def year(self) -> int | None:
-        """Return the inferred year from metadata."""
+        """Return the inferred year from metadata.
+
+        Returns
+        -------
+        int or None
+            The year if present in metadata, otherwise None.
+        """
         return self._metadata.get("year")
 
     @property
     def month(self) -> int | None:
-        """Return the inferred month from metadata."""
+        """Return the inferred month from metadata.
+
+        Returns
+        -------
+        int or None
+            The month if present in metadata, otherwise None.
+        """
         return self._metadata.get("month")
 
     @property
     def state(self) -> State | None:
-        """Return the inferred state from metadata."""
+        """Return the inferred state from metadata.
+
+        Returns
+        -------
+        State or None
+            The state abbreviation if present in metadata, otherwise None.
+        """
         return self._metadata.get("state")
 
     async def fetch_metadata(self) -> None:
-        """Fetch file size and last-modified from the remote server."""
+        """Fetch file size and last-modified from the remote server.
+
+        Updates ``record.api_size`` and ``record.last_modified`` in-place.
+        Silently ignores connection errors.
+        """
         try:
             async with httpx.AsyncClient(
                 follow_redirects=True,
@@ -151,7 +213,16 @@ async def _download(
         return await self.client._download_file(self, output, callback=callback)
 
     async def fetch_size(self) -> int:
-        """Fetch the remote file size and update the local record."""
+        """Fetch the remote file size and update the local record.
+
+        Makes a HEAD request (falling back to GET with a Range header)
+        to determine the Content-Length.
+
+        Returns
+        -------
+        int
+            The file size in bytes, or 0 if the size could not be determined.
+        """
         try:
             async with httpx.AsyncClient(
                 follow_redirects=True,
@@ -188,30 +259,59 @@ def __init__(
         dataset: BaseRemoteDataset,
         formatter: Callable | None = None,
     ):
-        """Initialize the Group with a dataset record and optional formatter."""
+        """Initialize the Group with a dataset record and optional formatter.
+
+        Parameters
+        ----------
+        record : ConjuntoDados
+            The API response record for this group.
+        dataset : BaseRemoteDataset
+            The parent dataset this group belongs to.
+        formatter : Callable, optional
+            A callable that extracts metadata from filenames.
+        """
         super().__init__(
             record=record, dataset=dataset  # type: ignore[call-arg]
         )
         self._formatter = formatter
 
     def __repr__(self):
+        """Return the group name as its string representation."""
         return self.name
 
     @property
     def name(self) -> str:
-        """Return the group name, resolved through dataset aliases."""
+        """Return the group name, resolved through dataset aliases.
+
+        Returns
+        -------
+        str
+            The alias for the group slug if defined, otherwise the raw slug.
+        """
         slug = self.record.slug
         aliases = getattr(self.dataset, "group_aliases", {})
         return aliases.get(slug, slug)
 
     @property
     def long_name(self) -> str:
-        """Return the group title."""
+        """Return the group title.
+
+        Returns
+        -------
+        str
+            The title of the underlying API record.
+        """
         return self.record.title
 
     @property
     def description(self) -> str:
-        """Return an empty description."""
+        """Return an empty description for the group.
+
+        Returns
+        -------
+        str
+            An empty string.
+        """
         return ""
 
     async def _fetch_files(self) -> list[BaseRemoteFile]:
@@ -247,13 +347,18 @@ async def _fetch_files(self) -> list[BaseRemoteFile]:
 
 
 class Dataset(BaseRemoteDataset):
-    """A health dataset available through dados.gov.br."""
+    """A health dataset available through dados.gov.br.
+
+    Subclasses define a list of API dataset IDs and an optional
+    :meth:`formatter` that extracts metadata from file names.
+    """
 
     ids: list[str] = []
     client: "DadosGov"
     group_aliases: dict[str, str] = {}
 
     def __repr__(self):
+        """Return the dataset name as its string representation."""
         return self.name
 
     @abstractmethod
diff --git a/pysus/api/ducklake/catalog.py b/pysus/api/ducklake/catalog.py
index cc83ba3..a9c0fd0 100644
--- a/pysus/api/ducklake/catalog.py
+++ b/pysus/api/ducklake/catalog.py
@@ -56,14 +56,36 @@ class CatalogTable(Base):
 
 
 class Origin(enum.Enum):
-    """Origin type for a dataset: FTP or API."""
+    """Origin type for a dataset.
+
+    Attributes
+    ----------
+    FTP : str
+        Dataset sourced from the FTP server.
+    API : str
+        Dataset sourced from an API.
+    """
 
     FTP = "ftp"
     API = "api"
 
 
 class CatalogDataset(CatalogTable):
-    """ORM model for the datasets table, representing a dataset collection."""
+    """ORM model for the datasets table, representing a dataset collection.
+
+    Parameters
+    ----------
+    id : int, optional
+        Primary key (auto-generated by sequence).
+    name : str
+        Unique short name for the dataset.
+    long_name : str
+        Human-readable full name.
+    description : str, optional
+        Optional description of the dataset contents.
+    origin : Origin
+        Whether the dataset originates from FTP or an API.
+    """
 
     __tablename__ = "datasets"
 
@@ -95,7 +117,23 @@ class CatalogDataset(CatalogTable):
 
 
 class ColumnDefinition(CatalogTable):
-    """ORM model for dataset column metadata (name, type, description)."""
+    """ORM model for dataset column metadata.
+
+    Parameters
+    ----------
+    id : int, optional
+        Primary key (auto-generated by sequence).
+    dataset_id : int
+        Foreign key referencing the parent dataset.
+    name : str
+        Column name.
+    type : str
+        Column data type string.
+    description : str, optional
+        Optional description of the column.
+    nullable : bool, optional
+        Whether the column allows null values.
+    """
 
     __tablename__ = "dataset_columns"
 
@@ -129,7 +167,21 @@ class ColumnDefinition(CatalogTable):
 
 
 class DatasetGroup(CatalogTable):
-    """ORM model for dataset groups, grouping related files within a dataset."""
+    """ORM model for dataset groups, grouping related files within a dataset.
+
+    Parameters
+    ----------
+    id : int, optional
+        Primary key (auto-generated by sequence).
+    name : str
+        Short name for the group.
+    dataset_id : int
+        Foreign key referencing the parent dataset.
+    long_name : str
+        Human-readable full name.
+    description : str, optional
+        Optional description of the group contents.
+    """
 
     __tablename__ = "dataset_groups"
 
@@ -162,7 +214,37 @@ class DatasetGroup(CatalogTable):
 
 
 class CatalogFile(CatalogTable):
-    """ORM model for the files table, representing individual data files."""
+    """ORM model for the files table, representing individual data files.
+
+    Parameters
+    ----------
+    id : int, optional
+        Primary key (auto-generated by sequence).
+    dataset_id : int
+        Foreign key referencing the parent dataset.
+    group_id : int, optional
+        Foreign key referencing the parent group.
+    path : str
+        Object storage path to the file.
+    size : int
+        File size in bytes.
+    rows : int
+        Number of rows in the file.
+    modified : datetime
+        Timestamp of the last known modification.
+    origin_modified : datetime, optional
+        Original modification timestamp from the source.
+    origin_path : str
+        Original source path of the file.
+    sha256 : str, optional
+        SHA-256 hex digest for integrity verification.
+    year : int, optional
+        Data year associated with the file.
+    month : int, optional
+        Data month associated with the file.
+    state : str, optional
+        Two-letter state code associated with the file.
+    """
 
     __tablename__ = "files"
 
diff --git a/pysus/api/ducklake/client.py b/pysus/api/ducklake/client.py
index 47ef426..21e9525 100644
--- a/pysus/api/ducklake/client.py
+++ b/pysus/api/ducklake/client.py
@@ -24,7 +24,15 @@
 
 
 class CatalogDatasetAdapter:
-    """Adapter wrapping a CatalogDataset ORM record for use by File objects."""
+    """Adapter wrapping a CatalogDataset ORM record for use by File objects.
+
+    Parameters
+    ----------
+    catalog_dataset : CatalogDataset
+        The ORM record to wrap.
+    ducklake : DuckLake
+        The parent DuckLake client instance.
+    """
 
     def __init__(self, catalog_dataset: CatalogDataset, ducklake):
         self.name = catalog_dataset.name
@@ -36,12 +44,26 @@ def __init__(self, catalog_dataset: CatalogDataset, ducklake):
 
     @property
     def content(self):
-        """Query the DuckLake client for files in this dataset."""
+        """Query the DuckLake client for files in this dataset.
+
+        Returns
+        -------
+        list
+            List of files belonging to this dataset.
+        """
         return self.ducklake.query(dataset=self.name.upper())
 
 
 class DatasetGroupAdapter:
-    """Adapter wrapping a DatasetGroup ORM record for use by File objects."""
+    """Adapter wrapping a DatasetGroup ORM record for use by File objects.
+
+    Parameters
+    ----------
+    dataset_group : DatasetGroup
+        The ORM record to wrap.
+    dataset : CatalogDataset
+        The parent dataset.
+    """
 
     def __init__(self, dataset_group: DatasetGroup, dataset):
         self.name = dataset_group.name
@@ -50,11 +72,24 @@ def __init__(self, dataset_group: DatasetGroup, dataset):
         self.dataset = dataset
 
     def __str__(self):
+        """Return the group name as its string representation.
+
+        Returns
+        -------
+        str
+            The short name of the group.
+        """
         return self.name
 
     @property
     async def files(self):
-        """Return the list of files in this group."""
+        """Return the list of files in this group.
+
+        Returns
+        -------
+        list
+            List of file objects in this group.
+        """
         return []
 
     async def _fetch_files(self):
@@ -62,19 +97,52 @@ async def _fetch_files(self):
         return []
 
     async def search(self, **kwargs):
-        """Search for files within this group matching the given criteria."""
+        """Search for files within this group matching the given criteria.
+
+        Parameters
+        ----------
+        ``**kwargs``
+            Arbitrary filter criteria.
+
+        Returns
+        -------
+        list
+            List of matching file objects.
+        """
         return []
 
 
 class DuckLakeCredentials(BaseModel):
-    """Credentials for authenticating with the S3-compatible object storage."""
+    """Credentials for authenticating with the S3-compatible object storage.
+
+    Parameters
+    ----------
+    access_key : SecretStr
+        The S3 access key ID.
+    secret_key : SecretStr
+        The S3 secret access key.
+    """
 
     access_key: SecretStr
     secret_key: SecretStr
 
 
 class DuckLake(BaseRemoteClient):
-    """Client for the DuckLake S3-based public health dataset catalog."""
+    """Client for the DuckLake S3-based public health dataset catalog.
+
+    Parameters
+    ----------
+    endpoint : str, optional
+        S3-compatible object storage endpoint.
+    region : str, optional
+        Storage region name.
+    bucket : str, optional
+        Bucket name containing the catalog.
+    credentials : DuckLakeCredentials, optional
+        Credentials for authenticated S3 operations.
+    engine : object, optional
+        Pre-configured SQLAlchemy engine to reuse.
+    """
 
     endpoint: str = "nbg1.your-objectstorage.com"
     region: str = "nbg1"
@@ -89,7 +157,15 @@ class DuckLake(BaseRemoteClient):
     _Session: Any = PrivateAttr(default=None)
 
     def __init__(self, engine=None, **data):
-        """Initialize the DuckLake client with an optional existing engine."""
+        """Initialize the DuckLake client with an optional existing engine.
+
+        Parameters
+        ----------
+        engine : object, optional
+            Pre-configured SQLAlchemy engine instead of creating a new one.
+        ``**data``
+            Additional fields passed to the Pydantic base model.
+        """
         super().__init__(**data)
         self._engine = engine
         self._cache_dir = Path(CACHEPATH) / "ducklake"
@@ -98,22 +174,46 @@ def __init__(self, engine=None, **data):
 
     @property
     def name(self) -> str:
-        """Return the short name of this client."""
+        """Return the short name of this client.
+
+        Returns
+        -------
+        str
+            The client short name.
+        """
         return "DuckLake"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name of this client."""
+        """Return the human-readable name of this client.
+
+        Returns
+        -------
+        str
+            The client display name.
+        """
         return "PySUS s3 Client"
 
     @property
     def description(self) -> str:
-        """Return a description of this client."""
+        """Return a description of this client.
+
+        Returns
+        -------
+        str
+            A description string (currently empty).
+        """
         return ""  # TODO:
 
     @property
     def catalog_path(self) -> Path:
-        """Return the local path to the downloaded catalog database."""
+        """Return the local path to the downloaded catalog database.
+
+        Returns
+        -------
+        Path
+            Filesystem path to the local catalog database file.
+        """
         return self._catalog_local
 
     @property
@@ -127,7 +227,18 @@ def _is_authenticated(self) -> bool:
         return self.credentials is not None
 
     async def datasets(self, **kwargs) -> list[DuckDataset]:
-        """Return all datasets from the catalog as DuckDataset instances."""
+        """Return all datasets from the catalog as DuckDataset instances.
+
+        Parameters
+        ----------
+        ``**kwargs``
+            Additional filter arguments (currently unused).
+
+        Returns
+        -------
+        list[DuckDataset]
+            List of all datasets in the catalog.
+        """
         if not self._Session:
             await self.connect()
 
@@ -155,7 +266,17 @@ async def login(
         secret_key: str | None = None,
         **kwargs,
     ) -> None:
-        """Authenticate with S3 credentials and reconnect to the catalog."""
+        """Authenticate with S3 credentials and reconnect to the catalog.
+
+        Parameters
+        ----------
+        access_key : str, optional
+            S3 access key ID. If omitted, credentials are cleared.
+        secret_key : str, optional
+            S3 secret access key. If omitted, credentials are cleared.
+        ``**kwargs``
+            Additional arguments (currently unused).
+        """
         if access_key and secret_key:
             self.credentials = DuckLakeCredentials(
                 access_key=SecretStr(access_key),
@@ -216,7 +337,13 @@ def _setup_engine(self):
         return engine
 
     async def connect(self, force: bool = False):
-        """Connect to the catalog, downloading it first if necessary."""
+        """Connect to the catalog, downloading it first if necessary.
+
+        Parameters
+        ----------
+        force : bool, optional
+            Whether to re-download and re-connect even if already connected.
+        """
         if self._engine and not force:
             if not self._Session:
                 self._Session = sessionmaker(bind=self._engine)
@@ -227,7 +354,13 @@ async def connect(self, force: bool = False):
         self._Session = sessionmaker(bind=self._engine)
 
     async def close(self):
-        """Dispose the engine, then upload the catalog if authenticated."""
+        """Dispose the engine, then upload the catalog if authenticated.
+
+        Raises
+        ------
+        PermissionError
+            If the client is not authenticated but an upload is required.
+        """
         if self._engine:
             await to_thread.run_sync(self._engine.dispose)
 
@@ -341,7 +474,28 @@ async def query(
         year: int | None = None,
         month: int | None = None,
     ) -> list[File]:
-        """Filter catalog files by client, dataset, group, state, year."""
+        """Filter catalog files by client, dataset, group, state, year.
+
+        Parameters
+        ----------
+        client : Literal["FTP", "DadosGov"], optional
+            Source client to filter by.
+        dataset : str, optional
+            Dataset name to filter by.
+        group : str, optional
+            Group name pattern to filter by (case-insensitive ILIKE).
+        state : str, optional
+            Two-letter state code to filter by.
+        year : int, optional
+            Year to filter by.
+        month : int, optional
+            Month to filter by.
+
+        Returns
+        -------
+        list[:class:`~pysus.api.ducklake.models.File`]
+            List of matching file objects.
+        """
         if not self._Session:
             await self.connect()
 
diff --git a/pysus/api/ducklake/models.py b/pysus/api/ducklake/models.py
index baf0e66..306a96f 100644
--- a/pysus/api/ducklake/models.py
+++ b/pysus/api/ducklake/models.py
@@ -24,7 +24,19 @@
 
 
 class File(BaseRemoteFile):
-    """A remote file in DuckLake catalog with download and verification."""
+    """A remote file in the DuckLake catalog with download and verification.
+
+    Parameters
+    ----------
+    record : CatalogFile
+        The underlying ORM record.
+    type : str, optional
+        File type identifier (default ``"remote"``).
+    dataset : Any
+        The parent dataset object.
+    group : Any, optional
+        The parent group object, if any.
+    """
 
     record: CatalogFile = Field(exclude=True)
     type: str = "remote"
@@ -33,32 +45,68 @@ class File(BaseRemoteFile):
 
     @property
     def basename(self) -> str:
-        """Return the file name without directory components."""
+        """Return the file name without directory components.
+
+        Returns
+        -------
+        str
+            The base file name.
+        """
         return self.path.name
 
     @property
     def extension(self) -> str:
-        """Return the file extension including the leading dot."""
+        """Return the file extension including the leading dot.
+
+        Returns
+        -------
+        str
+            File extension (e.g. ``'.csv'``).
+        """
         return self.path.suffix
 
     @property
     def size(self) -> int:
-        """Return the file size in bytes."""
+        """Return the file size in bytes.
+
+        Returns
+        -------
+        int
+            File size in bytes.
+        """
         return self.record.size
 
     @property
     def modify(self) -> datetime:
-        """Return the last-modified timestamp."""
+        """Return the last-modified timestamp.
+
+        Returns
+        -------
+        datetime
+            The last modification timestamp.
+        """
         return self.record.modified
 
     @property
     def rows(self) -> int:
-        """Return the number of rows in the file."""
+        """Return the number of rows in the file.
+
+        Returns
+        -------
+        int
+            Row count.
+        """
         return self.record.rows
 
     @property
     def sha256(self) -> str | None:
-        """Return the SHA-256 hash of the file, if available."""
+        """Return the SHA-256 hash of the file, if available.
+
+        Returns
+        -------
+        str or None
+            SHA-256 hex digest, or None if not recorded.
+        """
         return self.record.sha256
 
     async def _download(
@@ -77,7 +125,18 @@ async def _download(
         )
 
     async def verify(self, path: Path) -> bool:
-        """Verify the file matches the recorded SHA-256 hash."""
+        """Verify the file matches the recorded SHA-256 hash.
+
+        Parameters
+        ----------
+        path : Path
+            Path to the downloaded file on disk.
+
+        Returns
+        -------
+        bool
+            True if the hash matches or no hash is recorded, False otherwise.
+        """
         if not self.sha256:
             return True
 
@@ -93,22 +152,49 @@ def _calculate():
 
 
 class DuckDataset(BaseRemoteDataset):
-    """A dataset from the DuckLake catalog, containing groups and files."""
+    """A dataset from the DuckLake catalog, containing groups and files.
+
+    Parameters
+    ----------
+    record : CatalogDataset
+        The underlying ORM record.
+    client : BaseRemoteClient
+        The parent client instance.
+    """
 
     record: CatalogDataset = Field(exclude=True)
     client: BaseRemoteClient = Field(exclude=True)
 
     def __repr__(self) -> str:
+        """Return a string representation of the dataset.
+
+        Returns
+        -------
+        str
+            The uppercased dataset name.
+        """
         return self.name.upper()
 
     @property
     def name(self) -> str:
-        """Return the short name of the dataset."""
+        """Return the short name of the dataset.
+
+        Returns
+        -------
+        str
+            The dataset short name.
+        """
         return self.record.name
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name of the dataset."""
+        """Return the human-readable name of the dataset.
+
+        Returns
+        -------
+        str
+            The dataset display name, falling back to the short name.
+        """
         return (
             self.record.dataset_metadata.long_name
             if self.record.dataset_metadata
@@ -117,7 +203,13 @@ def long_name(self) -> str:
 
     @property
     def description(self) -> str:
-        """Return the description of the dataset."""
+        """Return the description of the dataset.
+
+        Returns
+        -------
+        str
+            The dataset description, or an empty string if unavailable.
+        """
         return (
             self.record.dataset_metadata.description
             if self.record.dataset_metadata
@@ -149,19 +241,39 @@ async def _fetch_content(self) -> list[Union["DuckGroup", File]]:
 
 
 class DuckGroup(BaseRemoteGroup):
-    """A group of related files within a DuckLake dataset."""
+    """A group of related files within a DuckLake dataset.
+
+    Parameters
+    ----------
+    record : DatasetGroup
+        The underlying ORM record.
+    dataset : DuckDataset
+        The parent dataset instance.
+    """
 
     record: DatasetGroup = Field(exclude=True)
     dataset: DuckDataset = Field(exclude=True)
 
     @property
     def name(self) -> str:
-        """Return the short name of the group."""
+        """Return the short name of the group.
+
+        Returns
+        -------
+        str
+            The group short name.
+        """
         return self.record.name
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name of the group."""
+        """Return the human-readable name of the group.
+
+        Returns
+        -------
+        str
+            The group display name, falling back to the short name.
+        """
         return (
             self.record.group_metadata.long_name
             if self.record.group_metadata
@@ -170,7 +282,13 @@ def long_name(self) -> str:
 
     @property
     def description(self) -> str:
-        """Return the description of the group."""
+        """Return the description of the group.
+
+        Returns
+        -------
+        str
+            The group description, or an empty string if unavailable.
+        """
         if self.record.group_metadata:
             return self.record.group_metadata.description
         return ""
diff --git a/pysus/api/extensions.py b/pysus/api/extensions.py
index 0451b42..d918817 100644
--- a/pysus/api/extensions.py
+++ b/pysus/api/extensions.py
@@ -56,6 +56,7 @@ async def stream(
         """Yield the file contents in chunks of the given size."""
 
         def _read_sync():
+            """Read file chunks synchronously in a thread."""
             with open(self.path, "rb") as f:
                 while chunk := f.read(chunk_size):
                     yield chunk
@@ -135,6 +136,7 @@ async def _get_encoding(self) -> str:
         if self._encoding is None:
 
             def detect():
+                """Detect encoding from file bytes synchronously."""
                 with open(self.path, "rb") as f:
                     return chardet.detect(f.read(1024 * 300))
 
@@ -151,6 +153,7 @@ async def _get_sep(self) -> str:
             encoding = await self._get_encoding()
 
             def sniff():
+                """Sniff the CSV delimiter synchronously."""
                 try:
                     with open(self.path, encoding=encoding) as f:
                         sample = f.read(1024 * 10)
@@ -168,6 +171,7 @@ async def load(self) -> pd.DataFrame:
         separator = await self._get_sep()
 
         def _read_sync():
+            """Read the CSV synchronously in a thread."""
             return pd.read_csv(
                 self.path, sep=separator, encoding=encoding, low_memory=False
             )
@@ -183,6 +187,7 @@ async def stream(
         separator = await self._get_sep()
 
         def _get_reader_sync():
+            """Create a CSV chunk reader synchronously in a thread."""
             return pd.read_csv(
                 self.path,
                 sep=separator,
@@ -233,6 +238,7 @@ async def load(self, parse: bool = True) -> pd.DataFrame:
         """Read the entire Parquet file into a DataFrame."""
 
         def _load():
+            """Read the Parquet file synchronously in a thread."""
             df = pd.read_parquet(self.path, engine="pyarrow")
             if parse:
                 df = self.parse_dftypes(df)
@@ -265,12 +271,14 @@ def parse_dftypes(df: pd.DataFrame) -> pd.DataFrame:
         """Convert known date and integer columns to their proper types."""
 
         def str_to_int(string):
+            """Convert a string to int, return original if not possible."""
             if pd.isna(string):
                 return string
             clean = str(string).replace(" ", "")
             return int(clean) if clean.isnumeric() else string
 
         def str_to_date(string):
+            """Convert a date string to date or return original on failure."""
             if isinstance(string, str):
                 try:
                     return datetime.strptime(string, "%Y%m%d").date()
@@ -308,7 +316,19 @@ def rows(self) -> int:
         return len(DBFReader(self.path, load=False))
 
     def decode_column(self, value):
-        """Decode a byte string value using cp1252 encoding."""
+        """Decode a raw DBF value, handling byte strings and null bytes.
+
+        Parameters
+        ----------
+        value : bytes or str or Any
+            The value to decode.
+
+        Returns
+        -------
+        str or Any
+            The decoded and stripped string, or the original value if it is
+            neither bytes nor str.
+        """
         if isinstance(value, bytes):
             return (
                 value.decode(encoding="cp1252", errors="replace")
@@ -323,6 +343,7 @@ async def load(self) -> pd.DataFrame:
         """Read the entire DBF file into a DataFrame."""
 
         def _load():
+            """Read the DBF file synchronously in a thread."""
             dbf = DBFReader(self.path, encoding="cp1252", raw=True)
             df = pd.DataFrame(iter(dbf))
             return df.map(self.decode_column)
@@ -336,6 +357,7 @@ async def stream(
         """Yield the DBF records in chunks of the given size."""
 
         def _get_db():
+            """Open the DBF reader synchronously in a thread."""
             return DBFReader(self.path, encoding="cp1252", raw=True)
 
         dbf_file = await to_thread.run_sync(_get_db)
@@ -371,6 +393,7 @@ async def to_parquet(
                 raise RuntimeError(f"Could not parse {out} to Parquet")
 
         async def _stream_to_single_file():
+            """Stream DBF records and write them to a single Parquet file."""
             dbf_reader = DBFReader(self.path, encoding="cp1252", raw=True)
             total_rows = len(dbf_reader)
             writer = None
@@ -538,6 +561,7 @@ async def stream(
         """Yield the PDF file contents in chunks of the given size."""
 
         def _read():
+            """Read PDF file data synchronously."""
             with open(self.path, "rb") as f:
                 if chunk_size:
                     while chunk := f.read(chunk_size):
@@ -563,6 +587,7 @@ async def list_members(self) -> list[str]:
         """Return the list of member names inside the archive."""
 
         def _list():
+            """List ZIP members synchronously in a thread."""
             with zipfile.ZipFile(self.path) as z:
                 return z.namelist()
 
@@ -572,6 +597,7 @@ async def open_member(self, member_name: str) -> bytes:
         """Read and return the contents of a named archive member."""
 
         def _read():
+            """Read a ZIP member synchronously in a thread."""
             with zipfile.ZipFile(self.path) as z:
                 return z.read(member_name)
 
@@ -588,6 +614,7 @@ async def extract(
         target_dir.mkdir(parents=True, exist_ok=True)
 
         def _extract_sync():
+            """Extract ZIP contents synchronously in a thread."""
             with zipfile.ZipFile(self.path) as z:
                 z.extractall(target_dir)
 
@@ -636,6 +663,7 @@ async def _safe_cleanup(self, directory: Path):
         """Remove a temporary directory and its contents."""
 
         def _cleanup():
+            """Remove directory contents synchronously in a thread."""
             if not directory.exists():
                 return
 
@@ -663,6 +691,7 @@ async def load(self) -> bytes:
         """Decompress and read the entire file contents into memory."""
 
         def _read():
+            """Decompress and read synchronously in a thread."""
             with gzip.open(self.path, "rb") as f:
                 return f.read()
 
@@ -688,6 +717,7 @@ async def extract(
         out_file = target_dir / self.path.stem
 
         def _decompress():
+            """Decompress gzip file synchronously in a thread."""
             with (
                 gzip.open(self.path, "rb") as f_in,
                 open(
@@ -714,6 +744,7 @@ async def list_members(self) -> list[str]:
         """Return the list of member names inside the archive."""
 
         def _list():
+            """List Tar members synchronously in a thread."""
             with tarfile.open(self.path) as t:
                 return t.getnames()
 
@@ -723,6 +754,7 @@ async def open_member(self, member_name: str) -> bytes:
         """Read and return the contents of a named archive member."""
 
         def _read():
+            """Read a Tar member synchronously in a thread."""
             with tarfile.open(self.path) as t:
                 f = t.extractfile(member_name)
                 return f.read() if f else b""
@@ -740,6 +772,7 @@ async def extract(
         members = await self.list_members()
 
         def _extract():
+            """Extract Tar contents synchronously in a thread."""
             with tarfile.open(self.path) as t:
                 t.extractall(target_dir)
 
@@ -802,6 +835,7 @@ def stream(
         """Raise ImportError indicating the missing DBC dependency."""
 
         async def _internal_gen():
+            """Yield nothing; always raises ImportError."""
             raise ImportError(self.import_err)
             yield pd.DataFrame()
 
@@ -862,7 +896,21 @@ async def _identify(cls, path: Path) -> type[BaseLocalFile] | None:
 
     @classmethod
     async def get_file_class(cls, path: Path) -> type[BaseLocalFile]:
-        """Return handler class for path, falling back to extension matching."""
+        """Return the file handler class for a given path.
+
+        First attempts MIME-type identification; falls back to extension
+        matching.
+
+        Parameters
+        ----------
+        path : Path
+            The file path to classify.
+
+        Returns
+        -------
+        type[BaseLocalFile]
+            The handler class for the file type.
+        """
         mime_class = await cls._identify(path)
         if mime_class:
             return mime_class
@@ -873,7 +921,21 @@ async def get_file_class(cls, path: Path) -> type[BaseLocalFile]:
 
     @classmethod
     async def instantiate(cls, path: str | Path) -> BaseLocalFile:
-        """Create and return the appropriate file handler for a given path."""
+        """Create and return the appropriate file handler for a path.
+
+        Determines whether the path is a directory or a file, resolves the
+        handler class, and instantiates it.
+
+        Parameters
+        ----------
+        path : str or Path
+            The filesystem path to wrap in a handler.
+
+        Returns
+        -------
+        BaseLocalFile
+            The instantiated file handler.
+        """
         path = Path(path).expanduser().resolve()
         if await to_thread.run_sync(path.is_dir):
             return Directory(path=path, type="DIR")
diff --git a/pysus/api/ftp/client.py b/pysus/api/ftp/client.py
index 3c1b46f..76872e5 100644
--- a/pysus/api/ftp/client.py
+++ b/pysus/api/ftp/client.py
@@ -48,17 +48,35 @@ class FTP(BaseRemoteClient):
 
     @property
     def name(self) -> str:
-        """Return the short name of this client."""
+        """Return the short name of this client.
+
+        Returns
+        -------
+        str
+            The client short name ("FTP").
+        """
         return "FTP"
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable name of this client."""
+        """Return the human-readable name of this client.
+
+        Returns
+        -------
+        str
+            The human-readable client name.
+        """
         return "Pysus FTP Client"
 
     @property
     def description(self) -> str:
-        """Return a description of this client's purpose."""
+        """Return a description of this client's purpose.
+
+        Returns
+        -------
+        str
+            A description string explaining the FTP client's capabilities.
+        """
         return """
             O cliente FTP do pysus foi desenvolvido para fornecer uma interface
             assíncrona e moderna para navegação e extração de dados diretamente
@@ -69,11 +87,23 @@ def description(self) -> str:
 
     @property
     def ftp(self) -> FTPLib | None:
-        """Return the underlying ftplib.FTP, or None if not connected."""
+        """Return the underlying ftplib.FTP, or None if not connected.
+
+        Returns
+        -------
+        FTPLib | None
+            The ftplib.FTP instance, or None if not connected.
+        """
         return self._ftp
 
     async def connect(self) -> None:
-        """Establish the FTP connection to the remote host."""
+        """Establish the FTP connection to the remote host.
+
+        Raises
+        ------
+        Exception
+            Any exception raised by ftplib during connection.
+        """
 
         def _connect():
             if self.ftp is None:
@@ -83,11 +113,28 @@ def _connect():
         await to_thread.run_sync(_connect)
 
     async def login(self, **kwargs) -> None:
-        """Authenticate and connect to the FTP server (alias for connect)."""
+        """Authenticate and connect to the FTP server (alias for connect).
+
+        Parameters
+        ----------
+        ``**kwargs``
+            Forwarded to connect() (currently unused).
+
+        Raises
+        ------
+        Exception
+            Any exception raised by ftplib during authentication.
+        """
         await self.connect()
 
     async def close(self) -> None:
-        """Close the FTP connection and reset the internal client state."""
+        """Close the FTP connection and reset the internal client state.
+
+        Raises
+        ------
+        Exception
+            Any exception raised by ftplib during disconnection.
+        """
 
         def _close():
             if self.ftp:
@@ -101,7 +148,18 @@ def _close():
         await to_thread.run_sync(_close)
 
     async def datasets(self, **kwargs) -> list[Dataset]:
-        """Return a list of all available dataset instances for this client."""
+        """Return a list of all available dataset instances for this client.
+
+        Returns
+        -------
+        list[:class:`~pysus.api.ftp.models.Dataset`]
+            A list of Dataset instances for all available databases.
+
+        Raises
+        ------
+        ConnectionError
+            If the FTP client is not connected.
+        """
         from .databases import AVAILABLE_DATABASES
 
         if self.ftp is None:
diff --git a/pysus/api/ftp/databases.py b/pysus/api/ftp/databases.py
index c08b09d..f84dcd5 100644
--- a/pysus/api/ftp/databases.py
+++ b/pysus/api/ftp/databases.py
@@ -19,17 +19,35 @@ class CIHA(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "CIHA"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Comunicação de Internação Hospitalar e Ambulatorial"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return (
             "A CIHA foi criada para ampliar o processo de planejamento, "
             "programação, controle, avaliação e regulação da assistência à "
@@ -38,7 +56,19 @@ def description(self) -> str:
         )
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a CIHA filename into group, state, year and month metadata."""
+        """Parse a CIHA filename into group, state, year and month metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw CIHA filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``state``, ``year``, ``month``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             group_code = name[:4]
@@ -88,17 +118,35 @@ class CNES(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "CNES"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Cadastro Nacional de Estabelecimentos de Saúde"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return (
             "O Cadastro Nacional de Estabelecimentos de Saúde (CNES) é o "
             "sistema de informação oficial de cadastramento de informações "
@@ -106,7 +154,19 @@ def description(self) -> str:
         )
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a CNES filename into group, state, year and month metadata."""
+        """Parse a CNES filename into group, state, year and month metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw CNES filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``state``, ``year``, ``month``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             group_code = name[:2]
@@ -145,24 +205,54 @@ class SINASC(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "SINASC"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informações sobre Nascidos Vivos"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return """
             O SINASC fornece subsídios para o diagnóstico de saúde e
             planejamento de políticas.
         """
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a SINASC filename into group, state and year metadata."""
+        """Parse a SINASC filename into group, state and year metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw SINASC filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``state``, ``year``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             year_short = name[-2:]
@@ -195,21 +285,51 @@ class SIM(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "SIM"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informação sobre Mortalidade"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return "O SIM coleta dados sobre obitos no pais para analise epidemiologica."  # noqa
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a SIM filename into group, state and year metadata."""
+        """Parse a SIM filename into group, state and year metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw SIM filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``state``, ``year``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             if "CID9" in filename:
@@ -242,21 +362,51 @@ class PNI(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "PNI"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Programa Nacional de Imunizações"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return "O SI-PNI monitora a cobertura vacinal e doses aplicadas."
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a PNI filename into group, state and year metadata."""
+        """Parse a PNI filename into group, state and year metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw PNI filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``state``, ``year``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             group_code, state, year_short = name[:4], name[4:6], name[-2:]
@@ -296,21 +446,51 @@ class IBGEDATASUS(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "IBGE"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "População Residente e Projeções (IBGE)"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return "Informações sobre a população residente obtidas de Censos."
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse an IBGE filename into group and year metadata."""
+        """Parse an IBGE filename into group and year metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw IBGE filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``year``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             year = name[-2:]
@@ -351,21 +531,51 @@ class SIA(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "SIA"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informações Ambulatoriais"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return "O SIA acompanha as ações de saúde produzidas."
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse an SIA filename into group, state, year and month metadata."""
+        """Parse an SIA filename into group, state, year and month metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw SIA filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``state``, ``year``, ``month``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             digits = "".join([d for d in name if d.isdigit()])
@@ -409,23 +619,53 @@ class SIH(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "SIH"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informações Hospitalares"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return """
             O SIH processa as internações hospitalares financiadas pelo SUS.
         """
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse an SIH filename into group, state, year and month metadata."""
+        """Parse an SIH filename into group, state, year and month metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw SIH filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``state``, ``year``, ``month``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             group_code = name[:2]
@@ -508,21 +748,51 @@ class SINAN(Dataset):
 
     @property
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym (e.g. "CIHA").
+        """
         return "SINAN"
 
     @property
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
         return "Sistema de Informação de Agravos de Notificação"
 
     @property
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose in Portuguese.
+        """
         return "O SINAN é alimentado pela notificação de doenças compulsórias."
 
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a SINAN filename into group and year metadata."""
+        """Parse a SINAN filename into group and year metadata.
+
+        Parameters
+        ----------
+        filename : str
+            The raw SINAN filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dict with keys ``group``, ``year``.
+            On parse failure values are set to None.
+        """
         try:
             name = filename.split(".")[0].upper()
             year_short = name[-2:]
diff --git a/pysus/api/ftp/models.py b/pysus/api/ftp/models.py
index 06bf291..88edb1a 100644
--- a/pysus/api/ftp/models.py
+++ b/pysus/api/ftp/models.py
@@ -28,7 +28,14 @@ class File(BaseRemoteFile):
     _info: FTPFileInfo = PrivateAttr()
 
     def __init__(self, **data):
-        """Initialise the File with raw FTP metadata."""
+        """Initialise the File with raw FTP metadata.
+
+        Parameters
+        ----------
+        **data
+            Keyword arguments passed to BaseRemoteFile, including
+            optional ``_info`` with parsed FTP metadata.
+        """
         info = data.pop("_info", None)
         if "path" not in data and info and "path" in info:
             data["path"] = info["path"]
@@ -47,22 +54,51 @@ def __init__(self, **data):
             )
 
     def __repr__(self) -> str:
-        """Return the file name as its string representation."""
+        """Return the file name as its string representation.
+
+        Returns
+        -------
+        str
+            The file name.
+        """
         return self.name
 
     @property
     def extension(self) -> str:
-        """Return the file extension (e.g. .dbc, .dbf)."""
+        """Return the file extension (e.g. .dbc, .dbf).
+
+        Returns
+        -------
+        str
+            The file extension including the leading dot.
+        """
         return Path(self.path).suffix
 
     @property
     def size(self) -> int:
-        """Return the file size in bytes."""
+        """Return the file size in bytes.
+
+        Returns
+        -------
+        int
+            The file size in bytes.
+        """
         return self._info.get("size", 0)
 
     @property
     def modify(self) -> datetime:
-        """Return the last modification timestamp."""
+        """Return the last modification timestamp.
+
+        Returns
+        -------
+        datetime
+            The file's last modification datetime.
+
+        Raises
+        ------
+        ValueError
+            If no modification date is available.
+        """
         m = self._info.get("modify")
         if not m:
             raise ValueError("File requires a modify date")
@@ -70,17 +106,35 @@ def modify(self) -> datetime:
 
     @property
     def year(self) -> int | None:
-        """Return the data year extracted from the filename, if available."""
+        """Return the data year extracted from the filename, if available.
+
+        Returns
+        -------
+        int | None
+            The year as an integer, or None if not available.
+        """
         return self._info.get("year")
 
     @property
     def month(self) -> int | None:
-        """Return the data month extracted from the filename, if available."""
+        """Return the data month extracted from the filename, if available.
+
+        Returns
+        -------
+        int | None
+            The month as an integer, or None if not available.
+        """
         return self._info.get("month")
 
     @property
     def state(self) -> State | None:
-        """Return the state code extracted from the filename, if available."""
+        """Return the state code extracted from the filename, if available.
+
+        Returns
+        -------
+        State | None
+            The state code, or None if not available.
+        """
         return self._info.get("state", None)
 
     async def _download(
@@ -108,7 +162,21 @@ def __init__(
         formatter: Callable | None = None,
         dataset: Dataset | None = None,
     ):
-        """Initialise the Directory with a remote path and optional context."""
+        """Initialise the Directory with a remote path and optional context.
+
+        Parameters
+        ----------
+        path : str
+            The remote directory path.
+        parent : Directory | Dataset | Group | None, optional
+            The parent directory, dataset or group.
+        client : BaseRemoteClient | None, optional
+            The FTP client instance.
+        formatter : Callable | None, optional
+            A filename formatter function.
+        dataset : Dataset | None, optional
+            The dataset this directory belongs to.
+        """
         self.path = os.path.normpath(path)
         self.parent = parent
         self.dataset = dataset or getattr(parent, "dataset", None)
@@ -120,13 +188,25 @@ def __init__(
 
     @property
     async def content(self) -> list[Directory | File]:
-        """Return the directory contents, loading from FTP if not yet cached."""
+        """Return the directory contents, loading from FTP if not yet cached.
+
+        Returns
+        -------
+        list[Directory | File]
+            The list of files and subdirectories.
+        """
         if not self.loaded:
             await self.load()
         return self._content
 
     async def load(self) -> None:
-        """Fetch and parse the directory listing from the FTP server."""
+        """Fetch and parse the directory listing from the FTP server.
+
+        Raises
+        ------
+        ValueError
+            If the client is not an FTP instance.
+        """
         if not isinstance(self.client, FTP):
             raise ValueError("no ftp client found")
         raw_infos = await self.client._list_directory(
@@ -162,11 +242,23 @@ async def load(self) -> None:
         self.loaded = True
 
     def __str__(self) -> str:
-        """Return the normalised directory path."""
+        """Return the normalised directory path.
+
+        Returns
+        -------
+        str
+            The normalised path string.
+        """
         return self.path
 
     def __repr__(self) -> str:
-        """Return a debug representation of this directory."""
+        """Return a debug representation of this directory.
+
+        Returns
+        -------
+        str
+            A debug string with the directory path.
+        """
         return f"<Directory: {self.path}>"
 
 
@@ -188,7 +280,23 @@ def __init__(
         description: str = "",
         **data: Any,
     ):
-        """Initialise the Group with metadata and a directory reference."""
+        """Initialise the Group with metadata and a directory reference.
+
+        Parameters
+        ----------
+        name : str
+            The group short code.
+        path : str
+            The remote directory path for this group.
+        dataset : Dataset
+            The parent dataset.
+        long_name : str
+            The human-readable group name.
+        description : str, optional
+            A description of the group.
+        **data : Any
+            Additional keyword arguments.
+        """
         data.update({"dataset": dataset, "path": path})
         super().__init__(**data)
 
@@ -205,22 +313,46 @@ def __init__(
 
     @property
     def name(self) -> str:
-        """Return the group short code (e.g. 'RD', 'PA')."""
+        """Return the group short code (e.g. 'RD', 'PA').
+
+        Returns
+        -------
+        str
+            The group short code.
+        """
         return self._name
 
     @property
     def long_name(self) -> str:
-        """Return the human-readable group name."""
+        """Return the human-readable group name.
+
+        Returns
+        -------
+        str
+            The human-readable group name.
+        """
         return self._long_name
 
     @property
     def description(self) -> str:
-        """Return the group description."""
+        """Return the group description.
+
+        Returns
+        -------
+        str
+            The group description.
+        """
         return self._description
 
     @property
     async def content(self) -> list[Directory | File]:
-        """Return the contents of the underlying directory."""
+        """Return the contents of the underlying directory.
+
+        Returns
+        -------
+        list[Directory | File]
+            The directory contents.
+        """
         return await self._dir.content
 
     async def _fetch_files(self) -> list[BaseRemoteFile]:
@@ -238,21 +370,50 @@ class Dataset(BaseRemoteDataset, ABC):
     @property
     @abstractmethod
     def name(self) -> str:
-        """Return the dataset short name."""
+        """Return the dataset short name.
+
+        Returns
+        -------
+        str
+            The dataset acronym.
+        """
 
     @property
     @abstractmethod
     def long_name(self) -> str:
-        """Return the dataset full name in Portuguese."""
+        """Return the dataset full name in Portuguese.
+
+        Returns
+        -------
+        str
+            The full Portuguese name of the dataset.
+        """
 
     @property
     @abstractmethod
     def description(self) -> str:
-        """Return a description of the dataset's purpose."""
+        """Return a description of the dataset's purpose.
+
+        Returns
+        -------
+        str
+            A description of the dataset's purpose.
+        """
 
     @abstractmethod
     def formatter(self, filename: str) -> dict[str, Any]:
-        """Parse a filename into metadata (group, state, year, etc.)."""
+        """Parse a filename into metadata (group, state, year, etc.).
+
+        Parameters
+        ----------
+        filename : str
+            The raw filename to parse.
+
+        Returns
+        -------
+        dict[str, Any]
+            A dictionary of parsed metadata fields.
+        """
 
     async def _fetch_content(
         self,
@@ -289,5 +450,11 @@ async def _fetch_content(
         return results
 
     def __repr__(self) -> str:
-        """Return the dataset short name as its string representation."""
+        """Return the dataset short name as its string representation.
+
+        Returns
+        -------
+        str
+            The dataset short name.
+        """
         return self.name
diff --git a/pysus/api/models.py b/pysus/api/models.py
index 5e883c3..6f9029a 100644
--- a/pysus/api/models.py
+++ b/pysus/api/models.py
@@ -56,6 +56,7 @@ def basename(self) -> str:
         return self.path.name
 
     def __str__(self) -> str:
+        """Return the file's basename as its string representation."""
         return self.basename
 
     @property
@@ -84,6 +85,7 @@ class BaseLocalFile(BaseFile, ABC):
 
     @property
     def name(self) -> str:
+        """Return the file name from the path."""
         return self.path.name
 
     async def get_hash(
@@ -91,12 +93,21 @@ async def get_hash(
     ) -> str:
         """Compute the file's hash digest.
 
-        *algorithm* is the hash algorithm name (default "sha256").
-        *chunk_size* is the read chunk size in bytes.
-        Return the hex digest string.
+        Parameters
+        ----------
+        algorithm : str, optional
+            The hash algorithm name (default ``"sha256"``).
+        chunk_size : int, optional
+            Read chunk size in bytes (default 1 MiB).
+
+        Returns
+        -------
+        str
+            The hex digest string.
         """
 
         def _compute_hash():
+            """Compute the hash digest in a thread-safe manner."""
             hash_obj = hashlib.new(algorithm)
             with open(self.path, "rb") as f:
                 while chunk := f.read(chunk_size):
@@ -118,14 +129,17 @@ def stream(
 
     @property
     def extension(self) -> str:
+        """Return the file extension from the local path."""
         return self.path.suffix
 
     @property
     def size(self) -> int:
+        """Return the file size in bytes from the local filesystem."""
         return self.path.stat().st_size
 
     @property
     def modify(self) -> datetime:
+        """Return the last modification timestamp from the local filesystem."""
         return datetime.fromtimestamp(self.path.stat().st_mtime)
 
 
@@ -164,10 +178,21 @@ async def to_parquet(
     ) -> Parquet:
         """Convert the file to Parquet format.
 
-        *output_path* is the destination path; defaults to the source path
-        with a .parquet extension.  *chunk_size* controls the streaming chunk
-        size.  *callback* receives (current_rows, total_rows) after each chunk.
-        Return the resulting Parquet wrapper object.
+        Parameters
+        ----------
+        output_path : str or Path, optional
+            Destination path for the Parquet file. Defaults to the source
+            path with a ``.parquet`` extension.
+        chunk_size : int, optional
+            Number of rows per streaming chunk (default 10 000).
+        callback : Callable[[int, int], None], optional
+            Function called after each chunk with
+            ``(current_rows, total_rows)``.
+
+        Returns
+        -------
+        Parquet
+            The resulting Parquet wrapper object.
         """
         from pysus.api.extensions import ExtensionFactory, Parquet
 
@@ -273,7 +298,7 @@ class SearchableMixin:
     """Mixin providing attribute-based filtering for remote objects."""
 
     def _matches(self, obj: Any, **kwargs) -> bool:
-        """Return True if all *kwargs* attributes match on *obj*."""
+        """Return True if all *kwargs* attributes equal those on *obj*."""
         for key, value in kwargs.items():
             obj_value = getattr(obj, key, None)
             if obj_value != value:
@@ -293,6 +318,7 @@ class BaseRemoteFile(BaseFile, SearchableMixin, ABC):
 
     @property
     def name(self) -> str:
+        """Return the basename as the display name."""
         return self.basename
 
     @property
@@ -364,6 +390,7 @@ class BaseRemoteObject(BaseModel, ABC):
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     def __str__(self) -> str:
+        """Return the short name as the string representation."""
         return self.name
 
     @property
diff --git a/pysus/api/utils.py b/pysus/api/utils.py
index 1e82735..1a7c0f7 100644
--- a/pysus/api/utils.py
+++ b/pysus/api/utils.py
@@ -17,6 +17,19 @@ def is_geocode_column(name: str) -> bool:
 
 
 def add_dv(geocode: str) -> str:
+    """Add the IBGE verification digit to a municipality code.
+
+    Parameters
+    ----------
+    geocode : str
+        The municipality code (6 or 7 digits).
+
+    Returns
+    -------
+    str
+        The code with the verification digit appended, or the original
+        string if it cannot be processed.
+    """
     if not geocode or not str(geocode).isdigit():
         return geocode