From 2b7c49a5b8b203633f4876e39b02e2f9b05a9b97 Mon Sep 17 00:00:00 2001 From: kalyanamdewri Date: Thu, 2 Jul 2026 23:16:42 -0700 Subject: [PATCH] GH-30800: [Python][Docs] Document explicit partition schemas --- docs/source/python/dataset.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst index 4e18ea0a51c..0c51458c8e6 100644 --- a/docs/source/python/dataset.rst +++ b/docs/source/python/dataset.rst @@ -374,6 +374,24 @@ altogether if they do not match the filter: 3 8 0.313068 1 b 4 9 -0.854096 2 b +When passing an explicit ``schema`` to :func:`dataset`, include the partition +fields in the schema if they are used in filters or projections. The partition +fields are not stored in the physical files, so they need to be present in the +dataset schema when schema inference is bypassed: + +.. code-block:: python + + >>> schema = pa.schema([ + ... ("a", pa.int64()), + ... ("b", pa.float64()), + ... ("c", pa.int64()), + ... ("part", pa.string()), + ... ]) + >>> dataset = ds.dataset("parquet_dataset_partitioned", format="parquet", + ... partitioning="hive", schema=schema) + >>> dataset.count_rows(filter=ds.field("part") == "b") + 5 + Different partitioning schemes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~