From bc5d6811f63397cd74362ffb4ba5336f526f4b01 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Tue, 28 Apr 2026 15:35:50 -0700 Subject: [PATCH 1/8] chore: enable testmon for incremental test runs pytest-testmon tracks which tests cover which source files and skips unaffected tests on subsequent runs. Activated via a TESTMON Makefile variable so the default `make check` uses incremental selection while `make check TESTMON=` runs the full suite. Lock the dependency in the dev group, gitignore the local cache file, and thread $(TESTMON) through the test, test-all, and test-only targets. Signed-off-by: Seth Fitzsimmons --- .gitignore | 1 + Makefile | 8 +++++--- pyproject.toml | 1 + uv.lock | 15 +++++++++++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 6a40dfafd..26c73e5f7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ docs/docusaurus __pycache__/ .coverage +.testmondata* diff --git a/Makefile b/Makefile index edea9b34d..a88a9917e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,7 @@ .PHONY: default uv-sync check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines +TESTMON ?= --testmon + default: test-all install: uv-sync @@ -11,13 +13,13 @@ check: uv-sync @$(MAKE) -j test-only doctest-only lint-only mypy-only test-all: uv-sync - @uv run pytest -W error packages/ + @uv run pytest -W error $(TESTMON) packages/ test: uv-sync - @uv run pytest -W error packages/ -x -q --tb=short + @uv run pytest -W error $(TESTMON) packages/ -x -q --tb=short test-only: - @uv run pytest -W error packages/ -x -q --tb=short + @uv run pytest -W error $(TESTMON) packages/ -x -q --tb=short coverage: uv-sync @uv run pytest packages/ --cov overture.schema --cov-report=term --cov-report=html && open htmlcov/index.html diff --git a/pyproject.toml b/pyproject.toml index 6046d76a6..c21f4bc17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ dev = [ "pydocstyle>=6.3.0", "pytest>=9.0.0", "pytest-cov>=7.0.0", + "pytest-testmon>=2.2.0", "ruff>=0.13.0", ] diff --git a/uv.lock b/uv.lock index 0bbdf1cda..e22235af7 100644 --- a/uv.lock +++ b/uv.lock @@ -934,6 +934,7 @@ dev = [ { name = "pydocstyle" }, { name = "pytest" }, { name = "pytest-cov" }, + { name = "pytest-testmon" }, { name = "ruff" }, ] @@ -946,6 +947,7 @@ dev = [ { name = "pydocstyle", specifier = ">=6.3.0" }, { name = "pytest", specifier = ">=9.0.0" }, { name = "pytest-cov", specifier = ">=7.0.0" }, + { name = "pytest-testmon", specifier = ">=2.2.0" }, { name = "ruff", specifier = ">=0.13.0" }, ] @@ -1191,6 +1193,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, ] +[[package]] +name = "pytest-testmon" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/1d/3e4230cc67cd6205bbe03c3527500c0ccaf7f0c78b436537eac71590ee4a/pytest_testmon-2.2.0.tar.gz", hash = "sha256:01f488e955ed0e0049777bee598bf1f647dd524e06f544c31a24e68f8d775a51", size = 23108, upload-time = "2025-12-01T07:30:24.76Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/55/ebb3c2f59fb089f08d00f764830d35780fc4e4c41dffcadafa3264682b65/pytest_testmon-2.2.0-py3-none-any.whl", hash = "sha256:2604ca44a54d61a2e830d9ce828b41a837075e4ebc1f81b148add8e90d34815b", size = 25199, upload-time = "2025-12-01T07:30:23.623Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3" From 5c792399f1b207e540bba71f163298b736fa95d3 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Thu, 7 May 2026 12:12:59 -0700 Subject: [PATCH 2/8] refactor(core): extract VehicleSelectorBase Pull the shared `dimension` and `comparison` fields of the five vehicle selector subtypes into a `VehicleSelectorBase` parent, and thread `discriminator="dimension"` through the `VehicleSelector` annotated union. The discriminator turns the union into a Pydantic discriminated union, so it serializes as JSON Schema's `oneOf` + `discriminator` rather than `anyOf`. Regenerated segment_baseline_schema.json captures the new shape. This is a prerequisite for downstream tooling that walks discriminated unions structurally (e.g. PySpark codegen for segment's nested vehicle scoping). Signed-off-by: Seth Fitzsimmons --- .../overture/schema/common/scoping/vehicle.py | 46 +++++++++--------- .../tests/segment_baseline_schema.json | 48 +++++++++++++++---- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/packages/overture-schema-common/src/overture/schema/common/scoping/vehicle.py b/packages/overture-schema-common/src/overture/schema/common/scoping/vehicle.py index 801d35aa3..287b25c55 100644 --- a/packages/overture-schema-common/src/overture/schema/common/scoping/vehicle.py +++ b/packages/overture-schema-common/src/overture/schema/common/scoping/vehicle.py @@ -38,24 +38,30 @@ class VehicleRelation(str, Enum): @no_extra_fields -class VehicleAxleCountSelector(BaseModel): +class VehicleSelectorBase(BaseModel): """ - Selects vehicles based on the number of axles they have. + Common fields shared by all vehicle selector subtypes. + + See also: `VehicleSelector`. """ - dimension: Literal[VehicleDimension.AXLE_COUNT] + dimension: VehicleDimension comparison: VehicleRelation + + +@no_extra_fields +class VehicleAxleCountSelector(VehicleSelectorBase): + """Selects vehicles based on the number of axles they have.""" + + dimension: Literal[VehicleDimension.AXLE_COUNT] value: uint8 = Field(description="Number of axles on the vehicle") @no_extra_fields -class VehicleHeightSelector(BaseModel): - """ - Selects vehicles based on their height. - """ +class VehicleHeightSelector(VehicleSelectorBase): + """Selects vehicles based on their height.""" dimension: Literal[VehicleDimension.HEIGHT] - comparison: VehicleRelation value: Annotated[ float64, Field( @@ -66,13 +72,10 @@ class VehicleHeightSelector(BaseModel): @no_extra_fields -class VehicleLengthSelector(BaseModel): - """ - Selects vehicles based on their length. - """ +class VehicleLengthSelector(VehicleSelectorBase): + """Selects vehicles based on their length.""" dimension: Literal[VehicleDimension.LENGTH] - comparison: VehicleRelation value: Annotated[ float64, Field( @@ -83,13 +86,10 @@ class VehicleLengthSelector(BaseModel): @no_extra_fields -class VehicleWeightSelector(BaseModel): - """ - Selects vehicles based on their weight. - """ +class VehicleWeightSelector(VehicleSelectorBase): + """Selects vehicles based on their weight.""" dimension: Literal[VehicleDimension.WEIGHT] - comparison: VehicleRelation value: Annotated[ float64, Field( @@ -100,13 +100,10 @@ class VehicleWeightSelector(BaseModel): @no_extra_fields -class VehicleWidthSelector(BaseModel): - """ - Selects vehicles based on their width. - """ +class VehicleWidthSelector(VehicleSelectorBase): + """Selects vehicles based on their width.""" dimension: Literal[VehicleDimension.WIDTH] - comparison: VehicleRelation value: Annotated[ float64, Field( @@ -123,7 +120,8 @@ class VehicleWidthSelector(BaseModel): | VehicleWeightSelector | VehicleWidthSelector, Field( - description="Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count." + discriminator="dimension", + description="Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count.", ), ] """ diff --git a/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json b/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json index 4ec108313..20f144003 100644 --- a/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json +++ b/packages/overture-schema-transportation-theme/tests/segment_baseline_schema.json @@ -1862,7 +1862,18 @@ "vehicle": { "description": "A list of one or more vehicle parameters that limit the vehicles the containing AccessRestrictionRule applies to.", "items": { - "anyOf": [ + "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count.", + "discriminator": { + "mapping": { + "axle_count": "#/$defs/VehicleAxleCountSelector", + "height": "#/$defs/VehicleHeightSelector", + "length": "#/$defs/VehicleLengthSelector", + "weight": "#/$defs/VehicleWeightSelector", + "width": "#/$defs/VehicleWidthSelector" + }, + "propertyName": "dimension" + }, + "oneOf": [ { "$ref": "#/$defs/VehicleAxleCountSelector" }, @@ -1878,8 +1889,7 @@ { "$ref": "#/$defs/VehicleWidthSelector" } - ], - "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count." + ] }, "minItems": 1, "title": "Vehicle", @@ -2025,7 +2035,18 @@ "vehicle": { "description": "A list of one or more vehicle parameters that limit the vehicles the containing ProhibitedTransitionRule applies to.", "items": { - "anyOf": [ + "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count.", + "discriminator": { + "mapping": { + "axle_count": "#/$defs/VehicleAxleCountSelector", + "height": "#/$defs/VehicleHeightSelector", + "length": "#/$defs/VehicleLengthSelector", + "weight": "#/$defs/VehicleWeightSelector", + "width": "#/$defs/VehicleWidthSelector" + }, + "propertyName": "dimension" + }, + "oneOf": [ { "$ref": "#/$defs/VehicleAxleCountSelector" }, @@ -2041,8 +2062,7 @@ { "$ref": "#/$defs/VehicleWidthSelector" } - ], - "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count." + ] }, "minItems": 1, "title": "Vehicle", @@ -2173,7 +2193,18 @@ "vehicle": { "description": "A list of one or more vehicle parameters that limit the vehicles the containing SpeedLimitRule applies to.", "items": { - "anyOf": [ + "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count.", + "discriminator": { + "mapping": { + "axle_count": "#/$defs/VehicleAxleCountSelector", + "height": "#/$defs/VehicleHeightSelector", + "length": "#/$defs/VehicleLengthSelector", + "weight": "#/$defs/VehicleWeightSelector", + "width": "#/$defs/VehicleWidthSelector" + }, + "propertyName": "dimension" + }, + "oneOf": [ { "$ref": "#/$defs/VehicleAxleCountSelector" }, @@ -2189,8 +2220,7 @@ { "$ref": "#/$defs/VehicleWidthSelector" } - ], - "description": "Selects vehicles that a scope applies to based on criteria such as height, weight, or axle count." + ] }, "minItems": 1, "title": "Vehicle", From 8b4e0f532f1cd01d458eb515e09fb62fc4ec4dd3 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Thu, 7 May 2026 23:58:32 -0700 Subject: [PATCH 3/8] chore(themes): refresh example data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the Tonga-based Division/DivisionArea/DivisionBoundary fixtures with Kauaʻi County samples that exercise admin_level, capital_division_ids, wikidata, and source license alongside the existing fields. Replace the Tonga-based Connector/Segment fixtures with a Vermooten Street junction in Pretoria that exercises access_restrictions with when.vehicle, speed_limits with when.heading, routes with ref, road_surface, and multi-source attribution. Reformat the TOML with 4-space indents and sorted keys to match sibling theme packages. Signed-off-by: Seth Fitzsimmons --- .../pyproject.toml | 143 +++++++++-------- .../pyproject.toml | 151 ++++++++++++++---- 2 files changed, 203 insertions(+), 91 deletions(-) diff --git a/packages/overture-schema-divisions-theme/pyproject.toml b/packages/overture-schema-divisions-theme/pyproject.toml index 0314d8d1b..ccec15fd7 100644 --- a/packages/overture-schema-divisions-theme/pyproject.toml +++ b/packages/overture-schema-divisions-theme/pyproject.toml @@ -1,6 +1,6 @@ [project] maintainers = [ - {name = "Overture Maps Schema Working Group"}, + { name = "Overture Maps Schema Working Group" }, ] dependencies = [ "overture-schema-common", @@ -42,111 +42,126 @@ division_boundary = "overture.schema.divisions:DivisionBoundary" overture_baselines = "overture.schema.system.testing.plugin" [[examples.Division]] -id = "350e85f6-68ba-4114-9906-c2844815988b" -geometry = "POINT (-175.2551522 -21.1353686)" -country = "TO" -version = 1 -subtype = "locality" -class = "village" -region = "TO-04" +admin_level = 2 +capital_division_ids = ["958c67d3-868a-482d-9dd8-1c65b0c1a9e8"] +country = "US" +geometry = "POINT (-159.4945109 22.0557204)" hierarchies = [ - [ - {division_id = "fef8748b-0c91-46ad-9f2d-976d8d2de3e9", subtype = "country", name = "Tonga"}, - {division_id = "4d67561a-2292-41bd-8996-7853d276a42c", subtype = "region", name = "Tongatapu"}, - {division_id = "8730f0cc-d436-4f11-a7d3-49085813ef44", subtype = "county", name = "Vahe Kolomotu'a"}, - {division_id = "350e85f6-68ba-4114-9906-c2844815988b", subtype = "locality", name = "Sia'atoutai"}, - ], + [ + { division_id = "f39eb4af-5206-481b-b19e-bd784ded3f05", subtype = "country", name = "United States" }, + { division_id = "a2a08395-e968-4be5-bdd7-7f63db8c2165", subtype = "region", name = "Hawaii" }, + { division_id = "c9b8adc9-4639-4392-8efe-8401eeb19929", subtype = "county", name = "Kauaʻi County" }, + ], ] -parent_division_id = "8730f0cc-d436-4f11-a7d3-49085813ef44" -population = 534 +id = "c9b8adc9-4639-4392-8efe-8401eeb19929" +parent_division_id = "a2a08395-e968-4be5-bdd7-7f63db8c2165" +population = 71735 +region = "US-HI" +subtype = "county" theme = "divisions" type = "division" +version = 5 +wikidata = "Q111517" [examples.Division.bbox] -xmin = -175.25515747070312 -xmax = -175.255126953125 -ymin = -21.1353702545166 -ymax = -21.13536834716797 +xmax = -159.4945068359375 +xmin = -159.49453735351562 +ymax = 22.055721282958984 +ymin = 22.05571746826172 [[examples.Division.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "n3173231082@4" -update_time = "2014-12-18T09:17:03Z" - -[examples.Division.cartography] -prominence = 29 +license = "ODbL-1.0" +property = "" +record_id = "r166560@15" +update_time = "2025-08-16T14:25:55Z" [examples.Division.names] -primary = "Sia'atoutai" +primary = "Kauaʻi County" + +[examples.Division.names.common] +haw = "kalana Kauaʻi" [[examples.Division.names.rules]] +value = "Kauai County" variant = "alternate" -value = "Nafualu" [examples.Division.local_type] -en = "village" +en = "county" [[examples.DivisionArea]] -id = "eb9b112f-ec3c-47f7-b519-6f9f2e6fc2bd" -geometry = "MULTIPOLYGON (((-174.9553949 -21.4730179, -174.9514163 -21.4719978, -174.9520108 -21.4681253, -174.9566122 -21.4687535, -174.9553949 -21.4730179)), ((-174.9634398 -21.3476807, -174.9753507 -21.3833656, -174.9702168 -21.4037277, -174.950488 -21.4269887, -174.9082983 -21.4577763, -174.9004303 -21.4398142, -174.9048159 -21.3698688, -174.9165467 -21.3035402, -174.9126977 -21.2903268, -174.9199765 -21.2834922, -174.9634398 -21.3476807)))" -country = "TO" -version = 2 -subtype = "region" +admin_level = 2 class = "land" +country = "US" +division_id = "c9b8adc9-4639-4392-8efe-8401eeb19929" +geometry = "MULTIPOLYGON (((-160.5408313 21.6535414, -160.544491 21.6514764, -160.5369253 21.6495783, -160.5408313 21.6535414)), ((-160.0921364 22.005401, -160.1251483 21.9547066, -160.2260792 21.8900136, -160.2470031 21.841157, -160.2339532 21.7921394, -160.2022486 21.7794359, -160.1588705 21.864646, -160.0737812 21.8957962, -160.0826438 21.9302968, -160.05025 21.9848306, -160.0921364 22.005401)), ((-160.0881165 22.0243325, -160.1005193 22.0287469, -160.1026828 22.0166419, -160.0881165 22.0243325)), ((-159.7508422 21.9762242, -159.6679712 21.9531973, -159.604686 21.8923573, -159.5913126 21.9041661, -159.443465 21.8684403, -159.3461084 21.9373297, -159.3624768 21.952083, -159.3309486 21.9590284, -159.3357151 22.0450997, -159.2964497 22.1053479, -159.2926896 22.1434001, -159.335189 22.2040351, -159.4024533 22.2327054, -159.4282799 22.2164343, -159.4864746 22.2298148, -159.5060386 22.2031539, -159.5808829 22.2237075, -159.7228708 22.1497995, -159.7443872 22.099889, -159.7826601 22.0669155, -159.7849578 22.0149525, -159.7508422 21.9762242)))" +id = "109cfa53-bb13-4e37-aeb0-14f9a08c737d" is_land = true is_territorial = false -region = "TO-01" -division_id = "21597af0-b564-463c-a356-42c29e712b7d" +region = "US-HI" +subtype = "county" theme = "divisions" type = "division_area" +version = 7 [examples.DivisionArea.bbox] -xmin = -174.97535705566406 -xmax = -174.90040588378906 -ymin = -21.473018646240234 -ymax = -21.283489227294922 +xmax = -159.29266357421875 +xmin = -160.54449462890625 +ymax = 22.23270606994629 +ymin = 21.649578094482422 [[examples.DivisionArea.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "r7247527@3" -update_time = "2020-12-30T18:41:56Z" +license = "ODbL-1.0" +property = "" +record_id = "r166560@15" +update_time = "2025-08-16T14:25:55Z" [examples.DivisionArea.names] -primary = "ʻEua" +primary = "Kauaʻi County" + +[examples.DivisionArea.names.common] +haw = "kalana Kauaʻi" + +[[examples.DivisionArea.names.rules]] +value = "Kauai County" +variant = "alternate" [[examples.DivisionBoundary]] -id = "2bdf68e4-860d-3d8c-a472-ccf439a5302a" -geometry = "LINESTRING (-147.064823 -15.4231537, -147.0519131 -15.2885069, -147.048482 -15.1511701)" -country = "PF" -version = 1 -subtype = "county" -class = "maritime" -is_land = false -is_territorial = true +admin_level = 2 +class = "land" +country = "US" division_ids = [ - "ae266459-63a4-4508-8295-0101e27d039b", - "d4a6873d-885a-4f2a-bc0f-37e9d9e874e4" + "a546a5ef-ba43-44ce-a7c3-1999b005b20f", + "0554d43c-de68-433c-9a6c-db58161c8b4a", ] +geometry = "LINESTRING (-157.0195397035514 21.183903480793326, -157.0201257 21.180663, -157.0084983 21.1786971, -157.0053877 21.1769627, -157.0060747 21.1738084, -157.0003054 21.1722773, -156.9809248 21.1746356, -156.9710241 21.172431, -156.9670723 21.1613703, -156.9550334 21.1531141, -156.9552315 21.1564667, -156.9607515 21.1621168, -156.9584334 21.162083, -156.9598705 21.1636745, -156.958371 21.1708964, -156.9602176 21.1762534, -156.9577173 21.1760764, -156.9556416 21.1715505, -156.9554893 21.157221, -156.9528625 21.1523977, -156.9442066 21.1504038, -156.9389218 21.1568278, -156.938089 21.1530661, -156.9348977 21.15157, -156.9283241 21.1419085, -156.9262384 21.1360023, -156.9186457 21.1279636, -156.9124605 21.1262614, -156.8975854 21.1285336, -156.8979892 21.1337635, -156.8958138 21.1376515, -156.8970853 21.1421243, -156.8929598 21.1451006, -156.8928438 21.1467406, -156.897116 21.1457141, -156.9036748 21.1485179, -156.9059125 21.1527496, -156.910528 21.1563906, -156.917332 21.1590391, -156.9140393 21.1691869)" +id = "321ed3ec-23c2-376e-a4fd-0df484cb99ca" is_disputed = false +is_land = true +is_territorial = false +region = "US-HI" +subtype = "county" theme = "divisions" type = "division_boundary" +version = 2 [examples.DivisionBoundary.bbox] -xmin = -147.06483459472656 -xmax = -147.04847717285156 -ymin = -15.4231538772583 -ymax = -15.151169776916504 +xmax = -156.89283752441406 +xmin = -157.02012634277344 +ymax = 21.18390655517578 +ymin = 21.12626075744629 [[examples.DivisionBoundary.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "r6063055@9" -update_time = "2023-07-20T00:28:40Z" +license = "ODbL-1.0" +property = "" +record_id = "r166564@15" +update_time = "2025-02-21T16:35:23Z" [[examples.DivisionBoundary.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "r6063063@12" -update_time = "2023-07-20T00:28:40Z" +license = "ODbL-1.0" +property = "" +record_id = "r166561@26" +update_time = "2025-08-16T14:01:24Z" diff --git a/packages/overture-schema-transportation-theme/pyproject.toml b/packages/overture-schema-transportation-theme/pyproject.toml index 547b54401..51614e4ae 100644 --- a/packages/overture-schema-transportation-theme/pyproject.toml +++ b/packages/overture-schema-transportation-theme/pyproject.toml @@ -1,6 +1,6 @@ [project] maintainers = [ - {name = "Overture Maps Schema Working Group"}, + { name = "Overture Maps Schema Working Group" }, ] dependencies = [ "overture-schema-common", @@ -41,62 +41,159 @@ segment = "overture.schema.transportation:Segment" [project.entry-points.pytest11] overture_baselines = "overture.schema.system.testing.plugin" +# Connector: Vermooten Street junction, Pretoria, South Africa (2026-02-18.0) [[examples.Connector]] -id = "39542bee-230f-4b91-b7e5-a9b58e0c59b1" -geometry = "POINT (-176.5472979 -43.9679472)" -version = 1 +geometry = "POINT (30.048398 -25.708697)" +id = "73a46c48-dc5a-4162-b9c8-1643298784c3" theme = "transportation" type = "connector" +version = 1 [examples.Connector.bbox] -xmin = -176.54730224609375 -xmax = -176.54727172851562 -ymin = -43.96794891357422 -ymax = -43.96794128417969 +xmax = 30.04840087890625 +xmin = 30.048397064208984 +ymax = -25.708696365356445 +ymin = -25.70870018005371 [[examples.Connector.sources]] -property = "" dataset = "OpenStreetMap" +license = "ODbL-1.0" +property = "" +record_id = "n252436807@6" +update_time = "2025-01-06T20:44:06Z" +# Road segment: Vermooten Street / R33, Pretoria, South Africa (2026-02-18.0) +# Populates access_restrictions with when.vehicle, speed_limits with +# when.heading, routes with ref, road_surface, and names. [[examples.Segment]] -id = "1bc62f3b-08b5-42b8-89fe-36f685f60455" -geometry = "LINESTRING (-176.5636191 -43.954404, -176.5643637 -43.9538145, -176.5647264 -43.9535274, -176.5649947 -43.953251)" -version = 1 +class = "primary" +geometry = "LINESTRING (30.048398 -25.708697, 30.0485458 -25.708892, 30.0487074 -25.7090728, 30.0488875 -25.709252, 30.049138 -25.7094697, 30.0493666 -25.7096603, 30.0497209 -25.7099369, 30.0509508 -25.710904, 30.0511567 -25.7110786, 30.0515268 -25.7113855, 30.0518399 -25.711661, 30.052143 -25.7119631, 30.0523513 -25.7121619, 30.0526875 -25.7124827, 30.0531992 -25.7129799, 30.0535874 -25.7133575)" +id = "621e0a00-9466-4c3f-bb4a-64a83cd7a934" subtype = "road" -class = "residential" theme = "transportation" type = "segment" +version = 4 [examples.Segment.bbox] -xmin = -176.5650177001953 -xmax = -176.56361389160156 -ymin = -43.954410552978516 -ymax = -43.953250885009766 +xmax = 30.05359 +xmin = 30.048397 +ymax = -25.708696 +ymin = -25.713358 [[examples.Segment.sources]] -property = "" dataset = "OpenStreetMap" -record_id = "w53435546@6" -update_time = "2021-05-03T06:37:03Z" +license = "ODbL-1.0" +property = "/routes" +record_id = "r1808544@180" + +[[examples.Segment.sources]] +dataset = "OpenStreetMap" +license = "ODbL-1.0" +property = "/routes" +record_id = "r1808545@177" + +[[examples.Segment.sources]] +dataset = "OpenStreetMap" +license = "ODbL-1.0" +property = "" +record_id = "w338134264@15" +update_time = "2025-04-21T09:53:35Z" [examples.Segment.names] -primary = "Meteorological Lane" +primary = "Vermooten Street" [[examples.Segment.names.rules]] +value = "Vermooten Street" variant = "common" -value = "Meteorological Lane" [[examples.Segment.connectors]] -connector_id = "15b2c131-9137-4add-88c6-2acd3fa61355" at = 0.0 +connector_id = "73a46c48-dc5a-4162-b9c8-1643298784c3" + +[[examples.Segment.connectors]] +at = 0.154695182 +connector_id = "e81188ed-9b2f-48b4-99f2-d894044d88f5" [[examples.Segment.connectors]] -connector_id = "23ae2702-ef77-4d2e-b39d-77360b696d20" -at = 0.523536154 +at = 0.483463065 +connector_id = "11124794-8830-4aff-bd09-f578d3a196b1" + +[[examples.Segment.connectors]] +at = 0.753014135 +connector_id = "cfda5f80-ffe6-4b5c-b219-4f208e0a3832" [[examples.Segment.connectors]] -connector_id = "8e944ce1-4b81-49eb-a823-7d98779c855c" at = 1.0 +connector_id = "a5871213-947e-4342-b486-f560f0ac22f3" + +[[examples.Segment.access_restrictions]] +access_type = "denied" + +[examples.Segment.access_restrictions.when] + +[[examples.Segment.access_restrictions.when.vehicle]] +comparison = "greater_than" +dimension = "height" +unit = "m" +value = 5.2 + +[[examples.Segment.speed_limits]] + +[examples.Segment.speed_limits.max_speed] +unit = "km/h" +value = 60 + +[examples.Segment.speed_limits.when] +heading = "forward" [[examples.Segment.road_surface]] -value = "gravel" +value = "paved" + +[[examples.Segment.routes]] +name = "R33 (northbound)" +network = "za:regional" +ref = "R33" + +[[examples.Segment.routes]] +name = "R33 (southbound)" +network = "za:regional" +ref = "R33" + +# Rail segment: disused railway, Mpulungu, Zambia (2026-02-18.0) +# Populates rail_flags with values to cover the rail_flags[].values xfail. +[[examples.Segment]] +class = "unknown" +geometry = "LINESTRING (30.9844394 -12.7185733, 30.9818611 -12.7207838, 30.9815908 -12.7210751)" +id = "2a9415ed-fa07-4734-9d8e-1d8ff69451c2" +subtype = "rail" +theme = "transportation" +type = "segment" +version = 1 + +[examples.Segment.bbox] +xmax = 30.98444 +xmin = 30.98159 +ymax = -12.718572 +ymin = -12.721077 + +[[examples.Segment.sources]] +dataset = "OpenStreetMap" +license = "ODbL-1.0" +property = "" +record_id = "w414442537@2" +update_time = "2026-02-05T14:25:06Z" + +[[examples.Segment.connectors]] +at = 0.0 +connector_id = "2da12352-29c5-479e-932f-68fbe90c8229" + +[[examples.Segment.connectors]] +at = 0.895049489 +connector_id = "feed87bb-7abf-4254-9e14-efd6bdb3e428" + +[[examples.Segment.connectors]] +at = 1.0 +connector_id = "e37ca4ff-ab09-4c84-8d3c-450c703d7308" + +[[examples.Segment.rail_flags]] +values = ["is_disused"] From 5843a254281a8bc8770cd3e7bc3bc82468146e39 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Thu, 7 May 2026 12:14:09 -0700 Subject: [PATCH 4/8] feat(pyspark): introduce package with codegen pipeline and CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce overture-schema-pyspark, a runtime PySpark validation package whose per-feature expression modules and conformance tests are generated from the same Pydantic models that define the schema, along with an `overture-validate` CLI. Runtime (overture-schema-pyspark/src/overture/schema/pyspark/): - check.py — Check, CheckShape, FeatureValidation dataclasses. - schema_check.py — write-first comparison of Spark schemas against an expected StructType, with structural type matching and SchemaMismatch reporting. - validate.py — public API: validate_feature(), evaluate_checks(), explain_errors(). The explain stage UNPIVOTs per-row check results into one row per violation, preserving all input columns for downstream join-back. - cli.py — `overture-validate ` runs the validation pipeline against a path of GeoParquet files. Output is one row per violation: feature ID, theme/type, failing field, check name, offending value. Single-pass evaluation keeps memory bounded for arbitrarily large inputs. - expressions/ — shared runtime utilities (constraint_expressions, column_patterns, _schema_structs). Per-feature expression modules live under expressions/overture/ and are added by the codegen in a follow-up commit. - tests/_support/ — conformance test infrastructure (scenarios, harness, helpers, mutations). The harness builds one DataFrame per feature, applies all scenarios as deterministic-UUID-tagged rows, runs validation once, and indexes violations back to scenario IDs — O(checks) rather than O(checks * scenarios). CLI filtering options: --theme limit to one theme --feature limit to one feature type --skip-schema-check run only constraint checks (no schema comparison) --count-only print violation counts per check rather than rows --suppress suppress specific (feature, field, check) triples per a YAML config Codegen pipeline (overture-schema-codegen/src/.../pyspark/): FeatureSpec | constraint_dispatch.py map constraints to descriptors | check_builder.py walk FieldSpec -> CheckNode IR; resolve array nesting, variant gating | schema_builder.py FieldSpec -> SchemaField list (StructType source) | renderer.py CheckNode -> per-feature expression module test_renderer.py CheckNode -> per-feature conformance test module synthetic.py FeatureSpec -> BASE_ROW + invalid values | pipeline.py orchestrate, return GeneratedModule list The dispatch tables map every supported constraint (Ge/Gt/Le/Lt/ Interval, MinLen/MaxLen, StrippedConstraint, PatternConstraint, UniqueItemsConstraint, GeometryTypeConstraint, JsonPointerConstraint, RequireAnyOfConstraint, RadioGroupConstraint, RequireIfConstraint, ForbidIfConstraint, MinFieldsSetConstraint), NewType (Country- CodeAlpha2, LinearlyReferencedRange, RegionCode), and base type (HttpUrl, EmailStr) to constraint_expressions check functions. Discriminated unions (segment is the canonical hard case) split into per-arm test files. The codegen handles arm splitting via generate_arm_rows in synthetic.py and _filter_field_nodes_for_arm in test_renderer.py. The Makefile gains a `generate-pyspark` target and gates `check` on it so a stale generation surfaces immediately. The CLI is exposed as a `[project.scripts]` entry point so `overture-validate` becomes available after `pip install` / `uv sync`. Signed-off-by: Seth Fitzsimmons --- Makefile | 19 +- .../overture-schema-codegen/docs/design.md | 326 ++- .../docs/walkthrough.md | 434 ++-- .../overture-schema-codegen/pyproject.toml | 16 + .../src/overture/schema/codegen/cli.py | 53 +- .../codegen/extraction/case_conversion.py | 41 - .../schema/codegen/extraction/field.py | 172 ++ .../codegen/extraction/field_constraints.py | 26 +- .../schema/codegen/extraction/field_walk.py | 215 ++ .../codegen/extraction/length_constraints.py | 47 + .../codegen/extraction/model_extraction.py | 239 ++- .../codegen/extraction/newtype_extraction.py | 29 +- .../codegen/extraction/numeric_extraction.py | 32 +- .../schema/codegen/extraction/specs.py | 106 +- .../codegen/extraction/type_analyzer.py | 712 ++++--- .../codegen/extraction/type_registry.py | 112 +- .../codegen/extraction/union_extraction.py | 143 +- .../schema/codegen/layout/module_layout.py | 19 +- .../schema/codegen/layout/type_collection.py | 144 +- .../codegen/markdown/link_computation.py | 5 +- .../codegen/markdown/path_assignment.py | 13 +- .../schema/codegen/markdown/pipeline.py | 28 +- .../schema/codegen/markdown/renderer.py | 45 +- .../codegen/markdown/reverse_references.py | 131 +- .../schema/codegen/markdown/type_format.py | 400 ++-- .../schema/codegen/pyspark/__init__.py | 1 + .../schema/codegen/pyspark/_render_common.py | 265 +++ .../schema/codegen/pyspark/check_builder.py | 699 ++++++ .../schema/codegen/pyspark/check_ir.py | 83 + .../codegen/pyspark/constraint_dispatch.py | 509 +++++ .../schema/codegen/pyspark/pipeline.py | 256 +++ .../schema/codegen/pyspark/renderer.py | 647 ++++++ .../schema/codegen/pyspark/schema_builder.py | 183 ++ .../templates/_check_function.py.jinja2 | 10 + .../templates/feature_module.py.jinja2 | 83 + .../pyspark/templates/test_module.py.jinja2 | 124 ++ .../codegen/pyspark/test_data/__init__.py | 9 + .../codegen/pyspark/test_data/base_row.py | 648 ++++++ .../pyspark/test_data/invalid_value.py | 129 ++ .../codegen/pyspark/test_data/scaffold.py | 264 +++ .../schema/codegen/pyspark/test_renderer.py | 423 ++++ .../tests/codegen_test_support.py | 148 +- .../overture-schema-codegen/tests/test_cli.py | 56 + .../tests/test_constraint_description.py | 32 +- .../tests/test_example_loader.py | 24 +- .../tests/test_field_walk.py | 164 ++ .../tests/test_golden_markdown.py | 16 +- .../tests/test_integration_real_models.py | 47 +- .../tests/test_markdown_renderer.py | 54 +- .../tests/test_markdown_type_format.py | 202 +- .../tests/test_model_extraction.py | 43 + .../tests/test_model_extractor.py | 119 +- .../tests/test_newtype_extraction.py | 21 +- .../tests/test_numeric_extraction.py | 16 +- .../tests/test_pyspark_base_row.py | 319 +++ .../tests/test_pyspark_check_builder.py | 1877 +++++++++++++++++ .../tests/test_pyspark_constraint_dispatch.py | 385 ++++ .../tests/test_pyspark_e2e.py | 206 ++ .../tests/test_pyspark_invalid_value.py | 175 ++ .../tests/test_pyspark_pipeline.py | 391 ++++ .../tests/test_pyspark_renderer.py | 1097 ++++++++++ .../tests/test_pyspark_scaffold.py | 245 +++ .../tests/test_pyspark_schema_builder.py | 213 ++ .../tests/test_pyspark_test_renderer.py | 880 ++++++++ .../tests/test_reverse_references.py | 51 +- .../tests/test_specs.py | 222 +- .../tests/test_type_analyzer.py | 927 ++++---- .../tests/test_type_collection.py | 16 +- .../tests/test_type_placement.py | 6 +- .../tests/test_type_registry.py | 141 +- .../tests/test_union_extraction.py | 53 +- packages/overture-schema-pyspark/README.md | 238 +++ .../overture-schema-pyspark/pyproject.toml | 27 + .../src/overture/__init__.py | 1 + .../src/overture/schema/__init__.py | 1 + .../src/overture/schema/pyspark/__about__.py | 1 + .../src/overture/schema/pyspark/__init__.py | 27 + .../src/overture/schema/pyspark/_registry.py | 67 + .../src/overture/schema/pyspark/check.py | 49 + .../src/overture/schema/pyspark/cli.py | 239 +++ .../schema/pyspark/expressions/__init__.py | 1 + .../pyspark/expressions/_schema_structs.py | 22 + .../pyspark/expressions/column_patterns.py | 94 + .../expressions/constraint_expressions.py | 484 +++++ .../src/overture/schema/pyspark/py.typed | 0 .../overture/schema/pyspark/schema_check.py | 109 + .../src/overture/schema/pyspark/validate.py | 334 +++ .../overture-schema-pyspark/tests/__init__.py | 0 .../tests/_support/__init__.py | 0 .../tests/_support/harness.py | 246 +++ .../tests/_support/helpers.py | 135 ++ .../tests/_support/mutations.py | 388 ++++ .../tests/_support/scenarios.py | 34 + .../overture-schema-pyspark/tests/conftest.py | 50 + .../tests/expressions/__init__.py | 0 .../tests/expressions/test_column_patterns.py | 258 +++ .../test_constraint_expressions.py | 1341 ++++++++++++ .../tests/expressions/test_schema_check.py | 268 +++ .../tests/test_check.py | 20 + .../overture-schema-pyspark/tests/test_cli.py | 475 +++++ .../tests/test_harness.py | 361 ++++ .../tests/test_helpers.py | 147 ++ .../tests/test_mutations.py | 263 +++ .../tests/test_validate.py | 516 +++++ .../src/overture/schema/system/case.py | 26 + .../schema/system/discovery/__init__.py | 10 + .../schema/system/discovery/entry_point.py | 119 ++ .../src/overture/schema/system/field_path.py | 301 +++ .../test_string_constraints.py | 2 +- .../tests/test_case.py} | 10 +- .../tests/test_discovery_entry_point.py | 97 + .../tests/test_field_path.py | 376 ++++ .../pyproject.toml | 1 - .../overture/schema/transportation/models.py | 2 +- pyproject.toml | 1 + uv.lock | 1072 +++++----- 116 files changed, 21308 insertions(+), 2891 deletions(-) delete mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/__init__.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py create mode 100644 packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py create mode 100644 packages/overture-schema-codegen/tests/test_field_walk.py create mode 100644 packages/overture-schema-codegen/tests/test_model_extraction.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_base_row.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_check_builder.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_e2e.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_pipeline.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_renderer.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_scaffold.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py create mode 100644 packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py create mode 100644 packages/overture-schema-pyspark/README.md create mode 100644 packages/overture-schema-pyspark/pyproject.toml create mode 100644 packages/overture-schema-pyspark/src/overture/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/__about__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/_schema_structs.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/py.typed create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py create mode 100644 packages/overture-schema-pyspark/tests/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/_support/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/_support/harness.py create mode 100644 packages/overture-schema-pyspark/tests/_support/helpers.py create mode 100644 packages/overture-schema-pyspark/tests/_support/mutations.py create mode 100644 packages/overture-schema-pyspark/tests/_support/scenarios.py create mode 100644 packages/overture-schema-pyspark/tests/conftest.py create mode 100644 packages/overture-schema-pyspark/tests/expressions/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py create mode 100644 packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py create mode 100644 packages/overture-schema-pyspark/tests/expressions/test_schema_check.py create mode 100644 packages/overture-schema-pyspark/tests/test_check.py create mode 100644 packages/overture-schema-pyspark/tests/test_cli.py create mode 100644 packages/overture-schema-pyspark/tests/test_harness.py create mode 100644 packages/overture-schema-pyspark/tests/test_helpers.py create mode 100644 packages/overture-schema-pyspark/tests/test_mutations.py create mode 100644 packages/overture-schema-pyspark/tests/test_validate.py create mode 100644 packages/overture-schema-system/src/overture/schema/system/case.py create mode 100644 packages/overture-schema-system/src/overture/schema/system/discovery/entry_point.py create mode 100644 packages/overture-schema-system/src/overture/schema/system/field_path.py rename packages/{overture-schema-codegen/tests/test_naming.py => overture-schema-system/tests/test_case.py} (67%) create mode 100644 packages/overture-schema-system/tests/test_discovery_entry_point.py create mode 100644 packages/overture-schema-system/tests/test_field_path.py diff --git a/Makefile b/Makefile index a88a9917e..4488d0125 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: default uv-sync check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines +.PHONY: default uv-sync clean-pyspark generate-pyspark check test-all test test-only docformat doctest doctest-only mypy mypy-only lint-only update-baselines TESTMON ?= --testmon @@ -7,9 +7,22 @@ default: test-all install: uv-sync uv-sync: - @uv sync --all-packages 2> /dev/null + @uv sync --all-packages --all-extras 2> /dev/null -check: uv-sync +PYSPARK_EXPRESSIONS := packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated +PYSPARK_GENERATED_TESTS := packages/overture-schema-pyspark/tests/generated + +clean-pyspark: + @rm -rf $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS) + +generate-pyspark: uv-sync clean-pyspark + @uv run overture-codegen generate --format pyspark \ + --output-dir $(PYSPARK_EXPRESSIONS) \ + --test-output-dir $(PYSPARK_GENERATED_TESTS) + @uv run ruff check --fix --quiet $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS) + @uv run ruff format --quiet $(PYSPARK_EXPRESSIONS) $(PYSPARK_GENERATED_TESTS) + +check: uv-sync generate-pyspark @$(MAKE) -j test-only doctest-only lint-only mypy-only test-all: uv-sync diff --git a/packages/overture-schema-codegen/docs/design.md b/packages/overture-schema-codegen/docs/design.md index 662d77fc5..67e78892e 100644 --- a/packages/overture-schema-codegen/docs/design.md +++ b/packages/overture-schema-codegen/docs/design.md @@ -30,9 +30,9 @@ definitions regularly nest `Annotated` inside `NewType` inside `Annotated` -- Annotated[int, Field(ge=...)])` -- and constraints at each depth need to be tagged with the NewType that contributed them. -The code generator solves this by extracting type information once into a flat, -navigable representation (`TypeInfo`), then passing that to renderers that produce -output without touching Python's type system. +The code generator solves this by extracting type information once into a tree-shaped +`FieldShape` IR, then passing that to renderers that produce output without touching +Python's type system. ## Inputs and Outputs @@ -41,10 +41,12 @@ points, plus example data from theme `pyproject.toml` files. Examples serve two purposes: rendered examples in documentation pages, and a starting point for generating tests that verify behavior of generated code. -**Current Outputs**: Markdown documentation pages with field tables, cross-page links, -constraint descriptions, and examples. +**Outputs**: -**Planned outputs**: Arrow schemas, PySpark expressions. +- Markdown documentation pages with field tables, cross-page links, constraint + descriptions, and examples. +- PySpark validation modules: per-feature expression builders, StructType schemas, + a feature registry, and generated conformance test modules. ## Architecture @@ -55,15 +57,21 @@ Rendering Output formatting, all presentation decisions ^ Output Layout What to generate, where it goes, how outputs link ^ -Extraction TypeInfo, FieldSpec, ModelSpec, EnumSpec, ... +Extraction FieldShape, FieldSpec, ModelSpec, EnumSpec, ... ^ Discovery discover_models() from overture-schema-common ``` -`markdown/pipeline.py` orchestrates the pipeline without I/O: it expands feature trees, -collects supplementary types, builds placement registries, computes reverse references, -and calls renderers -- returning `RenderedPage` objects. The CLI (`cli.py`) is a thin -Click wrapper that calls `generate_markdown_pages()` and writes files to disk. +Each output format has its own pipeline module that orchestrates without I/O: + +- `markdown/pipeline.py` expands feature trees, collects supplementary types, builds + placement registries, computes reverse references, and calls renderers -- returning + `RenderedPage` objects. +- `pyspark/pipeline.py` expands feature trees, builds checks and schemas, renders + expression modules and test modules -- returning `GeneratedModule` objects. + +The CLI (`cli.py`) is a thin Click wrapper that dispatches to the appropriate pipeline +and writes files to disk. ```mermaid graph TD @@ -75,24 +83,24 @@ graph TD subgraph Extraction EX["extraction/type_analyzer / extractors"] - EX -->|"ModelSpec, UnionSpec"| TREE["expand_model_tree()"] end - TREE -->|"FeatureSpec[]"| OL + EX -->|"FeatureSpec[]"| OL + EX -->|"FeatureSpec[]"| PS - subgraph "Output Layout" + subgraph "Output Layout (Markdown)" OL["layout/type_collection"] OL -->|"SupplementarySpec{}"| PA["markdown/path_assignment"] PA -->|"dict[str, Path]"| LC["markdown/link_computation"] RR["markdown/reverse_references"] end - subgraph Rendering + subgraph "Markdown Rendering" R["markdown/renderer"] TR["extraction/type_registry"] -.->|"type name resolution"| R end - subgraph Orchestration + subgraph "Markdown Orchestration" MP["markdown/pipeline"] end @@ -101,39 +109,64 @@ graph TD RR --> MP MP --> R R -->|"RenderedPage[]"| MP + + subgraph "PySpark Pipeline" + PS["pyspark/pipeline"] + CD["constraint_dispatch"] -->|"ExpressionDescriptor"| CB + CB["check_builder"] -->|"Check, ModelCheck"| PR + SB["schema_builder"] -->|"SchemaField[]"| PR + CB -->|"Check, ModelCheck"| PTR + SY["test_data/"] -->|"BASE_ROW, scaffold, invalid_value"| PTR + PR["renderer"] + PTR["test_renderer"] + end + + PS --> CD + PS --> CB + PS --> SB + PS --> PR + PS --> PTR + MP -->|"list[RenderedPage]"| CLI["cli.py → disk"] + PS -->|"list[GeneratedModule]"| CLI ``` ## Extraction -### `analyze_type` -- iterative type unwrapping - -`analyze_type(annotation)` is a single iterative function that peels type annotation -layers in a fixed order, accumulating information into an `_UnwrapState`: - -1. **NewType**: Records the outermost name (user-facing semantic identity, e.g. - `FeatureVersion`) and updates the "current" name (used for constraint provenance and - as `base_type` at terminal) -2. **Annotated**: Collects constraints from metadata, each tagged with whichever NewType - was most recently entered. Extracts `Field.description` when present -3. **Union**: Filters out `None` (marks optional), `Sentinel`, and `Literal` sentinel - arms. If multiple concrete `BaseModel` arms remain, classifies as `UNION`; otherwise - continues with the single remaining arm -4. **list / dict**: Increments `list_depth` for each `list[...]` layer, sets dict flags, - continues into element types -5. **Terminal**: Classifies as `PRIMITIVE`, `LITERAL`, `ENUM`, `MODEL`, or `UNION` - -The result is `TypeInfo` -- a flat dataclass that fully describes the unwrapped type: -classification (`TypeKind`), optional/dict flags, `list_depth` (count of `list[...]` -layers), `newtype_outer_list_depth` (list layers outside the outermost NewType boundary), -accumulated constraints with provenance, NewType names, source type, literal values, and -(for UNION kind) the tuple of concrete `BaseModel` member types. Dict types carry -recursively analyzed `TypeInfo` for their key and value types. - -Multi-depth `Annotated` layers (common in practice, since NewTypes wrap `Annotated` -types that wrap further NewTypes) are handled naturally by the loop -- each iteration -processes the next wrapper. Constraints from each `Annotated` layer are tagged with the -NewType active at that depth. +### `analyze_type` -- recursive type unwrapping + +`analyze_type(annotation)` recurses through a Python type annotation, peeling one layer +per call frame via the internal `_unwrap` function: + +1. **NewType**: Constructs `_NewTypeCtx` with the NewType's name, recurses into + `__supertype__`, then wraps the result in `NewTypeShape`. `_erase_inner_newtypes` + strips every inner `NewTypeShape` reached through `ArrayOf` layers, so each spine + keeps only its outermost `NewTypeShape` (inner NewType names survive on the + terminal `Primitive.base_type`). +2. **Annotated**: Collects constraints from metadata as `ConstraintSource` objects, + each tagged with the active `_NewTypeCtx`. Extracts `Field.description` when present. + Recurses into the inner annotation, then attaches constraints to the result via + `attach_constraints`, which prepends them to the outermost structural layer. +3. **Union**: Delegates to `_peel_union`, which filters `None` (marks optional), + `Sentinel`, and `Literal` sentinel arms. Multiple concrete `BaseModel` arms invoke + `union_resolver`; a single arm continues with `_unwrap`. +4. **list / dict**: `list[X]` recurses into `X` and wraps in `ArrayOf`. Nested lists + produce nested `ArrayOf` instances -- no numeric depth counter. `dict[K, V]` recurses + for key and value independently and returns `MapOf`. +5. **Terminal**: Classifies as `Primitive`, `LiteralScalar`, `AnyScalar`, `ModelRef`, + or `UnionRef`. + +The result is `tuple[FieldShape, bool, str | None]` -- the structural shape describing +the type as a nested tree, whether the field accepts `None`, and the first +`FieldInfo.description` found during unwrapping. `FieldShape` is a discriminated union +of eight variants (`Primitive`, `LiteralScalar`, `AnyScalar`, `ModelRef`, `UnionRef`, +`ArrayOf`, `MapOf`, `NewTypeShape`) nested to describe arbitrary collection and NewType +wrapping. + +Constraints from each `Annotated` layer attach to the shape layer they annotate -- +`attach_constraints` walks past any `NewTypeShape` wrappers to prepend constraints on +the first `ArrayOf`, `MapOf`, or scalar node. This means array-level and element-level +constraints land on structurally distinct nodes without any numeric bookkeeping. ### Extractors by domain @@ -145,12 +178,14 @@ Extraction is split by entity kind: - `extraction/newtype_extraction.py`: NewType -> `NewTypeSpec` - `extraction/union_extraction.py`: Discriminated union alias -> `UnionSpec` - `extraction/numeric_extraction.py`: Numeric types -> `NumericSpec` +- `extraction/pydantic_extraction.py`: Pydantic built-in type -> `PydanticTypeSpec` -Each calls `analyze_type()` for field types. Tree expansion (`expand_model_tree()`) -walks MODEL-kind fields to populate nested model references, with a shared cache and -cycle detection (`starts_cycle=True`). +Each calls `analyze_type()` for field types. `extract_model` recurses into sub-models +and sub-unions during extraction, building `ModelRef`/`UnionRef` terminals with their +specs resolved. A shared cache and cycle detection (`starts_cycle=True`) prevent +infinite recursion and duplicate extraction. -### Unions and the FeatureSpec protocol +### Unions and FeatureSpec Discriminated unions (e.g. `Segment = Annotated[Union[RoadSegment, ...], Discriminator(...)]`) are type aliases, not classes. `UnionSpec` captures the union @@ -159,10 +194,10 @@ Fields shared across all variants appear once; fields present in some variants a wrapped in `AnnotatedField` with `variant_sources` indicating which members contribute them. The common base class is identified so shared fields can be deduplicated. -`FeatureSpec` is a `Protocol` satisfied by both `ModelSpec` and `UnionSpec`. Code that -operates on "any top-level feature" -- tree expansion, supplementary type collection, -rendering dispatch -- uses `FeatureSpec` rather than a concrete type, so union and model -features flow through the same pipeline. +`FeatureSpec` is a type alias `ModelSpec | UnionSpec`. Code that operates on "any +top-level feature" -- supplementary type collection, rendering dispatch -- uses +`FeatureSpec` so union and model features flow through the same pipeline. Consumers +narrow with `isinstance` when arm-specific attributes are needed. ### Constraints @@ -180,9 +215,12 @@ reference each other. ### Supplementary type collection -`collect_all_supplementary_types()` walks the expanded field trees of all feature specs, -extracting enums, semantic NewTypes, and sub-models that need their own output. Returns -`dict[str, SupplementarySpec]`. +`collect_all_supplementary_types()` walks the field trees of all feature specs to extract +the supplementary types that need their own output: enums, semantic NewTypes, sub-models, +and Pydantic built-in types (`HttpUrl`, `EmailStr`). Returns `dict[TypeIdentity, +SupplementarySpec]`, where `SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | +PydanticTypeSpec`. `TypeIdentity` pairs a unique Python object with its display name so +registry lookups remain stable when two distinct types share a name. ### Module-mirrored output paths @@ -195,14 +233,14 @@ directory. ### Link computation -`LinkContext` carries the current output's path and the full type-to-path registry. When -a renderer formats a type reference, it looks up the target in the registry and computes -a relative path. Links exist only for types with registry entries, avoiding broken -references to ungenerated outputs. +`LinkContext` carries the current output's path and the full `dict[TypeIdentity, +PurePosixPath]` registry. When a renderer formats a type reference, it looks up the +target by `TypeIdentity` and computes a relative path. Links exist only for types with +registry entries, avoiding broken references to ungenerated outputs. ### Reverse references -`compute_reverse_references()` walks feature specs to build `dict[type_name, +`compute_reverse_references()` walks feature specs to build `dict[TypeIdentity, list[UsedByEntry]]` for "Used By" sections. ## Rendering @@ -221,10 +259,10 @@ to registered primitives. ### Markdown renderer Jinja2 templates for feature, enum, NewType, primitives, and geometry pages. -`render_feature()` expands MODEL-kind fields inline with dot-notation (e.g., -`sources[].dataset`), stopping at cycle boundaries. `format_type()` in -`markdown/type_format.py` converts `TypeInfo` into link-aware display strings using -`LinkContext`. +`render_feature()` walks each field's `FieldShape` tree and expands `ModelRef` +terminals inline with dot-notation (e.g., `sources[].dataset`), stopping at +`ModelRef.starts_cycle`. `format_type()` in `markdown/type_format.py` converts a +`FieldShape` into link-aware display strings using `LinkContext`. ### Constraint prose @@ -247,17 +285,157 @@ need for external schema information -- the model instance itself encodes the ty structure. `augment_missing_fields` appends `(name, None)` entries for union cross-arm fields absent from the concrete variant instance. +## PySpark Pipeline + +The PySpark codegen transforms extracted `FeatureSpec` trees into validation expression +modules and generated conformance test modules. `pyspark/pipeline.py` exposes +`generate_pyspark_module` (single spec) and `generate_pyspark_modules` (all specs). + +### Constraint Dispatch + +`pyspark/constraint_dispatch.py` maps constraint objects to expression descriptors. +Four dispatch mechanisms: + +1. **`dispatch_constraint`** -- field constraints (bounds, min/max length, pattern, + stripped, geometry type, unique items, JSON pointer). Returns `ExpressionDescriptor` + with function name, args, kwargs. Returns None for skipped constraints (Reference, + Strict). + +2. **`dispatch_newtype`** -- NewType-level overrides: `LinearlyReferencedRange` -> + three range checks. `CountryCodeAlpha2` and `RegionCode` decompose normally + via their `PatternConstraint` subclasses and return None here. + +3. **`dispatch_base_type`** -- base-type overrides for types with no `Annotated` + constraints: `HttpUrl` -> `check_url_format` + `check_url_length`, + `EmailStr` -> `check_email`, `BBox` -> `check_bbox_completeness` + + `check_bbox_lat_ordering` + `check_bbox_lat_range`. + +4. **`dispatch_model_constraint`** -- model constraints: `RequireAnyOfConstraint`, + `RadioGroupConstraint`, `RequireIfConstraint`, `ForbidIfConstraint`, + `MinFieldsSetConstraint`. Returns `ModelConstraintDescriptor`. Returns None for + `NoExtraFieldsConstraint`. + +### Check Builder + +`pyspark/check_builder.py` walks `FieldSpec` trees to produce `Check` and `ModelCheck` +IR. Resolves the mapping from nested field paths to PySpark array iteration patterns, +producing a `FieldPath` (`ScalarPath` or `ArrayPath`) on each `Check`: + +- **Scalar field** -- `ScalarPath`; renders as `F.col("field")` +- **Top-level array** -- `ArrayPath` with one `ArraySegment`; renders as + `array_check("field", lambda el: ...)` +- **Field inside an array element** -- `ArrayPath` with struct navigation after the + array segment; renders as `array_check("array_col", lambda el: el["field"])` +- **Nested array inside an array** -- `ArrayPath` with multiple `ArraySegment`s; + renders as `nested_array_check("outer", lambda el: array_check(el["inner"], ...))` +- **Multiple nesting levels** -- chained `nested_array_check` with struct segments + navigating between array iterations + +Union handling: variant-specific fields are annotated with `ColumnGuard` or +`ElementGuard` discriminator gates. `Check.guards` is AND-composed at render time. +Nested unions (a union field within a union) produce a `ColumnGuard` and an +`ElementGuard` in sequence on the same check. + +`COLUMN_LEVEL_FUNCTIONS` (frozenset) selects checks that split into a +separate `Check`; `_COLUMN_LEVEL_SUFFIXES` (dict) supplies the label +suffix for each: `check_required` (no suffix), `check_array_min_length` +(`_min_length`), `check_array_max_length` (`_max_length`), +`check_struct_unique` (`_unique`). + +### Schema Builder + +`pyspark/schema_builder.py` converts `FieldSpec` trees to `SchemaField` lists for +StructType source generation. Maps types to Spark type expressions via the type registry. +`SHARED_TYPE_REFS` reserves a few base-type names for `_schema_structs.py` constants +when the codegen cannot walk the type -- currently just `BBox` -> `BBOX_STRUCT` (BBox +is a plain class, not a Pydantic `BaseModel`). Pydantic models are inlined into the +StructType expression. Union fields are deduplicated by name with type widening (the +wider Spark numeric type wins). + +### Renderer + +`pyspark/renderer.py` emits per-feature Python modules containing: + +- Private `_fieldname_check()` functions returning `Check(field=, name=, expr=, shape=, root_field=)` +- A public `feature_checks() -> list[Check]` function calling all of them +- A per-feature `FEATURENAME_SCHEMA` StructType constant (e.g. `ADDRESS_SCHEMA`, `SEGMENT_SCHEMA`) +- An `ENTRY_POINT` string, a `PARTITIONS` dict describing the feature's Hive partition + layout (empty when not partitioned), and a `FEATURE_VALIDATION` constant pairing the + schema and checks + +The registry is not generated. `_registry.py` lives hand-written in the +`overture-schema-pyspark` package and walks the `expressions.generated` namespace at +import time, collecting every module that exposes `ENTRY_POINT` and `FEATURE_VALIDATION` +into a `dict[str, FeatureValidation]`. Modules that also expose `PARTITIONS` populate a +parallel partition map keyed by entry point. + +Expression rendering handles scalar expressions, array_check/nested_array_check chains, +variant gating (`F.when(discriminator.isin(...))`), nullable parent gating +(`F.when(gate.isNotNull(), ...)`), and nested lambda variable naming for deep nesting. +Output is formatted with ruff. + +### Test Renderer + +`pyspark/test_renderer.py` emits per-feature pytest modules containing: + +- `BASE_ROW_SPARSE` / `BASE_ROW_POPULATED` -- valid synthetic rows +- `SCENARIOS: list[Scenario]` -- generated test cases, each carrying a + `mutate` callable that produces an invalid row from a merged base +- Fixtures: `checks`, `sparse_results`, `populated_results` +- Tests: `test_baseline_sparse`, `test_baseline_populated`, + `test_scenario_sparse`, `test_scenario_populated` (parametrized). + Schema coverage runs inside `run_validation_pipeline` via + `assert_schema_covers_checks`, not in a separate test. + +Union specs with multiple discriminator arms produce one test module per arm. + +### Test Data Generator + +`pyspark/test_data/` is a subpackage with three modules: + +- `base_row.py` -- `generate_base_row` / `generate_populated_row` produce sparse + (required only) and fully populated valid rows from a `FeatureSpec`. Consults field + constraints to produce constraint-satisfying values (country codes, geometry WKT, + bounds-respecting numbers). `generate_arm_rows` / `generate_populated_arm_rows` + produce one row per discriminator arm for union specs. +- `scaffold.py` -- `generate_scaffold` / `generate_model_scaffold` build sparse dicts + that provide nested structure (optional structs, arrays) needed for test scenarios. +- `invalid_value.py` -- `invalid_value` produces a concrete value that violates each + check function. + +### Known Semantic Gaps + +PySpark validation diverges from Pydantic validation in two documented areas: + +- `UniqueItemsConstraint` uses Spark's `array_distinct`, which compares whole + elements with structural equality (struct- and nested-array-aware) on the raw + stored values. Pydantic compares normalized Python objects -- e.g., + `list[HttpUrl]` is compared after URL normalization (trailing slash, lowercased + scheme/host) -- so it catches duplicates that differ only in normalization. The + PySpark check catches exact duplicates only. + +- `require_any_of` checks `isNotNull` as a proxy for Pydantic's `model_fields_set`. + Parquet has no equivalent of "explicitly provided"; `isNotNull` is stricter (it + rejects fields explicitly set to null). + ## Extension Points -**Adding a new output target** (Arrow schemas next, PySpark expressions after): Add a -column to `TypeMapping` in `extraction/type_registry.py` for type-name resolution. Write -a new renderer module that consumes specs and the type registry. The extraction layer and -output layout are target-independent. +**Adding a new output target**: Add a column to `TypeMapping` in +`extraction/type_registry.py` for type-name resolution. Write a pipeline module that +consumes `FeatureSpec` trees and a renderer that produces output. The extraction layer is +target-independent. Register the format in `cli.py`. -**Adding a new type kind**: Add a variant to `TypeKind` in `extraction/type_analyzer.py`. +**Adding a new type kind**: Add a variant to `FieldShape` in `extraction/field.py`. Handle it in the terminal classification of `analyze_type()`. Add an extraction function -and spec dataclass if needed. Update renderers to handle the new kind. - -**Adding a new constraint type**: The iterative unwrapper collects it automatically (any -`Annotated` metadata becomes a `ConstraintSource`). Add a case to -`describe_field_constraint()` for the prose representation. +and spec dataclass if needed. Update `extraction/field_walk.py` traversal helpers and +all renderers to handle the new variant. + +**Adding a new constraint type**: `_unwrap` collects it automatically (any `Annotated` +metadata becomes a `ConstraintSource`). Add a case to +`describe_field_constraint()` for prose and to `dispatch_constraint()` for PySpark +expression mapping. + +**Adding a new PySpark check function**: Add a case in `dispatch_constraint`, +`dispatch_newtype`, or `dispatch_base_type` in `constraint_dispatch.py`. Add an +`invalid_value` case in `test_data/invalid_value.py` for test generation. The check builder and +renderer handle the new descriptor automatically. diff --git a/packages/overture-schema-codegen/docs/walkthrough.md b/packages/overture-schema-codegen/docs/walkthrough.md index 397e082f5..2cd1e2b27 100644 --- a/packages/overture-schema-codegen/docs/walkthrough.md +++ b/packages/overture-schema-codegen/docs/walkthrough.md @@ -23,8 +23,8 @@ Documentation needs all of this. The codegen exists to preserve it. Navigating Python's type annotation machinery -- NewType chains, nested `Annotated` wrappers, union filtering, generic resolution -- is complex. The codegen does it once. -`analyze_type()` unwraps annotations into `TypeInfo`, a flat target-independent -representation. Extractors build specs from `TypeInfo`. Renderers consume specs without +`analyze_type()` unwraps annotations into `FieldShape`, a tree-shaped target-independent +representation. Extractors build specs from these shapes. Renderers consume specs without re-entering the type system. New output targets add renderers, not extraction logic. The solution decomposes into four layers. Discovery finds models. Extraction unwraps @@ -64,28 +64,15 @@ it. The entry point `overture:transportation:segment` maps to The codegen classifies these at the CLI boundary: `is_model_class` identifies concrete `BaseModel` subclasses, `is_union_alias` calls `analyze_type` to identify discriminated -unions. From that point forward both model features and union features satisfy the -`FeatureSpec` protocol and flow through the same pipeline. +unions. From that point forward both model features and union features are `FeatureSpec` values +(`ModelSpec | UnionSpec`) and flow through the same pipeline. ## 2. Leaf utilities -Two modules with no internal dependencies. Both serve multiple layers. - -### extraction/case_conversion.py - -Converts PascalCase to snake_case with two compiled regexes. `_ACRONYM_BOUNDARY` inserts -an underscore between an uppercase run and a capitalized word start: `HTMLParser` -becomes `HTML_Parser` becomes `html_parser`. `_CAMEL_BOUNDARY` inserts between -lowercase-or-digit and uppercase: `buildingPart` becomes `building_part`. -`to_snake_case` applies them in sequence and lowercases. - -`slug_filename` composes the conversion with a file extension. Every output file path in -the system passes through this function. - -```python ->>> slug_filename("HexColor") -'hex_color.md' -``` +One module with no internal dependencies, serving multiple layers. PascalCase to +snake_case conversion lives in `overture.schema.system.case` (used by the pyspark +generator and the markdown path assignment); markdown output filenames are +`f"{to_snake_case(name)}.md"` at the call site. ### extraction/docstring.py @@ -113,92 +100,85 @@ summaries. ## 3. Type analysis This is the module the entire package exists to house. `analyze_type` takes a raw type -annotation and returns `TypeInfo` -- a flat dataclass that fully describes the unwrapped -type without any reference to Python's typing machinery. - -### The loop - -The function runs a single `while True` loop that peels layers in fixed order. Each -iteration handles one wrapper: - -**NewType** records names at two levels. The first NewType encountered becomes -`outermost_newtype_name` (the user-facing identity, e.g. "FeatureVersion") and snapshots -the current `list_depth` into `newtype_outer_list_depth` -- capturing how many list -layers appeared before the NewType boundary. Subsequent NewTypes update -`last_newtype_name` (the innermost, used for constraint provenance and as the terminal -`base_type`). The loop unwraps via `__supertype__` and continues. - -**Annotated** collects every metadata object as a `ConstraintSource`, tagging each with -whichever NewType was most recently entered. This is how constraint provenance survives: -when `int32`'s `Annotated` layer contributes `Field(ge=0)`, the constraint records -`source="int32"`. If a `FieldInfo` carries a description, the function captures it -- -first description wins, so the outermost NewType's documentation takes precedence. - -**Union** filters out `NoneType` (marks optional), `Sentinel` instances (Pydantic's -`` marker for undeclared defaults), and `Literal` sentinel arms (like -`Literal[""]` used alongside `HttpUrl`). If multiple concrete `BaseModel` subclasses -remain after filtering, the function classifies the type as `UNION` and returns -immediately with the member tuple. Non-BaseModel multi-type unions raise -`UnsupportedUnionError`. A single remaining arm continues the loop. - -The `Literal` filtering has a guard: when a union contains *only* Literal arms (like -`Optional[Literal["x"]]`), the function keeps them rather than filtering everything out. - -**list/dict** increments `list_depth` for each `list[...]` layer (so `list[list[str]]` -records depth 2), sets dict flags, and continues into element types. Dict is the one -case where `analyze_type` recurses -- it calls itself for key and value types, storing -the results as nested `TypeInfo` objects. - -**Terminal** classification in `_classify_terminal` handles what remains after all -wrappers are peeled: `Any` becomes a PRIMITIVE, `Literal` returns with the literal value -(single-value only -- multi-value Literals get `literal_value=None`), `Enum` subclasses -become ENUM, `BaseModel` subclasses become MODEL, everything else becomes PRIMITIVE. +annotation and returns `tuple[FieldShape, bool, str | None]` -- the structural shape, +whether the field is optional, and the first description found in the annotation chain. +`FieldShape` is a discriminated union tree that fully describes the type without any +reference to Python's typing machinery. + +### The recursion + +`_unwrap` peels one annotation layer per call frame and returns a `FieldShape` subtree. +Each case handles one wrapper kind: + +**NewType** constructs a `_NewTypeCtx` carrying the NewType's name and callable +reference, then recurses into `__supertype__` with that context active. After the +recursion returns, `_erase_inner_newtypes` strips every `NewTypeShape` reachable through +the recursion result's `ArrayOf` layers so that exactly one `NewTypeShape` remains per +spine. The frame then wraps the (now wrapper-free) inner shape: +`NewTypeShape(name="FeatureVersion", inner=)`. Inner NewType names +survive as the terminal `Primitive.base_type`. + +**Annotated** collects every metadata object in the `args[1:]` slice as a +`ConstraintSource`, tagging each with the active `newtype_ctx`. If a `FieldInfo` is +present, its `metadata` list contributes additional constraint sources (Pydantic unpacks +`Field(min_length=1)` into annotated-types objects there). Descriptions are captured +from `FieldInfo.description` -- first one found wins, so the outermost annotation's +documentation takes precedence. The collected constraints are then attached to the +recursion result via `attach_constraints`, which walks any leading `NewTypeShape` +wrappers to prepend the constraints on the first structural layer (`ArrayOf`, `MapOf`, +or scalar terminal) that can hold them. Raw `MinLen` / `MaxLen` constraints are wrapped +into typed `ArrayMinLen` / `ScalarMinLen` (and `MaxLen` variants) matching the attachment +layer, so length-constraint dispatch is type-keyed downstream. + +**Union** delegates to `_peel_union`. That helper filters `NoneType` (marks optional), +`Sentinel` instances, and `Literal` sentinel arms. If multiple concrete `BaseModel` +subclasses remain, it invokes `union_resolver` and returns a `_Resolved` short-circuit. +A single remaining arm returns `_ContinueWith`, and `_unwrap` recurses into it. + +**list** recurses into the element type and wraps the result in `ArrayOf`. Nested lists +(`list[list[str]]`) produce nested `ArrayOf` instances -- there is no numeric depth +counter. Constraints contributed by an `Annotated` wrapper at any particular list level +land on that level's `ArrayOf` node because `attach_constraints` prepends to the +outermost structural layer, which is exactly the `ArrayOf` that was just constructed. + +**dict** recurses separately for key and value types (with `newtype_ctx=None` for both, +since dict keys and values are independent spines) and returns `MapOf`. + +**Terminal** classification in `_terminal` handles the base case: `Any` becomes +`AnyScalar`, `Literal` becomes `LiteralScalar`, `BaseModel` subclasses route through +`model_resolver` (or fall back to `Primitive(source_type=cls)`), everything else becomes +`Primitive(base_type=newtype_ctx.name or annotation.__name__)`. ### Concrete walkthroughs -**Segment (union path).** `analyze_type` receives the `Annotated` alias. Iteration 1 -sees `Annotated` -- collects the `FieldInfo` with discriminator metadata as a -constraint, unwraps to `Union[RoadSegment, RailSegment, WaterSegment]`. Iteration 2 sees -the union. No `None` arm, no sentinels. Three concrete `BaseModel` subclasses remain -- -the function classifies the type as `UNION` and returns immediately: `kind=UNION`, -`union_members=(RoadSegment, RailSegment, WaterSegment)`, `base_type="RoadSegment"` (the -first member). Two iterations, done. The union members are raw type objects, not -recursively analyzed -- callers that need field details call `extract_model` on each -member separately. +**Segment (union path).** `_unwrap` receives the `Annotated` alias for Segment. The +`Annotated` case collects discriminator metadata from `FieldInfo`, then sees the inner +annotation is a union. `_peel_union` finds three concrete `BaseModel` arms, invokes +`union_resolver`, and returns `_Resolved(UnionRef(...))` carrying the `UnionSpec` that +the resolver constructed. The `Annotated` handler attaches the discriminator constraints +and returns. Two frames deep, done. **FeatureVersion (NewType chain path).** `FeatureVersion = NewType("FeatureVersion", int32)` where `int32 = NewType("int32", Annotated[int, Field(ge=0, le=2147483647)])`. -Iteration 1 sees `FeatureVersion`. It's a NewType -- record -`outermost_newtype_name="FeatureVersion"`, snapshot `newtype_outer_list_depth=0` (no list -layers yet), unwrap to `int32`, continue. Iteration 2 sees -`int32`. Also a NewType -- update `last_newtype_name="int32"`, unwrap to `Annotated[int, -Field(ge=0, ...)]`, continue. Iteration 3 sees `Annotated`. Collect -`ConstraintSource(source="int32", constraint=)`, unwrap to `int`. The -loop breaks on `int` (not a NewType, not Annotated, not a union, not a container). -`_classify_terminal` returns a `TypeInfo` with `base_type="int32"`, -`newtype_name="FeatureVersion"`, `kind=PRIMITIVE`, and a constraint tuple recording the -provenance chain. - -The two paths demonstrate the function's range. Segment exits early on the union branch -with member types for downstream extraction. FeatureVersion runs the full loop through -NewType and Annotated layers, accumulating constraint provenance that survives to -rendering. - -### _UnwrapState - -The accumulator dataclass carries state across iterations: optional/dict flags, -`list_depth` (incremented per `list[...]` layer), `newtype_outer_list_depth` (snapshotted -from `list_depth` when the first NewType is entered), the constraint list, both NewType -name slots, and the captured description. Its `build_type_info` method assembles the -final `TypeInfo` from accumulated state, freezing the constraint list into a tuple. - -### walk_type_info - -A shared visitor that recurses into dict key/value `TypeInfo` children. Both type -collection and reverse reference computation use it rather than duplicating the descent -pattern. Union members are raw `type` objects (not `TypeInfo` instances), so callers -handle them directly. +Frame 1 sees `FeatureVersion` -- a NewType. Constructs `_NewTypeCtx("FeatureVersion", +FeatureVersion)`, recurses into `int32`. Frame 2 sees `int32` -- also a NewType. +Constructs `_NewTypeCtx("int32", int32)`, recurses into `Annotated[int, Field(ge=0, +...)]`. Frame 3 sees `Annotated`. Collects `ConstraintSource(source_name="int32", +constraint=)`. Recurses into `int`. Frame 4 hits the terminal +`int`. `newtype_ctx` is still `_NewTypeCtx("int32", int32)` -- frame 3 passed frame 2's +context through unchanged, since `Annotated` does not introduce a NewType -- so +`_terminal` uses `newtype_ctx.name` (`"int32"`) as `base_type`. Returns +`Primitive(base_type="int32")`. Frame 3 attaches the constraints: `Primitive` gets the +`ge=0` / `le=2147483647` sources prepended. Frame 2's `_erase_inner_newtypes` sees a +bare `Primitive` -- no `NewTypeShape` to strip -- and wraps the result in +`NewTypeShape(name="int32", inner=Primitive(...))`. Frame 1's `_erase_inner_newtypes` +strips that inner `NewTypeShape`, yielding `Primitive(...)`, and wraps it in +`NewTypeShape(name="FeatureVersion", inner=Primitive(...))`. + +The two paths demonstrate the function's range. Segment exits after two frames via +`union_resolver`. FeatureVersion recurses four frames through a NewType chain, with +constraint provenance tagging surviving to rendering. ## 4. Data structures @@ -206,10 +186,10 @@ handle them directly. a dataclass with no methods beyond field access and, in `UnionSpec`'s case, one cached property. -**FieldSpec** represents one model field: alias-resolved name, `TypeInfo`, description, -required flag. Two fields populated later by tree expansion: `model` (a reference to the -nested `ModelSpec` for MODEL-kind fields) and `starts_cycle` (true when following this -field's model would create a cycle in the ancestor chain). +**FieldSpec** represents one model field: alias-resolved name, `shape: FieldShape`, +description, required flag. `ModelRef` and `UnionRef` shapes carry their resolved specs +(populated during `extract_model` recursion), so consumers can follow the tree without a +separate expansion pass. **ModelSpec** represents one Pydantic model: class name, cleaned docstring, fields in documentation order, source class reference, the entry point string that located it, and @@ -218,33 +198,37 @@ model-level constraints from decorators like `@require_any_of`. **UnionSpec** represents a discriminated union type alias. Segment's `UnionSpec` carries `members=[RoadSegment, RailSegment, WaterSegment]`, `discriminator_field="subtype"`, and `common_base=TransportationSegment`. Its `annotated_fields` list pairs each `FieldSpec` -with `variant_sources` -- a tuple of class names indicating which union members -contribute that field, or `None` for fields from `TransportationSegment` shared across -all members. The `fields` cached property unwraps this for code that doesn't need -provenance. `UnionSpec` uses `eq=False` because it contains mutable lists and a -`cached_property` -- dataclass-generated `__eq__` would be unreliable. - -**FeatureSpec** is a `Protocol` satisfied by both `ModelSpec` and `UnionSpec`. This is -the pipeline's unifying abstraction. Tree expansion, type collection, rendering -dispatch, and example loading all operate on `FeatureSpec` without knowing which -concrete type they hold. +with `variant_sources` -- a tuple of `BaseModel` subclasses indicating which union +members contribute that field, or `None` for fields from `TransportationSegment` shared +across all members. The `fields` cached property unwraps this for code that doesn't need +provenance. Each member also has its already-extracted `ModelSpec` retained in +`member_specs: list[MemberSpec]` so downstream consumers (check builder, base-row +generator) reuse it instead of re-extracting the subtree. `UnionSpec` uses `eq=False` +because it contains mutable lists and a `cached_property` -- dataclass-generated +`__eq__` would be unreliable. + +**FeatureSpec** is the type alias `ModelSpec | UnionSpec`. Type collection, rendering +dispatch, and example loading all operate on `FeatureSpec`. Consumers narrow with +`isinstance` when they need `UnionSpec`-specific attributes like `discriminator_field`. **EnumSpec** and **EnumMemberSpec** serve enums. **NewTypeSpec** serves NewTypes. **NumericSpec** serves numeric primitives with an `Interval` for bounds and optional `float_bits`. -**SupplementarySpec** is the union type alias `EnumSpec | NewTypeSpec | ModelSpec` -- -the set of non-feature types that need their own output pages. `NumericSpec` and -geometry types are excluded because they render on aggregate pages rather than -individual ones. +**SupplementarySpec** is the union type alias `EnumSpec | NewTypeSpec | ModelSpec | +PydanticTypeSpec` -- the set of non-feature types that need their own output pages. +`PydanticTypeSpec` covers Pydantic built-ins like `HttpUrl` and `EmailStr` (carrying the +class plus a pointer back to Pydantic's docs). `NumericSpec` and geometry types are +excluded because they render on aggregate pages rather than individual ones. ### Classification functions -Three functions at the bottom of `extraction/specs.py` classify discovery results. `is_model_class` -is a `TypeGuard` that checks `isinstance(obj, type) and issubclass(obj, BaseModel)`. -`is_union_alias` calls `analyze_type` and checks for `UNION` kind -- the only place -outside the type analyzer that touches Python type annotations. `filter_model_classes` -applies the model guard across the discovery dict's values. +Three functions at the bottom of `extraction/specs.py` classify discovery results. +`is_model_class` is a `TypeGuard` that checks `isinstance(obj, type) and issubclass(obj, +BaseModel)`. `is_union_alias` calls `analyze_type` with a sentinel `union_resolver` that +raises immediately on detection -- the only place outside the type analyzer that touches +Python type annotations. `filter_model_classes` applies the model guard across the +discovery dict's values. ## 5. Type registry @@ -291,27 +275,28 @@ classes. One subtlety: Pydantic strips the `Annotated` wrapper from some fields and moves the metadata to `field_info.metadata`. When this happens, `analyze_type` sees a bare type -and misses the constraints. `_merge_field_metadata` patches them back in, tagging them -with `source=None` since they came from the field's own annotation rather than a NewType -chain. +and misses the constraints. `_attach_field_metadata` routes them through +`attach_constraints` -- tagging them with `source=None` since they came from the field's +own annotation rather than a NewType chain -- so length-constraint typing happens here +just as it does during normal `Annotated` unwrapping. Model-level constraints come from `ModelConstraint.get_model_constraints(model_class)`, which inspects decorators like `@require_any_of` and `@require_if`. -### Tree expansion +### Recursive extraction -`expand_model_tree` is the recursive step that populates `FieldSpec.model` references. -It maintains a shared cache keyed by Python class and an ancestor set for cycle -detection. +`extract_model` recursively resolves sub-models and sub-unions during field extraction, +building `ModelRef`/`UnionRef` shapes with their specs already populated. It maintains a +shared cache keyed by Python class and an ancestor set for cycle detection. The cache insert happens *before* recursion. Without this ordering, a back-edge encounter would find no cached entry and infinite-loop instead of marking -`starts_cycle=True`. The sequence: extract the sub-model, insert it into the cache, then -recurse into its fields. Shared references (the same sub-model used in multiple fields) -reuse the cached `ModelSpec` without marking cycles. +`starts_cycle=True`. The sequence: create the partial `ModelSpec`, insert it into the +cache, then populate its fields. Shared references (the same sub-model used in multiple +fields) reuse the cached `ModelSpec` without marking cycles. -Union-kind fields skip inline expansion -- they appear as a single row in the output, -linking to their members, rather than expanding inline. +`UnionRef` fields resolve via the `union_resolver` callback -- they appear as a single +row in the output, linking to their members, rather than expanding inline. ## 7. Other extractors @@ -326,17 +311,18 @@ per-member check, so members that inherit the class docstring verbatim get ### NewType extraction `extract_newtype` calls `analyze_type` on the NewType callable and extracts the custom -docstring. When the NewType has no explicit docstring, it falls back to -`TypeInfo.description` -- the first `Field.description` found in the `Annotated` +docstring. When the NewType has no explicit docstring, it falls back to the description +returned by `analyze_type` -- the first `Field.description` found in the `Annotated` metadata chain. ### Union extraction The most involved extractor. Walk through `Segment` concretely. -`extract_union("Segment", annotation)` calls `analyze_type` on the -`Annotated[Union[RoadSegment, RailSegment, WaterSegment], ...]` alias. The analyzer -returns `kind=UNION` with the three member types. +`extract_union("Segment", annotation)` calls `_union_members`, which runs `analyze_type` +with a capturing `union_resolver` that raises out of the analysis as soon as it sees a +multi-arm union of `BaseModel` subclasses. The captured tuple gives the three member +types plus any description from enclosing `Annotated` layers. Next, `_find_common_base` intersects each member's filtered MRO (BaseModel subclasses only, excluding `BaseModel` itself). All three share `TransportationSegment` in their @@ -348,13 +334,17 @@ The extractor calls `extract_model(TransportationSegment)` to get the shared fie Fields like `id`, `geometry`, `version`, `sources`, and `subtype` appear in the common base. These become shared `AnnotatedField` entries with `variant_sources=None`. -Then it extracts each member: `RoadSegment`, `RailSegment`, `WaterSegment`. Fields not -in the shared set are variant-specific, deduplicated by `(name, type_identity)` where -`type_identity` captures `base_type`, `kind`, `is_optional`, and `list_depth`. If -`RoadSegment` and `WaterSegment` both define a `width` field with the same type -identity, the `AnnotatedField` accumulates both class names: -`variant_sources=("RoadSegment", "WaterSegment")`. Fields unique to one member get a -single-element tuple. +Then it extracts each member: `RoadSegment`, `RailSegment`, `WaterSegment`. Each result +is retained on the `UnionSpec` as a `MemberSpec(member_cls, spec)` so consumers don't +re-extract. Fields not in the shared set are variant-specific, deduplicated by +`(name, structural_fingerprint)` where the fingerprint walks the field's `FieldShape` +tree, capturing every wrapper layer plus the terminal type. If `RoadSegment` and +`WaterSegment` both define a `width` field with the same fingerprint, the +`AnnotatedField` accumulates both classes: `variant_sources=(RoadSegment, +WaterSegment)`. Fields unique to one member get a single-element tuple. When two members +declare the same field name with the same structural fingerprint but diverging +constraints, the extractor raises rather than silently dropping one member's +constraints. `extract_discriminator` inspects the `Annotated` metadata for a `FieldInfo` with a discriminator attribute. For Segment, it finds `subtype` and builds the mapping: @@ -435,24 +425,23 @@ discover every referenced type that needs its own output page: enums, semantic N and sub-models. The walk maintains a visited set for models and a feature name set for skip detection. -Types that are themselves top-level features get skipped. For UNION-kind fields, the -function extracts and walks each member's fields. For semantic NewTypes, it walks the -`__supertype__` chain to collect intermediate NewTypes -- `Id` wraps -`NoWhitespaceString` wraps `str`, and both `Id` and `NoWhitespaceString` get their own -pages. The `walk_type_info` visitor handles dict key/value recursion. +Types that are themselves top-level features get skipped. For `UnionRef` fields, the function extracts and walks each member's fields. For +semantic NewTypes, it walks the `__supertype__` chain to collect intermediate NewTypes -- +`Id` wraps `NoWhitespaceString` wraps `str`, and both `Id` and `NoWhitespaceString` get +their own pages. `walk_shape` from `field_walk.py` handles recursion into `ArrayOf`, +`MapOf`, and `NewTypeShape` wrappers. -MODEL-kind fields follow `field_spec.model` references that were populated by -`expand_model_tree`. The function raises `RuntimeError` if it encounters a MODEL-kind -field with `model=None` -- a guard against calling collection before tree expansion. +`ModelRef` fields follow their `.model` reference (populated during `extract_model` +recursion) into nested `ModelSpec` trees. A single field matches multiple conditions independently. A semantic NewType wrapping a -MODEL-kind type triggers both NewType extraction and model collection. The checks use +`ModelRef` triggers both NewType extraction and model collection. The checks use independent `if` statements, not `elif`. ## 11. Path assignment -`build_placement_registry` builds the complete mapping from type names to output file -paths. Three tiers: +`build_placement_registry` builds the complete `dict[TypeIdentity, PurePosixPath]` +mapping each type to its output file path. Four tiers: Aggregate pages come first. All numeric primitives point to `system/primitive/primitives.md`. All geometry types point to @@ -460,7 +449,8 @@ Aggregate pages come first. All numeric primitives point to reference page. Feature specs get individual pages. Output directories derive from -`output_dir_for_entry_point`. Filenames use `slug_filename`. +`output_dir_for_entry_point`. Filenames are the snake-case type name with a `.md` +extension. Supplementary specs get module-derived paths from `source_type.__module__`. When a supplementary type's output directory falls under a feature directory, @@ -472,15 +462,20 @@ cluttering feature directories. `_nest_under_types` sorts feature directories by path length (descending) before checking containment, so the most specific match wins. +`PydanticTypeSpec` entries (e.g. `HttpUrl`) bypass module mirroring and land at +`pydantic//.md`, keeping the generated Pydantic reference set +isolated from theme directories. + ## 12. Links and reverse references ### Link computation -`LinkContext` carries the current page's output path and the full type-to-path registry. -When a renderer formats a type reference, it calls `resolve_link` to compute a relative -path from the current page to the target. Types without registry entries return `None`, -telling renderers to show inline code instead of a broken link. `resolve_link_or_slug` -provides a fallback when a link is required regardless. +`LinkContext` carries the current page's output path and the full `dict[TypeIdentity, +PurePosixPath]` registry. When a renderer formats a type reference, it calls +`resolve_link` with the target's `TypeIdentity` to compute a relative path. Identities +without registry entries return `None`, telling renderers to show inline code instead +of a broken link. `resolve_link_or_slug` provides a fallback when a link is required +regardless. `relative_link` computes `../` navigation between any two paths in the output tree. It finds the common prefix of directory components, counts the levels up from the source @@ -490,8 +485,9 @@ rejects `..` components to prevent path traversal surprises. ### Reverse references `compute_reverse_references` walks all feature fields and supplementary specs to build -`dict[str, list[UsedByEntry]]`. Each entry maps a type name to the list of types that -reference it. Entries sort models before NewTypes, alphabetical within each group. +`dict[TypeIdentity, list[UsedByEntry]]`. Each entry maps a target identity to the list +of types that reference it. Entries sort models before NewTypes, alphabetical within +each group. The function tracks references with sets for deduplication, then sorts into lists at the end. It skips self-references and references to types not in the supplementary spec dict @@ -504,29 +500,28 @@ provenance rather than direct field reference. ## 13. Markdown type formatting -`markdown/type_format.py` converts `TypeInfo` into display strings for markdown output. +`markdown/type_format.py` converts a field's `FieldShape` into display strings for +markdown output. -`format_type` handles the full range of field types. Single-value Literals render as -`"value"` in backticks. Semantic NewTypes and enums/models get markdown links via -`_resolve_type_link`, which checks the `LinkContext` registry and falls back to plain +`format_type` handles the full range of field types. Single-value `LiteralScalar`s +render as `"value"` in backticks. Semantic NewTypes and enums/models get markdown links +via `_resolve_type_link`, which checks the `LinkContext` registry and falls back to plain code spans. For types with a linked identity (semantic NewTypes, enums, models), list -rendering depends on where the list layers sit relative to the NewType boundary. -`newtype_outer_list_depth > 0` means the list wraps the NewType (`list[PhoneNumber]`) and -renders as `list`. `is_list` with `newtype_name` set means the NewType -wraps a list internally (`Sources` wrapping `list[SourceItem]`) and renders with a -`(list)` qualifier. Non-NewType identities (enums, models) use `list` syntax. Linked -inner types use broken-backtick syntax (`` `list<` `` ... `` `>` ``) built as a single -wrapper to avoid adjacent backticks that CommonMark would interpret as multi-backtick -code span delimiters. Dict types render as `` `map` ``. Qualifiers (optional, list, -map) append in parentheses. - -Union members format independently -- each gets its own link resolution, joined with -pipe separators escaped for table-cell safety. +rendering depends on where the `ArrayOf` layers sit relative to the `NewTypeShape` +boundary. An `ArrayOf` sitting outside the `NewTypeShape` in the shape tree means the +list wraps the NewType (`list[PhoneNumber]`) and renders as `list`. A +`NewTypeShape` with an `ArrayOf` inner means the NewType wraps a list internally +(`Sources` wrapping `list[SourceItem]`) and renders with a `(list)` qualifier. Non-NewType +identities (enums, models) use `list` syntax. Linked inner types use broken-backtick +syntax (`` `list<` `` ... `` `>` ``) built as a single wrapper to avoid adjacent backticks +that CommonMark would interpret as multi-backtick code span delimiters. `MapOf` shapes +render as `` `map` ``. Qualifiers (optional, list, map) append in parentheses. + +`UnionRef` members format independently -- each gets its own link resolution, joined +with pipe separators escaped for table-cell safety. `format_underlying_type` handles NewType page headers. It links enums and models that -have their own pages but skips the outermost NewType name to avoid self-referencing. The -function uses `source_type.__name__` rather than `base_type` for link resolution, since -`base_type` may carry the outermost NewType name when only one NewType wraps a class. +have their own pages but skips the outermost NewType name to avoid self-referencing. ## 14. Markdown rendering @@ -631,26 +626,23 @@ pipeline. `generate_markdown_pages` in `markdown/pipeline.py` is the "main" function. It takes feature specs and a schema root, returns rendered pages without touching the filesystem. -Eight steps: - -1. **Expand model trees** with a shared cache across all features, so sub-models - referenced by multiple features extract once. +Seven steps (tree expansion now happens inside `extract_model`): -2. **Partition primitive and geometry names** from the system primitive module's +1. **Partition primitive and geometry names** from the system primitive module's `__all__` exports. -3. **Collect supplementary types** by walking expanded feature trees. +2. **Collect supplementary types** by walking feature trees. -4. **Build the placement registry** mapping every type to its output file path. +3. **Build the placement registry** mapping every type to its output file path. -5. **Compute reverse references** across all features and supplements. +4. **Compute reverse references** across all features and supplements. -6. **Render each feature** with its `LinkContext`, loaded examples, and used-by entries. +5. **Render each feature** with its `LinkContext`, loaded examples, and used-by entries. -7. **Render each supplementary type** -- dispatching to `render_enum`, `render_newtype`, - or `render_feature` (for sub-models) based on spec type. +6. **Render each supplementary type** -- dispatching to `render_enum`, `render_newtype`, + `render_feature` (for sub-models), or `render_pydantic_type` based on spec type. -8. **Render aggregate pages** for primitives and geometry. +7. **Render aggregate pages** for primitives and geometry. The return value is `list[RenderedPage]` -- frozen dataclasses carrying content, output path, and a boolean `is_feature` flag. The caller decides what to do with them. @@ -688,36 +680,34 @@ A reader who reached this point has seen every module in isolation. This section entry_point="overture.schema.transportation:Segment")`. **Classification.** The CLI tests each entry. `is_model_class(Segment)` returns false -- -`Segment` is not a class. `is_union_alias(Segment)` calls `analyze_type`, which peels -the `Annotated` wrapper and finds three `BaseModel` subclasses in the union. The -analyzer returns `kind=UNION`. The CLI routes Segment to `extract_union`. - -**Extraction.** `extract_union("Segment", annotation)` calls `analyze_type` again (cheap --- the same two-iteration path), gets the three member types, and finds -`TransportationSegment` as the common base via `_find_common_base`. It extracts the -common base's fields as shared, then extracts each member's fields and partitions the -non-shared ones into `AnnotatedField` entries with variant provenance. +`Segment` is not a class. `is_union_alias(Segment)` calls `analyze_type` with a sentinel +`union_resolver` that raises on detection. The CLI routes Segment to `extract_union`. + +**Extraction.** `extract_union("Segment", annotation)` calls `_union_members`, which +runs `analyze_type` with a capturing `union_resolver` to grab the three member types +plus the union description. `_find_common_base` picks `TransportationSegment` as the +shared parent. The extractor calls `extract_model` on the common base and on each +member -- the results are cached on the `UnionSpec` as `member_specs` -- and partitions +the non-shared fields into `AnnotatedField` entries with variant provenance. `extract_discriminator` finds `subtype` and builds `{"road": RoadSegment, "rail": -RailSegment, "water": WaterSegment}`. The result is a `UnionSpec` satisfying -`FeatureSpec`. +RailSegment, "water": WaterSegment}`. The result is a `UnionSpec` (a `FeatureSpec`). Meanwhile, concrete models like `Building` go through `extract_model`, which calls `analyze_type` on each field annotation. A field typed `FeatureVersion` unwraps through -two NewType layers and an `Annotated` layer, producing a `TypeInfo` with -`base_type="int32"`, `newtype_name="FeatureVersion"`, and constraint provenance linking -`ge=0` back to the `int32` NewType. Both extraction paths produce specs satisfying -`FeatureSpec`. +two NewType layers and an `Annotated` layer, producing a `NewTypeShape(name="FeatureVersion", +inner=Primitive(base_type="int32", constraints=(...)))` shape with constraint provenance +linking `ge=0` back to the `int32` NewType. Both extraction paths produce `FeatureSpec` +values. **Pipeline entry.** The feature specs enter `generate_markdown_pages`. -`expand_model_tree` walks MODEL-kind fields on Segment's `UnionSpec` and populates -`FieldSpec.model` references. The shared cache ensures sub-models referenced by multiple -features (like `Sources`) extract once. Union-kind fields skip inline expansion. +Sub-model `FieldShape` trees are fully resolved -- `ModelRef` nodes already carry their +`ModelSpec` from recursive `extract_model` calls. No separate expansion pass is needed. **Layout.** `partition_numeric_and_geometry_types` reads the system module's exports. -`collect_all_supplementary_types` walks Segment's expanded fields and discovers -referenced enums (like `Subtype`), semantic NewTypes (like `Id`, `Sources`), and -sub-models. The walk follows `FieldSpec.model` references down the tree, and for -UNION-kind fields, extracts and walks each member's fields separately. +`collect_all_supplementary_types` walks Segment's field shapes and discovers referenced +enums (like `Subtype`), semantic NewTypes (like `Id`, `Sources`), and sub-models. The +walk follows `ModelRef.model` references down the tree, and for `UnionRef` shapes, +extracts and walks each member's fields separately. `build_placement_registry` assigns Segment's output path from its entry point: `entry_point_module` extracts `overture.schema.transportation`, `compute_output_dir` @@ -732,10 +722,10 @@ populate "Used By" sections: the `Subtype` enum page shows that Segment uses it. full registry. `render_feature` dispatches to `_expand_union_fields` because the spec is a `UnionSpec`. Shared fields from `TransportationSegment` render as plain rows. Variant-specific fields get italic tags: `` `road_class` *(Road)* ``. The renderer -formats each field's type via `format_type`, which resolves links through the +formats each field's `FieldShape` via `format_type`, which resolves links through the `LinkContext` -- `Subtype` gets a relative link to its enum page, `Id` links to its -NewType page. Constraints with `source=None` annotate field rows; constraints with named -sources appear on the source NewType's page instead. +NewType page. Constraints with `source_name=None` annotate field rows; constraints with +named sources appear on the source NewType's page instead. The example loader finds `pyproject.toml` in the transportation theme package, reads `[examples.Segment]`, validates each example against the union alias (injecting literal diff --git a/packages/overture-schema-codegen/pyproject.toml b/packages/overture-schema-codegen/pyproject.toml index 3019a6a92..044b592ce 100644 --- a/packages/overture-schema-codegen/pyproject.toml +++ b/packages/overture-schema-codegen/pyproject.toml @@ -20,9 +20,25 @@ name = "overture-schema-codegen" overture-codegen = "overture.schema.codegen.cli:main" [tool.uv.sources] +overture-schema-addresses-theme = { workspace = true } +overture-schema-base-theme = { workspace = true } +overture-schema-buildings-theme = { workspace = true } overture-schema-cli = { workspace = true } overture-schema-common = { workspace = true } +overture-schema-divisions-theme = { workspace = true } +overture-schema-places-theme = { workspace = true } overture-schema-system = { workspace = true } +overture-schema-transportation-theme = { workspace = true } + +[dependency-groups] +test = [ + "overture-schema-addresses-theme", + "overture-schema-base-theme", + "overture-schema-buildings-theme", + "overture-schema-divisions-theme", + "overture-schema-places-theme", + "overture-schema-transportation-theme", +] [tool.hatch.version] path = "src/overture/schema/codegen/__about__.py" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py index 279f22a84..fa2610a04 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py @@ -17,6 +17,7 @@ FeatureSpec, is_model_class, is_union_alias, + partitions_from_tags, ) from .extraction.union_extraction import extract_union from .layout.module_layout import ( @@ -26,12 +27,13 @@ entry_point_module, ) from .markdown.pipeline import generate_markdown_pages +from .pyspark.pipeline import generate_pyspark_modules log = logging.getLogger(__name__) __all__ = ["cli"] -_OUTPUT_FORMATS = ("markdown",) +_OUTPUT_FORMATS = ("markdown", "pyspark") _FEATURE_FRONTMATTER = "---\nsidebar_position: 1\n---\n\n" @@ -84,7 +86,15 @@ def list_models() -> None: "--output-dir", type=click.Path(path_type=Path), default=None, - help="Write output to directory (default: stdout)", + help="Write output files directly into this directory (default: stdout). " + "For pyspark, writes expression modules (*.py) and a _registry.py. " + "For markdown, writes theme subdirectories.", +) +@click.option( + "--test-output-dir", + type=click.Path(path_type=Path), + default=None, + help="Write test modules (test_*.py) into this directory (pyspark only).", ) def generate( output_format: str, @@ -92,13 +102,13 @@ def generate( filters: tuple[str, ...], excludes: tuple[str, ...], output_dir: Path | None, + test_output_dir: Path | None, ) -> None: """Generate code/docs from discovered models.""" - all_models = discover_models() + if output_format != "pyspark" and test_output_dir is not None: + raise click.UsageError("--test-output-dir is only valid with --format pyspark") - # Schema root from ALL entry points (before tag filters). - module_paths = [entry_point_module(k.entry_point) for k in all_models] - schema_root = compute_schema_root(module_paths) + all_models = discover_models() models = filter_models(all_models, build_selector(tags, filters, excludes)) @@ -107,18 +117,27 @@ def generate( feature_specs: list[FeatureSpec] = [] for key, entry in models.items(): + partitions = partitions_from_tags(key.tags) if is_model_class(entry): - feature_specs.append(extract_model(entry, entry_point=key.entry_point)) + feature_specs.append( + extract_model(entry, entry_point=key.entry_point, partitions=partitions) + ) elif is_union_alias(entry): feature_specs.append( extract_union( entry_point_class(key.entry_point), entry, entry_point=key.entry_point, + partitions=partitions, ) ) - _generate_markdown(feature_specs, schema_root, output_dir) + if output_format == "pyspark": + _generate_pyspark(feature_specs, output_dir, test_output_dir) + else: + module_paths = [entry_point_module(k.entry_point) for k in all_models] + schema_root = compute_schema_root(module_paths) + _generate_markdown(feature_specs, schema_root, output_dir) def _generate_markdown( @@ -141,6 +160,24 @@ def _generate_markdown( _write_category_files(output_dir, all_paths, feature_paths) +def _generate_pyspark( + feature_specs: list[FeatureSpec], + output_dir: Path | None, + test_output_dir: Path | None = None, +) -> None: + """Generate PySpark validation modules. + + Output is syntactically valid Python; we assume a code formatter runs + over the written directories afterwards to match existing conventions. + """ + modules = generate_pyspark_modules(feature_specs) + for mod in modules.source: + _write_output(mod.content, output_dir, mod.path) + if test_output_dir is not None: + for mod in modules.test: + _write_output(mod.content, test_output_dir, mod.path) + + def _ancestor_dirs(paths: set[PurePosixPath]) -> set[PurePosixPath]: """Collect all ancestor directories for a set of file paths.""" dirs: set[PurePosixPath] = set() diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py deleted file mode 100644 index 9d06341fb..000000000 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py +++ /dev/null @@ -1,41 +0,0 @@ -"""PascalCase to snake_case conversion for code generation.""" - -import re - -__all__ = ["slug_filename", "to_snake_case"] - -# Insert _ between an acronym run and a capitalized word start (HTML|Parser) -_ACRONYM_BOUNDARY = re.compile(r"([A-Z]+)([A-Z][a-z])") -# Insert _ between a lowercase/digit and an uppercase letter (building|Part) -_CAMEL_BOUNDARY = re.compile(r"([a-z0-9])([A-Z])") - - -def to_snake_case(name: str) -> str: - """Convert PascalCase to snake_case. - - Handles acronym runs correctly: "HTMLParser" becomes "html_parser", - not "h_t_m_l_parser". - - >>> to_snake_case("HTMLParser") - 'html_parser' - >>> to_snake_case("BuildingPart") - 'building_part' - >>> to_snake_case("simple") - 'simple' - """ - name = _ACRONYM_BOUNDARY.sub(r"\1_\2", name) - name = _CAMEL_BOUNDARY.sub(r"\1_\2", name) - return name.lower() - - -def slug_filename(name: str, ext: str = ".md") -> str: - """Convert a PascalCase type name to a snake_case filename. - - >>> slug_filename("HexColor") - 'hex_color.md' - >>> slug_filename("BuildingPart") - 'building_part.md' - >>> slug_filename("BuildingPart", ext=".json") - 'building_part.json' - """ - return f"{to_snake_case(name)}{ext}" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py new file mode 100644 index 000000000..1be5d6d7b --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field.py @@ -0,0 +1,172 @@ +"""Tree-shaped IR for model field types. + +`FieldShape` is a discriminated union -- `Primitive`, `LiteralScalar`, +`AnyScalar`, `ModelRef`, `UnionRef`, `ArrayOf`, `MapOf`, `NewTypeShape` +-- nested to describe arbitrary list / dict / NewType wrapping. Each +variant carries its own constraints (where meaningful), and walkers +encounter each constraint at the layer it targets. + +The three terminal scalar variants (`Primitive`, `LiteralScalar`, +`AnyScalar`) are grouped under the `Scalar` type alias for consumers +that only need to ask "is this a leaf?". + +`NewTypeShape` wraps an inner shape, so its position relative to +`ArrayOf` is structural: `NewTypeShape(inner=ArrayOf(...))` is a +NewType over `list[X]`, while `ArrayOf(element=NewTypeShape(...))` +is a list of NewType-wrapped values. Consumers pattern-match on +shape to distinguish the two. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, TypeAlias + +if TYPE_CHECKING: + from .specs import ModelSpec, UnionSpec + +__all__ = [ + "AnyScalar", + "ArrayOf", + "ConstraintSource", + "FieldShape", + "LiteralScalar", + "MapOf", + "ModelRef", + "NewTypeShape", + "Primitive", + "Scalar", + "UnionRef", +] + + +@dataclass(frozen=True, slots=True) +class ConstraintSource: + """A constraint paired with the NewType that contributed it. + + `source_ref` and `source_name` identify the NewType that declared + the constraint; both are `None` for constraints contributed directly + on a field annotation rather than through a NewType. `constraint` + is the raw metadata object from `Annotated[..., constraint]`. + """ + + source_ref: object | None + source_name: str | None + constraint: object + + +@dataclass(frozen=True, slots=True) +class Primitive: + """Terminal type with a registry lookup key. + + Covers primitives (`int32`, `str`), enums, Pydantic built-ins + (`HttpUrl`, `EmailStr`), and `BaseModel` subclasses that weren't + resolved to a `ModelRef` (e.g. when no `model_resolver` was + supplied). + """ + + base_type: str + source_type: type | None = None + constraints: tuple[ConstraintSource, ...] = () + + +@dataclass(frozen=True, slots=True) +class LiteralScalar: + """`Literal[X, ...]` terminal.""" + + values: tuple[object, ...] + constraints: tuple[ConstraintSource, ...] = () + + +@dataclass(frozen=True, slots=True) +class AnyScalar: + """`typing.Any` terminal.""" + + constraints: tuple[ConstraintSource, ...] = () + + +Scalar: TypeAlias = Primitive | LiteralScalar | AnyScalar +"""Terminal shape: a value that doesn't wrap another shape. + +Consumers that just need "is this a leaf?" check `isinstance(x, Scalar)`; +consumers that need terminal-specific data narrow to a variant. +""" + + +@dataclass(frozen=True, slots=True) +class ModelRef: + """Reference to a Pydantic sub-model. + + `starts_cycle` marks the back-edge of a cycle in the model graph; + consumers that recurse into models must stop at cycle starts. + """ + + model: ModelSpec + starts_cycle: bool = False + + +@dataclass(frozen=True, slots=True) +class UnionRef: + """Reference to a discriminated union of models.""" + + union: UnionSpec + + +@dataclass(frozen=True, slots=True) +class ArrayOf: + """Sequence of values sharing a single element shape. + + Nested arrays are nested `ArrayOf` instances; there is no numeric + depth field. `constraints` carries array-level validation rules + (length, uniqueness). Per-element constraints live on `element` + and its descendants. + """ + + element: FieldShape + constraints: tuple[ConstraintSource, ...] = () + + +@dataclass(frozen=True, slots=True) +class MapOf: + """Mapping from a key shape to a value shape. + + `constraints` carries map-level validation rules. Per-key and + per-value constraints live on `key` / `value` respectively. + """ + + key: FieldShape + value: FieldShape + constraints: tuple[ConstraintSource, ...] = () + + +@dataclass(frozen=True, slots=True) +class NewTypeShape: + """A NewType wrapper around an inner shape. + + Position relative to other wrappers is meaningful: + `NewTypeShape(inner=ArrayOf(...))` is a NewType over `list[X]`; + `ArrayOf(element=NewTypeShape(...))` is a list of NewType-wrapped + values. Consumers distinguish the two by pattern, not a numeric + offset. + + Constraints contributed by the NewType chain attach to the + `Scalar` / `ArrayOf` / `MapOf` layer they target, not to the + wrapper itself. `name` and `ref` identify the NewType for linking + without owning constraint state. + """ + + name: str + ref: object + inner: FieldShape + + +FieldShape: TypeAlias = ( + Primitive + | LiteralScalar + | AnyScalar + | ModelRef + | UnionRef + | ArrayOf + | MapOf + | NewTypeShape +) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py index 0db927065..141af58d2 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py @@ -1,37 +1,31 @@ """Convert field-level constraints to display text. Handles constraints from Annotated metadata and NewType wrappers: -Ge, Gt, Interval, Le, Lt, MaxLen, MinLen, GeometryTypeConstraint, -Reference, and custom constraint classes. +Ge, Gt, Interval, Le, Lt, ArrayMinLen, ArrayMaxLen, ScalarMinLen, +ScalarMaxLen, GeometryTypeConstraint, Reference, and custom constraint +classes. """ from __future__ import annotations from collections.abc import Callable -from annotated_types import Ge, Gt, Interval, Le, Lt, MaxLen, MinLen +from annotated_types import Ge, Gt, Interval, Le, Lt from overture.schema.system.primitive import GeometryTypeConstraint from overture.schema.system.ref import Reference from .docstring import first_docstring_line +from .length_constraints import ArrayMaxLen, ArrayMinLen, ScalarMaxLen, ScalarMinLen from .specs import TypeIdentity from .type_analyzer import ConstraintSource __all__ = [ "constraint_display_text", - "constraint_pattern", "describe_field_constraint", ] -# Bound attribute names paired with display operators. Each entry maps an -# annotated_types constraint attribute (Ge, Gt, Le, Lt, Interval) to its -# mathematical symbol for prose rendering. -# -# numeric_extraction.py has its own _BOUND_ATTRS for numeric extraction. The -# duplication is deliberate: these modules use the same attribute names for -# unrelated purposes (display formatting vs. numeric bound extraction), and -# coupling them for four string literals adds a dependency without value. +# Bound attribute -> mathematical symbol for prose rendering. _BOUND_OPS: tuple[tuple[str, str], ...] = ( ("ge", "≥"), ("gt", ">"), @@ -108,9 +102,9 @@ def describe_field_constraint( result = _first_bound(constraint) if result is not None: return result - if isinstance(constraint, MinLen): + if isinstance(constraint, (ArrayMinLen, ScalarMinLen)): return f"Minimum length: {constraint.min_length}" - if isinstance(constraint, MaxLen): + if isinstance(constraint, (ArrayMaxLen, ScalarMaxLen)): return f"Maximum length: {constraint.max_length}" if _is_opaque_constraint(constraint): @@ -130,7 +124,7 @@ def _constraint_class_description(constraint: object) -> str | None: return line or None -def constraint_pattern(constraint: object) -> str | None: +def _constraint_pattern(constraint: object) -> str | None: """Extract the regex pattern string from a constraint, if present. Traverses two levels: constraint.pattern is a compiled re.Pattern @@ -148,7 +142,7 @@ def constraint_display_text( description = _constraint_class_description(cs.constraint) if _is_opaque_constraint(cs.constraint) and description: cls_name = type(cs.constraint).__name__ - pattern = constraint_pattern(cs.constraint) + pattern = _constraint_pattern(cs.constraint) if pattern: return f"{description} (`{cls_name}`, pattern: `{pattern}`)" return f"{description} (`{cls_name}`)" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py new file mode 100644 index 000000000..86d385d60 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_walk.py @@ -0,0 +1,215 @@ +"""Generic traversal helpers over `FieldShape` trees. + +`shape_children` (one-level child enumeration) and `walk_shape` +(pre-order DFS) cover open-ended traversals; `terminal_of`, +`terminal_scalar`, `list_depth`, `newtype_name`, and `all_constraints` +cover the most common derived views. `ModelRef` and `UnionRef` are +leaves -- the walker does not cross model or union boundaries +automatically; that's a per-consumer decision. +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterator + +from typing_extensions import assert_never + +from .field import ( + AnyScalar, + ArrayOf, + ConstraintSource, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) + +__all__ = [ + "all_constraints", + "has_array_layer", + "list_depth", + "newtype_name", + "shape_children", + "terminal_model_ref", + "terminal_of", + "terminal_primitive", + "terminal_scalar", + "walk_shape", +] + + +def terminal_of(shape: FieldShape) -> FieldShape: + """Unwrap `ArrayOf` and `NewTypeShape` layers to find the terminal shape. + + Returns the innermost shape that isn't a sequence or NewType wrapper. + `Scalar`, `ModelRef`, `UnionRef`, and `MapOf` count as terminals. + """ + while True: + match shape: + case ArrayOf(element=inner) | NewTypeShape(inner=inner): + shape = inner + case ( + Primitive() + | LiteralScalar() + | AnyScalar() + | ModelRef() + | UnionRef() + | MapOf() + ): + return shape + case _: + assert_never(shape) + + +def terminal_scalar(shape: FieldShape) -> Scalar | None: + """Return the terminal `Scalar`, or `None` for non-scalar terminals.""" + terminal = terminal_of(shape) + return terminal if isinstance(terminal, Scalar) else None + + +def terminal_primitive(shape: FieldShape) -> Primitive | None: + """Return the terminal `Primitive`, or `None` for non-primitive terminals. + + Like `terminal_scalar`, but returns `None` for `LiteralScalar` and + `AnyScalar` — use this when the caller needs `base_type` or + `source_type`, which only exist on `Primitive`. + """ + terminal = terminal_of(shape) + return terminal if isinstance(terminal, Primitive) else None + + +def terminal_model_ref(shape: FieldShape) -> ModelRef | None: + """Return the terminal `ModelRef`, or `None` for non-model terminals.""" + terminal = terminal_of(shape) + return terminal if isinstance(terminal, ModelRef) else None + + +def shape_children(shape: FieldShape) -> Iterator[FieldShape]: + """Yield direct child shapes within *shape* (one level deep). + + `Scalar`, `ModelRef`, and `UnionRef` have no children. + """ + match shape: + case ArrayOf(element=element): + yield element + case MapOf(key=key, value=value): + yield key + yield value + case NewTypeShape(inner=inner): + yield inner + case Primitive() | LiteralScalar() | AnyScalar() | ModelRef() | UnionRef(): + return + case _: + assert_never(shape) + + +def walk_shape(shape: FieldShape, visit: Callable[[FieldShape], None]) -> None: + """Pre-order traversal of a `FieldShape` tree. + + Visits *shape*, then descends into each direct child via + `shape_children`. Stops at `ModelRef` / `UnionRef` -- recursion + across model boundaries is the caller's choice. + """ + visit(shape) + for child in shape_children(shape): + walk_shape(child, visit) + + +def list_depth(shape: FieldShape) -> int: + """Total number of `ArrayOf` layers in *shape*, looking through `NewTypeShape`. + + A NewType wrapping a list counts the same as a list wrapping a + NewType. + """ + depth = 0 + cur = shape + while True: + match cur: + case ArrayOf(element=element): + depth += 1 + cur = element + case NewTypeShape(inner=inner): + cur = inner + case ( + Primitive() + | LiteralScalar() + | AnyScalar() + | ModelRef() + | UnionRef() + | MapOf() + ): + return depth + case _: + assert_never(cur) + + +def has_array_layer(shape: FieldShape) -> bool: + """Whether *shape* has any `ArrayOf` layer, looking through `NewTypeShape`. + + Prefer this over `list_depth(shape) > 0` -- callers that only need + "is this array-shaped" don't need to count layers. + """ + cur = shape + while isinstance(cur, NewTypeShape): + cur = cur.inner + return isinstance(cur, ArrayOf) + + +def newtype_name(shape: FieldShape) -> str | None: + """Return the outermost `NewTypeShape` name, looking through `ArrayOf` layers.""" + cur: FieldShape = shape + while isinstance(cur, ArrayOf): + cur = cur.element + match cur: + case NewTypeShape(name=name): + return name + case ( + Primitive() + | LiteralScalar() + | AnyScalar() + | ModelRef() + | UnionRef() + | MapOf() + ): + return None + case _: + assert_never(cur) + + +def all_constraints(shape: FieldShape) -> tuple[ConstraintSource, ...]: + """Concatenate the field's own constraints from every layer of *shape*. + + Walks `NewTypeShape` and `ArrayOf` wrappers to gather constraints + that apply to this field. Stops at `MapOf` (key/value constraints + belong to nested key/value shapes, not to the enclosing field) and + at `ModelRef` / `UnionRef` (which carry no constraints). Constraints + from outer `ArrayOf` layers appear before constraints from inner + layers, matching the structural order of the shape tree. + """ + collected: list[ConstraintSource] = [] + cur = shape + while True: + match cur: + case ArrayOf(element=inner, constraints=cs): + collected.extend(cs) + cur = inner + case NewTypeShape(inner=inner): + cur = inner + case ( + Primitive(constraints=cs) + | LiteralScalar(constraints=cs) + | AnyScalar(constraints=cs) + ): + collected.extend(cs) + return tuple(collected) + case MapOf(constraints=cs): + collected.extend(cs) + return tuple(collected) + case ModelRef() | UnionRef(): + return tuple(collected) + case _: + assert_never(cur) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py new file mode 100644 index 000000000..36e3cfed6 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/length_constraints.py @@ -0,0 +1,47 @@ +"""Internal typed length-constraint classes. + +`annotated_types.MaxLen` and `annotated_types.MinLen` are polysemous: +`MaxLen(10)` on a `str` constrains character count, while `MaxLen(10)` +on a `list[X]` constrains cardinality. The codegen extractor splits +them by attachment layer so each variant carries its own dispatch: +`ArrayMinLen` / `ArrayMaxLen` for `ArrayOf` layers, `ScalarMinLen` / +`ScalarMaxLen` for scalar layers. + +These are codegen-internal classes -- Pydantic users continue to write +`Annotated[X, MinLen(n)]` in their schemas; the wrapping happens inside +`type_analyzer.attach_constraints` when the constraint reaches its +target layer. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from annotated_types import MaxLen, MinLen + +__all__ = [ + "ArrayMaxLen", + "ArrayMinLen", + "ScalarMaxLen", + "ScalarMinLen", +] + + +@dataclass(frozen=True) +class ArrayMinLen(MinLen): + """Cardinality lower bound for an `ArrayOf` layer.""" + + +@dataclass(frozen=True) +class ArrayMaxLen(MaxLen): + """Cardinality upper bound for an `ArrayOf` layer.""" + + +@dataclass(frozen=True) +class ScalarMinLen(MinLen): + """Character-count lower bound for a scalar layer.""" + + +@dataclass(frozen=True) +class ScalarMaxLen(MaxLen): + """Character-count upper bound for a scalar layer.""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py index 76807e123..d3ef371e9 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py @@ -1,8 +1,8 @@ -"""Model extraction and tree expansion.""" +"""Pydantic model extraction into `ModelSpec`.""" from __future__ import annotations -import dataclasses +from collections.abc import Mapping from pydantic import BaseModel from pydantic.fields import FieldInfo @@ -11,11 +11,22 @@ from overture.schema.system.model_constraint import ModelConstraint from .docstring import clean_docstring -from .specs import FeatureSpec, FieldSpec, ModelSpec, is_model_class -from .type_analyzer import ConstraintSource, TypeInfo, TypeKind, analyze_type +from .field import ( + ConstraintSource, + FieldShape, + ModelRef, + UnionRef, +) +from .specs import FieldSpec, ModelSpec, is_model_class +from .type_analyzer import ( + ModelResolver, + UnionResolver, + analyze_type, + attach_constraints, + unwrap_list, +) __all__ = [ - "expand_model_tree", "extract_model", "resolve_field_alias", ] @@ -37,28 +48,30 @@ def resolve_field_alias(field_name: str, field_info: FieldInfo) -> str: return field_name -def _merge_field_metadata(type_info: TypeInfo, field_info: FieldInfo) -> TypeInfo: - """Merge constraints from field_info.metadata into TypeInfo. - - Pydantic strips the Annotated wrapper from some fields (non-optional, - non-union) and moves the metadata to field_info.metadata. When this - happens, analyze_type sees a bare type and misses the constraints. - The two sets never overlap: field_info.metadata is empty when the - Annotated wrapper survives in the annotation. - """ - if not field_info.metadata: - return type_info - extra = tuple(ConstraintSource(None, None, m) for m in field_info.metadata) - return dataclasses.replace(type_info, constraints=type_info.constraints + extra) - - -def _is_field_required(field_info: FieldInfo, type_info: TypeInfo) -> bool: +def _is_field_required(field_info: FieldInfo, is_optional: bool) -> bool: """Determine whether a field is required (no default and not Optional).""" has_default = ( field_info.default is not PydanticUndefined or field_info.default_factory is not None ) - return not has_default and not type_info.is_optional + return not has_default and not is_optional + + +def _attach_field_metadata(shape: FieldShape, field_info: FieldInfo) -> FieldShape: + """Merge constraints from `field_info.metadata` onto *shape*. + + Pydantic strips the outermost Annotated wrapper from some fields + (non-optional, non-union) and moves its metadata to + `field_info.metadata`. When that happens `analyze_type` sees a bare + type and misses those constraints. They anchor at the topmost + constraint-bearing layer, so we route them through + `attach_constraints` so that length-constraint wrapping applies here + just as it does during normal annotation unwrapping. + """ + if not field_info.metadata: + return shape + extra = tuple(ConstraintSource(None, None, m) for m in field_info.metadata) + return attach_constraints(shape, extra) def _basemodel_bases(cls: type) -> list[type[BaseModel]]: @@ -88,13 +101,13 @@ def _class_order(model_class: type[BaseModel]) -> list[type]: def _field_order(model_class: type[BaseModel]) -> list[str]: - """Return model_fields keys in documentation order. + """Return `model_fields` keys in documentation order. Walks the class hierarchy recursively. At each level of multiple - inheritance, the first base is the "primary chain" and the rest - are "mixins." Primary chain and own fields come first, then mixin - fields in declaration order. Single-inheritance levels use - Pydantic's default reversed-MRO order. + inheritance, the first base is the primary chain and the rest are + mixins. Primary chain and own fields come first, then mixin fields + in declaration order. Single-inheritance levels use Pydantic's + default reversed-MRO order. """ valid_names = set(model_class.model_fields.keys()) result: list[str] = [] @@ -111,94 +124,124 @@ def extract_model( model_class: type[BaseModel], *, entry_point: str | None = None, + partitions: Mapping[str, str] | None = None, ) -> ModelSpec: - """Extract model specification from a Pydantic model class.""" - field_info_map = model_class.model_fields - ordered_keys = _field_order(model_class) - - fields: list[FieldSpec] = [] - for field_name in ordered_keys: - field_info = field_info_map[field_name] - output_name = resolve_field_alias(field_name, field_info) - - # Use field_info.annotation (resolved TypeVars) not get_type_hints - annotation = field_info.annotation - if annotation is None: - continue + """Extract a fully-resolved `ModelSpec` from a Pydantic model class. + + Recurses into sub-models and unions, producing `ModelRef` / + `UnionRef` terminals with their specs resolved. Cycles in the + model graph (a field whose source type is an ancestor on the + current extraction stack) produce a `ModelRef` pointing at the + in-progress ancestor spec with `starts_cycle=True` so consumers + stop recursion at the back-edge. + """ + return _extract_model_recursive( + model_class, + entry_point=entry_point, + partitions=partitions or {}, + cache={}, + ancestors=frozenset(), + ) - type_info = _merge_field_metadata(analyze_type(annotation), field_info) - fields.append( - FieldSpec( - name=output_name, - type_info=type_info, - description=field_info.description or type_info.description, - is_required=_is_field_required(field_info, type_info), - ) - ) +def _extract_model_recursive( + model_class: type[BaseModel], + *, + entry_point: str | None, + partitions: Mapping[str, str], + cache: dict[type, ModelSpec], + ancestors: frozenset[type], +) -> ModelSpec: + """Inner recursive helper for `extract_model`. - return ModelSpec( + Inserts the (partial) `ModelSpec` into `cache` before populating + its fields so cycles can find it. `ancestors` is the set of types + currently on the recursion stack -- a sub-field whose source type + appears there is a back-edge and gets `starts_cycle=True`. + """ + spec = ModelSpec( name=model_class.__name__, description=clean_docstring(model_class.__doc__), - fields=fields, + fields=[], source_type=model_class, entry_point=entry_point, + partitions=partitions, constraints=ModelConstraint.get_model_constraints(model_class), ) + cache[model_class] = spec + descendant_ancestors = ancestors | {model_class} + model_resolver, union_resolver = _make_resolvers(cache, descendant_ancestors) -def expand_model_tree( - spec: FeatureSpec, - cache: dict[type, ModelSpec] | None = None, -) -> FeatureSpec: - """Populate model references on MODEL-kind fields, recursively. - - Walks *spec*'s fields and sets `field.model` for fields whose type - is a Pydantic model. Uses *cache* to reuse already-extracted ModelSpecs - and detect shared references. Marks fields whose model creates a cycle - in the ancestor chain with `starts_cycle=True`. + fields: list[FieldSpec] = [] + for field_name in _field_order(model_class): + field_info = model_class.model_fields[field_name] + annotation = field_info.annotation + if annotation is None: + continue + shape, is_optional, ti_description = analyze_type( + annotation, + model_resolver=model_resolver, + union_resolver=union_resolver, + ) + shape = _attach_field_metadata(shape, field_info) + fields.append( + FieldSpec( + name=resolve_field_alias(field_name, field_info), + shape=shape, + description=field_info.description or ti_description, + is_required=_is_field_required(field_info, is_optional), + is_optional=is_optional, + ) + ) - Mutates *spec* in place and returns it. - """ - if cache is None: - cache = {} - if isinstance(spec, ModelSpec) and spec.source_type is not None: - cache[spec.source_type] = spec - ancestors = frozenset({spec.source_type}) if spec.source_type else frozenset() - _expand_fields(spec.fields, cache, ancestors) + spec.fields = fields return spec -def _expand_fields( - fields: list[FieldSpec], +def _make_resolvers( cache: dict[type, ModelSpec], ancestors: frozenset[type], -) -> None: - """Recursive helper for expand_model_tree. - - Cache insertion happens before recursion — cycle detection depends - on the ancestor's ModelSpec being in the cache when the back-edge - is encountered. +) -> tuple[ModelResolver, UnionResolver]: + """Build the resolvers that recursively extract sub-models / sub-unions. + + `cache` shares already-extracted sub-specs across a single + extraction so sub-models referenced more than once share a + `ModelSpec`. `ancestors` carries the recursion stack for cycle + detection -- a back-edge produces a `ModelRef` pointing at the + in-progress ancestor spec with `starts_cycle=True`. """ - for field_spec in fields: - ti = field_spec.type_info - source = ti.source_type - if ti.kind == TypeKind.UNION: - # Union fields have no single model to recurse into. - # The field row appears in the output; skip inline expansion. - continue - if ti.kind != TypeKind.MODEL or source is None: - continue - if source in ancestors: - # Cycle: reuse existing spec, mark the edge - field_spec.model = cache.get(source) - field_spec.starts_cycle = True - elif source in cache: - # Shared reference: reuse, not a cycle - field_spec.model = cache[source] - else: - sub_spec = extract_model(source) - cache[source] = sub_spec # insert BEFORE recursing - field_spec.model = sub_spec - _expand_fields(sub_spec.fields, cache, ancestors | {source}) + def resolve_model(cls: type[BaseModel]) -> ModelRef: + if cls in ancestors: + return ModelRef(model=cache[cls], starts_cycle=True) + cached = cache.get(cls) + if cached is not None: + return ModelRef(model=cached) + sub_spec = _extract_model_recursive( + cls, + entry_point=None, + partitions={}, + cache=cache, + ancestors=ancestors, + ) + return ModelRef(model=sub_spec) + + def resolve_union( + annotation: object, + members: tuple[type[BaseModel], ...], + _description: str | None, + ) -> UnionRef: + # Late import: extract_union calls back into extract_model for + # member classes. A module-level import would be a cycle. + from .union_extraction import extract_union + + # Recover the union alias name: `analyze_type` reaches the + # union via `members[0].__name__` when the alias name is lost + # (plain `Foo = Annotated[...]` doesn't preserve it pre-PEP-695). + # Convention: members extend `Base`. + placeholder = members[0].__name__ if members else "" + sub_union = extract_union(placeholder, unwrap_list(annotation)) + return UnionRef(union=sub_union) + + return resolve_model, resolve_union diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py index ff11c770a..5e074d259 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py @@ -1,6 +1,7 @@ """NewType extraction.""" from .docstring import clean_docstring, is_custom_docstring +from .field import NewTypeShape from .specs import NewTypeSpec from .type_analyzer import analyze_type @@ -8,19 +9,31 @@ def extract_newtype(newtype_callable: object) -> NewTypeSpec: - """Extract NewType specification from a NewType callable.""" - type_info = analyze_type(newtype_callable) - doc = getattr(newtype_callable, "__doc__", None) - name = type_info.newtype_name or getattr(newtype_callable, "__name__", None) + """Extract a `NewTypeSpec` from a NewType callable. + + `analyze_type(newtype_callable)` returns a shape whose outermost + layer is the NewType's own `NewTypeShape`. We strip that wrapper so + `NewTypeSpec.shape` describes the *underlying* type -- the NewType + isn't a self-reference on its own page. + """ + shape, _, ti_description = analyze_type(newtype_callable) + + name = getattr(newtype_callable, "__name__", None) + if isinstance(shape, NewTypeShape) and shape.name == name: + underlying = shape.inner + else: + underlying = shape + if name is None: msg = f"Cannot determine name for NewType: {newtype_callable!r}" raise ValueError(msg) - description = ( - clean_docstring(doc) if is_custom_docstring(doc) else type_info.description - ) + + doc = getattr(newtype_callable, "__doc__", None) + description = clean_docstring(doc) if is_custom_docstring(doc) else ti_description + return NewTypeSpec( name=name, description=description, - type_info=type_info, + shape=underlying, source_type=newtype_callable, ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/numeric_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/numeric_extraction.py index ae899a4e6..7416d42f8 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/numeric_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/numeric_extraction.py @@ -3,9 +3,10 @@ from annotated_types import Interval from .docstring import first_docstring_line +from .field import FieldShape, Scalar +from .field_walk import terminal_of from .newtype_extraction import extract_newtype from .specs import NumericSpec, TypeIdentity -from .type_analyzer import TypeInfo __all__ = [ "extract_numeric_bounds", @@ -13,24 +14,22 @@ ] -# Bound attribute names on annotated_types constraint objects (Ge, Gt, Le, -# Lt, Interval) used for numeric bound extraction. -# -# field_constraints.py has its own _BOUND_OPS for display formatting. -# The duplication is deliberate: these modules use the same attribute names -# for unrelated purposes (numeric extraction vs. prose rendering), and -# coupling them for four string literals adds a dependency without value. +# Bound attribute names on annotated_types constraints (Ge, Gt, Le, Lt, Interval). _BOUND_ATTRS = ("ge", "gt", "le", "lt") -def extract_numeric_bounds(type_info: TypeInfo) -> Interval: - """Extract numeric bounds from a TypeInfo's constraints. +def extract_numeric_bounds(shape: FieldShape) -> Interval: + """Extract numeric bounds from the constraints on a shape's terminal scalar. - Checks for ge, gt, le, and lt attributes on constraint objects. - Stops at the first constraint defining each bound. + Walks `NewTypeShape` / `ArrayOf` wrappers to find the terminal + `Scalar`, then scans its constraints for `ge`, `gt`, `le`, and `lt` + attributes. Stops at the first constraint defining each bound. """ + terminal = terminal_of(shape) + if not isinstance(terminal, Scalar): + return Interval() found: dict[str, int | float] = {} - for cs in type_info.constraints: + for cs in terminal.constraints: c = cs.constraint for attr in _BOUND_ATTRS: if attr not in found: @@ -47,7 +46,10 @@ def extract_numerics( specs: list[NumericSpec] = [] for tid in numeric_ids: newtype_spec = extract_newtype(tid.obj) - bounds = extract_numeric_bounds(newtype_spec.type_info) + # extract_newtype strips the outer NewTypeShape, so the spec's + # terminal scalar already carries the constraints the NewType + # contributed -- extract_numeric_bounds walks straight to it. + bounds = extract_numeric_bounds(newtype_spec.shape) description = first_docstring_line(getattr(tid.obj, "__doc__", None)) float_bits = _extract_float_bits(tid.name) specs.append( @@ -68,5 +70,5 @@ def extract_numerics( def _extract_float_bits(name: str) -> int | None: - """Extract bit width from a float type name like 'float32'.""" + """Extract bit width from a float type name like `float32`.""" return _FLOAT_BITS.get(name) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py index acba1577d..3aac1e648 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py @@ -3,15 +3,18 @@ from __future__ import annotations import functools +from collections.abc import Mapping from dataclasses import dataclass, field -from typing import Any, Protocol, TypeGuard, runtime_checkable +from typing import Any, TypeAlias, TypeGuard from annotated_types import Interval from pydantic import BaseModel +from overture.schema.system.discovery.tag import get_values_for_key from overture.schema.system.model_constraint import ModelConstraint -from .type_analyzer import TypeInfo, TypeKind, UnsupportedUnionError, analyze_type +from .field import FieldShape +from .type_analyzer import capture_union_members __all__ = [ "AnnotatedField", @@ -19,6 +22,7 @@ "EnumSpec", "FeatureSpec", "FieldSpec", + "MemberSpec", "ModelSpec", "NewTypeSpec", "NumericSpec", @@ -28,11 +32,22 @@ "filter_model_classes", "is_model_class", "is_pydantic_sourced", - "is_pydantic_type", "is_union_alias", + "partitions_from_tags", ] +def partitions_from_tags(tags: frozenset[str]) -> dict[str, str]: + """Map registry tags to Hive partition columns for a feature. + + Today populated only from `overture:theme=`; the value object is + a generic name -> value map so additional partition keys (e.g. release + version) can be added without changing the surrounding pipeline. + """ + theme = next(iter(get_values_for_key(tags, "overture:theme")), None) + return {"theme": theme} if theme is not None else {} + + @dataclass(frozen=True, eq=False) class TypeIdentity: """Unique identity for a type in the codegen system. @@ -106,31 +121,18 @@ class EnumSpec(_SourceTypeIdentityMixin): @dataclass class FieldSpec: - """Specification for a model field.""" - - name: str - type_info: TypeInfo - description: str | None - is_required: bool - model: ModelSpec | None = None - starts_cycle: bool = False + """Specification for a model field: header metadata plus structural shape. - -@runtime_checkable -class FeatureSpec(Protocol): - """Shared interface for feature-level specs (ModelSpec, UnionSpec).""" + `shape` is the full `FieldShape` tree, including any sub-model + (`ModelRef`) and sub-union (`UnionRef`) references already + resolved during extraction. + """ name: str - description: str | None - source_type: type[BaseModel] | None - entry_point: str | None - constraints: tuple[ModelConstraint, ...] - - @property - def fields(self) -> list[FieldSpec]: ... - - @property - def identity(self) -> TypeIdentity: ... + shape: FieldShape + description: str | None = None + is_required: bool = True + is_optional: bool = False @dataclass @@ -142,6 +144,7 @@ class ModelSpec(_SourceTypeIdentityMixin): fields: list[FieldSpec] = field(default_factory=list) source_type: type[BaseModel] | None = None entry_point: str | None = None + partitions: Mapping[str, str] = field(default_factory=dict) constraints: tuple[ModelConstraint, ...] = () @@ -150,12 +153,24 @@ class AnnotatedField: """A FieldSpec paired with union variant provenance.""" field_spec: FieldSpec - variant_sources: tuple[str, ...] | None + variant_sources: tuple[type[BaseModel], ...] | None -# eq=False: contains mutable lists and a cached_property, so -# dataclass-generated __eq__ would be unreliable. -@dataclass(eq=False) +@dataclass +class MemberSpec: + """A union member's class paired with its extracted `ModelSpec`. + + `extract_union` already runs `extract_model` on every member to + build the merged `annotated_fields`; retaining the result here lets + consumers (check builder, base-row generator) reuse it instead of + re-extracting the same subtree. + """ + + member_cls: type[BaseModel] + spec: ModelSpec + + +@dataclass class UnionSpec: """Specification for a discriminated union type alias.""" @@ -167,8 +182,10 @@ class UnionSpec: discriminator_mapping: dict[str, type[BaseModel]] | None source_annotation: object common_base: type[BaseModel] + member_specs: list[MemberSpec] = field(default_factory=list) source_type: type[BaseModel] | None = field(default=None, init=False) entry_point: str | None = None + partitions: Mapping[str, str] = field(default_factory=dict) constraints: tuple[ModelConstraint, ...] = () @functools.cached_property @@ -183,11 +200,16 @@ def identity(self) -> TypeIdentity: @dataclass class NewTypeSpec(_SourceTypeIdentityMixin): - """Specification for a NewType.""" + """Specification for a NewType. + + `shape` is the underlying shape -- i.e. the `inner` of the + NewType's own `NewTypeShape` wrapper, with the wrapper stripped + so the NewType isn't a self-reference on its own page. + """ name: str description: str | None - type_info: TypeInfo + shape: FieldShape source_type: object | None = None @@ -219,6 +241,13 @@ def docs_url(self) -> str: ) +FeatureSpec: TypeAlias = ModelSpec | UnionSpec +"""Top-level feature types passed through the extraction pipeline. + +Consumers narrow with `isinstance` when an arm-specific attribute +is needed (e.g. `UnionSpec.discriminator_field`). +""" + SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | PydanticTypeSpec """Non-feature types referenced by feature models. @@ -232,15 +261,6 @@ def is_pydantic_sourced(source_type: type | None) -> bool: return getattr(source_type, "__module__", "").startswith("pydantic") -def is_pydantic_type(ti: TypeInfo) -> bool: - """Check whether a TypeInfo represents a Pydantic built-in type.""" - return ( - ti.kind == TypeKind.PRIMITIVE - and ti.source_type is not None - and is_pydantic_sourced(ti.source_type) - ) - - def is_model_class(obj: object) -> TypeGuard[type[BaseModel]]: """Check whether *obj* is a concrete BaseModel subclass (not a type alias).""" return isinstance(obj, type) and issubclass(obj, BaseModel) @@ -248,11 +268,7 @@ def is_model_class(obj: object) -> TypeGuard[type[BaseModel]]: def is_union_alias(obj: object) -> bool: """Check whether *obj* is a discriminated union type alias of BaseModel subclasses.""" - try: - ti = analyze_type(obj) - except (TypeError, UnsupportedUnionError): - return False - return ti.kind == TypeKind.UNION + return capture_union_members(obj) is not None def filter_model_classes(models: dict[Any, Any]) -> list[type[BaseModel]]: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py index a0cd5314f..349f1a375 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py @@ -1,344 +1,526 @@ -"""Iterative type unwrapping for Pydantic model annotations.""" +"""Annotation-to-`FieldShape` analysis. + +`analyze_type` recurses through a Python type annotation, peeling +`NewType`, `Annotated`, `Optional`, `list`, and `dict` layers one frame +at a time, and produces a `FieldShape` describing the structure with +constraints attached to the layer they target. + +Each `Annotated` frame attaches its metadata to the shape its inner +annotation unwraps to, so that, e.g., the inner and outer `MinLen` in +`Annotated[list[Annotated[str, MinLen(2)]], MinLen(3)]` land on +different layers as different typed variants: `ArrayMinLen(3)` on the +`ArrayOf`, `ScalarMinLen(2)` on the `Primitive`. + +MODEL and UNION terminals are resolved via optional callbacks. When +no resolver is supplied a MODEL terminal falls back to +`Primitive(source_type=cls)`; a multi-arm UNION raises +`UnsupportedUnionError`. Callers that need to recurse into sub-models +pass resolvers that build a `ModelRef`/`UnionRef` with the resolved +spec. +""" from __future__ import annotations import types from collections.abc import Callable -from dataclasses import dataclass, field -from enum import Enum, auto -from typing import Annotated, Any, Literal, Union, get_args, get_origin +from dataclasses import dataclass, replace +from typing import Annotated, Any, Literal, NoReturn, Union, get_args, get_origin +from annotated_types import MaxLen, MinLen from pydantic import BaseModel from pydantic.fields import FieldInfo -from typing_extensions import Sentinel +from typing_extensions import Sentinel, assert_never from .docstring import clean_docstring +from .field import ( + AnyScalar, + ArrayOf, + ConstraintSource, + FieldShape, + LiteralScalar, + MapOf, + NewTypeShape, + Primitive, +) +from .field_walk import terminal_of +from .length_constraints import ArrayMaxLen, ArrayMinLen, ScalarMaxLen, ScalarMinLen + + +@dataclass(frozen=True, slots=True) +class _ContinueWith: + """`_peel_union` result: next annotation to keep peeling.""" + + annotation: object + is_optional: bool + + +@dataclass(frozen=True, slots=True) +class _Resolved: + """`_peel_union` result: finished shape, short-circuit the unwrap.""" + + shape: FieldShape + is_optional: bool + + +@dataclass(frozen=True, slots=True) +class _NewTypeCtx: + """The innermost NewType currently in scope.""" + + name: str + ref: object + __all__ = [ "ConstraintSource", - "TypeKind", - "TypeInfo", + "ModelResolver", + "UnionResolver", "UnsupportedUnionError", "analyze_type", + "attach_constraints", + "capture_union_members", "is_newtype", "single_literal_value", - "walk_type_info", + "unwrap_list", ] class UnsupportedUnionError(TypeError): - """Raised when analyze_type encounters a multi-type union it cannot represent.""" + """Raised when `analyze_type` encounters a multi-type union it cannot represent.""" -class TypeKind(Enum): - """Classification of type kinds.""" +ModelResolver = Callable[[type[BaseModel]], FieldShape] +"""Resolver invoked when `analyze_type` reaches a `BaseModel` terminal.""" - PRIMITIVE = auto() - LITERAL = auto() - ENUM = auto() - MODEL = auto() - UNION = auto() +UnionResolver = Callable[[object, tuple[type[BaseModel], ...], str | None], FieldShape] +"""Resolver invoked at a multi-arm union terminal. +Receives the original union annotation, the tuple of member classes, +and the description accumulated from enclosing `Annotated` layers. +""" -@dataclass(slots=True) -class ConstraintSource: - """A constraint paired with the NewType that contributed it.""" - source_ref: object | None - source_name: str | None - constraint: object +def is_newtype(annotation: object) -> bool: + """Check whether *annotation* is a `typing.NewType`. + NewType creates a callable with a `__supertype__` attribute pointing + to the wrapped type. No public API exists for this check. + """ + return callable(annotation) and hasattr(annotation, "__supertype__") -@dataclass(slots=True) -class TypeInfo: - """Information about a type annotation.""" - base_type: str - kind: TypeKind - is_optional: bool = False - list_depth: int = 0 - newtype_outer_list_depth: int = 0 - is_dict: bool = False - dict_key_type: TypeInfo | None = None - dict_value_type: TypeInfo | None = None - constraints: tuple[ConstraintSource, ...] = () - literal_values: tuple[object, ...] | None = None - source_type: type | None = None - newtype_name: str | None = None - newtype_ref: object | None = None - union_members: tuple[type[BaseModel], ...] | None = None - description: str | None = None +class _UnionCaptured(Exception): # noqa: N818 - control flow, not a true error + """Raised by the capturing union resolver to short-circuit analyze_type.""" - @property - def is_list(self) -> bool: - """Whether this type has any list wrapping.""" - return self.list_depth > 0 + def __init__( + self, members: tuple[type[BaseModel], ...], description: str | None + ) -> None: + self.members = members + self.description = description -def walk_type_info(ti: TypeInfo, visitor: Callable[[TypeInfo], None]) -> None: - """Call *visitor* on *ti*, then recurse into dict key/value types. +def capture_union_members( + annotation: object, +) -> tuple[tuple[type[BaseModel], ...], str | None] | None: + """Peel wrappers from *annotation* and return its union members. - Captures the shared recursive descent pattern used by type collection - and reverse reference computation. Union members are `type` objects - (not `TypeInfo`), so callers handle them directly. + Returns `(members, description)` when *annotation* (possibly wrapped + in `Annotated`) terminates in a multi-arm union of `BaseModel` + subclasses, otherwise `None`. Internally drives `analyze_type` with + a capturing resolver and unwinds via an exception once the union + terminal is reached. The resolver fires only after every enclosing + `Annotated` layer is peeled, so the captured description matches what + `analyze_type` would return. """ - visitor(ti) - if ti.dict_key_type is not None: - walk_type_info(ti.dict_key_type, visitor) - if ti.dict_value_type is not None: - walk_type_info(ti.dict_value_type, visitor) + def _capture( + _ann: object, + members: tuple[type[BaseModel], ...], + description: str | None, + ) -> NoReturn: + raise _UnionCaptured(members, description) -def is_newtype(annotation: object) -> bool: - """Check if annotation is a typing.NewType. - - NewType creates a callable with a __supertype__ attribute pointing - to the wrapped type. No public API exists for this check. - """ - return callable(annotation) and hasattr(annotation, "__supertype__") + try: + analyze_type(annotation, union_resolver=_capture) + except _UnionCaptured as captured: + return captured.members, captured.description + except (TypeError, UnsupportedUnionError): + return None + return None def _is_union(origin: object) -> bool: - """Check if an origin represents a union type (X | Y or Union[X, Y]).""" + """Whether an origin represents a union type (`X | Y` or `Union[X, Y]`).""" return origin in (types.UnionType, Union) -@dataclass(slots=True) -class _UnwrapState: - """Accumulated state from iterative type unwrapping. +def _filter_sentinel_arms(args: tuple[object, ...]) -> list[object]: + """Remove `NoneType` and `Sentinel` arms from union type arguments.""" + return [a for a in args if a is not types.NoneType and not isinstance(a, Sentinel)] + + +def analyze_type( + annotation: object, + *, + model_resolver: ModelResolver | None = None, + union_resolver: UnionResolver | None = None, +) -> tuple[FieldShape, bool, str | None]: + """Analyze an annotation into a `FieldShape` plus field-level metadata. + + Parameters + ---------- + annotation + The annotation to analyze. + model_resolver + Optional callback invoked when the terminal is a `BaseModel` + subclass. Returns the `FieldShape` to use at that position -- + typically a `ModelRef` with a resolved `ModelSpec`. Defaults to + a `Scalar` carrying the class as `source_type` for callers that + cannot resolve sub-models (e.g. dict key/value analysis). + union_resolver + Optional callback invoked when the terminal is a multi-arm + union of `BaseModel` subclasses. Returns the `FieldShape` to + use -- typically a `UnionRef` with a resolved `UnionSpec`. + Required to support unions; raises otherwise. + + Returns + ------- + tuple[FieldShape, bool, str | None] + The structural shape, whether the field accepts `None`, and + the first `FieldInfo.description` encountered during unwrapping. + """ + return _unwrap( + annotation, + newtype_ctx=None, + model_resolver=model_resolver, + union_resolver=union_resolver, + ) + - Tracks NewType names and refs during unwrapping: - - `outermost_newtype_name` / `outermost_newtype_ref`: the first - NewType encountered, exposed as `TypeInfo.newtype_name` / `newtype_ref`. - - `last_newtype_name`: the most recently entered NewType name, used - as the resolved `base_type` for the terminal type. - - `last_newtype_ref`: the most recently entered NewType callable, - used as constraint provenance (which NewType contributed each constraint). - - `newtype_outer_list_depth`: list layers accumulated before entering - the outermost NewType boundary. +def _unwrap( + annotation: object, + *, + newtype_ctx: _NewTypeCtx | None, + model_resolver: ModelResolver | None, + union_resolver: UnionResolver | None, +) -> tuple[FieldShape, bool, str | None]: + """Recurse one annotation layer, returning its `FieldShape` subtree. + + Parameters + ---------- + newtype_ctx + The innermost `NewType` currently in scope, or None. Sets the + terminal `Primitive.base_type` and tags constraints with their + contributing `NewType`. + + Returns + ------- + tuple + The shape subtree, whether this layer or any descendant accepts + `None`, and the first `FieldInfo.description` found. """ - is_optional: bool = False - list_depth: int = 0 - newtype_outer_list_depth: int = 0 - is_dict: bool = False - dict_key_type: TypeInfo | None = None - dict_value_type: TypeInfo | None = None - constraints: list[ConstraintSource] = field(default_factory=list) - outermost_newtype_name: str | None = None - outermost_newtype_ref: object | None = None - last_newtype_name: str | None = None - last_newtype_ref: object | None = None - description: str | None = None - - def add_constraint(self, constraint: object) -> None: - self.constraints.append( - ConstraintSource(self.last_newtype_ref, self.last_newtype_name, constraint) + def _recurse( + annotation: object, newtype_ctx: _NewTypeCtx | None + ) -> tuple[FieldShape, bool, str | None]: + """Recurse into a child annotation, carrying the invariant resolvers.""" + return _unwrap( + annotation, + newtype_ctx=newtype_ctx, + model_resolver=model_resolver, + union_resolver=union_resolver, ) - def build_type_info( - self, - *, - base_type: str, - kind: TypeKind, - literal_values: tuple[object, ...] | None = None, - source_type: type | None = None, - union_members: tuple[type[BaseModel], ...] | None = None, - ) -> TypeInfo: - return TypeInfo( - base_type=base_type, - kind=kind, - is_optional=self.is_optional, - list_depth=self.list_depth, - newtype_outer_list_depth=self.newtype_outer_list_depth, - is_dict=self.is_dict, - dict_key_type=self.dict_key_type, - dict_value_type=self.dict_value_type, - constraints=tuple(self.constraints), - literal_values=literal_values, - source_type=source_type, - newtype_name=self.outermost_newtype_name, - newtype_ref=self.outermost_newtype_ref, - union_members=union_members, - description=self.description, + origin = get_origin(annotation) + + if is_newtype(annotation): + ctx = _NewTypeCtx(annotation.__name__, annotation) # type: ignore[attr-defined] + inner, opt, desc = _recurse(annotation.__supertype__, ctx) # type: ignore[attr-defined] + inner = _erase_inner_newtypes(inner) + return NewTypeShape(name=ctx.name, ref=ctx.ref, inner=inner), opt, desc + + if origin is Annotated: + args = get_args(annotation) + inner_annotation = args[0] + own_desc: str | None = None + collected: list[ConstraintSource] = [] + for c in args[1:]: + if isinstance(c, FieldInfo): + if c.description is not None and own_desc is None: + own_desc = clean_docstring(c.description) + for m in c.metadata: + collected.append(_constraint_source(m, newtype_ctx)) + else: + collected.append(_constraint_source(c, newtype_ctx)) + + # Pick the annotation to recurse into and the optionality this + # Annotated layer contributes. A directly-wrapped union is peeled + # here so the resolver still sees the Annotated form; a `_Resolved` + # union short-circuits with the constraints attached. + next_annotation = inner_annotation + layer_optional = False + if _is_union(get_origin(inner_annotation)): + result = _peel_union( + inner_annotation, + union_resolver, + resolver_annotation=annotation, + description=own_desc, + ) + match result: + case _Resolved(shape): + return ( + attach_constraints(shape, tuple(collected)), + result.is_optional, + own_desc, + ) + case _ContinueWith(next_annotation, layer_optional): + pass + case _: + assert_never(result) + + inner, opt, desc = _recurse(next_annotation, newtype_ctx) + inner = attach_constraints(inner, tuple(collected)) + return ( + inner, + opt or layer_optional, + own_desc if own_desc is not None else desc, ) + if _is_union(origin): + result = _peel_union(annotation, union_resolver) + match result: + case _Resolved(shape): + return shape, result.is_optional, None + case _ContinueWith(next_annotation, is_optional): + inner, opt, desc = _recurse(next_annotation, newtype_ctx) + return inner, opt or is_optional, desc + case _: + assert_never(result) + + if origin is list: + args = get_args(annotation) + if not args: + raise TypeError("Bare list without type argument is not supported") + element, opt, desc = _recurse(args[0], newtype_ctx) + return ArrayOf(element=element, constraints=()), opt, desc + + if origin is dict: + args = get_args(annotation) + if not args: + raise TypeError("Bare dict without type arguments is not supported") + key_shape, _, _ = _recurse(args[0], None) + value_shape, _, _ = _recurse(args[1], None) + return MapOf(key=key_shape, value=value_shape, constraints=()), False, None + + return _terminal(annotation, newtype_ctx, model_resolver), False, None + + +def _constraint_source( + constraint: object, newtype_ctx: _NewTypeCtx | None +) -> ConstraintSource: + return ConstraintSource( + source_ref=newtype_ctx.ref if newtype_ctx else None, + source_name=newtype_ctx.name if newtype_ctx else None, + constraint=constraint, + ) -def analyze_type(annotation: object) -> TypeInfo: - """Analyze a type annotation and return TypeInfo. - Iteratively unwraps type wrappers (Annotated, Optional, list, NewType) until - reaching a terminal type. - """ - state = _UnwrapState() - - while True: - origin = get_origin(annotation) - - # Handle NewType (e.g., int32 = NewType("int32", Annotated[int, ...])) - if is_newtype(annotation): - name = annotation.__name__ # type: ignore[attr-defined] - state.last_newtype_name = name - state.last_newtype_ref = annotation - if state.outermost_newtype_name is None: - state.newtype_outer_list_depth = state.list_depth - state.outermost_newtype_name = name - state.outermost_newtype_ref = annotation - annotation = annotation.__supertype__ # type: ignore[attr-defined] - continue - - # Handle Annotated types (Annotated[X, metadata...]) - if origin is Annotated: - args = get_args(annotation) - annotation = args[0] - for c in args[1:]: - if isinstance(c, FieldInfo): - if c.description is not None and state.description is None: - state.description = clean_docstring(c.description) - for m in c.metadata: - state.add_constraint(m) - else: - state.add_constraint(c) - continue - - # Handle union types (X | None or Optional[X]) - if _is_union(origin): - args = get_args(annotation) - # Filter out None, Sentinel types (Pydantic's ), and - # Literal alternatives (e.g., HttpUrl | Literal[""] where the - # Literal is a special-value sentinel, not the primary type). - if any(a is types.NoneType for a in args): - state.is_optional = True - - non_none_args = [ - a - for a in args - if a is not types.NoneType and not isinstance(a, Sentinel) - ] - - # Only filter out Literal arms when a concrete (non-Literal) type - # exists. Without this guard, Optional[Literal["x"]] would lose - # all args because the Literal *is* the primary type. - concrete_args = [a for a in non_none_args if get_origin(a) is not Literal] - real_args = concrete_args if concrete_args else non_none_args - - if len(real_args) > 1: - # Check if all real args are BaseModel subclasses - # (unwrap Annotated wrappers to get the actual class) - members: list[type[BaseModel]] = [] - for arg in real_args: - inner = arg - if get_origin(inner) is Annotated: - inner = get_args(inner)[0] - if isinstance(inner, type) and issubclass(inner, BaseModel): - members.append(inner) - else: - raise UnsupportedUnionError( - f"Multi-type unions not supported: {annotation}" - ) - return state.build_type_info( - base_type=members[0].__name__, - kind=TypeKind.UNION, - union_members=tuple(members), - ) +def _erase_inner_newtypes(shape: FieldShape) -> FieldShape: + """Drop every `NewTypeShape` reachable through `ArrayOf` layers. - if not real_args: - raise UnsupportedUnionError( - f"Union with no concrete types: {annotation}" - ) + A `NewType` chain — including NewTypes nested as list elements — + collapses to a single `NewTypeShape` (the outermost), with inner + NewType names surviving only as the terminal `Primitive.base_type`. + Each `NewType` frame calls this on its recursion result so that by + the time the outermost frame returns, exactly one `NewTypeShape` + remains per spine. - annotation = real_args[0] - continue + Recurses through `ArrayOf.element` but stops at `MapOf` — `dict` + key/value are independent spines, each keeping its own outermost + `NewTypeShape` — and at scalar / `ModelRef` / `UnionRef` terminals. + """ + match shape: + case NewTypeShape(inner=inner): + return _erase_inner_newtypes(inner) + case ArrayOf(element=element): + return replace(shape, element=_erase_inner_newtypes(element)) + case _: + return shape + + +def attach_constraints( + shape: FieldShape, constraints: tuple[ConstraintSource, ...] +) -> FieldShape: + """Prepend `constraints` to the outermost non-`NewTypeShape` layer. + + Skips any number of leading `NewTypeShape` wrappers, then prepends + to the `.constraints` of the first `ArrayOf`, `MapOf`, `Primitive`, + `LiteralScalar`, or `AnyScalar` reached. Does not descend into + `ArrayOf.element` or `MapOf.key` / `.value`. `ModelRef` / `UnionRef` + carry no constraints -- constraints destined for a model terminal + are dropped (preserved verbatim from current behavior). + + Length constraints (`annotated_types.MinLen` / `MaxLen`) are wrapped + into the typed `length_constraints` variants matching the + attachment layer: `ArrayMinLen` / `ArrayMaxLen` on `ArrayOf`, + `ScalarMinLen` / `ScalarMaxLen` on scalar layers. `MapOf` raises: + map-length constraints have no current schema use and would + otherwise silently take the scalar path. + """ + if not constraints: + return shape + match shape: + case NewTypeShape(inner=inner): + return replace(shape, inner=attach_constraints(inner, constraints)) + case ArrayOf(): + wrapped = tuple(_wrap_length_for_array(cs) for cs in constraints) + return replace(shape, constraints=wrapped + shape.constraints) + case MapOf(): + _reject_length_on_map(constraints) + return replace(shape, constraints=constraints + shape.constraints) + case Primitive() | LiteralScalar() | AnyScalar(): + wrapped = tuple(_wrap_length_for_scalar(cs) for cs in constraints) + return replace(shape, constraints=wrapped + shape.constraints) + case _: + return shape + + +def _wrap_length_for_array(cs: ConstraintSource) -> ConstraintSource: + """Replace a raw `MinLen`/`MaxLen` with its `ArrayOf`-layer variant. + + Uses exact-type checks so already-wrapped variants (`ArrayMinLen`, + `ScalarMinLen`, etc.) are returned unchanged. + """ + if type(cs.constraint) is MinLen: + return replace(cs, constraint=ArrayMinLen(min_length=cs.constraint.min_length)) + if type(cs.constraint) is MaxLen: + return replace(cs, constraint=ArrayMaxLen(max_length=cs.constraint.max_length)) + return cs - # Handle list types (list[X]) - if origin is list: - args = get_args(annotation) - if not args: - raise TypeError("Bare list without type argument is not supported") - state.list_depth += 1 - annotation = args[0] - continue - - # Handle dict types (dict[K, V]) - if origin is dict: - args = get_args(annotation) - if not args: - raise TypeError("Bare dict without type arguments is not supported") - state.is_dict = True - state.dict_key_type = analyze_type(args[0]) - state.dict_value_type = analyze_type(args[1]) - base_type = state.last_newtype_name or "dict" - return state.build_type_info( - base_type=base_type, - kind=TypeKind.PRIMITIVE, - source_type=dict, - ) - break +def _wrap_length_for_scalar(cs: ConstraintSource) -> ConstraintSource: + """Replace a raw `MinLen`/`MaxLen` with its scalar-layer variant. - return _classify_terminal(annotation, state) + Uses exact-type checks so already-wrapped variants (`ArrayMinLen`, + `ScalarMinLen`, etc.) are returned unchanged. + """ + if type(cs.constraint) is MinLen: + return replace(cs, constraint=ScalarMinLen(min_length=cs.constraint.min_length)) + if type(cs.constraint) is MaxLen: + return replace(cs, constraint=ScalarMaxLen(max_length=cs.constraint.max_length)) + return cs + + +def _reject_length_on_map(constraints: tuple[ConstraintSource, ...]) -> None: + """Raise on `MinLen`/`MaxLen` attached to a `MapOf` layer.""" + for cs in constraints: + if isinstance(cs.constraint, (MinLen, MaxLen)): + raise NotImplementedError( + f"{type(cs.constraint).__name__} on a Map type is not supported" + ) -def _classify_terminal(annotation: object, state: _UnwrapState) -> TypeInfo: - """Classify a fully-unwrapped terminal type into a TypeInfo.""" - # typing.Any -- treat as an opaque primitive +def _terminal( + annotation: object, + newtype_ctx: _NewTypeCtx | None, + model_resolver: ModelResolver | None, +) -> FieldShape: + """Classify a fully-unwrapped terminal annotation into a shape.""" if annotation is Any: - return state.build_type_info( - base_type="Any", - kind=TypeKind.PRIMITIVE, - ) - - # Literal types (e.g., Literal["value"] or Literal["a", "b"]) + return AnyScalar(constraints=()) if get_origin(annotation) is Literal: - args = get_args(annotation) - return state.build_type_info( - base_type="Literal", - kind=TypeKind.LITERAL, - literal_values=tuple(args), - ) - + return LiteralScalar(values=tuple(get_args(annotation)), constraints=()) if not isinstance(annotation, type): raise TypeError(f"Unsupported annotation type: {type(annotation)}") - if issubclass(annotation, list): raise TypeError("Bare list without type argument is not supported") - if issubclass(annotation, dict): raise TypeError("Bare dict without type arguments is not supported") + if issubclass(annotation, BaseModel) and model_resolver is not None: + return model_resolver(annotation) + base_type = newtype_ctx.name if newtype_ctx else annotation.__name__ + return Primitive(base_type=base_type, source_type=annotation, constraints=()) + + +def _peel_union( + annotation: object, + union_resolver: UnionResolver | None, + *, + resolver_annotation: object | None = None, + description: str | None = None, +) -> _ContinueWith | _Resolved: + """Process one union layer. + + Filters out `None` / `Sentinel` arms (recording `is_optional`), then + drops `Literal[...]` arms when a concrete (non-Literal) arm exists. + A single remaining arm is returned as `_ContinueWith`; multiple arms + invoke `union_resolver` and the result is returned as `_Resolved` + (raising `UnsupportedUnionError` when no resolver is supplied). + + `resolver_annotation` is passed to `union_resolver` instead of + `annotation` when set. This lets the `Annotated` branch forward the + full `Annotated[X | Y, ...]` form so resolvers can recover + discriminator metadata that the `Annotated` peeling step consumed. + """ + args = get_args(annotation) + is_optional = any(a is types.NoneType for a in args) + + non_none_args = _filter_sentinel_arms(args) + concrete_args = [a for a in non_none_args if get_origin(a) is not Literal] + real_args = concrete_args if concrete_args else non_none_args + + if len(real_args) > 1: + members: list[type[BaseModel]] = [] + for arg in real_args: + inner = arg + if get_origin(inner) is Annotated: + inner = get_args(inner)[0] + if isinstance(inner, type) and issubclass(inner, BaseModel): + members.append(inner) + else: + raise UnsupportedUnionError( + f"Multi-type unions not supported: {annotation}" + ) + if union_resolver is None: + raise UnsupportedUnionError( + f"No union_resolver supplied for multi-arm union: {annotation}" + ) + return _Resolved( + union_resolver( + resolver_annotation or annotation, tuple(members), description + ), + is_optional, + ) - # Determine kind from type hierarchy - if issubclass(annotation, Enum): - kind = TypeKind.ENUM - elif issubclass(annotation, BaseModel): - kind = TypeKind.MODEL - else: - kind = TypeKind.PRIMITIVE + if not real_args: + raise UnsupportedUnionError(f"Union with no concrete types: {annotation}") - base_type = state.last_newtype_name or annotation.__name__ + return _ContinueWith(real_args[0], is_optional) - return state.build_type_info( - base_type=base_type, - kind=kind, - source_type=annotation, - ) + +def unwrap_list(annotation: object) -> object: + """Strip `| None`, `Sentinel`, and outermost `list[]` wrappers.""" + if _is_union(get_origin(annotation)): + args = _filter_sentinel_arms(get_args(annotation)) + if len(args) == 1: + annotation = args[0] + + while get_origin(annotation) is list: + annotation = get_args(annotation)[0] + return annotation def single_literal_value(annotation: object) -> object | None: - """Extract a single literal value from a type annotation, or None. + """Extract a single literal value from a type annotation, or `None`. - Delegates to analyze_type for all unwrapping, then checks - whether the result is a single-value Literal. Multi-value - Literals return None — callers needing all values should use - `analyze_type` and read `literal_values` directly. + Returns `None` for multi-value Literals -- callers needing all + values should use `analyze_type` and inspect the terminal + `LiteralScalar`'s `values`. """ try: - ti = analyze_type(annotation) + shape, _, _ = analyze_type(annotation) except (TypeError, UnsupportedUnionError): return None - if ( - ti.kind == TypeKind.LITERAL - and ti.literal_values - and len(ti.literal_values) == 1 - ): - return ti.literal_values[0] + terminal = terminal_of(shape) + if isinstance(terminal, LiteralScalar) and len(terminal.values) == 1: + return terminal.values[0] return None diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py index 505657866..19a3007e0 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py @@ -2,7 +2,8 @@ from dataclasses import dataclass -from .type_analyzer import TypeInfo +from .field import FieldShape +from .field_walk import newtype_name, terminal_primitive __all__ = [ "TypeMapping", @@ -18,96 +19,79 @@ class TypeMapping: """Maps a type to its representation in different targets.""" markdown: str - - def for_target(self, target: str) -> str: - """Get the type representation for a named target.""" - if target != "markdown": - raise ValueError(f"Unknown target {target!r}, expected 'markdown'") - return self.markdown + spark: str | None = None PRIMITIVE_TYPES: dict[str, TypeMapping] = { # Signed integers - "int8": TypeMapping(markdown="int8"), - "int16": TypeMapping(markdown="int16"), - "int32": TypeMapping(markdown="int32"), - "int64": TypeMapping(markdown="int64"), + "int8": TypeMapping(markdown="int8", spark="IntegerType()"), + "int16": TypeMapping(markdown="int16", spark="IntegerType()"), + "int32": TypeMapping(markdown="int32", spark="IntegerType()"), + "int64": TypeMapping(markdown="int64", spark="LongType()"), # Unsigned integers - "uint8": TypeMapping(markdown="uint8"), - "uint16": TypeMapping(markdown="uint16"), - "uint32": TypeMapping(markdown="uint32"), + "uint8": TypeMapping(markdown="uint8", spark="IntegerType()"), + "uint16": TypeMapping(markdown="uint16", spark="IntegerType()"), + "uint32": TypeMapping(markdown="uint32", spark="IntegerType()"), # Floating point - "float32": TypeMapping(markdown="float32"), - "float64": TypeMapping(markdown="float64"), + "float32": TypeMapping(markdown="float32", spark="FloatType()"), + "float64": TypeMapping(markdown="float64", spark="DoubleType()"), # Basic types - "str": TypeMapping(markdown="string"), - "bool": TypeMapping(markdown="boolean"), + "str": TypeMapping(markdown="string", spark="StringType()"), + "bool": TypeMapping(markdown="boolean", spark="BooleanType()"), # Python builtins (aliases to their portable equivalents) - "int": TypeMapping(markdown="int64"), - "float": TypeMapping(markdown="float64"), + "int": TypeMapping(markdown="int64", spark="LongType()"), + "float": TypeMapping(markdown="float64", spark="DoubleType()"), # Geometry types - "Geometry": TypeMapping(markdown="geometry"), + "Geometry": TypeMapping(markdown="geometry", spark="BinaryType()"), "BBox": TypeMapping(markdown="bbox"), } -def is_semantic_newtype(type_info: TypeInfo) -> bool: - """Whether a type represents a semantic NewType that should be displayed by name. +def is_semantic_newtype(shape: FieldShape) -> bool: + """Whether a shape's outermost NewType should be displayed by name. - Returns True for unregistered NewTypes (HexColor, Sources) and NewTypes - that wrap a different base type (FeatureVersion wrapping int32, Id wrapping - NoWhitespaceString). Returns False for registered primitives (int32, Geometry). + Returns True for unregistered NewTypes (HexColor, Sources) and + NewTypes that wrap a different base type (FeatureVersion wrapping + int32, Id wrapping NoWhitespaceString). Returns False for + registered primitives (int32, Geometry). """ - if type_info.newtype_name is None: + nt_name = newtype_name(shape) + if nt_name is None: return False - if type_info.newtype_name != type_info.base_type: + terminal = terminal_primitive(shape) + if terminal is None: + return True + if nt_name != terminal.base_type: return True - return get_type_mapping(type_info.base_type) is None + return get_type_mapping(terminal.base_type) is None def get_type_mapping(type_name: str) -> TypeMapping | None: """Look up a type mapping by name. - Parameters - ---------- - type_name : str - The type name to look up (e.g., "int32", "str", "Geometry"). - Also accepts Python builtin names ("int" -> int64, "float" -> float64). - - Returns - ------- - TypeMapping or None - The TypeMapping for the type, or None if not found. + Accepts portable type names (`int32`, `str`, `Geometry`) and Python + builtin names (`int` -> int64, `float` -> float64). """ return PRIMITIVE_TYPES.get(type_name) -def resolve_type_name(type_info: TypeInfo, target: str) -> str: - """Resolve a TypeInfo to the base type string for a given target. - - Looks up the type in the registry first (trying source_type if base_type - has no mapping). Falls back to the base_type name as-is. - - Parameters - ---------- - type_info : TypeInfo - The analyzed type information. - target : str - The output target ("markdown"). +def resolve_type_name(shape: FieldShape) -> str: + """Resolve a shape to its markdown base type name string. - Returns - ------- - str - The resolved base type name string for the target. + Looks up the terminal scalar's `base_type` in the registry first, + falling back to `source_type.__name__`. Semantic NewTypes wrapping + unregistered types resolve to the underlying class name (e.g. + `Sources` wrapping `SourceItem` -> `SourceItem`). """ - mapping = get_type_mapping(type_info.base_type) - if mapping is None and type_info.source_type is not None: - mapping = get_type_mapping(type_info.source_type.__name__) + terminal = terminal_primitive(shape) + if terminal is None: + return "?" + mapping = get_type_mapping(terminal.base_type) + if mapping is None and terminal.source_type is not None: + mapping = get_type_mapping(terminal.source_type.__name__) if mapping is not None: - return mapping.for_target(target) + return mapping.markdown - # Semantic NewType wrapping an unregistered type (e.g., Sources wrapping - # SourceItem): use the underlying class name rather than the NewType alias. - if type_info.newtype_name and type_info.source_type is not None: - return type_info.source_type.__name__ - return type_info.base_type + if newtype_name(shape) and terminal.source_type is not None: + return terminal.source_type.__name__ + return terminal.base_type diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py index c555fdba0..cd3870a5e 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py @@ -2,6 +2,8 @@ from __future__ import annotations +from collections.abc import Mapping +from enum import Enum from typing import Annotated, get_args, get_origin from pydantic import BaseModel @@ -9,9 +11,24 @@ from overture.schema.system.feature import resolve_discriminator_field_name +from .field import ( + AnyScalar, + ArrayOf, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + UnionRef, +) +from .field_walk import list_depth, terminal_of, walk_shape from .model_extraction import extract_model, resolve_field_alias -from .specs import AnnotatedField, UnionSpec, is_model_class -from .type_analyzer import TypeInfo, TypeKind, analyze_type, single_literal_value +from .specs import AnnotatedField, FieldSpec, MemberSpec, UnionSpec, is_model_class +from .type_analyzer import ( + capture_union_members, + single_literal_value, +) __all__ = ["extract_discriminator", "extract_union"] @@ -39,7 +56,7 @@ def max_mro_index(cls: type) -> int: def _find_field_by_alias(model: type[BaseModel], alias: str) -> FieldInfo | None: - """Find a field in model_fields by alias-resolved name.""" + """Find a field in `model_fields` by alias-resolved name.""" direct = model.model_fields.get(alias) if direct is not None: return direct @@ -73,18 +90,76 @@ def extract_discriminator( if field_info and field_info.annotation is not None: lit_val = single_literal_value(field_info.annotation) if lit_val is not None: - mapping[str(lit_val)] = member + key = lit_val.value if isinstance(lit_val, Enum) else str(lit_val) + mapping[key] = member return disc_field_name, mapping or None -_TypeShape = tuple[str, TypeKind, bool, int] +_TypeShape = tuple[object, ...] _FieldKey = tuple[str, _TypeShape] -def _type_shape(ti: TypeInfo) -> _TypeShape: - """Structural shape for dedup -- excludes source_type which varies across members.""" - return (ti.base_type, ti.kind, ti.is_optional, ti.list_depth) +def _structural_fingerprint(spec: FieldSpec) -> _TypeShape: + """Structural shape for dedup: ignores per-variant source_type variation. + + Two fields with the same name and same `(terminal_base_type, + terminal_kind, is_optional, list_depth)` collapse to a single + `AnnotatedField` whose `variant_sources` lists the contributing + members. + + `terminal_of` unwraps `ArrayOf` / `NewTypeShape`, so the terminal is + always one of the six leaf variants below; an unrecognized one + raises instead of silently collapsing into a shared fingerprint. + """ + depth = list_depth(spec.shape) + base_type: object + terminal = terminal_of(spec.shape) + match terminal: + case Primitive(base_type=bt): + base_type, kind = bt, "scalar" + case LiteralScalar(values=values): + base_type, kind = ("Literal", values), "scalar" + case AnyScalar(): + base_type, kind = "Any", "scalar" + case ModelRef(model=model): + base_type, kind = model.name, "model" + case UnionRef(union=union): + base_type, kind = union.name, "union" + case MapOf(): + base_type, kind = "dict", "map" + case _: + raise TypeError(f"Unexpected terminal shape: {terminal!r}") + return (base_type, kind, spec.is_optional, depth) + + +def _constraints_fingerprint(spec: FieldSpec) -> frozenset[str]: + """Constraints declared anywhere in *spec*'s shape tree, as a comparable set. + + `_structural_fingerprint` deliberately ignores constraints so that + members declaring the same field with per-variant `Annotated` + metadata still collapse to one `AnnotatedField`. This captures what + that ignores, so collisions with diverging constraints fail loudly + instead of silently keeping the last member's `FieldSpec`. + """ + constraints: list[str] = [] + + def collect(shape: FieldShape) -> None: + match shape: + case ( + Primitive(constraints=cs) + | LiteralScalar(constraints=cs) + | AnyScalar(constraints=cs) + | ArrayOf(constraints=cs) + | MapOf(constraints=cs) + ): + for source in cs: + constraints.append(repr(source.constraint)) + case ModelRef() | UnionRef() | NewTypeShape(): + pass + + walk_shape(spec.shape, collect) + return frozenset(constraints) def extract_union( @@ -92,39 +167,65 @@ def extract_union( annotation: object, *, entry_point: str | None = None, + partitions: Mapping[str, str] | None = None, ) -> UnionSpec: - """Extract a UnionSpec from a discriminated union type alias.""" - ti = analyze_type(annotation) - if ti.kind != TypeKind.UNION or ti.union_members is None: + """Extract a `UnionSpec` from a discriminated union type alias.""" + extracted = capture_union_members(annotation) + if extracted is None: raise TypeError(f"{name} is not a union type alias") + member_tuple, description = extracted + members = list(member_tuple) - members = list(ti.union_members) common_base = _find_common_base(members) + # Plain Python type aliases (`Foo = Annotated[...]`) don't preserve + # the alias name in the annotation. The nested-union path (called + # from extract_model for UNION-kind fields) passes `members[0].__name__` + # as the placeholder name. Recover the alias by convention: members + # extend `Base`, so stripping that suffix yields the alias. + # Top-level unions go through the CLI, which supplies the real name + # and skips this fallback. + # + # PEP 695 (`type Foo = Annotated[...]`) preserves `__name__` as + # `"Foo"` on 3.12+; after migrating, the placeholder hack can go. + member_names = {m.__name__ for m in members} + if name in member_names: + base_name = common_base.__name__ + name = ( + base_name.removesuffix("Base") if base_name.endswith("Base") else base_name + ) + base_spec = extract_model(common_base) shared_field_names = {f.name for f in base_spec.fields} - member_specs = [(m, extract_model(m)) for m in members] + member_specs = [MemberSpec(m, extract_model(m)) for m in members] annotated_fields: list[AnnotatedField] = [] - # Shared fields first (from common base) for fs in base_spec.fields: annotated_fields.append(AnnotatedField(field_spec=fs, variant_sources=None)) - # Variant-specific fields: collect by (name, type identity) for dedup seen: dict[_FieldKey, AnnotatedField] = {} - for member_cls, member_spec in member_specs: - for fs in member_spec.fields: + for member in member_specs: + member_cls = member.member_cls + for fs in member.spec.fields: if fs.name in shared_field_names: continue - key = (fs.name, _type_shape(fs.type_info)) + key = (fs.name, _structural_fingerprint(fs)) existing = seen.get(key) + if existing is not None: + existing_constraints = _constraints_fingerprint(existing.field_spec) + if _constraints_fingerprint(fs) != existing_constraints: + raise ValueError( + f"Union {name!r} field {fs.name!r} has the same structural " + f"shape across members but diverging constraints; dedup " + f"would silently drop one member's constraints" + ) prior_sources = existing.variant_sources or () if existing else () seen[key] = AnnotatedField( field_spec=fs, - variant_sources=(*prior_sources, member_cls.__name__), + variant_sources=(*prior_sources, member_cls), ) annotated_fields.extend(seen.values()) @@ -133,12 +234,14 @@ def extract_union( return UnionSpec( name=name, - description=ti.description, + description=description, annotated_fields=annotated_fields, members=members, + member_specs=member_specs, discriminator_field=disc_field, discriminator_mapping=disc_mapping, source_annotation=annotation, common_base=common_base, entry_point=entry_point, + partitions=partitions or {}, ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py index bb6b92379..f15bb0120 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py @@ -10,6 +10,8 @@ from collections.abc import Iterable, Mapping from pathlib import PurePosixPath +from overture.schema.system.discovery import split_entry_point + __all__ = [ "OUTPUT_ROOT", "compute_output_dir", @@ -24,26 +26,13 @@ OUTPUT_ROOT = PurePosixPath(".") -def _split_entry_point(entry_point_path: str) -> tuple[str, str]: - """Split `"module.path:ClassName"` into its two parts. - - >>> _split_entry_point("overture.schema.buildings:Building") - ('overture.schema.buildings', 'Building') - """ - if ":" not in entry_point_path: - msg = f"Expected 'module:Class' format, got {entry_point_path!r}" - raise ValueError(msg) - module, cls = entry_point_path.split(":", 1) - return module, cls - - def entry_point_module(entry_point_path: str) -> str: """Extract module path from entry-point-style path. >>> entry_point_module("overture.schema.buildings:Building") 'overture.schema.buildings' """ - return _split_entry_point(entry_point_path)[0] + return split_entry_point(entry_point_path)[0] def entry_point_class(entry_point_path: str) -> str: @@ -52,7 +41,7 @@ def entry_point_class(entry_point_path: str) -> str: >>> entry_point_class("overture.schema.buildings:Building") 'Building' """ - return _split_entry_point(entry_point_path)[1] + return split_entry_point(entry_point_path)[1] def compute_schema_root(module_paths: Iterable[str]) -> str: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py index b9072da64..621249ec1 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py @@ -1,14 +1,26 @@ -"""Supplementary type discovery by walking expanded feature trees. +"""Supplementary type discovery by walking feature trees. -Walks FieldSpec.model references for sub-models (already extracted), -and extracts enums and NewTypes on first encounter. +Walks `FieldShape` trees to extract referenced enums, NewTypes, +Pydantic built-ins, and union member sub-models. `ModelRef` and +`UnionRef` carry their resolved specs structurally, so recursion +follows the shape directly. """ from collections.abc import Sequence +from enum import Enum from typing import Annotated, get_args, get_origin +from pydantic import BaseModel + from ..extraction.enum_extraction import extract_enum -from ..extraction.model_extraction import expand_model_tree, extract_model +from ..extraction.field import ( + FieldShape, + ModelRef, + NewTypeShape, + Primitive, + UnionRef, +) +from ..extraction.field_walk import walk_shape from ..extraction.newtype_extraction import extract_newtype from ..extraction.pydantic_extraction import extract_pydantic_type from ..extraction.specs import ( @@ -17,15 +29,9 @@ ModelSpec, SupplementarySpec, TypeIdentity, - is_pydantic_type, -) -from ..extraction.type_analyzer import ( - TypeInfo, - TypeKind, - analyze_type, - is_newtype, - walk_type_info, + is_pydantic_sourced, ) +from ..extraction.type_analyzer import analyze_type, is_newtype from ..extraction.type_registry import is_semantic_newtype __all__ = ["collect_all_supplementary_types"] @@ -36,19 +42,16 @@ def collect_all_supplementary_types( ) -> dict[TypeIdentity, SupplementarySpec]: """Collect supplementary types by walking expanded feature trees. - Requires that expand_model_tree has been called on all feature specs - first. Walks FieldSpec.model references for sub-models (already - extracted), and extracts enums and NewTypes on first encounter. - - Returns a dict mapping TypeIdentity to extracted specs. Two types - with the same class name from different modules are keyed separately. + Walks `ModelRef` references for sub-models (already extracted), + and extracts enums and NewTypes on first encounter. Two types + with the same class name from different modules are keyed + separately. """ feature_objs: set[object] = {spec.identity.obj for spec in feature_specs} all_specs: dict[TypeIdentity, SupplementarySpec] = {} visited_models: set[object] = set() def _register_newtype(newtype_ref: object, name: str) -> bool: - """Register a NewType if not already present. Returns True if registered.""" nt_id = TypeIdentity(newtype_ref, name) if nt_id in all_specs: return False @@ -66,91 +69,52 @@ def _collect_from_model(model_spec: ModelSpec) -> None: _collect_from_fields(model_spec.fields) def _collect_inner_newtypes(newtype_ref: object) -> None: - """Walk a NewType's __supertype__ chain for intermediate semantic NewTypes.""" + """Walk a NewType's `__supertype__` chain for nested semantic NewTypes.""" annotation = getattr(newtype_ref, "__supertype__", None) while annotation is not None: if get_origin(annotation) is Annotated: annotation = get_args(annotation)[0] continue if is_newtype(annotation): - inner_ti = analyze_type(annotation) - if ( - inner_ti.newtype_ref is not None - and inner_ti.newtype_name is not None - and is_semantic_newtype(inner_ti) + inner_shape, _, _ = analyze_type(annotation) + if isinstance(inner_shape, NewTypeShape) and is_semantic_newtype( + inner_shape ): - _register_newtype(inner_ti.newtype_ref, inner_ti.newtype_name) + _register_newtype(inner_shape.ref, inner_shape.name) annotation = getattr(annotation, "__supertype__", None) continue break - def _collect_from_type_info(ti: TypeInfo) -> None: - """Collect supplementary types from a single TypeInfo. - - Uses walk_type_info for dict key/value recursion. Handles all - TypeKind variants without early returns so newtype extraction - and dict recursion apply regardless of kind. - """ - - def _visit(node: TypeInfo) -> None: - # UNION, ENUM, and pydantic (PRIMITIVE) are mutually exclusive - # by TypeKind. NewType extraction is orthogonal -- a node can be - # a NewType-wrapped ENUM, for instance. - if node.kind == TypeKind.UNION and node.union_members: - # Walk each member's fields for supplementary types. - # Members that are also top-level feature specs are skipped - # by the feature_objs guard in _collect_from_model. - for member_cls in node.union_members: - member_spec = extract_model(member_cls) - expand_model_tree(member_spec) - _collect_from_model(member_spec) - elif node.kind == TypeKind.ENUM and node.source_type is not None: - enum_id = TypeIdentity.of(node.source_type) - if enum_id not in all_specs: - all_specs[enum_id] = extract_enum(node.source_type) - elif is_pydantic_type(node): - if node.source_type is None: - raise TypeError( - "is_pydantic_type returned True but source_type is None" - ) - pid = TypeIdentity.of(node.source_type) - if pid not in all_specs: - all_specs[pid] = extract_pydantic_type(node.source_type) - - # Semantic NewTypes always get extracted, including intermediate - # NewTypes in the wrapping chain (e.g., Id wraps NoWhitespaceString - # wraps str -- both Id and NoWhitespaceString get pages). - if ( - node.newtype_ref is not None - and node.newtype_name is not None - and is_semantic_newtype(node) - ): - newly_registered = _register_newtype( - node.newtype_ref, node.newtype_name - ) - if newly_registered: - _collect_inner_newtypes(node.newtype_ref) - - walk_type_info(ti, _visit) + def _collect_from_shape(shape: FieldShape) -> None: + """Walk *shape* and register every supplementary type it touches.""" + + def _visit(node: FieldShape) -> None: + match node: + case NewTypeShape(name=name, ref=ref): + if _register_newtype(ref, name): + _collect_inner_newtypes(ref) + case UnionRef(union=u): + for member in u.member_specs: + _collect_from_model(member.spec) + case ModelRef(model=m, starts_cycle=False): + _collect_from_model(m) + case Primitive(source_type=cls) if cls is not None and isinstance( + cls, type + ): + if issubclass(cls, Enum): + eid = TypeIdentity.of(cls) + if eid not in all_specs: + all_specs[eid] = extract_enum(cls) + elif is_pydantic_sourced(cls) and not issubclass(cls, BaseModel): + pid = TypeIdentity.of(cls) + if pid not in all_specs: + all_specs[pid] = extract_pydantic_type(cls) + + walk_shape(shape, _visit) def _collect_from_fields(fields: list[FieldSpec]) -> None: - # A single field can match multiple conditions (e.g., Sources is both - # a semantic NewType and wraps a MODEL-kind type), so checks are - # independent `if` statements, not `elif`. for field_spec in fields: - ti = field_spec.type_info - _collect_from_type_info(ti) - - # MODEL-kind fields (whether direct or via NewType wrapper) get expanded - if ti.kind == TypeKind.MODEL and ti.source_type is not None: - if field_spec.model is None: - msg = ( - f"MODEL-kind field {field_spec.name!r} has source_type " - f"but model=None — call expand_model_tree first" - ) - raise RuntimeError(msg) - if not field_spec.starts_cycle: - _collect_from_model(field_spec.model) + _collect_from_shape(field_spec.shape) for spec in feature_specs: _collect_from_fields(spec.fields) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py index bf09950c4..a5c34fef7 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py @@ -3,7 +3,8 @@ from dataclasses import dataclass from pathlib import PurePosixPath -from ..extraction.case_conversion import slug_filename +from overture.schema.system.case import to_snake_case + from ..extraction.specs import TypeIdentity __all__ = ["LinkContext", "relative_link"] @@ -28,7 +29,7 @@ def resolve_link_or_slug(self, identity: TypeIdentity) -> str: Always returns a usable link string. Use when the caller needs a link regardless of whether the type has a registered page. """ - return self.resolve_link(identity) or slug_filename(identity.name) + return self.resolve_link(identity) or f"{to_snake_case(identity.name)}.md" def _is_normalized(path: PurePosixPath) -> bool: diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py index f0d224ee4..9f38f63a1 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py @@ -7,7 +7,8 @@ from collections.abc import Sequence from pathlib import PurePosixPath -from ..extraction.case_conversion import slug_filename +from overture.schema.system.case import to_snake_case + from ..extraction.specs import ( FeatureSpec, PydanticTypeSpec, @@ -54,10 +55,8 @@ def build_placement_registry( if tid in registry: continue if isinstance(supp_spec, PydanticTypeSpec): - registry[tid] = ( - PurePosixPath("pydantic") - / supp_spec.source_module - / slug_filename(tid.name) + registry[tid] = _md_path( + PurePosixPath("pydantic") / supp_spec.source_module, tid.name ) continue source_module = getattr(supp_spec.source_type, "__module__", None) @@ -77,7 +76,7 @@ def resolve_output_path( """Look up a type's output path from the registry, with flat-file fallback.""" if registry is not None and identity in registry: return registry[identity] - return PurePosixPath(slug_filename(identity.name)) + return _md_path(PurePosixPath(""), identity.name) def _aggregate_page_entries( @@ -112,4 +111,4 @@ def _nest_under_types( def _md_path(directory: PurePosixPath, name: str) -> PurePosixPath: """Build a .md file path from a directory and a PascalCase type name.""" - return directory / slug_filename(name) + return directory / f"{to_snake_case(name)}.md" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py index f7c676c06..8a6bb8348 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py @@ -14,7 +14,6 @@ from overture.schema.system.primitive import GeometryType from ..extraction.examples import ExampleRecord, load_examples -from ..extraction.model_extraction import expand_model_tree from ..extraction.numeric_extraction import extract_numerics from ..extraction.specs import ( EnumSpec, @@ -97,16 +96,19 @@ def _render_supplement( ctx = LinkContext(output_path, registry) used_by = reverse_refs.get(tid) - if isinstance(spec, EnumSpec): - content = render_enum(spec, link_ctx=ctx, used_by=used_by) - elif isinstance(spec, NewTypeSpec): - content = render_newtype(spec, ctx, used_by=used_by) - elif isinstance(spec, ModelSpec): - content = render_feature(spec, ctx, used_by=used_by) - elif isinstance(spec, PydanticTypeSpec): - content = render_pydantic_type(spec, link_ctx=ctx, used_by=used_by) - else: - raise TypeError(f"Unhandled SupplementarySpec variant: {type(spec).__name__}") + match spec: + case EnumSpec(): + content = render_enum(spec, link_ctx=ctx, used_by=used_by) + case NewTypeSpec(): + content = render_newtype(spec, ctx, used_by=used_by) + case ModelSpec(): + content = render_feature(spec, ctx, used_by=used_by) + case PydanticTypeSpec(): + content = render_pydantic_type(spec, link_ctx=ctx, used_by=used_by) + case _: + raise TypeError( + f"Unhandled SupplementarySpec variant: {type(spec).__name__}" + ) return RenderedPage(content=content, path=output_path) @@ -143,10 +145,6 @@ def generate_markdown_pages( I/O, frontmatter injection, and any output-format-specific concerns (like Docusaurus category files). """ - cache: dict[type, ModelSpec] = {} - for spec in feature_specs: - expand_model_tree(spec, cache) - numeric_names, geometry_names = partition_numeric_and_geometry_types( _system_primitive ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py index 0e829d1f4..0a5c9d08f 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py @@ -14,7 +14,9 @@ from typing_extensions import NotRequired from ..extraction.examples import ExampleRecord +from ..extraction.field import ConstraintSource from ..extraction.field_constraints import constraint_display_text +from ..extraction.field_walk import all_constraints, list_depth, terminal_model_ref from ..extraction.model_constraints import analyze_model_constraints from ..extraction.specs import ( AnnotatedField, @@ -28,9 +30,6 @@ TypeIdentity, UnionSpec, ) -from ..extraction.type_analyzer import ( - ConstraintSource, -) from .link_computation import LinkContext from .reverse_references import UsedByEntry from .type_format import ( @@ -237,14 +236,14 @@ def _annotate_field_constraints( ) -> None: """Annotate a field row with constraints from the field's own annotation. - Shows constraints where source is None — those applied directly to + Shows constraints where source is None -- those applied directly to the field, not inherited from NewType chains. NewType-inherited constraints appear on the NewType's own page instead. """ link_fn = _link_fn_from_ctx(ctx) notes = [ constraint_display_text(cs, link_fn=link_fn) - for cs in field.type_info.constraints + for cs in all_constraints(field.shape) if cs.source_ref is None ] if notes: @@ -253,13 +252,11 @@ def _annotate_field_constraints( def _expandable_list_suffix(field_spec: FieldSpec) -> str: """Return `"[]"` per nesting level for list-of-model fields expanded inline.""" - if ( - field_spec.type_info.is_list - and field_spec.model - and not field_spec.starts_cycle - ): - return "[]" * field_spec.type_info.list_depth - return "" + model_ref = terminal_model_ref(field_spec.shape) + if model_ref is None or model_ref.starts_cycle: + return "" + depth = list_depth(field_spec.shape) + return "[]" * depth if depth > 0 else "" def _expand_sub_model( @@ -269,10 +266,13 @@ def _expand_sub_model( result: list[_FieldRow], ) -> None: """Expand sub-model fields inline, appending child rows to *result*.""" - sub = field_spec.model if not field_spec.starts_cycle else None - if sub is not None: - child_prefix = f"{name}{_expandable_list_suffix(field_spec)}." - result.extend(_expand_model_fields(sub.fields, ctx, prefix=child_prefix)) + model_ref = terminal_model_ref(field_spec.shape) + if model_ref is None or model_ref.starts_cycle: + return + child_prefix = f"{name}{_expandable_list_suffix(field_spec)}." + result.extend( + _expand_model_fields(model_ref.model.fields, ctx, prefix=child_prefix) + ) def _annotate_top_level_constraints( @@ -341,7 +341,7 @@ def _variant_tag(annotated: AnnotatedField, union_name: str) -> str | None: if annotated.variant_sources is None: return None short_names = [ - _short_variant_name(v, union_name) for v in annotated.variant_sources + _short_variant_name(v.__name__, union_name) for v in annotated.variant_sources ] return f" *({', '.join(short_names)})*" @@ -385,9 +385,8 @@ def render_feature( examples: list[ExampleRecord] | None = None, used_by: list[UsedByEntry] | None = None, ) -> str: - """Render a FeatureSpec (ModelSpec or UnionSpec) as Markdown documentation. + """Render a feature spec as Markdown documentation. - For ModelSpec, requires expand_model_tree to have been called first. For UnionSpec, adds inline variant tags to variant-specific fields. """ template = _get_jinja_env().get_template("feature.md.jinja2") @@ -491,13 +490,13 @@ def render_newtype( link_ctx: LinkContext | None = None, used_by: list[UsedByEntry] | None = None, ) -> str: - """Render a NewTypeSpec as Markdown documentation.""" + """Render a `NewTypeSpec` as Markdown documentation.""" template = _get_jinja_env().get_template("newtype.md.jinja2") - ti = newtype_spec.type_info - underlying = format_underlying_type(ti, link_ctx) + shape = newtype_spec.shape + underlying = format_underlying_type(shape, link_ctx) constraints = [ _format_constraint(cs, newtype_spec.source_type, link_ctx) - for cs in ti.constraints + for cs in all_constraints(shape) ] return template.render( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py index 2ad471fc1..39f841345 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py @@ -6,6 +6,17 @@ from dataclasses import dataclass from enum import Enum +from pydantic import BaseModel + +from ..extraction.field import ( + FieldShape, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) +from ..extraction.field_walk import terminal_of, walk_shape from ..extraction.specs import ( FeatureSpec, FieldSpec, @@ -14,9 +25,8 @@ SupplementarySpec, TypeIdentity, UnionSpec, - is_pydantic_type, + is_pydantic_sourced, ) -from ..extraction.type_analyzer import TypeInfo, TypeKind, walk_type_info __all__ = [ "UsedByEntry", @@ -51,98 +61,84 @@ def compute_reverse_references( Parameters ---------- - feature_specs : Sequence[FeatureSpec] + feature_specs Feature-level specs (ModelSpec or UnionSpec). - all_specs : Mapping[TypeIdentity, SupplementarySpec] + all_specs Supplementary types (enums, newtypes, sub-models). - - Returns - ------- - dict[TypeIdentity, list[UsedByEntry]] - Dict mapping TypeIdentity to sorted lists of UsedByEntry. """ - # Track references with sets to deduplicate references: dict[TypeIdentity, set[UsedByEntry]] = {} def add_reference( target: TypeIdentity, referrer: TypeIdentity, kind: UsedByKind ) -> None: - """Add a reference from referrer to target, with deduplication.""" if target == referrer or target not in all_specs: return references.setdefault(target, set()).add(UsedByEntry(referrer, kind)) - def collect_from_type_info( - ti: TypeInfo, referrer: TypeIdentity, referrer_kind: UsedByKind + def collect_from_shape( + shape: FieldShape, + referrer: TypeIdentity, + referrer_kind: UsedByKind, ) -> None: - """Collect references from a TypeInfo.""" - - def _visit(node: TypeInfo) -> None: - if node.newtype_ref is not None and node.newtype_name is not None: - add_reference( - TypeIdentity(node.newtype_ref, node.newtype_name), - referrer, - referrer_kind, - ) - - # ENUM, MODEL, pydantic (PRIMITIVE), and UNION are mutually - # exclusive by TypeKind. - if ( - node.kind in (TypeKind.ENUM, TypeKind.MODEL) - and node.source_type is not None - ): - add_reference( - TypeIdentity.of(node.source_type), - referrer, - referrer_kind, - ) - elif is_pydantic_type(node): - add_reference( - TypeIdentity.of(node.source_type), referrer, referrer_kind - ) - elif node.union_members is not None: - for member_cls in node.union_members: + """Walk a shape and add references for every type it touches.""" + + def _visit(node: FieldShape) -> None: + match node: + case NewTypeShape(name=name, ref=ref): + add_reference(TypeIdentity(ref, name), referrer, referrer_kind) + case ModelRef(model=m) if m.source_type is not None: add_reference( - TypeIdentity.of(member_cls), - referrer, - referrer_kind, + TypeIdentity.of(m.source_type), referrer, referrer_kind ) - - walk_type_info(ti, _visit) + case UnionRef(union=u): + for member_cls in u.members: + add_reference( + TypeIdentity.of(member_cls), referrer, referrer_kind + ) + case Primitive(source_type=cls) if cls is not None: + if isinstance(cls, type) and ( + issubclass(cls, Enum) + or issubclass(cls, BaseModel) + or is_pydantic_sourced(cls) + ): + add_reference(TypeIdentity.of(cls), referrer, referrer_kind) + + walk_shape(shape, _visit) def collect_from_fields( - fields: list[FieldSpec], referrer: TypeIdentity, referrer_kind: UsedByKind + fields: list[FieldSpec], + referrer: TypeIdentity, + referrer_kind: UsedByKind, ) -> None: - """Collect references from model fields.""" + """Collect references from each field's shape.""" for field_spec in fields: - collect_from_type_info(field_spec.type_info, referrer, referrer_kind) + collect_from_shape(field_spec.shape, referrer, referrer_kind) def collect_from_model_spec(spec: ModelSpec, referrer: TypeIdentity) -> None: - """Collect references from a ModelSpec.""" collect_from_fields(spec.fields, referrer, UsedByKind.MODEL) def collect_from_union_spec(spec: UnionSpec) -> None: - """Collect references from a UnionSpec.""" referrer = spec.identity # Union features reference their members for member_cls in spec.members: - add_reference( - TypeIdentity.of(member_cls), - referrer, - UsedByKind.MODEL, - ) - # Also walk fields for other supplementary types + add_reference(TypeIdentity.of(member_cls), referrer, UsedByKind.MODEL) collect_from_fields(spec.fields, referrer, UsedByKind.MODEL) def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None: - """Collect references from a NewTypeSpec.""" - collect_from_type_info(spec.type_info, referrer, UsedByKind.NEWTYPE) - - # Collect inherited NewTypes from constraint sources - for cs in spec.type_info.constraints: - if cs.source_ref is not None and cs.source_name is not None: - ref_id = TypeIdentity(cs.source_ref, cs.source_name) - add_reference(ref_id, referrer, UsedByKind.NEWTYPE) + # The NewType's own identity isn't added here (self-reference). + # spec.shape already has the outer NewTypeShape stripped. + collect_from_shape(spec.shape, referrer, UsedByKind.NEWTYPE) + + # Inherited NewTypes from constraint sources (constraint chains). + terminal = terminal_of(spec.shape) + if isinstance(terminal, Scalar): + for cs in terminal.constraints: + if cs.source_ref is not None and cs.source_name is not None: + add_reference( + TypeIdentity(cs.source_ref, cs.source_name), + referrer, + UsedByKind.NEWTYPE, + ) # Collect from features for spec in feature_specs: @@ -151,17 +147,14 @@ def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None elif isinstance(spec, UnionSpec): collect_from_union_spec(spec) - # Collect from supplementary specs (NewTypes and sub-models reference - # other types; enums do not, so they need no processing here) + # Collect from supplementary specs (enums have no outgoing references) for tid, supp_spec in all_specs.items(): if isinstance(supp_spec, NewTypeSpec): collect_from_newtype_spec(supp_spec, tid) elif isinstance(supp_spec, ModelSpec): collect_from_model_spec(supp_spec, tid) - # Sort into deterministic lists. (kind, name) handles the common case; - # module breaks ties when two referrers share the same display name - # (e.g. identically-named types from different themes/modules). + # Sort into deterministic lists. result: dict[TypeIdentity, list[UsedByEntry]] = {} for target, ref_set in references.items(): entries = sorted( diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py index b6bd7a6ec..baaff8668 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py @@ -1,16 +1,33 @@ -"""Format TypeInfo as markdown type strings with cross-page links.""" +"""Format `FieldShape` trees as markdown type strings with cross-page links.""" from __future__ import annotations +from collections.abc import Sequence +from enum import Enum + from pydantic import BaseModel -from ..extraction.specs import FieldSpec, TypeIdentity -from ..extraction.type_analyzer import TypeInfo, TypeKind -from ..extraction.type_registry import is_semantic_newtype, resolve_type_name +from ..extraction.field import ( + AnyScalar, + ArrayOf, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) +from ..extraction.specs import FieldSpec, TypeIdentity, is_pydantic_sourced +from ..extraction.type_registry import ( + get_type_mapping, + is_semantic_newtype, + resolve_type_name, +) from .link_computation import LinkContext __all__ = [ - "format_dict_type", "format_type", "format_underlying_type", "resolve_type_link", @@ -18,17 +35,17 @@ def _code_link(name: str, href: str) -> str: - """Format a markdown link with inline-code text: [`name`](href).""" + """Format a markdown link with inline-code text: `[``name``](href)`.""" return f"[`{name}`]({href})" def resolve_type_link(identity: TypeIdentity, ctx: LinkContext | None = None) -> str: - """Resolve a TypeIdentity to a linked code span or plain code span. + """Resolve a `TypeIdentity` to a linked or plain code span. - When *ctx* is provided, links only to types in the registry (types - without pages render as inline code). Without context, renders as - inline code -- producing a link requires a placement registry to - compute correct relative paths. + With `ctx`, links only to types in the registry (types without + pages render as inline code). Without context, renders as inline + code -- producing a link requires a placement registry to compute + correct relative paths. """ if ctx: href = ctx.resolve_link(identity) @@ -40,9 +57,9 @@ def resolve_type_link(identity: TypeIdentity, ctx: LinkContext | None = None) -> def _wrap_list_n(inner: str, depth: int) -> str: """Wrap an inner type string in `list<...>` markdown syntax *depth* times. - Builds a single broken-backtick wrapper rather than nesting iteratively. - Iterative nesting creates adjacent backticks that CommonMark - interprets as multi-backtick code span delimiters. + Builds a single broken-backtick wrapper rather than nesting + iteratively, since iterative nesting creates adjacent backticks + that CommonMark interprets as multi-backtick code span delimiters. """ return f"`{'list<' * depth}`{inner}`{'>' * depth}`" @@ -52,165 +69,238 @@ def _plain_list_type(base: str, depth: int) -> str: return f"`{'list<' * depth}{base}{'>' * depth}`" -def _linked_type_identity(ti: TypeInfo) -> TypeIdentity | None: - """Return the TypeIdentity to use for a markdown link, or None for non-linked types.""" - if ( - is_semantic_newtype(ti) - and ti.newtype_ref is not None - and ti.newtype_name is not None - ): - return TypeIdentity(ti.newtype_ref, ti.newtype_name) - if ti.kind in (TypeKind.ENUM, TypeKind.MODEL) and ti.source_type is not None: - return TypeIdentity(ti.source_type, ti.base_type) - return None - - -def _try_primitive_link( - ti: TypeInfo, display_name: str, ctx: LinkContext | None -) -> str | None: - """Try to link a PRIMITIVE type to its page via registry lookup. +def _peel_arrays(shape: FieldShape) -> tuple[int, FieldShape]: + """Strip outer `ArrayOf` layers; return (count, inner).""" + depth = 0 + while isinstance(shape, ArrayOf): + depth += 1 + shape = shape.element + return depth, shape - Registered primitives (int32, Geometry) and Pydantic types (HttpUrl) - can have pages in the registry. Uses the type registry display name - (e.g. `geometry` not `Geometry`) for the link text. - """ - if ti.kind != TypeKind.PRIMITIVE or not ctx: - return None - candidate = ti.newtype_ref or ti.source_type - if candidate is None: - return None - href = ctx.resolve_link(TypeIdentity(candidate, display_name)) - if href: - return _code_link(display_name, href) - return None - -def _markdown_type_name(ti: TypeInfo) -> str: - """Return the markdown display name for a type. - - Uses the semantic NewType name when present (e.g. `LanguageTag`), - otherwise falls back to the resolved markdown type (e.g. `string`). - """ - name = ti.newtype_name if is_semantic_newtype(ti) else None - return name or resolve_type_name(ti, "markdown") - - -def format_dict_type(ti: TypeInfo) -> str: - """Format a dict TypeInfo as bare `map` using resolved markdown names.""" - if ti.dict_key_type is None or ti.dict_value_type is None: - msg = f"format_dict_type requires dict key/value types, got {ti}" - raise ValueError(msg) - key = _markdown_type_name(ti.dict_key_type) - value = _markdown_type_name(ti.dict_value_type) - return f"map<{key}, {value}>" +def _format_literal(values: tuple[object, ...]) -> str: + """Format Literal values for display.""" + if len(values) == 1: + return f'`"{values[0]}"`' + return r" \| ".join(f'`"{v}"`' for v in values) def _format_union_members( - members: tuple[type[BaseModel], ...], + members: Sequence[type[BaseModel]], ctx: LinkContext | None, separator: str = r" \| ", ) -> str: - r"""Format union members as individually linked/backticked names. + r"""Format union members as individually linked / backticked names. - Each member is resolved independently so members with pages get linked - while others render as plain code spans. *separator* is inserted between - members (default is `\|` for table-cell safety). + Each member is resolved independently so members with pages get + linked while others render as plain code spans. `separator` is + inserted between members (default is `\|` for table-cell safety). """ return separator.join(resolve_type_link(TypeIdentity.of(m), ctx) for m in members) -def format_type( - field: FieldSpec, - ctx: LinkContext | None = None, -) -> str: - """Format a field's type for markdown display, with links and qualifiers.""" - ti = field.type_info - qualifiers: list[str] = [] +def _model_link(model_ref: ModelRef, ctx: LinkContext | None) -> str: + """Resolve a `ModelRef` to a markdown link or fallback code span.""" + src = model_ref.model.source_type + if src is not None: + return resolve_type_link(TypeIdentity(src, model_ref.model.name), ctx) + return f"`{model_ref.model.name}`" - if ti.kind == TypeKind.LITERAL and ti.literal_values: - if len(ti.literal_values) == 1: - return f'`"{ti.literal_values[0]}"`' - return r" \| ".join(f'`"{v}"`' for v in ti.literal_values) - - identity = _linked_type_identity(ti) - - if ti.kind == TypeKind.UNION and ti.union_members: - display = _format_union_members(ti.union_members, ctx) - if ti.is_list: - qualifiers.append("list") - elif ti.is_dict: - if identity: - display = resolve_type_link(identity, ctx) - qualifiers.append("map") - else: - display = f"`{format_dict_type(ti)}`" - elif identity: - display = resolve_type_link(identity, ctx) - # List layers outside a NewType wrap with list<> syntax (e.g., list[PhoneNumber] - # renders as list). List layers inside a NewType use a (list) - # qualifier instead (e.g., Sources wrapping list[SourceItem] renders as - # Sources (list)), since the list-ness is an implementation detail of the type. - if ti.newtype_outer_list_depth > 0: - display = _wrap_list_n(display, ti.newtype_outer_list_depth) - elif ti.is_list and ti.newtype_name is not None: # list is inside the NewType - qualifiers.append("list") - elif ti.is_list: - display = _wrap_list_n(display, ti.list_depth) - else: - # Fallback: types without a linked identity. Registered primitives (int32, - # Geometry) and Pydantic types (HttpUrl) may still link to aggregate pages - # via the placement registry. Unregistered primitives render as plain code. - base = resolve_type_name(ti, "markdown") - link = _try_primitive_link(ti, base, ctx) - if link and ti.is_list: - display = _wrap_list_n(link, ti.list_depth) - elif link: - display = link - elif ti.is_list: - display = _plain_list_type(base, ti.list_depth) - else: - display = f"`{base}`" +def _scalar_identity(scalar: Primitive) -> TypeIdentity | None: + """Return a linkable identity for a `Primitive`'s `source_type`, if any.""" + src = scalar.source_type + if src is None: + return None + if isinstance(src, type) and ( + issubclass(src, Enum) or issubclass(src, BaseModel) or is_pydantic_sourced(src) + ): + return TypeIdentity.of(src) + return None + + +def _scalar_display(scalar: Scalar, ctx: LinkContext | None) -> tuple[str, bool]: + """Render a `Scalar` variant as a markdown string; second value is True if linked. + + Linked when the scalar is a `Primitive` with an Enum / BaseModel / + Pydantic-sourced `source_type` whose identity resolves to a page. + Otherwise renders as the registry-resolved markdown name. + """ + if isinstance(scalar, Primitive): + identity = _scalar_identity(scalar) + if identity is not None and ctx: + href = ctx.resolve_link(identity) + if href: + return _code_link(identity.name, href), True + if identity is not None: + return f"`{identity.name}`", False + return f"`{_registry_name(scalar)}`", False + + +def _registry_name(scalar: Scalar) -> str: + """Resolve a scalar to its markdown registry name (e.g. `int64`).""" + if isinstance(scalar, LiteralScalar): + return "Literal" + if isinstance(scalar, AnyScalar): + return "Any" + mapping = get_type_mapping(scalar.base_type) + if mapping is None and scalar.source_type is not None: + mapping = get_type_mapping(scalar.source_type.__name__) + if mapping is not None: + return mapping.markdown + return scalar.base_type + + +def _format_map(shape: MapOf, ctx: LinkContext | None) -> str: + """Format a `MapOf` as a bare `map` code span (no outer wrappers).""" + key = _markdown_name_for_shape(shape.key) + value = _markdown_name_for_shape(shape.value) + return f"`map<{key}, {value}>`" + + +def _markdown_name_for_shape(shape: FieldShape) -> str: + """Return a bare markdown name (no link, no backticks) for a shape. + + Used inside `map` rendering. Picks the semantic NewType name + when wrapping a registered primitive, otherwise the registry name + of the terminal scalar. + """ + if isinstance(shape, NewTypeShape): + return shape.name + if isinstance(shape, Scalar): + return _registry_name(shape) + if isinstance(shape, ModelRef): + return shape.model.name + if isinstance(shape, ArrayOf): + inner = _markdown_name_for_shape(shape.element) + return f"list<{inner}>" + if isinstance(shape, MapOf): + return ( + f"map<{_markdown_name_for_shape(shape.key)}, " + f"{_markdown_name_for_shape(shape.value)}>" + ) + return "?" + + +def format_type(field: FieldSpec, ctx: LinkContext | None = None) -> str: + """Format a field's type for markdown display, with links and qualifiers.""" + qualifiers: list[str] = [] + display = _format_shape(field.shape, ctx, qualifiers) if not field.is_required: qualifiers.append("optional") - if qualifiers: return f"{display} ({', '.join(qualifiers)})" return display -def _linked_or_backticked(ti: TypeInfo, ctx: LinkContext | None) -> tuple[str, bool]: - """Return (formatted_string, has_link) for a TypeInfo component. +def _format_shape( + shape: FieldShape, ctx: LinkContext | None, qualifiers: list[str] +) -> str: + """Format a `FieldShape`, possibly appending qualifiers like `list`, `map`.""" + outer_depth, inner = _peel_arrays(shape) + + match inner: + case LiteralScalar(values=values): + if outer_depth > 0: + inside = " | ".join(f'"{v}"' for v in values) + return _plain_list_type(inside, outer_depth) + return _format_literal(values) + + case UnionRef(union=u): + if outer_depth > 0: + qualifiers.append("list") + return _format_union_members(u.members, ctx) + + case MapOf() as m: + map_str = _format_map(m, ctx) + if outer_depth > 0: + return _wrap_list_n(map_str.strip("`"), outer_depth) + return map_str + + case ModelRef() as m: + link = _model_link(m, ctx) + if outer_depth > 0: + return _wrap_list_n(link, outer_depth) + return link + + case NewTypeShape(name=name, ref=ref, inner=nt_inner): + link = resolve_type_link(TypeIdentity(ref, name), ctx) + if outer_depth > 0: + return _wrap_list_n(link, outer_depth) + if isinstance(nt_inner, ArrayOf): + qualifiers.append("list") + elif isinstance(nt_inner, MapOf): + qualifiers.append("map") + return link + + case Primitive() | AnyScalar() as s: + text, linked = _scalar_display(s, ctx) + if outer_depth > 0: + if linked: + return _wrap_list_n(text, outer_depth) + return _plain_list_type(text.strip("`"), outer_depth) + return text + + raise TypeError(f"Unhandled FieldShape: {shape!r}") + + +# ---- Underlying-type rendering for NewType pages ---- + + +def _peel_to_terminal(shape: FieldShape) -> FieldShape: + """Strip `NewTypeShape` / `ArrayOf` layers to find the terminal shape.""" + while True: + if isinstance(shape, NewTypeShape): + shape = shape.inner + elif isinstance(shape, ArrayOf): + shape = shape.element + else: + return shape + - Used by format_underlying_type to decide whether container types - need broken-backtick formatting (interleaving backtick runs with - linked text). +def _linked_or_backticked( + shape: FieldShape, ctx: LinkContext | None +) -> tuple[str, bool]: + """Return (formatted_string, has_link) for a shape component. - When `has_link` is True, `formatted_string` is a markdown link - ready for broken-backtick container syntax. When False, it is a raw - name that the caller embeds inside backticks. + Used by NewType page rendering to format the underlying type with + a link to its source page when one exists. """ - identity = _linked_type_identity(ti) + identity: TypeIdentity | None = None + _, cur = _peel_arrays(shape) + if isinstance(cur, NewTypeShape) and is_semantic_newtype(shape): + identity = TypeIdentity(cur.ref, cur.name) + elif isinstance(cur, Primitive) and cur.source_type is not None: + src = cur.source_type + if isinstance(src, type) and ( + issubclass(src, Enum) or issubclass(src, BaseModel) + ): + identity = TypeIdentity(src, cur.base_type) if identity and ctx: href = ctx.resolve_link(identity) if href: return _code_link(identity.name, href), True - return _markdown_type_name(ti), False + return _markdown_name_for_underlying(shape), False -def format_underlying_type(ti: TypeInfo, ctx: LinkContext | None = None) -> str: - """Format a NewType's underlying type for the page header, with links. +def _markdown_name_for_underlying(shape: FieldShape) -> str: + """Bare markdown display name for a NewType's underlying type.""" + if is_semantic_newtype(shape): + _, cur = _peel_arrays(shape) + if isinstance(cur, NewTypeShape): + return cur.name + return resolve_type_name(shape) - Links enums and models that have their own pages. Does not link the - outermost NewType (which would self-reference). Dict key/value types - use full link resolution since they reference other types. - """ - if ti.kind == TypeKind.UNION and ti.union_members: - return _format_union_members(ti.union_members, ctx, separator=" | ") - if ti.is_dict and ti.dict_key_type and ti.dict_value_type: - key_str, key_linked = _linked_or_backticked(ti.dict_key_type, ctx) - val_str, val_linked = _linked_or_backticked(ti.dict_value_type, ctx) +def format_underlying_type(shape: FieldShape, ctx: LinkContext | None = None) -> str: + """Format a NewType's underlying type for the page header, with links.""" + terminal = _peel_to_terminal(shape) + if isinstance(terminal, UnionRef): + return _format_union_members(terminal.union.members, ctx, separator=" | ") + + if isinstance(terminal, MapOf): + key_str, key_linked = _linked_or_backticked(terminal.key, ctx) + val_str, val_linked = _linked_or_backticked(terminal.value, ctx) if key_linked or val_linked: if not key_linked: key_str = f"`{key_str}`" @@ -219,22 +309,28 @@ def format_underlying_type(ti: TypeInfo, ctx: LinkContext | None = None) -> str: return f"`map<`{key_str}`,`{val_str}`>`" return f"`map<{key_str}, {val_str}>`" - # Only link enums and models -- skip is_semantic_newtype to avoid - # self-linking (this TypeInfo belongs to the NewType being rendered). - identity = ( - TypeIdentity.of(ti.source_type) - if ti.kind in (TypeKind.ENUM, TypeKind.MODEL) and ti.source_type - else None - ) + # For underlying-type rendering on a NewType's own page, skip the + # is_semantic_newtype path to avoid self-linking: this shape + # belongs to the NewType being rendered. + identity: TypeIdentity | None = None + if isinstance(terminal, Primitive) and terminal.source_type is not None: + src = terminal.source_type + if isinstance(src, type) and ( + issubclass(src, Enum) or issubclass(src, BaseModel) + ): + identity = TypeIdentity.of(src) + + depth, _ = _peel_arrays(shape) + if identity and ctx: href = ctx.resolve_link(identity) if href: linked = _code_link(identity.name, href) - if ti.is_list: - return _wrap_list_n(linked, ti.list_depth) + if depth > 0: + return _wrap_list_n(linked, depth) return linked - base = identity.name if identity else resolve_type_name(ti, "markdown") - if ti.is_list: - return _plain_list_type(base, ti.list_depth) + base = identity.name if identity else resolve_type_name(shape) + if depth > 0: + return _plain_list_type(base, depth) return f"`{base}`" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py new file mode 100644 index 000000000..13a0e841a --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/__init__.py @@ -0,0 +1 @@ +"""PySpark codegen pipeline: FeatureSpec to expression and test modules.""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py new file mode 100644 index 000000000..fc1f68e57 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/_render_common.py @@ -0,0 +1,265 @@ +"""Shared rendering primitives used by `renderer` and `test_renderer`. + +Concerns: + +- `jinja_env` -- the cached Jinja2 environment. +- `py_literal` / `tuple_literal` -- render Python values back to source code. +- `parse_field_eq` -- unwrap a `FieldEqCondition` / `Not(FieldEqCondition)`. +- check/label naming -- `check_name`, `field_label`, `column_level_suffix`, + `model_constraint_field_label`, `COLUMN_LEVEL_FUNCTIONS` (membership), + and `_COLUMN_LEVEL_SUFFIXES` (label suffix lookup). +- collision disambiguation -- `disambiguate` (function names) and + `compute_label_suffixes` (violation labels). +""" + +from __future__ import annotations + +import functools +from collections import Counter +from collections.abc import Hashable, Iterable +from enum import Enum +from pathlib import Path +from typing import NamedTuple, TypeVar + +from jinja2 import Environment, FileSystemLoader + +from overture.schema.system.field_path import ArrayPath +from overture.schema.system.model_constraint import ( + Condition, + FieldEqCondition, + Not, +) + +from .check_ir import Check, ModelCheck +from .constraint_dispatch import ForbidIf, RequireIf, model_constraint_function + +__all__ = [ + "COLUMN_LEVEL_FUNCTIONS", + "FieldEq", + "check_name", + "column_level_suffix", + "compute_label_suffixes", + "disambiguate", + "field_label", + "jinja_env", + "model_constraint_field_label", + "parse_field_eq", + "py_literal", + "tuple_literal", +] + +_K = TypeVar("_K", bound=Hashable) + +# Constraint functions that emit a column-level check (one per field +# rather than per element), used by the check builder to split them +# into their own `Check` IR nodes. +COLUMN_LEVEL_FUNCTIONS: frozenset[str] = frozenset( + { + "check_required", + "check_array_min_length", + "check_array_max_length", + "check_struct_unique", + } +) + +# Violation label suffix per column-level check that shares its +# field's structural path. `check_required` lands on its field's own +# path, so it stays absent from this table. +_COLUMN_LEVEL_SUFFIXES: dict[str, str] = { + "check_array_min_length": "_min_length", + "check_array_max_length": "_max_length", + "check_struct_unique": "_unique", +} + +_TEMPLATES_DIR = Path(__file__).parent / "templates" + + +@functools.lru_cache(maxsize=1) +def jinja_env() -> Environment: + """Return the shared Jinja2 environment for PySpark code generation templates.""" + env = Environment( + loader=FileSystemLoader(_TEMPLATES_DIR), + trim_blocks=True, + lstrip_blocks=True, + keep_trailing_newline=True, + autoescape=False, + ) + env.filters["py_literal"] = py_literal + return env + + +_CHECK_PREFIX = "check_" + + +def tuple_literal(rendered_items: Iterable[str]) -> str: + """Wrap pre-rendered items as a Python tuple literal source. + + A single-element tuple needs a trailing comma; this helper applies + that rule so callers rendering enum-like values that don't fit + `py_literal` can still share its tuple-formatting behaviour. + """ + items = list(rendered_items) + joined = ", ".join(items) + return f"({joined},)" if len(items) == 1 else f"({joined})" + + +def py_literal(value: object) -> str: + """Render a Python value as source code. + + Recurses into containers to extract `Enum.value` (since `repr()` of + an Enum member is not valid Python). Quote style and line wrapping + are left to `ruff format`. + """ + if isinstance(value, Enum): + return py_literal(value.value) + if isinstance(value, dict): + items = ", ".join(f"{py_literal(k)}: {py_literal(v)}" for k, v in value.items()) + return "{" + items + "}" + if isinstance(value, list): + return "[" + ", ".join(py_literal(v) for v in value) + "]" + if isinstance(value, tuple): + return tuple_literal(py_literal(v) for v in value) + return repr(value) + + +class FieldEq(NamedTuple): + """An unwrapped `FieldEqCondition`, with `negated` set when wrapped in `Not`.""" + + field_name: str + value: object + negated: bool + + +def parse_field_eq(condition: Condition) -> FieldEq | None: + """Unwrap a `FieldEqCondition` or `Not(FieldEqCondition)`. + + Returns a `FieldEq` triple for either shape, or `None` for any + other condition. `negated` is True iff the condition was wrapped + in `Not`. + """ + match condition: + case Not(inner=FieldEqCondition(field_name=fn, value=v)): + return FieldEq(fn, v, True) + case FieldEqCondition(field_name=fn, value=v): + return FieldEq(fn, v, False) + case _: + return None + + +def check_name(function: str, override: str | None = None) -> str: + """Strip the `check_` prefix to produce a human-readable check name.""" + if override is not None: + return override + return function.removeprefix(_CHECK_PREFIX) + + +def column_level_suffix(check: Check) -> str: + """Return the column-level label suffix for `check`, or empty string. + + Column-level checks (`check_array_min_length`, `check_struct_unique`, + etc.) share their structural path with the field they constrain; the + suffix differentiates the violation label so each check reports a + distinct `Check.field`. + """ + if not check.descriptors: + return "" + return _COLUMN_LEVEL_SUFFIXES.get(check.descriptors[0].function, "") + + +def field_label(check: Check) -> str: + """Render the violation label for a Check. + + Combines the structural field path with any column-level suffix + (`_min_length`, `_unique`, etc.) so each check reports a distinct + `Check.field` even when several share a structural path. + """ + return f"{check.target}{column_level_suffix(check)}" + + +def _model_check_base_label(check: ModelCheck) -> str: + """Compute the violation field label sans collision suffix. + + - `require_if` / `forbid_if` produce a per-target label + (`field_required` / `path.field_forbidden`) since each descriptor + now carries a single target field (multi-field decorators split + at dispatch time). + - Other kinds (`require_any_of`, `radio_group`, `min_fields_set`) + name the whole constraint; on `ArrayPath` targets they use the + path itself so anchors are distinguishable across nestings. + """ + match check.descriptor: + case RequireIf(): + kind_suffix = "_required" + case ForbidIf(): + kind_suffix = "_forbidden" + case _: + if isinstance(check.target, ArrayPath): + return str(check.target) + return check_name(model_constraint_function(check.descriptor)) + target = check.descriptor.field_names[0] + if not isinstance(check.target, ArrayPath): + return f"{target}{kind_suffix}" + return f"{check.target}.{target}{kind_suffix}" + + +def model_constraint_field_label(check: ModelCheck, label_suffix: str) -> str: + """Compute the field label for a model constraint check. + + `label_suffix` (from `compute_label_suffixes`) disambiguates labels + that would otherwise collide -- e.g. two `@require_any_of` on the + same model, or two `@require_if(["x"], ...)` with different + conditions. + """ + return f"{_model_check_base_label(check)}{label_suffix}" + + +def _occurrence_indices(keys: list[_K]) -> list[tuple[int, int]]: + """Pair each key with `(occurrence_index, total_count)`. + + `occurrence_index` is the 0-based position of the key among its + equal siblings; `total_count` is how many times the key appears in + `keys`. Both `disambiguate` and `compute_label_suffixes` need this + "where am I within my collision group" view. + """ + counts: Counter[_K] = Counter(keys) + seen: Counter[_K] = Counter() + result: list[tuple[int, int]] = [] + for key in keys: + result.append((seen[key], counts[key])) + seen[key] += 1 + return result + + +def disambiguate(names: list[str]) -> list[str]: + """Make a list of names unique by appending `_N` to repeated entries. + + The first occurrence of a name is left bare; the second becomes + `name_1`, the third `name_2`, and so on. Names that appear once are + untouched. + + Assumes no input name already matches a generated `name_N` form; a + collision there would reintroduce a duplicate. Field names in + practice never carry that suffix, so the assumption holds. + """ + return [ + f"{name}_{idx}" if total > 1 and idx > 0 else name + for name, (idx, total) in zip(names, _occurrence_indices(names), strict=True) + ] + + +def compute_label_suffixes(model_checks: list[ModelCheck]) -> list[str]: + """Pre-compute field label suffixes, adding counters only for collisions. + + Unlike `disambiguate`, every colliding entry receives a `_N` suffix + including the first one (`_0`, `_1`, ...). This is symmetric on + purpose: violation labels for a colliding group all share the same + base name, so each needs an explicit collision index to stay + distinct. `disambiguate` operates on Python function names where + leaving the first occurrence bare preserves readable identifiers + for the common no-collision case. + """ + base_labels = [_model_check_base_label(check) for check in model_checks] + return [ + f"_{idx}" if total > 1 else "" + for idx, total in _occurrence_indices(base_labels) + ] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py new file mode 100644 index 000000000..9e736a67c --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py @@ -0,0 +1,699 @@ +"""Walk FieldSpec trees to produce Check/ModelCheck IR for rendering. + +Consults the constraint dispatch table to map each constraint to a +descriptor, then applies composition rules the dispatch table can't see: + +- Coalesce ordering: gather descriptors for the same field into one + `Check` (required first, then enum, then dispatched constraints), + deduplicate, and split column-level checks into separate suffixed checks. +- Target resolution: a shape walker descends each field's `FieldShape` + tree, building the `ScalarPath` or `ArrayPath` target by appending + segments as it goes -- so the path read in the code is the path that + lands in the IR. Entering a `list[...]` layer promotes the path's + terminal struct segment to an iterated `ArraySegment`. +- Subtype gating: annotate variant-specific fields with discriminator + `Guard`s, synthesize forbid_if/require_if for absent or required + variants, and gate check_required under nullable struct ancestors. +""" + +from __future__ import annotations + +from collections import defaultdict +from dataclasses import dataclass, replace +from enum import Enum + +from pydantic import BaseModel +from typing_extensions import assert_never + +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + FieldPath, + ScalarPath, + promote_terminal_array, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + ModelConstraint, + Not, +) + +from ..extraction.field import ( + AnyScalar, + ArrayOf, + ConstraintSource, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) +from ..extraction.field_walk import terminal_primitive +from ..extraction.specs import FeatureSpec, FieldSpec, ModelSpec, UnionSpec +from ..extraction.type_registry import PRIMITIVE_TYPES +from ._render_common import COLUMN_LEVEL_FUNCTIONS +from .check_ir import ( + Check, + ColumnGuard, + ElementGuard, + Guard, + ModelCheck, +) +from .constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + RequireIf, + dispatch_base_type, + dispatch_constraint, + dispatch_model_constraint, + dispatch_newtype, + forbid_if_field_shapes, +) + +__all__ = [ + "build_checks", +] + + +def _dispatch_layer_constraints( + constraints: tuple[ConstraintSource, ...], + base_type: str | None, +) -> list[ExpressionDescriptor]: + """Dispatch one shape layer's constraints, skipping primitive-inherent ones.""" + descriptors: list[ExpressionDescriptor] = [] + for cs in constraints: + if cs.source_name is not None and cs.source_name in PRIMITIVE_TYPES: + continue + desc = dispatch_constraint(cs.constraint, base_type=base_type) + if desc is not None: + descriptors.append(desc) + return descriptors + + +def _enum_values(scalar: Scalar) -> list[object] | None: + """Return enum/literal values for a terminal `Scalar`, or `None`.""" + if isinstance(scalar, LiteralScalar): + return list(scalar.values) + if isinstance(scalar, Primitive): + src = scalar.source_type + if isinstance(src, type) and issubclass(src, Enum): + return [m.value for m in src] + return None + + +def _required_descriptor(gate: FieldPath | None) -> ExpressionDescriptor: + return ExpressionDescriptor(function="check_required", gate=gate) + + +@dataclass(frozen=True, slots=True) +class _ShapeTerminal: + """A `ModelRef`/`UnionRef` terminal and the path the walker reached it at. + + The `FieldSpec` recursion uses `path` directly as the prefix for the + sub-model's or sub-union's fields. The walker returns `None` instead + of a `_ShapeTerminal` for terminals it fully handles itself (scalars, + maps, and NewTypes with a dispatch override). + """ + + ref: ModelRef | UnionRef + path: FieldPath + + +def _walk_field_shape( + shape: FieldShape, + path: FieldPath, + *, + base_type: str | None, + required: bool, + required_gate: FieldPath | None, + carried_element: list[ExpressionDescriptor], +) -> tuple[list[Check], _ShapeTerminal | None]: + """Descend a `FieldShape`, emitting the field's own Checks. + + Builds the `FieldPath` target structurally: `ArrayOf` promotes the + path's terminal segment, `NewTypeShape` passes the path through, + terminals emit at the path reached. Returns the emitted Checks plus, + at a `ModelRef`/`UnionRef` terminal, a `_ShapeTerminal` for the + `FieldSpec` recursion (`None` for terminals the walker fully handles). + + Parameters + ---------- + path + The path reached so far, promoted once per `ArrayOf` layer + crossed. `required` and `path` move together: a field's path + starts as a plain struct path and is promoted exactly when the + first `ArrayOf` clears `required`, so while `required` holds the + path is still the plain struct path -- a standalone + `check_required` always lands there. + required + Whether the field still needs a `check_required`. Cleared by the + first `ArrayOf`: before it, `check_required` merges into the + terminal Check; from it on, it is a standalone column-level Check. + carried_element + Element-level descriptors from `ArrayOf` layers above, prepended + to the terminal's own element-level descriptors. + """ + match shape: + case NewTypeShape(name=name, inner=inner): + nt_descriptors = dispatch_newtype(name) + if nt_descriptors is not None: + if isinstance(path.segments[-1], ArraySegment): + # A NewType with a dispatch override nested under a list + # layer has no schema field; raise to keep the gap loud + # rather than emit an untested target (cf. list[list[Union]]). + raise NotImplementedError( + f"NewType with a dispatch override ({name}) nested " + "under a list layer is not supported" + ) + descriptors = list(nt_descriptors) + if required: + descriptors.insert(0, _required_descriptor(required_gate)) + return [Check(descriptors=tuple(descriptors), target=path)], None + return _walk_field_shape( + inner, + path, + base_type=base_type, + required=required, + required_gate=required_gate, + carried_element=carried_element, + ) + + case ArrayOf(element=element, constraints=constraints): + layer_descriptors = _dispatch_layer_constraints( + constraints, + base_type, + ) + column_descriptors = list( + dict.fromkeys( + d for d in layer_descriptors if d.function in COLUMN_LEVEL_FUNCTIONS + ) + ) + element_descriptors = [ + d for d in layer_descriptors if d.function not in COLUMN_LEVEL_FUNCTIONS + ] + checks: list[Check] = [] + if required: + checks.append( + Check( + descriptors=(_required_descriptor(required_gate),), + target=path, + ) + ) + checks.extend( + Check(descriptors=(d,), target=path) for d in column_descriptors + ) + sub_checks, terminal = _walk_field_shape( + element, + promote_terminal_array(path), + base_type=base_type, + required=False, + required_gate=required_gate, + carried_element=[*carried_element, *element_descriptors], + ) + return [*checks, *sub_checks], terminal + + case UnionRef(): + terminal_seg = path.segments[-1] + if isinstance(terminal_seg, ArraySegment) and terminal_seg.iter_count > 1: + # `list[list[Union]]` would build a multi-iter union target, + # but no schema field has that shape. The walker raises to + # keep the gap loud rather than silently emit one. + raise NotImplementedError( + "Union nested under multiple list layers " + "(list[list[Union]]) is not supported" + ) + return _ref_terminal_checks(shape, path, required, required_gate) + + case ModelRef(): + return _ref_terminal_checks(shape, path, required, required_gate) + + case Primitive() | LiteralScalar() | AnyScalar() | MapOf(): + # `MapOf` shares this arm: a map is a terminal the walker + # does not descend into. Length constraints on a MapOf are + # rejected at extraction (`attach_constraints` raises). + # No schema field exercises map-level constraints today. + constraints = shape.constraints + element_descriptors = list(carried_element) + enum_values = _enum_values(shape) if isinstance(shape, Scalar) else None + if enum_values is not None: + element_descriptors.append( + ExpressionDescriptor( + function="check_enum", + args=(tuple(enum_values),), + ) + ) + element_descriptors.extend( + _dispatch_layer_constraints(constraints, base_type) + ) + if base_type is not None: + base_descriptors = dispatch_base_type(base_type) + if base_descriptors is not None: + element_descriptors.extend(base_descriptors) + element_descriptors = list(dict.fromkeys(element_descriptors)) + + if required: + return [ + Check( + descriptors=( + _required_descriptor(required_gate), + *element_descriptors, + ), + target=path, + ) + ], None + if element_descriptors: + return [ + Check(descriptors=tuple(element_descriptors), target=path) + ], None + return [], None + + assert_never(shape) + + +def _ref_terminal_checks( + ref: ModelRef | UnionRef, + path: FieldPath, + required: bool, + required_gate: FieldPath | None, +) -> tuple[list[Check], _ShapeTerminal]: + """Handle a `ModelRef`/`UnionRef` terminal: emit `check_required`, hand back the ref. + + A required model or union field always gets a standalone + `check_required` Check; `required` holds only before any `ArrayOf`, + so `path` is the field's plain struct path. The sub-fields are the + caller's job, reached via the returned `_ShapeTerminal`. + """ + checks: list[Check] = [] + if required: + checks.append( + Check( + descriptors=(_required_descriptor(required_gate),), + target=path, + ) + ) + return checks, _ShapeTerminal(ref=ref, path=path) + + +def _build_field_checks( + field_spec: FieldSpec, + prefix: FieldPath = ScalarPath(), + *, + nullable_gate: FieldPath | None = None, + arm: str | None = None, +) -> tuple[list[Check], list[ModelCheck]]: + """Build Checks for a single field by walking its shape tree. + + `arm` is the singleton union-arm discriminator value the field belongs + to (when it lives in exactly one arm), or `None` when the field is + shared. It propagates to any model constraints discovered through this + field's sub-models so per-arm test modules can filter them correctly. + """ + path = prefix.append_struct(field_spec.name) + checks, terminal = _walk_field_shape( + field_spec.shape, + path, + base_type=( + p.base_type + if (p := terminal_primitive(field_spec.shape)) is not None + else None + ), + required=field_spec.is_required, + required_gate=nullable_gate, + carried_element=[], + ) + + model_checks: list[ModelCheck] = [] + match terminal: + case None: + pass + case _ShapeTerminal(ref=UnionRef(union=union_spec), path=terminal_path): + sub_field_checks, sub_model_checks = _recurse_into_union( + union_spec, terminal_path, arm=arm + ) + checks.extend(sub_field_checks) + model_checks.extend(sub_model_checks) + case _ShapeTerminal(ref=ModelRef(model=model_spec), path=terminal_path): + sub_field_checks, sub_model_checks = _recurse_into_model( + model_spec, + terminal_path, + field_spec.is_optional, + nullable_gate, + arm=arm, + ) + checks.extend(sub_field_checks) + model_checks.extend(sub_model_checks) + case _ShapeTerminal(ref=ref): + raise AssertionError( + f"unhandled _ShapeTerminal.ref variant: {type(ref).__name__}" + ) + + return checks, model_checks + + +def _recurse_into_model( + model_spec: ModelSpec, + prefix: FieldPath = ScalarPath(), + is_optional: bool = False, + nullable_gate: FieldPath | None = None, + *, + arm: str | None = None, +) -> tuple[list[Check], list[ModelCheck]]: + """Walk a MODEL-kind field's children plus its model-level constraints. + + `prefix` is the terminal path the shape walker reached the `ModelRef` + at, defaulting to the empty `ScalarPath()` at the row root. Its terminal + segment is an `ArraySegment` exactly when the field is itself a list, + which resets the nullable gate (array iteration handles element + nullability). + + `arm` propagates from the union arm whose variant-specific field led + here, so model constraints declared on the sub-model are tagged with + that arm rather than `None` (which would route them to every per-arm + test). + """ + last_seg = prefix.segments[-1] if prefix.segments else None + field_is_list = isinstance(last_seg, ArraySegment) + if field_is_list: + child_gate: FieldPath | None = None + else: + child_gate = prefix if is_optional else nullable_gate + + field_checks: list[Check] = [] + model_checks: list[ModelCheck] = [] + for sub_field in model_spec.fields: + sub_field_checks, sub_model_checks = _build_field_checks( + sub_field, + prefix=prefix, + nullable_gate=child_gate, + arm=arm, + ) + field_checks.extend(sub_field_checks) + model_checks.extend(sub_model_checks) + + if model_spec.constraints: + sub_model_constraint_checks = _dispatch_model_constraints( + model_spec.constraints, + model_spec.fields, + target=_model_constraint_target(prefix), + arm=arm, + ) + if sub_model_constraint_checks: + _guard_struct_nested_anchor(prefix, model_spec.name) + model_checks.extend(sub_model_constraint_checks) + return field_checks, model_checks + + +def _guard_struct_nested_anchor(prefix: FieldPath, name: str) -> None: + """Raise when emitting a model constraint at a struct-only prefix. + + See `_model_constraint_target`: in that case the constraint's target + collapses to the row root, which is wrong for any non-skipped + constraint. Today only `NoExtraFieldsConstraint` reaches here (and + dispatches to None); a real descriptor at this depth is a renderer + gap, not a normal case. + """ + if not isinstance(prefix, ArrayPath) and prefix.segments: + raise NotImplementedError( + f"Model constraint on struct-nested {name!r} " + f"(reached at {prefix!r}) -- the renderer has no anchor " + "for nested-struct model constraints." + ) + + +def _recurse_into_union( + union_spec: UnionSpec, + prefix: FieldPath = ScalarPath(), + *, + arm: str | None = None, +) -> tuple[list[Check], list[ModelCheck]]: + """Walk a UNION-kind field's variants, gathering Checks and ModelChecks. + + `prefix` is the terminal path the shape walker reached the `UnionRef` + at; the union's variant fields live directly under it. An `ArrayPath` + prefix means the union is reached through array iteration, so variant + gates are element-level and model constraints target that path. + + `arm` is the outer union arm whose variant-specific field reached this + inner union. It tags any model constraints discovered here so they + aren't propagated to other arms' test modules. + """ + mapping = union_spec.discriminator_mapping or {} + value_by_class = {cls: value for value, cls in mapping.items()} + union_target = _model_constraint_target(prefix) + + field_checks, field_model_checks = _field_checks_for_union( + union_spec, value_by_class, prefix=prefix, arm=arm + ) + union_level_checks = _model_checks_for_union( + union_spec, value_by_class, union_target, arm=arm + ) + exclusivity_checks = _exclusivity_checks_for_union( + union_spec, value_by_class, union_target, arm=arm + ) + if union_level_checks or exclusivity_checks: + _guard_struct_nested_anchor(prefix, union_spec.name) + return field_checks, union_level_checks + field_model_checks + exclusivity_checks + + +def _model_constraint_target(prefix: FieldPath) -> FieldPath: + """Where a model constraint's check should be anchored. + + Two supported cases: + + - `ArrayPath` -- constraints on a sub-model reached through array + iteration target the array path (so the renderer wraps the check + in `array_check`). + - Empty or struct-only `ScalarPath` -- constraints anchor at the row + root. Pure struct nesting (e.g. `Names` reached at + `ScalarPath('names')`) collapses here because the renderer has no + anchor for nested-struct model constraints. The only constraint kind + currently reachable through pure struct nesting is + `NoExtraFieldsConstraint`, which `dispatch_model_constraint` + discards before the target is consulted, so the collapse is + observationally inert today; a non-skipped constraint at this depth + would surface as a wrong-anchor bug. + """ + return prefix if isinstance(prefix, ArrayPath) else ScalarPath() + + +def _dispatch_model_constraints( + constraints: tuple[ModelConstraint, ...], + fields: list[FieldSpec], + *, + target: FieldPath = ScalarPath(), + arm: str | None = None, +) -> list[ModelCheck]: + """Dispatch model constraints to ModelChecks.""" + return [ + ModelCheck(descriptor=desc, target=target, arm=arm) + for mc in constraints + for desc in dispatch_model_constraint(mc, fields) + ] + + +def _singleton_arm(values: tuple[str, ...]) -> str | None: + """Return the sole arm in `values`, or None when there isn't exactly one. + + No real schema today has a variant-specific field belonging to a + proper subset of arms (2-of-N): every variant-specific field is + declared on exactly one arm. If a future schema introduces a 2-of-N + field whose sub-model declares model constraints, this collapse + would broadcast those constraints to every arm (including the ones + the field doesn't belong to). `TestMultiArmVariantSourcesPolicy` + pins the current behaviour as a tombstone. + """ + return values[0] if len(values) == 1 else None + + +def _field_checks_for_union( + spec: UnionSpec, + value_by_class: dict[type[BaseModel], str], + prefix: FieldPath = ScalarPath(), + *, + arm: str | None = None, +) -> tuple[list[Check], list[ModelCheck]]: + """Build field checks for a union spec's annotated fields. + + `arm` is the outer-union arm threaded through from an enclosing + `_recurse_into_union`. When present, every sub-model constraint + reached from here inherits that arm -- the inner union's own + discriminator is irrelevant to per-arm test filtering, which always + keys on the outermost union's discriminator. + """ + guard_cls: type[Guard] = ( + ElementGuard if isinstance(prefix, ArrayPath) else ColumnGuard + ) + field_checks: list[Check] = [] + model_checks: list[ModelCheck] = [] + discriminator = spec.discriminator_field + for af in spec.annotated_fields: + values: tuple[str, ...] = () + if af.variant_sources is not None and discriminator is not None: + values = tuple( + value_by_class[src] + for src in af.variant_sources + if src in value_by_class + ) + # Outer arm dominates: when this is a nested union, every sub-model + # constraint discovered here belongs to the outer arm. Only the + # outermost union picks a `field_arm` from its own variant sources, + # and only when the field is variant-specific to a single arm. + field_arm = arm if arm is not None else _singleton_arm(values) + checks, sub_model_checks = _build_field_checks( + af.field_spec, prefix=prefix, arm=field_arm + ) + model_checks.extend(sub_model_checks) + if values and discriminator is not None: + # Outer guards land first so the renderer composes + # outer-then-inner (e.g. a `ColumnGuard` from a parent union, + # then an `ElementGuard` from the nested union the field + # lives in). + guard: Guard = guard_cls(discriminator=discriminator, values=values) + checks = [replace(ck, guards=(guard, *ck.guards)) for ck in checks] + field_checks.extend(checks) + return field_checks, model_checks + + +def _model_checks_for_union( + spec: UnionSpec, + arm_by_class: dict[type[BaseModel], str], + target: FieldPath = ScalarPath(), + *, + arm: str | None = None, +) -> list[ModelCheck]: + """Build ModelChecks for the union itself plus each member's own constraints. + + When `arm` is None (top-level union): union-level constraints carry + `arm=None` because they apply regardless of which arm matches. + Member-class constraints (e.g. `@radio_group` on `RoadSegment`) are + tagged with the discriminator value mapped to that class so the test + renderer can confine them to the right per-arm test module. + + When `arm` is set (nested union reached from an outer arm): every + check produced -- union-level and member-level -- inherits that outer + arm. The inner union's own discriminator is irrelevant to per-arm + test filtering, which always keys on the outermost union's + discriminator. + """ + model_checks = _dispatch_model_constraints( + spec.constraints, + spec.fields, + target=target, + arm=arm, + ) + for member in spec.member_specs: + member_constraints = ModelConstraint.get_model_constraints(member.member_cls) + member_arm = arm if arm is not None else arm_by_class.get(member.member_cls) + model_checks.extend( + _dispatch_model_constraints( + member_constraints, + member.spec.fields, + target=target, + arm=member_arm, + ) + ) + return model_checks + + +def _exclusivity_checks_for_union( + spec: UnionSpec, + value_by_class: dict[type[BaseModel], str], + target: FieldPath = ScalarPath(), + *, + arm: str | None = None, +) -> list[ModelCheck]: + """Generate forbid_if/require_if checks from union variant structure. + + Unlike `dispatch_model_constraint` (which maps user-declared + `ModelConstraint` objects to descriptors), this synthesizes + `ForbidIf`/`RequireIf` descriptors directly from the union's variant + grouping. The input is a structural property of the union, not a + declared constraint, so there is no source `ModelConstraint` to + dispatch from. + + `arm` is the outer-union arm threaded through when this union is + nested inside another. Inner exclusivity checks belong to that outer + arm rather than being broadcast to every arm. + """ + if spec.discriminator_mapping is None or spec.discriminator_field is None: + return [] + + all_values = set(spec.discriminator_mapping) + + grouped: dict[str, set[type[BaseModel]]] = defaultdict(set) + required_by_field: dict[str, set[type[BaseModel]]] = defaultdict(set) + shape_by_field: dict[str, FieldShape] = {} + for af in spec.annotated_fields: + if af.variant_sources is None: + continue + name = af.field_spec.name + shape_by_field[name] = af.field_spec.shape + for src in af.variant_sources: + if src in value_by_class: + grouped[name].add(src) + if af.field_spec.is_required: + required_by_field[name].add(src) + + def forbid_check(field_name: str, condition: FieldEqCondition | Not) -> ModelCheck: + return ModelCheck( + descriptor=ForbidIf( + field_names=(field_name,), + condition=condition, + field_shapes=forbid_if_field_shapes((field_name,), shape_by_field), + ), + target=target, + arm=arm, + ) + + def require_check(field_name: str, condition: FieldEqCondition | Not) -> ModelCheck: + return ModelCheck( + descriptor=RequireIf(field_names=(field_name,), condition=condition), + target=target, + arm=arm, + ) + + checks: list[ModelCheck] = [] + disc_field = spec.discriminator_field + for field_name, variant_classes in grouped.items(): + variant_values = {value_by_class[cls] for cls in variant_classes} + excluded_values = all_values - variant_values + if not excluded_values: + continue + + if len(variant_values) == 1 and len(excluded_values) > 1: + (sole_value,) = variant_values + checks.append( + forbid_check(field_name, Not(FieldEqCondition(disc_field, sole_value))) + ) + else: + for exc_val in sorted(excluded_values): + checks.append( + forbid_check(field_name, FieldEqCondition(disc_field, exc_val)) + ) + + required_classes = required_by_field[field_name] + required_values = {value_by_class[cls] for cls in required_classes} + for req_val in sorted(required_values): + checks.append( + require_check(field_name, FieldEqCondition(disc_field, req_val)) + ) + + return checks + + +def build_checks( + spec: FeatureSpec, +) -> tuple[list[Check], list[ModelCheck]]: + """Build all check IR for a feature spec. + + Roots the walk at the empty `ScalarPath()` and delegates to the same + helpers used at every nested level (`_recurse_into_union` for unions, + `_recurse_into_model` for models), so the row-root and nested cases + share one path. + """ + if isinstance(spec, UnionSpec): + return _recurse_into_union(spec) + return _recurse_into_model(spec) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py new file mode 100644 index 000000000..e9029c632 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py @@ -0,0 +1,83 @@ +"""Tree-shaped IR for PySpark check expressions. + +Sum types describe each check's structural placement: + +- `Check.target: FieldPath` -- a `ScalarPath` or `ArrayPath` locating + where the descriptor's expression is evaluated. The choice of variant + signals whether the renderer wraps the expression in `array_check` / + `nested_array_check`. +- `Guard` -- a single discriminator gate. `Check.guards` is a tuple + of `Guard`s AND-composed at render time; nested-union gating + composes one `ColumnGuard` with one `ElementGuard`. + +The check_builder produces these types and the renderer consumes them. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TypeAlias + +from overture.schema.system.field_path import FieldPath, ScalarPath + +from .constraint_dispatch import ExpressionDescriptor, ModelConstraintDescriptor + +__all__ = [ + "Check", + "ColumnGuard", + "ElementGuard", + "Guard", + "ModelCheck", +] + + +@dataclass(frozen=True, slots=True) +class ColumnGuard: + """Discriminator gate where the discriminator is a top-level row column.""" + + discriminator: str + values: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class ElementGuard: + """Discriminator gate where the discriminator is a struct field inside an array element.""" + + discriminator: str + values: tuple[str, ...] + + +Guard: TypeAlias = ColumnGuard | ElementGuard + + +@dataclass(frozen=True, slots=True) +class Check: + """A field-level validation check.""" + + descriptors: tuple[ExpressionDescriptor, ...] + target: FieldPath + guards: tuple[Guard, ...] = () + + +@dataclass(frozen=True, slots=True) +class ModelCheck: + """A model-level validation check (cross-field constraint). + + `target` locates the model the constraint applies to: an empty + `ScalarPath()` for row-root constraints, or an `ArrayPath` when the + constrained model is reached by iterating one or more arrays. The + default `ScalarPath()` makes the row-root case ergonomic at + construction sites and is the common case; `Check.target` has no + sensible default and is required. + + `arm` records the discriminator value of the union member that + contributed the constraint, or `None` when the constraint applies to + every arm. The test renderer filters per-arm test modules by this + value. Constraints discovered through a variant-specific field's + sub-model or sub-union inherit the contributing outer arm, so they + land only in that arm's test module. + """ + + descriptor: ModelConstraintDescriptor + target: FieldPath = ScalarPath() + arm: str | None = None diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py new file mode 100644 index 000000000..b02f5b735 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/constraint_dispatch.py @@ -0,0 +1,509 @@ +"""Constraint type to PySpark expression descriptor dispatch. + +Pure mapping from constraint objects to expression descriptors. +No awareness of field paths, list depth, or struct nesting -- +those are composition concerns handled by check_builder. +""" + +from __future__ import annotations + +import re +from collections.abc import Callable, Mapping +from dataclasses import dataclass +from typing import Any, TypeAlias + +from annotated_types import Ge, Gt, Interval, Le, Lt +from pydantic import Strict + +from overture.schema.system.case import to_snake_case +from overture.schema.system.field_constraint.collection import UniqueItemsConstraint +from overture.schema.system.field_constraint.string import ( + JsonPointerConstraint, + PatternConstraint, + StrippedConstraint, +) +from overture.schema.system.field_path import FieldPath +from overture.schema.system.model_constraint import ( + Condition, + ForbidIfConstraint, + MinFieldsSetConstraint, + NoExtraFieldsConstraint, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, +) +from overture.schema.system.primitive import GeometryTypeConstraint +from overture.schema.system.ref import Reference + +from ..extraction.field import FieldShape, ModelRef +from ..extraction.field_walk import has_array_layer, terminal_of +from ..extraction.length_constraints import ( + ArrayMaxLen, + ArrayMinLen, + ScalarMaxLen, + ScalarMinLen, +) +from ..extraction.specs import FieldSpec + +__all__ = [ + "ExpressionDescriptor", + "ForbidIf", + "MinFieldsSet", + "ModelConstraintDescriptor", + "RadioGroup", + "RequireAnyOf", + "RequireIf", + "dispatch_base_type", + "dispatch_constraint", + "dispatch_model_constraint", + "dispatch_newtype", + "forbid_if_field_shapes", + "model_constraint_function", + "model_mutation_function", +] + + +@dataclass(frozen=True, slots=True) +class ExpressionDescriptor: + """Describes a constraint_expressions function call. + + `function` names the function (e.g., `"check_bounds"`). + `args` are positional arguments after `col` and `field`. + `kwargs` are keyword arguments, stored as a tuple of `(name, value)` + pairs so the descriptor is hashable -- consumers convert with `dict()` + when they need mapping access. + `constraint_type` is the Python class of the constraint that + produced this descriptor (e.g., `NoWhitespaceConstraint`), + used by test generators to pick pattern-appropriate mutation values. + `gate` is the structural path to a nullable ancestor struct; when set, + the renderer wraps the expression in `F.when(gate.isNotNull(), ...)`. + `label` is a human-readable description used in error messages + (e.g., `"ISO 3166-1 alpha-2 country code"`). + `check_name` overrides the Check.name derivation in error_key; + when None, the renderer strips the `check_` prefix from `function`. + """ + + function: str + args: tuple[object, ...] = () + kwargs: tuple[tuple[str, object], ...] = () + constraint_type: type | None = None + gate: FieldPath | None = None + label: str | None = None + check_name: str | None = None + + +_BASE_TYPE_DISPATCH: dict[str, tuple[ExpressionDescriptor, ...]] = { + "HttpUrl": ( + ExpressionDescriptor(function="check_url_format"), + ExpressionDescriptor(function="check_url_length"), + ), + "EmailStr": (ExpressionDescriptor(function="check_email"),), + "BBox": ( + ExpressionDescriptor(function="check_bbox_completeness"), + ExpressionDescriptor(function="check_bbox_lat_ordering"), + ExpressionDescriptor(function="check_bbox_lat_range"), + ), +} + +_NEWTYPE_DISPATCH: dict[str, tuple[ExpressionDescriptor, ...]] = { + "LinearlyReferencedRange": ( + ExpressionDescriptor(function="check_linear_range_length"), + ExpressionDescriptor(function="check_linear_range_bounds"), + ExpressionDescriptor(function="check_linear_range_order"), + ), +} + + +def _normalize_anchor(pattern: str) -> str: + """Replace trailing `$` with `\\z` for Java/Spark regex compatibility. + + Leaves an escaped trailing `\\$` (literal dollar match) untouched. + """ + if pattern.endswith("$") and not pattern.endswith(r"\$"): + return pattern[:-1] + r"\z" + return pattern + + +def _pattern_check_name(constraint: PatternConstraint) -> str: + """Derive a snake_case check name from the constraint class name.""" + if type(constraint) is PatternConstraint: + return "pattern" + return to_snake_case(type(constraint).__name__.removesuffix("Constraint")) + + +def _pattern_label(constraint: PatternConstraint) -> str: + """Extract a human-readable label from a PatternConstraint.""" + if constraint.description: + return constraint.description + doc = type(constraint).__doc__ + if doc: + return doc.strip().split("\n")[0].rstrip(".") + name = type(constraint).__name__.removesuffix("Constraint") + return re.sub(r"(?<=[a-z0-9])([A-Z])", r" \1", name).lower() + + +_ConstraintHandler = Callable[[Any, str | None], ExpressionDescriptor | None] + + +_BOUND_ATTRS = ("ge", "gt", "le", "lt") + +_FLOAT_BASE_TYPES = frozenset({"float", "float32", "float64"}) + + +def _dispatch_bounds( + constraint: Ge | Gt | Le | Lt | Interval, + base_type: str | None, +) -> ExpressionDescriptor: + """Extract bound kwargs from an annotated_types constraint. + + Coerces integer bound values to float on float-typed columns so + that generated test mutations match the Spark DoubleType column. + """ + is_float = base_type in _FLOAT_BASE_TYPES + kwargs: list[tuple[str, object]] = [] + for attr in _BOUND_ATTRS: + value = getattr(constraint, attr, None) + if value is not None: + if is_float and isinstance(value, int) and not isinstance(value, bool): + value = float(value) + kwargs.append((attr, value)) + return ExpressionDescriptor(function="check_bounds", kwargs=tuple(kwargs)) + + +def _dispatch_pattern( + constraint: PatternConstraint, + _base_type: str | None, +) -> ExpressionDescriptor: + """Map a PatternConstraint (or subclass) to a check_pattern descriptor.""" + return ExpressionDescriptor( + function="check_pattern", + args=(_normalize_anchor(constraint.pattern.pattern),), + constraint_type=type(constraint), + label=_pattern_label(constraint), + check_name=_pattern_check_name(constraint), + ) + + +# Ordered: the first matching entry wins, so any subclass relationship +# between keys must place the more-specific class first. StrippedConstraint +# subclasses PatternConstraint, so it must appear before the PatternConstraint +# fallback entry. +_CONSTRAINT_DISPATCH: list[tuple[type | tuple[type, ...], _ConstraintHandler]] = [ + ((Reference, Strict), lambda _c, _bt: None), + ((Ge, Gt, Le, Lt, Interval), _dispatch_bounds), + ( + ArrayMinLen, + lambda c, _bt: ExpressionDescriptor( + function="check_array_min_length", args=(c.min_length,) + ), + ), + ( + ArrayMaxLen, + lambda c, _bt: ExpressionDescriptor( + function="check_array_max_length", args=(c.max_length,) + ), + ), + ( + ScalarMinLen, + lambda c, _bt: ExpressionDescriptor( + function="check_string_min_length", args=(c.min_length,) + ), + ), + ( + ScalarMaxLen, + lambda c, _bt: ExpressionDescriptor( + function="check_string_max_length", args=(c.max_length,) + ), + ), + ( + StrippedConstraint, + lambda _c, _bt: ExpressionDescriptor(function="check_stripped"), + ), + ( + JsonPointerConstraint, + lambda _c, _bt: ExpressionDescriptor(function="check_json_pointer"), + ), + (PatternConstraint, _dispatch_pattern), + # check_struct_unique uses Spark's array_distinct: structural equality on + # whole elements, against the raw stored values. Pydantic's + # UniqueItemsConstraint on list[HttpUrl] compares *normalized* URLs + # (trailing-slash, lowercase host/scheme), so it catches duplicates that + # differ only in normalization. We accept that difference -- the PySpark + # check catches exact duplicates only. + ( + UniqueItemsConstraint, + lambda _c, _bt: ExpressionDescriptor(function="check_struct_unique"), + ), + ( + GeometryTypeConstraint, + lambda c, _bt: ExpressionDescriptor( + function="check_geometry_type", args=tuple(c.allowed_types) + ), + ), +] + + +def dispatch_constraint( + constraint: object, + *, + base_type: str | None = None, +) -> ExpressionDescriptor | None: + """Map a constraint object to an expression descriptor. + + Parameters + ---------- + constraint + The constraint object from `ConstraintSource.constraint`. Length + constraints arrive as `ArrayMinLen` / `ArrayMaxLen` / + `ScalarMinLen` / `ScalarMaxLen` -- the typed variants emitted + by `extraction.type_analyzer.attach_constraints`. + base_type + The field's terminal-scalar base type, used to detect float + bounds. + + Returns + ------- + ExpressionDescriptor or None + `None` for explicitly skipped constraints (Reference, Strict). + + Raises + ------ + TypeError + For unrecognized constraint types. + """ + for key_types, handler in _CONSTRAINT_DISPATCH: + if isinstance(constraint, key_types): + return handler(constraint, base_type) + raise TypeError(f"Unhandled constraint type: {type(constraint).__name__}") + + +def dispatch_newtype(newtype_name: str) -> tuple[ExpressionDescriptor, ...] | None: + """Look up a NewType-level expression override. + + Returns None when the NewType decomposes normally into + individual constraint dispatches. + """ + return _NEWTYPE_DISPATCH.get(newtype_name) + + +def dispatch_base_type(base_type: str) -> tuple[ExpressionDescriptor, ...] | None: + """Look up a base-type-level expression override. + + Handles primitive types like HttpUrl and EmailStr that carry no + Annotated constraints but need semantic validation functions. + """ + return _BASE_TYPE_DISPATCH.get(base_type) + + +@dataclass(frozen=True, slots=True) +class RequireAnyOf: + """Descriptor for `check_require_any_of`: at least one field must be set.""" + + field_names: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class RadioGroup: + """Descriptor for `check_radio_group`: exactly one boolean field must be True.""" + + field_names: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class RequireIf: + """Descriptor for `check_require_if`: field required when condition holds.""" + + field_names: tuple[str, ...] + condition: Condition + + +@dataclass(frozen=True, slots=True) +class ForbidIf: + """Descriptor for `check_forbid_if`: field must be absent when condition holds. + + `field_shapes` pairs non-string field names with their `FieldShape` so + the test renderer can emit type-appropriate `fill_values` literals. + Stored as a tuple of `(name, shape)` pairs so the descriptor is + hashable; consumers convert with `dict()` when they need mapping + access. String fields are omitted because the renderer defaults to + `""` for them without needing the shape. + """ + + field_names: tuple[str, ...] + condition: Condition + field_shapes: tuple[tuple[str, FieldShape], ...] + + +@dataclass(frozen=True, slots=True) +class MinFieldsSet: + """Descriptor for `check_min_fields_set`: at least `count` fields set. + + Matches Pydantic's `model_fields_set` semantics: required fields are + always set (the constructor requires them) and contribute to the count + alongside any explicitly-set optional fields. Both kinds are passed to + the runtime check. + """ + + field_names: tuple[str, ...] + count: int + + +ModelConstraintDescriptor: TypeAlias = ( + RequireAnyOf | RadioGroup | RequireIf | ForbidIf | MinFieldsSet +) +"""One variant per model-constraint kind. + +Each variant carries only the fields meaningful for that constraint; +`ForbidIf` adds `field_shapes` for non-string targets so the test +renderer can emit type-appropriate `fill_values` literals. +""" + + +def _first_required_leaf(field_spec: FieldSpec) -> str | None: + """Return the name of the first required field in a MODEL-kind `FieldSpec`. + + Returns `None` for fields whose terminal is anything but a + `ModelRef` (scalars, arrays, `UnionRef`s, etc.). The + `RequireAnyOf` unwrapping uses this to drill into a struct's + required leaf when one exists; non-struct terminals leave the + field name unwrapped, which is the correct behavior for scalars + and arrays. `UnionRef` returns `None` because picking one arm's + required leaf would silently bias the constraint to that arm. + """ + if has_array_layer(field_spec.shape): + return None + terminal = terminal_of(field_spec.shape) + if not isinstance(terminal, ModelRef): + return None + for sub in terminal.model.fields: + if sub.is_required: + return sub.name + return None + + +def _unwrap_require_any_of_names( + field_names: tuple[str, ...], + by_name: dict[str, FieldSpec], +) -> tuple[str, ...]: + """Replace struct field names with their first required leaf path.""" + result = [] + for name in field_names: + field_spec = by_name.get(name) + leaf = _first_required_leaf(field_spec) if field_spec is not None else None + result.append(f"{name}.{leaf}" if leaf is not None else name) + return tuple(result) + + +def _is_compound_shape(shape: FieldShape) -> bool: + """Whether `shape` needs a non-`{}` fill value in mutation helpers.""" + if has_array_layer(shape): + return True + return isinstance(terminal_of(shape), ModelRef) + + +def forbid_if_field_shapes( + field_names: tuple[str, ...], + shape_by_name: Mapping[str, FieldShape], +) -> tuple[tuple[str, FieldShape], ...]: + """Build the `field_shapes` pairs for non-string ForbidIf targets. + + Keeps only fields whose shape is compound (an array or a model + reference); string fields are omitted because the test renderer + defaults their fill value to `""` without needing the shape. + """ + return tuple( + (name, shape) + for name in field_names + if (shape := shape_by_name.get(name)) is not None and _is_compound_shape(shape) + ) + + +def dispatch_model_constraint( + constraint: object, + fields: list[FieldSpec], +) -> tuple[ModelConstraintDescriptor, ...]: + """Map a model-level constraint to fully constructed typed descriptors. + + Parameters + ---------- + constraint + The model constraint object. + fields + All fields of the model. Branches consult them as needed -- + `RequireAnyOf` and `ForbidIf` index by name, `MinFieldsSet` + enumerates every field (required and optional). + + Returns + ------- + tuple of ModelConstraintDescriptor + Empty tuple for explicitly skipped constraints (NoExtraFields). + Most kinds return a single-element tuple. Multi-field + `@require_if` / `@forbid_if` split into one descriptor per + target field because the runtime check functions take a single + target column each. + + Raises + ------ + TypeError + For unrecognized constraint types. + """ + match constraint: + case NoExtraFieldsConstraint(): + return () + case RequireAnyOfConstraint(): + unwrapped = _unwrap_require_any_of_names( + constraint.field_names, {f.name: f for f in fields} + ) + return (RequireAnyOf(field_names=unwrapped),) + case RadioGroupConstraint(): + return (RadioGroup(field_names=constraint.field_names),) + case RequireIfConstraint(): + # `@require_if(["a", "b"], cond)` means "all of a, b required when + # cond" -- one runtime check per field, since check_require_if + # takes a single target column. + return tuple( + RequireIf(field_names=(name,), condition=constraint.condition) + for name in constraint.field_names + ) + case ForbidIfConstraint(): + shapes_by_field = forbid_if_field_shapes( + constraint.field_names, + {f.name: f.shape for f in fields}, + ) + per_field_shapes = dict(shapes_by_field) + return tuple( + ForbidIf( + field_names=(name,), + condition=constraint.condition, + field_shapes=( + ((name, per_field_shapes[name]),) + if name in per_field_shapes + else () + ), + ) + for name in constraint.field_names + ) + case MinFieldsSetConstraint(): + all_names = tuple(f.name for f in fields) + return (MinFieldsSet(field_names=all_names, count=constraint.count),) + case _: + raise TypeError(f"Unhandled model constraint: {type(constraint).__name__}") + + +_MODEL_CONSTRAINT_DISPATCH: dict[type[ModelConstraintDescriptor], tuple[str, str]] = { + RequireAnyOf: ("check_require_any_of", "mutate_require_any_of"), + RadioGroup: ("check_radio_group", "mutate_radio_group"), + RequireIf: ("check_require_if", "mutate_require_if"), + ForbidIf: ("check_forbid_if", "mutate_forbid_if"), + MinFieldsSet: ("check_min_fields_set", "mutate_min_fields_set"), +} + + +def model_constraint_function(d: ModelConstraintDescriptor) -> str: + """Map a `ModelConstraintDescriptor` variant to its runtime function name.""" + return _MODEL_CONSTRAINT_DISPATCH[type(d)][0] + + +def model_mutation_function(d: ModelConstraintDescriptor) -> str: + """Map a `ModelConstraintDescriptor` variant to its test mutation helper.""" + return _MODEL_CONSTRAINT_DISPATCH[type(d)][1] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py new file mode 100644 index 000000000..a6033c0db --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/pipeline.py @@ -0,0 +1,256 @@ +"""PySpark generation pipeline: produce modules without I/O. + +Orchestrates check building, schema building, and rendering into +GeneratedModule objects. The caller decides what to do with them (write +to disk, stream to stdout, etc.). +""" + +from __future__ import annotations + +from collections.abc import Iterable, Sequence +from dataclasses import dataclass +from pathlib import PurePosixPath + +from overture.schema.system.case import to_snake_case +from overture.schema.system.discovery import entry_point_to_path +from overture.schema.system.primitive import GeometryType + +from ..extraction.specs import FeatureSpec, UnionSpec +from .check_builder import build_checks +from .check_ir import Check, ModelCheck +from .renderer import render_feature_module +from .schema_builder import build_schema +from .test_data.base_row import ( + generate_arm_rows, + generate_base_row, + generate_populated_arm_rows, + generate_populated_row, +) +from .test_renderer import render_test_module + +__all__ = [ + "GeneratedModule", + "PipelineOutput", + "generate_pyspark_module", + "generate_pyspark_modules", +] + + +@dataclass(frozen=True, slots=True) +class GeneratedModule: + """A generated Python module with its content and output path.""" + + content: str + path: PurePosixPath + + +@dataclass(frozen=True, slots=True) +class PipelineOutput: + """PySpark modules emitted by the pipeline, split by output tree. + + The source and test trees are written to separate directories and + mirror the same relative layout, so a path is meaningful only + relative to its tree. Splitting at the boundary keeps each tree + self-contained -- in practice the overlap today is just + `__init__.py`, but any path duplicated between trees would be + ambiguous in a single flat list. + """ + + source: list[GeneratedModule] + test: list[GeneratedModule] + + +_OUTPUT_PACKAGE = "overture.schema.pyspark.expressions.generated" + +# Dots in `from ...x import y` from a generated test module to reach +# `tests/`: one to leave the file's package, one to leave `generated/`. +# Each additional directory component under `generated/` adds another. +_DOTS_FROM_TEST_TO_TESTS_ROOT = 2 + + +def _support_prefix(directory: PurePosixPath) -> str: + """Relative-import prefix used by generated test modules to reach `_support`. + + Each leading dot climbs one package level; the first two dots step + out of `tests/generated/` to `tests/`, and an extra dot is appended + for every component of *directory* under `generated/`. + """ + return "." * (len(directory.parts) + _DOTS_FROM_TEST_TO_TESTS_ROOT) + + +def _require_entry_point(spec: FeatureSpec) -> str: + """Return *spec*'s entry point or raise if it's missing.""" + if spec.entry_point is None: + msg = f"FeatureSpec {spec.name!r} has no entry_point." + raise ValueError(msg) + return spec.entry_point + + +def _directory_and_feature_name(spec: FeatureSpec) -> tuple[PurePosixPath, str]: + """Return the output directory and snake_case feature name for a spec. + + Both halves derive from the entry-point's class name so filenames + and symbol names stay in sync with what the runtime registry + discovers. + """ + directory, cls_name = entry_point_to_path(_require_entry_point(spec)) + return directory, to_snake_case(cls_name) + + +def _extract_geometry_types( + field_checks: list[Check], +) -> tuple[GeometryType, ...]: + """Collect allowed geometry types from every `check_geometry_type` descriptor. + + A feature may carry multiple `check_geometry_type` descriptors -- e.g. + one per union arm with a distinct allowed-types set. The result is the + union of all of them, sorted by name for deterministic output. + """ + seen: set[GeometryType] = set() + for check in field_checks: + for desc in check.descriptors: + if desc.function != "check_geometry_type": + continue + for arg in desc.args: + if isinstance(arg, GeometryType): + seen.add(arg) + return tuple(sorted(seen, key=lambda g: g.name)) + + +def _init_modules(paths: Iterable[PurePosixPath]) -> list[GeneratedModule]: + """Emit empty `__init__.py` for every directory of `paths`. + + Includes the output root so the top-level package exists after a + full `rm -rf` of the generated tree. + """ + paths = list(paths) + if not paths: + return [] + dirs: set[PurePosixPath] = set() + for path in paths: + dirs.update(path.parents) + return [GeneratedModule(content="", path=d / "__init__.py") for d in sorted(dirs)] + + +def generate_pyspark_module(spec: FeatureSpec) -> GeneratedModule: + """Generate a PySpark validation module from a feature spec. + + Parameters + ---------- + spec + The extracted feature spec to generate from. + + Returns + ------- + GeneratedModule + Module content and a relative output path mirroring the + feature's entry-point package layout. + """ + return _render_module(spec, build_checks(spec)) + + +def generate_pyspark_modules( + feature_specs: Sequence[FeatureSpec], +) -> PipelineOutput: + """Generate PySpark validation modules for all features. + + Parameters + ---------- + feature_specs + Extracted feature specs to generate from. + + Returns + ------- + PipelineOutput + Source-tree feature modules and test-tree modules. Each tree + includes the `__init__.py` files needed for its package layout. + """ + items = [(spec, build_checks(spec)) for spec in feature_specs] + source = [_render_module(spec, checks) for spec, checks in items] + test: list[GeneratedModule] = [] + for spec, checks in items: + test.extend(_render_test_modules(spec, checks)) + source.extend(_init_modules(m.path for m in source)) + test.extend(_init_modules(m.path for m in test)) + return PipelineOutput(source=source, test=test) + + +def _render_module( + spec: FeatureSpec, + checks: tuple[list[Check], list[ModelCheck]], +) -> GeneratedModule: + """Build checks, schema, and render for a feature spec.""" + field_checks, model_checks = checks + schema_fields = build_schema(spec) + geometry_types = _extract_geometry_types(field_checks) + directory, feature_name = _directory_and_feature_name(spec) + content = render_feature_module( + feature_name, + field_checks, + model_checks, + schema_fields, + geometry_types, + entry_point=_require_entry_point(spec), + partitions=spec.partitions, + ) + return GeneratedModule( + content=content, + path=directory / f"{feature_name}.py", + ) + + +def _select_arm_rows( + spec: FeatureSpec, +) -> dict[str | None, tuple[dict[str, object], dict[str, object]]]: + """Map each test module's arm key to its (sparse, populated) base rows. + + Multi-arm unions key by discriminator value (one entry per arm); other + specs use a single `None` key. Either way the caller iterates the dict + to emit one test module per entry. + """ + if isinstance(spec, UnionSpec) and spec.discriminator_field: + sparse_arm_rows = generate_arm_rows(spec) + populated_arm_rows = generate_populated_arm_rows(spec) + return { + arm: (sparse_arm_rows[arm], populated_arm_rows[arm]) + for arm in sparse_arm_rows + } + return {None: (generate_base_row(spec), generate_populated_row(spec))} + + +def _render_test_modules( + spec: FeatureSpec, + checks: tuple[list[Check], list[ModelCheck]], +) -> list[GeneratedModule]: + """Render test modules for a feature spec. + + For union specs with multiple discriminator arms, produces one + test module per arm. Each arm's test includes the field and + model checks tagged with that arm (or untagged), filtered by + `render_test_module`. + """ + field_checks, model_checks = checks + directory, feature_name = _directory_and_feature_name(spec) + expression_import = ".".join([_OUTPUT_PACKAGE, *directory.parts, feature_name]) + support_prefix = _support_prefix(directory) + + modules: list[GeneratedModule] = [] + for arm, (base_row_sparse, base_row_populated) in _select_arm_rows(spec).items(): + suffix = f"_{arm}" if arm else "" + modules.append( + GeneratedModule( + content=render_test_module( + feature_name, + field_checks, + model_checks, + base_row_sparse=base_row_sparse, + base_row_populated=base_row_populated, + arm=arm, + spec=spec, + expression_import=expression_import, + support_prefix=support_prefix, + ), + path=directory / f"test_{feature_name}{suffix}.py", + ) + ) + return modules diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py new file mode 100644 index 000000000..9728a499a --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py @@ -0,0 +1,647 @@ +"""Render Check / ModelCheck IR into complete Python modules.""" + +from __future__ import annotations + +import re +from collections.abc import Mapping +from enum import Enum + +from overture.schema.system.field_path import ( + ArrayPath, + FieldPath, + ScalarPath, +) +from overture.schema.system.model_constraint import Condition +from overture.schema.system.primitive import GeometryType + +from ._render_common import ( + check_name, + compute_label_suffixes, + disambiguate, + field_label, + jinja_env, + model_constraint_field_label, + parse_field_eq, + py_literal, + tuple_literal, +) +from .check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) +from .constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + RadioGroup, + RequireAnyOf, + RequireIf, + model_constraint_function, +) +from .schema_builder import SHARED_TYPE_REFS, SchemaField + +__all__ = [ + "render_feature_module", +] + +# Descriptor function names that resolve to helpers from the +# `column_patterns` runtime module (rather than `constraint_expressions`). +# Used to route imports to the correct module. Distinct from +# `_render_common.COLUMN_LEVEL_FUNCTIONS`, which classifies checks that +# emit one Check per field rather than per array element. +_COLUMN_PATTERN_HELPERS = frozenset( + {"array_check", "nested_array_check", "check_struct_unique"} +) + +_SHARED_STRUCT_REFS = frozenset(SHARED_TYPE_REFS.values()) + +_SPARK_TYPES = frozenset( + { + "ArrayType", + "BinaryType", + "BooleanType", + "ByteType", + "DateType", + "DoubleType", + "FloatType", + "IntegerType", + "LongType", + "MapType", + "ShortType", + "StringType", + "StructField", + "StructType", + "TimestampType", + } +) + + +# Collapses runs of `.`, `[`, `]`, `_` to a single `_` for identifier sanitization. +_PATH_SEPARATOR_RUN = re.compile(r"[.\[\]_]+") + + +def _sanitize_field_name(field: str) -> str: + """Convert an encoded field-path string to a valid Python identifier fragment.""" + return _PATH_SEPARATOR_RUN.sub("_", field).strip("_") + + +def _render_condition_desc(condition: Condition) -> str: + """Render a Condition to a human-readable description string for error messages.""" + parsed = parse_field_eq(condition) + if parsed is None: + raise TypeError(f"Unhandled condition type: {type(condition).__name__}") + display = repr( + parsed.value.value if isinstance(parsed.value, Enum) else parsed.value + ) + op = "!=" if parsed.negated else "=" + return f"{parsed.field_name} {op} {display}" + + +def _render_condition( + condition: Condition, *, in_array: bool = False, var: str = "el" +) -> str: + """Render a Condition to a PySpark Column expression string.""" + parsed = parse_field_eq(condition) + if parsed is None: + raise TypeError(f"Unhandled condition type: {type(condition).__name__}") + ref = _render_field_ref(parsed.field_name, in_array=in_array, var=var) + op = "!=" if parsed.negated else "==" + return f"{ref} {op} {py_literal(parsed.value)}" + + +def _render_field_ref( + field_name: str, + *, + in_array: bool, + struct_path: tuple[str, ...] = (), + var: str = "el", +) -> str: + """Render a field reference as F.col("x"), el["x"], or el["struct"]["x"]. + + `F.col` accepts dotted names directly so the top-level form keeps + `field_name` intact. The in-array form descends a struct via + `el[...]`, which requires the dotted name to be split into segments + before applying `struct_path` and the field's own parts. + """ + if not in_array: + return f'F.col("{field_name}")' + parts = (*struct_path, *field_name.split(".")) + return _element_accessor(var, parts) + + +def _geometry_type_literal(g: GeometryType) -> str: + """Spell out `GeometryType.NAME` as valid Python source. + + `repr(g)` yields ``, which is not a valid + expression. + """ + return f"GeometryType.{g.name}" + + +# Check functions whose first positional arg is a list of allowed values. +# Descriptors store the values as a tuple for hashability; the renderer +# unwraps that one position to a list literal so the generated call matches +# the runtime signature. +_LIST_FIRST_ARG_FUNCTIONS = frozenset({"check_enum"}) + + +def _render_arg(arg: object) -> str: + """Render a descriptor arg as a valid Python expression string.""" + if isinstance(arg, GeometryType): + return _geometry_type_literal(arg) + return py_literal(arg) + + +def _render_expr_call( + desc: ExpressionDescriptor, + col_expr: str, +) -> str: + """Render a single ExpressionDescriptor call with col injected.""" + parts = [col_expr] + for idx, arg in enumerate(desc.args): + if ( + idx == 0 + and desc.function in _LIST_FIRST_ARG_FUNCTIONS + and isinstance(arg, tuple) + ): + parts.append(py_literal(list(arg))) + else: + parts.append(_render_arg(arg)) + for k, v in desc.kwargs: + parts.append(f"{k}={py_literal(v)}") + if desc.label is not None: + parts.append(f"label={py_literal(desc.label)}") + return f"{desc.function}({', '.join(parts)})" + + +def _element_accessor(var: str, path: tuple[str, ...]) -> str: + """Build bracket-notation accessor like `el["foo"]["bar"]`.""" + return var + "".join(f'["{p}"]' for p in path) + + +def _iter_var_name(idx: int, total: int) -> str: + """Lambda variable name at iteration depth `idx` (0..total-1) of `total`. + + Single-iteration cases (`total == 1`) return `"el"` from the first + branch; the innermost frame of a nested iteration uses `"inner"`, + intermediate frames `"el2"`, `"el3"`, ... + """ + if idx == 0: + return "el" + if idx == total - 1: + return "inner" + return f"el{idx + 1}" + + +def _wrap_element_gate(body: str, var: str, gate_parts: tuple[str, ...]) -> str: + """Wrap a lambda body in F.when(var[gate].isNotNull(), ...) for nullable parent gating.""" + gate_accessor = _element_accessor(var, gate_parts) + return f"F.when({gate_accessor}.isNotNull(), {body})" + + +def _wrap_in_array_iteration( + column_path: str, + inner_struct_paths: tuple[tuple[str, ...], ...], + body: str, + *, + gate_parts: tuple[str, ...] = (), +) -> str: + """Wrap `body` in nested array_check / nested_array_check frames. + + One frame per iteration: `column_path` names the outermost array + column, `inner_struct_paths` gives the struct accessor from each + iteration's element to the next array (its length plus one is the + iteration count). `body` is the innermost lambda body. `gate_parts`, + when set, wraps the outermost lambda body in a nullable-parent + element gate. + + The recursion descends one frame per call, carrying the frame index + and its lambda variable; the innermost frame is `array_check`, every + outer frame `nested_array_check`. + """ + total = 1 + len(inner_struct_paths) + + def frame(idx: int, accessor: str, var: str) -> str: + if idx == total - 1: + inner = body + fn = "array_check" + else: + child_var = _iter_var_name(idx + 1, total) + child_accessor = _element_accessor(var, inner_struct_paths[idx]) + inner = frame(idx + 1, child_accessor, child_var) + fn = "nested_array_check" + if idx == 0 and gate_parts: + inner = _wrap_element_gate(inner, var, gate_parts) + return f"{fn}({accessor}, lambda {var}: {inner})" + + return frame(0, f'"{column_path}"', "el") + + +def _render_array_check_expr( + target: ArrayPath, + desc: ExpressionDescriptor, + *, + element_guards: tuple[ElementGuard, ...] = (), + gate_parts: tuple[str, ...] = (), +) -> str: + """Render an ArrayPath target to an array_check / nested_array_check expression. + + Element guards are applied at the innermost iteration variable. This + assumes each guard's discriminator lives on the same struct level as + the leaf accessor -- which is true today because `ElementGuard`s only + arise from a union variant whose discriminator field is the + immediately enclosing array element. A future case where a check is + reached through further iteration *inside* a discriminated union + element would need per-guard depth info to apply the guard at the + correct frame. + """ + inner_struct_paths = target.iter_struct_paths + iteration_count = 1 + len(inner_struct_paths) + + innermost_var = _iter_var_name(iteration_count - 1, iteration_count) + leaf_accessor = _element_accessor(innermost_var, target.leaf) + body = _render_expr_call(desc, leaf_accessor) + + for guard in reversed(element_guards): + body = _render_variant_expr( + body, guard.values, guard.discriminator, in_array=True, var=innermost_var + ) + + return _wrap_in_array_iteration( + target.column_path, inner_struct_paths, body, gate_parts=gate_parts + ) + + +def _render_variant_expr( + inner_expr: str, + variant_values: tuple[str, ...], + discriminator_field: str, + *, + in_array: bool = False, + var: str = "el", +) -> str: + """Wrap an expression in F.when(...).isin() gating for union variant fields.""" + values_repr = py_literal(list(variant_values)) + disc_ref = ( + f'{var}["{discriminator_field}"]' + if in_array + else f'F.col("{discriminator_field}")' + ) + return f"F.when({disc_ref}.isin({values_repr}), {inner_expr})" + + +def _render_column_gate(expr: str, gate: FieldPath) -> str: + """Wrap an expression in F.when(gate.isNotNull(), ...) for nullable parent gating.""" + return f'F.when(F.col("{gate}").isNotNull(), {expr})' + + +def _model_check_func_name(check: ModelCheck, idx: int) -> str: + """Build the private function name for a model-constraint check. + + Non-array targets emit `_{fn}_{idx}_check`. Array targets prefix the + column path -- using the full encoded `FieldPath` when the check is + reached via inner iteration or leaf struct navigation, otherwise the + outer column name alone -- so collisions across nested contexts get + distinct identifiers. + """ + fn = model_constraint_function(check.descriptor) + match check.target: + case ArrayPath() as target: + has_nested_path = bool(target.iter_struct_paths) or bool(target.leaf) + prefix_source = str(target) if has_nested_path else target.column_path + prefix = _sanitize_field_name(prefix_source) + return f"_{prefix}_{fn}_{idx}_check" + case _: + return f"_{fn}_{idx}_check" + + +def _root_field_for_target(target: FieldPath) -> str | None: + """Top-level schema column for a Check/ModelCheck target. + + Returns the first segment's name, or `None` for an empty path. + """ + return target.segments[0].name if target.segments else None + + +def _check_shape_token(target: FieldPath) -> str: + """Token naming the runtime `CheckShape` member for a target path. + + Mirrors the member names of `overture.schema.pyspark.check.CheckShape`; + the check-function template prefixes `CheckShape.` to the result. An + `ArrayPath` target renders to an `array` expression, every + other path to a nullable string. + """ + return "ARRAY" if isinstance(target, ArrayPath) else "SCALAR" + + +def _render_check_expr(check: Check, descriptor_idx: int) -> str: + """Render the PySpark expression for one descriptor of `check`.""" + desc = check.descriptors[descriptor_idx] + column_guards = tuple(g for g in check.guards if isinstance(g, ColumnGuard)) + element_guards = tuple(g for g in check.guards if isinstance(g, ElementGuard)) + + match check.target: + case ScalarPath(): + expr = _render_expr_call(desc, f'F.col("{check.target}")') + if desc.gate: + expr = _render_column_gate(expr, desc.gate) + case ArrayPath(): + gate_parts: tuple[str, ...] = () + if desc.gate is not None: + # check_builder zeros the nullable gate when descending into + # a list (see `_recurse_into_model`), so a gate paired with + # an ArrayPath target should never occur today. If it does, + # the column-level fallback below would silently hide a + # codegen bug -- raise instead. + element_relative = check.target.element_relative_gate(desc.gate) + if element_relative is None: + raise AssertionError( + f"ArrayPath target with column-level gate is not " + f"produced by check_builder (gate={desc.gate!r}, " + f"target={check.target!r})" + ) + gate_parts = element_relative + expr = _render_array_check_expr( + check.target, + desc, + element_guards=element_guards, + gate_parts=gate_parts, + ) + case _: + raise TypeError( + f"Unhandled FieldPath variant: {type(check.target).__name__}" + ) + + for guard in reversed(column_guards): + expr = _render_variant_expr(expr, guard.values, guard.discriminator) + return expr + + +def _check_function_context( + *, target: FieldPath, func_name: str, field: str, name: str, expr: str +) -> dict[str, object]: + """Assemble the template context dict for one check function.""" + return { + "func_name": func_name, + "field": field, + "check_name": name, + "expr": expr, + "shape": _check_shape_token(target), + "root_field": _root_field_for_target(target), + } + + +def _render_check_function_context( + check: Check, func_name: str, descriptor_idx: int = 0 +) -> dict[str, object]: + """Build the template context for a per-field check function from a Check.""" + desc = check.descriptors[descriptor_idx] + return _check_function_context( + target=check.target, + func_name=func_name, + field=field_label(check), + name=check_name(desc.function, desc.check_name), + expr=_render_check_expr(check, descriptor_idx), + ) + + +def _render_model_constraint_function_context( + check: ModelCheck, idx: int, label_suffix: str +) -> dict[str, object]: + """Build the template context for a model-constraint check function.""" + desc = check.descriptor + target = check.target + match target: + case ArrayPath(): + in_array = True + var = "inner" if target.iter_struct_paths else "el" + struct_path: tuple[str, ...] = target.leaf + case _: + in_array = False + var, struct_path = "el", () + + def _field_ref(field_name: str) -> str: + return _render_field_ref( + field_name, in_array=in_array, struct_path=struct_path, var=var + ) + + fn = model_constraint_function(desc) + + def _cols_and_names() -> tuple[str, str]: + cols_list = "[" + ", ".join(_field_ref(f) for f in desc.field_names) + "]" + names_list = py_literal(list(desc.field_names)) + return cols_list, names_list + + match desc: + case RequireAnyOf() | RadioGroup(): + cols_list, names_list = _cols_and_names() + inner_expr = f"{fn}({cols_list}, {names_list})" + case RequireIf() | ForbidIf(): + target_name = desc.field_names[0] + condition_expr = _render_condition( + desc.condition, in_array=in_array, var=var + ) + condition_desc = _render_condition_desc(desc.condition) + target_ref = _field_ref(target_name) + inner_expr = ( + f"{fn}({target_ref}, {condition_expr}, {py_literal(condition_desc)})" + ) + case MinFieldsSet(): + cols_list, names_list = _cols_and_names() + inner_expr = f"{fn}({cols_list}, {names_list}, {desc.count})" + case _: + raise TypeError(f"Unhandled model constraint descriptor: {desc!r}") + + if isinstance(target, ArrayPath): + expr = _wrap_in_array_iteration( + target.column_path, target.iter_struct_paths, inner_expr + ) + else: + expr = inner_expr + + return _check_function_context( + target=target, + func_name=_model_check_func_name(check, idx), + field=model_constraint_field_label(check, label_suffix), + name=check_name(fn), + expr=expr, + ) + + +def _collect_constraint_expr_imports( + field_checks: list[Check], + model_checks: list[ModelCheck], +) -> set[str]: + """Collect all constraint_expressions function names needed. + + Field-descriptor names go through a `_COLUMN_PATTERN_HELPERS` + filter so column-pattern helpers route to their own import bucket. + Model-constraint function names (`check_require_any_of`, + `check_radio_group`, ...) are disjoint from that set, so they pass + through unfiltered. + """ + names: set[str] = { + desc.function + for check in field_checks + for desc in check.descriptors + if desc.function not in _COLUMN_PATTERN_HELPERS + } + for mc in model_checks: + names.add(model_constraint_function(mc.descriptor)) + return names + + +def _needs_geometry_type_import(field_checks: list[Check]) -> bool: + """Return True when any descriptor arg is a GeometryType instance.""" + for check in field_checks: + for desc in check.descriptors: + if any(isinstance(a, GeometryType) for a in desc.args): + return True + return False + + +def _pattern_imports_for(target: FieldPath) -> set[str]: + """Column-pattern helpers needed to iterate `target`.""" + match target: + case ArrayPath(): + names = {"array_check"} + if target.iter_struct_paths: + names.add("nested_array_check") + return names + case _: + return set() + + +def _collect_column_pattern_imports( + field_checks: list[Check], + model_checks: list[ModelCheck], +) -> set[str]: + """Collect column_patterns function names needed.""" + names: set[str] = set() + for check in field_checks: + names |= _pattern_imports_for(check.target) + for desc in check.descriptors: + if desc.function in _COLUMN_PATTERN_HELPERS: + names.add(desc.function) + for mc in model_checks: + names |= _pattern_imports_for(mc.target) + return names + + +_IDENTIFIER_TOKEN = re.compile(r"[A-Z][A-Za-z0-9_]*") + + +def _identifier_tokens(expr: str) -> set[str]: + """Tokenize a Spark type expression into capitalized identifiers.""" + return set(_IDENTIFIER_TOKEN.findall(expr)) + + +def _collect_spark_type_imports(schema_fields: list[SchemaField]) -> set[str]: + """Collect Spark type class names from schema field type expressions.""" + if not schema_fields: + return set() + used: set[str] = {"StructType", "StructField"} + for sf in schema_fields: + used |= _identifier_tokens(sf.type_expr) & _SPARK_TYPES + return used + + +def _collect_schema_struct_imports(schema_fields: list[SchemaField]) -> set[str]: + """Collect _schema_structs constant names referenced in field type expressions.""" + refs: set[str] = set() + for sf in schema_fields: + refs |= _identifier_tokens(sf.type_expr) & _SHARED_STRUCT_REFS + return refs + + +def _field_check_function_entries( + field_checks: list[Check], +) -> list[dict[str, object]]: + """Build template contexts for field-level checks.""" + descriptor_refs: list[tuple[Check, int]] = [] + raw_names: list[str] = [] + for check in field_checks: + labeled = field_label(check) + multi = len(check.descriptors) > 1 + for desc_idx, desc in enumerate(check.descriptors): + suffix = f"_{check_name(desc.function, desc.check_name)}" if multi else "" + raw_names.append(f"_{_sanitize_field_name(labeled)}{suffix}_check") + descriptor_refs.append((check, desc_idx)) + + func_names = disambiguate(raw_names) + return [ + _render_check_function_context(check, func_name, desc_idx) + for (check, desc_idx), func_name in zip( + descriptor_refs, func_names, strict=True + ) + ] + + +def _model_check_function_entries( + model_checks: list[ModelCheck], +) -> list[dict[str, object]]: + """Build template contexts for model-level checks.""" + label_suffixes = compute_label_suffixes(model_checks) + return [ + _render_model_constraint_function_context(mc, idx, label_suffixes[idx]) + for idx, mc in enumerate(model_checks) + ] + + +def render_feature_module( + feature_name: str, + field_checks: list[Check], + model_checks: list[ModelCheck], + schema_fields: list[SchemaField], + geometry_types: tuple[GeometryType, ...] = (), + *, + entry_point: str = "tests.placeholder:Placeholder", + partitions: Mapping[str, str] | None = None, +) -> str: + """Render a complete Python module for a feature's checks and schema.""" + constraint_expr_fns = sorted( + _collect_constraint_expr_imports(field_checks, model_checks) + ) + column_pattern_fns = sorted( + _collect_column_pattern_imports(field_checks, model_checks) + ) + spark_types = sorted(_collect_spark_type_imports(schema_fields)) + schema_struct_refs = sorted(_collect_schema_struct_imports(schema_fields)) + geometry_type = _needs_geometry_type_import(field_checks) or bool(geometry_types) + geometry_types_literal = ( + _render_geometry_types(geometry_types) if geometry_types else None + ) + + check_functions = _field_check_function_entries( + field_checks + ) + _model_check_function_entries(model_checks) + + feature_title = feature_name.replace("_", " ").title() + + template = jinja_env().get_template("feature_module.py.jinja2") + return template.render( + feature_name=feature_name, + feature_title=feature_title, + constraint_expr_fns=constraint_expr_fns, + column_pattern_fns=column_pattern_fns, + spark_types=spark_types, + schema_struct_refs=schema_struct_refs, + geometry_type=geometry_type, + check_functions=check_functions, + schema_const_name=f"{feature_name.upper()}_SCHEMA", + schema_fields=schema_fields, + geometry_types_literal=geometry_types_literal, + entry_point=entry_point, + partitions=dict(partitions) if partitions else {}, + ) + + +def _render_geometry_types(geo: tuple[GeometryType, ...]) -> str: + """Render a `geometry_types` tuple literal. + + `GeometryType` is an Enum, so `repr()` does not produce a valid + expression -- members need explicit `GeometryType.NAME` source. + """ + return tuple_literal(_geometry_type_literal(g) for g in geo) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py new file mode 100644 index 000000000..194119145 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/schema_builder.py @@ -0,0 +1,183 @@ +"""Build StructType schema source from FeatureSpec field trees.""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + +from ..extraction.field import ( + AnyScalar, + ArrayOf, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + Scalar, + UnionRef, +) +from ..extraction.field_walk import terminal_scalar +from ..extraction.specs import FeatureSpec, FieldSpec, UnionSpec +from ..extraction.type_registry import get_type_mapping + +__all__ = [ + "SHARED_TYPE_REFS", + "SchemaField", + "build_schema", + "spark_type_rank", +] + +# Types whose base_type name maps to a _schema_structs.py StructType constant. +# Reserved for types the codegen cannot walk (BBox is a plain class, not a +# Pydantic BaseModel). Pydantic BaseModels are inlined. +SHARED_TYPE_REFS: dict[str, str] = { + "BBox": "BBOX_STRUCT", +} + +# Literal and Enum fields both serialize as strings in Parquet. +_STRING_FALLBACK = "StringType()" + + +@dataclass(frozen=True, slots=True) +class SchemaField: + """One field in the generated StructType. + + Parameters + ---------- + name + Column name. + type_expr + Spark type expression string (e.g. `"StringType()"`) or + a `_schema_structs.py` constant name. + """ + + name: str + type_expr: str + + +def _spark_for_base(base_type: str, source_type: type | None) -> str: + """Return a Spark type expression for a primitive base type. + + Tries `base_type` first, then falls back to `source_type.__name__`. + Returns `StringType()` when neither maps to a known Spark type. + """ + mapping = get_type_mapping(base_type) + if mapping is not None and mapping.spark is not None: + return mapping.spark + if source_type is not None: + fallback = get_type_mapping(source_type.__name__) + if fallback is not None and fallback.spark is not None: + return fallback.spark + return _STRING_FALLBACK + + +def _spark_for_scalar(scalar: Scalar) -> str: + """Map a `Scalar` variant to a Spark type expression. + + `LiteralScalar` and `AnyScalar` serialize as strings. `Primitive` + scalars look up the type registry; enum primitives and BBox short- + circuit to strings / shared constants before the registry. + """ + if isinstance(scalar, (LiteralScalar, AnyScalar)): + return _STRING_FALLBACK + if scalar.base_type in SHARED_TYPE_REFS: + return SHARED_TYPE_REFS[scalar.base_type] + if ( + scalar.source_type is not None + and isinstance(scalar.source_type, type) + and issubclass(scalar.source_type, Enum) + ): + return _STRING_FALLBACK + return _spark_for_base(scalar.base_type, scalar.source_type) + + +# Spark numeric type widening precedence (higher rank = wider type). +_SPARK_TYPE_WIDENING: dict[str, int] = { + "IntegerType()": 0, + "LongType()": 1, + "DoubleType()": 2, +} + + +def spark_type_rank(field_spec: FieldSpec) -> int: + """Return a widening rank for the field's resolved Spark type. + + Fields with a higher rank are preferred when deduplicating union + members by name. Non-numeric types return -1 (no widening). + """ + scalar = terminal_scalar(field_spec.shape) + if not isinstance(scalar, Primitive): + return -1 + expr = _spark_for_base(scalar.base_type, scalar.source_type) + return _SPARK_TYPE_WIDENING.get(expr, -1) + + +def _deduplicate_by_name(fields: list[FieldSpec]) -> list[FieldSpec]: + """Keep one FieldSpec per name, widening the Spark type on conflict. + + Union annotated_fields may contain the same field name with different + type shapes (e.g. `value` as uint8 in one variant and float64 in + another). Parquet stores one column per name, so the schema needs + exactly one entry. When two fields share a name, the one with the + wider Spark type wins (matching Parquet's type-widening behavior). + """ + seen: dict[str, FieldSpec] = {} + for f in fields: + existing = seen.get(f.name) + if existing is None or spark_type_rank(f) > spark_type_rank(existing): + seen[f.name] = f + return list(seen.values()) + + +def _struct_type_expr(fields: list[FieldSpec]) -> str: + """Build an inline `StructType([...])` expression from a list of fields.""" + parts = [ + f'StructField("{f.name}", {_shape_to_spark(f.shape)}, True)' for f in fields + ] + return f"StructType([{', '.join(parts)}])" + + +def _shape_to_spark(shape: FieldShape) -> str: + """Convert a FieldShape to a Spark type expression string.""" + match shape: + case ArrayOf(element=element): + return f"ArrayType({_shape_to_spark(element)}, True)" + case NewTypeShape(inner=inner): + return _shape_to_spark(inner) + case ModelRef(model=m): + return _struct_type_expr(m.fields) + case UnionRef(union=u): + return _struct_type_expr(_deduplicate_by_name(u.fields)) + case MapOf(key=k, value=v): + return f"MapType({_shape_to_spark(k)}, {_shape_to_spark(v)}, True)" + case Primitive() | LiteralScalar() | AnyScalar() as s: + return _spark_for_scalar(s) + raise TypeError(f"Unhandled FieldShape: {shape!r}") + + +def build_schema(spec: FeatureSpec) -> list[SchemaField]: + """Build schema fields for a feature spec. + + Walks the field tree and maps types to Spark type expressions. + Recognizes shared types and emits fields in model order. + + Parameters + ---------- + spec + The feature spec to build schema fields for. + + Returns + ------- + list[SchemaField] + One entry per schema column in model order. + """ + source_fields = ( + _deduplicate_by_name(spec.fields) + if isinstance(spec, UnionSpec) + else spec.fields + ) + return [ + SchemaField(name=f.name, type_expr=_shape_to_spark(f.shape)) + for f in source_fields + ] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 new file mode 100644 index 000000000..8c15ed9d9 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/_check_function.py.jinja2 @@ -0,0 +1,10 @@ +{%- macro check_function(c) -%} +def {{ c.func_name }}() -> Check: + return Check( + field="{{ c.field }}", + name="{{ c.check_name }}", + expr={{ c.expr }}, + shape=CheckShape.{{ c.shape }}, + root_field={{ c.root_field | py_literal }}, + ) +{% endmacro %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 new file mode 100644 index 000000000..1a28d39b2 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/feature_module.py.jinja2 @@ -0,0 +1,83 @@ +{% from '_check_function.py.jinja2' import check_function -%} +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""{{ feature_title }} validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +{% if spark_types %} +from pyspark.sql.types import ( +{% for t in spark_types %} + {{ t }}, +{% endfor %} +) +{% endif %} +{% if geometry_type %} +from overture.schema.system.primitive import GeometryType + +{% endif %} +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +{% if schema_struct_refs %} +from overture.schema.pyspark.expressions._schema_structs import ( +{% for r in schema_struct_refs %} + {{ r }}, +{% endfor %} +) +{% endif %} +{% if column_pattern_fns %} +from overture.schema.pyspark.expressions.column_patterns import ( +{% for f in column_pattern_fns %} + {{ f }}, +{% endfor %} +) +{% endif %} +{% if constraint_expr_fns %} +from overture.schema.pyspark.expressions.constraint_expressions import ( +{% for f in constraint_expr_fns %} + {{ f }}, +{% endfor %} +) +{% endif %} + + +{% for c in check_functions %} +{{ check_function(c) }} +{% endfor %} + +def {{ feature_name }}_checks() -> list[Check]: + """All validation checks for {{ feature_name }}.""" +{% if check_functions %} + return [ +{% for c in check_functions %} + {{ c.func_name }}(), +{% endfor %} + ] +{% else %} + return [] +{% endif %} + + +{{ schema_const_name }} = StructType( + [ +{%- for sf in schema_fields %} + StructField("{{ sf.name }}", {{ sf.type_expr }}, True), +{%- endfor %} + ] +) +{% if geometry_types_literal %} + +GEOMETRY_TYPES: tuple[GeometryType, ...] = {{ geometry_types_literal }} +{% endif %} + +ENTRY_POINT = "{{ entry_point }}" + +PARTITIONS: dict[str, str] = {{ partitions | py_literal }} + +FEATURE_VALIDATION = FeatureValidation( + schema={{ schema_const_name }}, + checks={{ feature_name }}_checks, +{%- if geometry_types_literal %} + geometry_types=GEOMETRY_TYPES, +{%- endif %} +) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 new file mode 100644 index 000000000..c69f146a8 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/templates/test_module.py.jinja2 @@ -0,0 +1,124 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for {{ feature_name }}.""" + +from __future__ import annotations + +import pytest +from {{ expression_import }} import ( + {{ feature_name | upper }}_SCHEMA, + {{ feature_name }}_checks, +) +from pyspark.sql import SparkSession + +from {{ support_prefix }}_support.harness import ( + ValidationResults, + run_validation_pipeline, +) +{% if mutation_imports %} +from {{ support_prefix }}_support.mutations import {{ mutation_imports | join(", ") }} +{% endif %} +{% if needs_set_at_path %} +from {{ support_prefix }}_support.helpers import set_at_path +{% endif %} +from {{ support_prefix }}_support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = {{ base_row_sparse }} + + +BASE_ROW_POPULATED: dict = {{ base_row_populated }} + + +SCENARIOS: list[Scenario] = [ +{% for entry in scenarios %} + Scenario( + {% for k, v in entry %} + {{ k }}={{ v }}, + {% endfor %} + ), +{% endfor %} +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return {{ feature_name }}_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + {{ schema_name }}, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="{{ feature_name }}", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + {{ schema_name }}, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="{{ feature_name }}", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("{{ feature_name }}::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("{{ feature_name }}::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/__init__.py new file mode 100644 index 000000000..fa271d7b8 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/__init__.py @@ -0,0 +1,9 @@ +"""Test-data generation for the rendered PySpark conformance tests. + +Three modules cover three flavors of data: + +- `invalid_value`: constraint-violating values for triggering each check. +- `base_row`: minimal and fully populated valid rows. +- `scaffold`: sparse path scaffolds that supply the nested intermediates + (optional structs, arrays) a check's field path requires. +""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py new file mode 100644 index 000000000..6af5b0855 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/base_row.py @@ -0,0 +1,648 @@ +"""Generate valid base rows for the rendered conformance tests. + +`generate_base_row` produces a minimal valid row (required fields only) +from a `FeatureSpec`. `generate_populated_row` produces a fully +populated row including optional fields. `generate_arm_rows` and +`generate_populated_arm_rows` do the same for each arm of a discriminated +union. +""" + +from __future__ import annotations + +import uuid +from collections.abc import Callable +from enum import Enum +from typing import Any + +from overture.schema.common.scoping.lr import LinearReferenceRangeConstraint +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + HexColorConstraint, + JsonPointerConstraint, + LanguageTagConstraint, + PhoneNumberConstraint, + RegionCodeConstraint, + SnakeCaseConstraint, + StrippedConstraint, + WikidataIdConstraint, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + ForbidIfConstraint, + MinFieldsSetConstraint, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, +) +from overture.schema.system.primitive.geom import ( + Geometry, + GeometryType, + GeometryTypeConstraint, +) + +from ...extraction.field import ( + AnyScalar, + ArrayOf, + ConstraintSource, + FieldShape, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + UnionRef, +) +from ...extraction.field_walk import has_array_layer, terminal_primitive +from ...extraction.length_constraints import ArrayMinLen +from ...extraction.specs import FeatureSpec, FieldSpec, ModelSpec, UnionSpec +from ..constraint_dispatch import ExpressionDescriptor, dispatch_constraint +from ..schema_builder import spark_type_rank + +__all__ = [ + "generate_arm_rows", + "generate_base_row", + "generate_populated_arm_rows", + "generate_populated_row", + "value_for_field", +] + +_BASE_ROW_NAMESPACE = uuid.uuid5( + uuid.NAMESPACE_DNS, "overturemaps.org/codegen/base_row" +) + + +# WKT strings for each allowed geometry type (valid side) +_VALID_GEOMETRY_WKT: dict[GeometryType, str] = { + GeometryType.POINT: "POINT (0 0)", + GeometryType.LINE_STRING: "LINESTRING (0 0, 1 1)", + GeometryType.POLYGON: "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", + GeometryType.MULTI_POLYGON: "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + GeometryType.MULTI_LINE_STRING: "MULTILINESTRING ((0 0, 1 1))", +} + + +_PRIMITIVE_DEFAULTS: dict[str, object] = { + "str": "", + "NoWhitespaceString": "", + "HttpUrl": "https://example.com/", + "EmailStr": "user@example.com", + "bool": False, + "bytes": b"", + "datetime": "2024-01-01T00:00:00Z", + "date": "2024-01-01", +} + + +def _bbox_value() -> dict[str, float]: + return {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0} + + +# Field-name overrides applied before any shape-based value generation in +# `value_for_field`. Each builder receives `(field_spec, spec_name)`. +_SPECIAL_FIELD_VALUES: dict[str, Callable[[FieldSpec, str], object]] = { + "id": lambda _f, spec_name: str(uuid.uuid5(_BASE_ROW_NAMESPACE, spec_name)), + "bbox": lambda _f, _spec_name: _bbox_value(), +} + + +def _is_geometry_terminal(terminal: Primitive) -> bool: + """Whether this terminal represents a geometry value. + + Only fires for the `Geometry` source class. Fields wanting a + geometry value must declare `Geometry`; ad-hoc forms like + `Annotated[bytes, GeometryTypeConstraint(...)]` aren't recognized. + """ + return terminal.source_type is Geometry + + +def generate_base_row(spec: FeatureSpec, *, index: int = 0) -> dict[str, Any]: + """Produce a minimal valid row from a feature spec (required fields only). + + The row passes `TypeAdapter(validation_type).validate_python()`. + + Parameters + ---------- + spec + An expanded feature spec. + index + Position within a parent list. Non-zero values suffix string fields + to ensure uniqueness across list items. + """ + return _build_row(spec, index=index, populate_optional=False) + + +def generate_populated_row(spec: FeatureSpec, *, index: int = 0) -> dict[str, Any]: + """Produce a fully populated valid row (all fields, including optional). + + Sub-models are recursively populated. + + Parameters + ---------- + spec + An expanded feature spec. + index + Position within a parent list. Non-zero values suffix string fields + to ensure uniqueness across list items. + """ + return _build_row(spec, index=index, populate_optional=True) + + +def generate_arm_rows(spec: FeatureSpec) -> dict[str, dict[str, Any]]: + """Produce one minimal valid row per discriminator arm of a union. + + Returns `{arm_value: row}` where each row passes TypeAdapter + validation against the union's source annotation. + + Parameters + ---------- + spec + An expanded union spec. + """ + return _build_arm_rows(_require_union(spec), populate_optional=False) + + +def generate_populated_arm_rows( + spec: FeatureSpec, +) -> dict[str, dict[str, Any]]: + """Produce one fully populated valid row per discriminator arm. + + Returns `{arm_value: row}` where each row passes TypeAdapter + validation and includes all optional fields with valid values. + + Parameters + ---------- + spec + An expanded union spec. + """ + return _build_arm_rows(_require_union(spec), populate_optional=True) + + +def _require_union(spec: FeatureSpec) -> UnionSpec: + if not isinstance(spec, UnionSpec): + raise TypeError( + f"Expected a UnionSpec, got {type(spec).__name__}: {spec.name!r}" + ) + return spec + + +def _build_row( + spec: FeatureSpec, + *, + index: int = 0, + populate_optional: bool, + name_override: str | None = None, +) -> dict[str, Any]: + row: dict[str, Any] = {} + name = name_override or spec.name + for field in spec.fields: + if not populate_optional and not field.is_required: + continue + row[field.name] = value_for_field( + field, name, index=index, populate_optional=populate_optional + ) + _satisfy_model_constraints(row, spec) + return row + + +def _build_arm_rows( + spec: UnionSpec, + *, + populate_optional: bool, +) -> dict[str, dict[str, Any]]: + if spec.discriminator_field is None or spec.discriminator_mapping is None: + raise ValueError(f"UnionSpec {spec.name!r} has no discriminator") + if spec.constraints: + # Per-arm rows are built from member specs only; union-level + # constraints (e.g. radio_group on the union itself) would need + # `_satisfy_model_constraints` applied with the union's field + # list. No schema exercises this today; raise so a future union + # that adds one fails loudly rather than producing invalid rows. + raise NotImplementedError( + f"UnionSpec {spec.name!r} has {len(spec.constraints)} model " + "constraint(s); per-arm row generation does not enforce them" + ) + spec_by_class = {ms.member_cls: ms.spec for ms in spec.member_specs} + result: dict[str, dict[str, Any]] = {} + for arm_val, member_cls in spec.discriminator_mapping.items(): + row = _build_row( + spec_by_class[member_cls], + populate_optional=populate_optional, + name_override=spec.name, + ) + row[spec.discriminator_field] = arm_val + result[arm_val] = row + return result + + +def _row_satisfies_condition(row: dict[str, Any], condition: object) -> bool: + """Check whether a FieldEqCondition is satisfied by the row's current values.""" + if not isinstance(condition, FieldEqCondition): + return False + cond_value = condition.value + if isinstance(cond_value, Enum): + cond_value = cond_value.value + return row.get(condition.field_name) == cond_value + + +def _satisfy_model_constraints(row: dict[str, Any], spec: FeatureSpec) -> None: + """Adjust *row* so each model constraint is satisfied. + + `require_if`/`radio_group`/`require_any_of`/`min_fields_set` fill in + optional fields the constraint makes mandatory. `forbid_if` removes + fields the constraint excludes. Constraints whose guard predicate is + false (e.g. a `RequireIf` whose condition does not hold for the + current row) need no adjustment and pass through; any constraint type + not matched by an arm here is silently skipped, intentionally -- new + constraint kinds surface via `dispatch_model_constraint` (which + raises) rather than here. + """ + fields_by_name = {f.name: f for f in spec.fields} + for constraint in spec.constraints: + match constraint: + case RequireIfConstraint() if _row_satisfies_condition( + row, constraint.condition + ): + for field_name in constraint.field_names: + if field_name in row: + continue + field_spec = fields_by_name.get(field_name) + if field_spec is not None: + row[field_name] = value_for_field(field_spec, spec.name) + case RadioGroupConstraint() if not any( + row.get(fn) is True for fn in constraint.field_names + ): + for field_name in constraint.field_names: + if field_name in fields_by_name: + row[field_name] = True + break + case RequireAnyOfConstraint() if not any( + fn in row for fn in constraint.field_names + ): + for field_name in constraint.field_names: + field_spec = fields_by_name.get(field_name) + if field_spec is not None: + row[field_name] = value_for_field(field_spec, spec.name) + break + case ForbidIfConstraint() if _row_satisfies_condition( + row, constraint.condition + ): + for field_name in constraint.field_names: + row.pop(field_name, None) + case MinFieldsSetConstraint(count=count): + # Mirror Pydantic's `model_fields_set` semantics: every + # required field is "set" by the constructor, and counts + # alongside any non-null optional field. Required fields + # are always populated by the time we reach this branch, + # so satisfying `count` may need extra optional fills. + missing = count - sum(1 for f in spec.fields if f.name in row) + for opt_field in (f for f in spec.fields if not f.is_required): + if missing <= 0: + break + if opt_field.name in row: + continue + row[opt_field.name] = value_for_field(opt_field, spec.name) + missing -= 1 + + +def value_for_field( + field: FieldSpec, + spec_name: str, + *, + index: int = 0, + populate_optional: bool = False, +) -> object: + """Produce a valid value for a single field. + + Consults field constraints via `dispatch_constraint` to produce + constraint-satisfying values (e.g., a valid country code instead of + an empty string). + + Parameters + ---------- + field + The field spec to produce a value for. + spec_name + The name of the containing spec, used for deterministic UUID generation. + index + Position within a parent list. Non-zero values suffix string fields + to ensure uniqueness across list items. + populate_optional + When True, MODEL and UNION sub-rows include optional fields via + `generate_populated_row`. When False (default), sub-rows are sparse + via `generate_base_row`. + """ + special = _SPECIAL_FIELD_VALUES.get(field.name) + if special is not None: + return special(field, spec_name) + + shape = field.shape + + # Geometry fields short-circuit to a WKT literal. PySpark's Geometry + # validator parses WKT via `from_wkt`; the field is stored as + # BinaryType (WKB) downstream. + terminal = terminal_primitive(shape) + if terminal is not None and _is_geometry_terminal(terminal): + return _geometry_wkt_from_shape_constraints(terminal.constraints) + + # Non-list fields: try a constraint-driven value (e.g. CountryCode -> "US") + # before falling back to type defaults. The terminal scalar carries the + # constraints directly in the no-list case. Lists go through the recursive + # shape walk so array-level constraints and per-element constraints both + # get a chance to drive value generation. + if not has_array_layer(shape) and terminal is not None: + constraint_val = _value_from_scalar_constraints(terminal) + if constraint_val is not None: + if index > 0 and isinstance(constraint_val, str): + return f"{constraint_val}{index}" + return constraint_val + + return _value_for_shape( + shape, + index=index, + check_constraints=False, + populate_optional=populate_optional, + ) + + +def _widest_union_member(union: UnionSpec) -> ModelSpec: + """Pick the union member whose fields have the highest cumulative Spark type rank. + + When multiple union members share a field name with different numeric + types (e.g. `value: uint8` in one variant and `value: float64` in + another), PySpark widens the column to the broadest type (DoubleType). + Generating a row from the narrower member produces Python `int` values + that PySpark silently converts to null in `DoubleType` columns. + + By selecting the member with the widest field types, the generated row + uses Python `float` values that PySpark accepts in `DoubleType` columns. + """ + best_spec = union.member_specs[0].spec + best_rank = -1 + for member in union.member_specs: + field_ranks = [spark_type_rank(f) for f in member.spec.fields] + rank = sum(r for r in field_ranks if r >= 0) + if rank > best_rank: + best_rank = rank + best_spec = member.spec + return best_spec + + +def _row_from_model_spec( + spec: ModelSpec, + *, + index: int = 0, + populate_optional: bool = False, +) -> dict[str, Any]: + """Generate a row dict from an already-extracted model spec.""" + if populate_optional: + return generate_populated_row(spec, index=index) + return generate_base_row(spec, index=index) + + +def _value_for_shape( + shape: FieldShape, + *, + index: int = 0, + check_constraints: bool = True, + populate_optional: bool = False, +) -> object: + """Produce a valid value from a `FieldShape`. + + Each shape layer carries its own constraints: `ArrayOf`'s + constraints drive list-length decisions; the element shape's + constraints (visible after descending into `element`) drive + per-item value generation. + + Parameters + ---------- + shape + The field shape to produce a value for. + index + Array element index, used to suffix strings for uniqueness. + check_constraints + When True, attempt constraint-driven value generation at the + terminal Scalar before falling back to a primitive default. + populate_optional + When True, MODEL and UNION sub-rows include optional fields via + `generate_populated_row`. When False (default), sub-rows are + sparse via `generate_base_row`. + """ + match shape: + case ArrayOf(element=element, constraints=array_constraints): + list_val = _list_value_from_shape_constraints(array_constraints) + if list_val is not None: + return list_val + count = _min_length_from_shape_constraints(array_constraints) + return [ + _value_for_shape(element, index=i, populate_optional=populate_optional) + for i in range(count) + ] + + case NewTypeShape(inner=inner): + return _value_for_shape( + inner, + index=index, + check_constraints=check_constraints, + populate_optional=populate_optional, + ) + + case MapOf(): + return {} + + case LiteralScalar(values=values): + val = values[0] + return val.value if isinstance(val, Enum) else val + + case Primitive(source_type=cls) if ( + cls is not None and isinstance(cls, type) and issubclass(cls, Enum) + ): + return list(cls)[0].value # type: ignore[call-overload] + + case ModelRef(model=m): + return _row_from_model_spec( + m, index=index, populate_optional=populate_optional + ) + + case UnionRef(union=u): + # The selected member's discriminator field is a `Literal[X] = "x"` + # with a default, so it has `is_required=False`. In the populated + # case the LiteralScalar branch writes the literal explicitly; in + # the sparse case the field is omitted from the dict and Pydantic + # supplies the default during `TypeAdapter.validate_python()`. + return _row_from_model_spec( + _widest_union_member(u), + index=index, + populate_optional=populate_optional, + ) + + case AnyScalar(): + # Unreachable today: the only `AnyScalar` is a `MapOf` value + # type, and the `MapOf` case returns `{}` without descending. + raise TypeError( + "AnyScalar reached base-row generation; no value strategy exists" + ) + + case Primitive() as scalar: + constraint_val: object | None = None + if check_constraints: + constraint_val = _value_from_scalar_constraints(scalar) + val = ( + constraint_val + if constraint_val is not None + else _primitive_default(scalar.base_type) + ) + if index > 0 and isinstance(val, str): + val = f"{val}{index}" + return val + + raise TypeError(f"Unhandled FieldShape: {shape!r}") + + +def _value_from_check_bounds( + desc: ExpressionDescriptor, scalar: Primitive, cs: ConstraintSource +) -> object | None: + # Skip structural bounds from numeric primitive NewTypes (int32, uint8, ...). + # Those bounds match Spark/Parquet types structurally -- the type system + # already enforces the range. Only semantic bounds (from field-level + # constraints or semantic NewTypes like FeatureVersion) produce values. + if cs.source_name == scalar.base_type: + return None + return _valid_bound_for_base_row(desc) + + +def _value_from_check_enum( + desc: ExpressionDescriptor, _scalar: Primitive, _cs: ConstraintSource +) -> object: + """Return the first allowed value from a `check_enum` descriptor.""" + return desc.args[0][0] # type: ignore[index,no-any-return] + + +def _value_from_check_string_min_length( + _desc: ExpressionDescriptor, _scalar: Primitive, _cs: ConstraintSource +) -> str: + """Return any single character; satisfies `min_length>=1` for every schema today.""" + return "a" + + +# Builders for descriptor-driven values, keyed by `ExpressionDescriptor.function`. +# Functions absent from this table are intentionally skipped -- notably +# `check_pattern`, since matching strings can't be generated generically. +_DESCRIPTOR_VALUE_BUILDERS: dict[ + str, Callable[[ExpressionDescriptor, Primitive, ConstraintSource], object | None] +] = { + "check_enum": _value_from_check_enum, + "check_bounds": _value_from_check_bounds, + "check_string_min_length": _value_from_check_string_min_length, +} + + +_CONSTRAINT_VALID_VALUES: dict[type, object] = { + CountryCodeAlpha2Constraint: "US", + HexColorConstraint: "#aabbcc", + JsonPointerConstraint: "/valid/pointer", + LanguageTagConstraint: "en", + PhoneNumberConstraint: "+1 555-555-5555", + RegionCodeConstraint: "US-CA", + SnakeCaseConstraint: "snake_case", + StrippedConstraint: "clean", + WikidataIdConstraint: "Q42", +} + +_CONSTRAINT_VALID_LIST_VALUES: dict[type, list[object]] = { + LinearReferenceRangeConstraint: [0.0, 1.0], +} + + +def _value_from_scalar_constraints(scalar: Primitive) -> object | None: + """Return a value satisfying the first dispatched constraint. + + Maps known constraint types to valid values directly, then dispatches + remaining constraints through `_DESCRIPTOR_VALUE_BUILDERS` keyed on + the `ExpressionDescriptor` function name. Assumes constraints on a + single field don't conflict; no schema today mixes constraints in a + way that would expose a conflict. + """ + for cs in scalar.constraints: + constraint_type = type(cs.constraint) + if constraint_type in _CONSTRAINT_VALID_VALUES: + return _CONSTRAINT_VALID_VALUES[constraint_type] + desc = dispatch_constraint(cs.constraint, base_type=scalar.base_type) + if desc is None: + continue + builder = _DESCRIPTOR_VALUE_BUILDERS.get(desc.function) + if builder is None: + continue + val = builder(desc, scalar, cs) + if val is not None: + return val + return None + + +def _list_value_from_shape_constraints( + constraints: tuple[ConstraintSource, ...], +) -> list[object] | None: + """Return a fixed valid list value if a list-level constraint requires it.""" + for cs in constraints: + val = _CONSTRAINT_VALID_LIST_VALUES.get(type(cs.constraint)) + if val is not None: + return val + return None + + +def _min_length_from_shape_constraints( + constraints: tuple[ConstraintSource, ...], +) -> int: + """Extract the array min_length from constraints anchored at this layer. + + Constraints sit on the `ArrayOf` whose iteration they govern, so any + `ArrayMinLen` we see here applies to this list level directly -- no + anchor arithmetic is required. + """ + for cs in constraints: + if isinstance(cs.constraint, ArrayMinLen): + return max(cs.constraint.min_length, 1) + return 1 + + +def _valid_bound_for_base_row(desc: ExpressionDescriptor) -> object: + """Produce a value satisfying a bounds check for base row generation.""" + kwargs = dict(desc.kwargs) + if "ge" in kwargs: + return kwargs["ge"] + if "gt" in kwargs: + return kwargs["gt"] + 1 # type: ignore[operator] + if "le" in kwargs: + return kwargs["le"] + if "lt" in kwargs: + return kwargs["lt"] - 1 # type: ignore[operator] + return 0 + + +def _primitive_default(base_type: str) -> object: + """Return a type-appropriate default for a primitive base_type.""" + explicit = _PRIMITIVE_DEFAULTS.get(base_type) + if explicit is not None: + return explicit + # Numeric types: match prefixes like int32, uint8, float64, double + lower = base_type.lower() + if lower.startswith(("float", "double")): + return 0.0 + if lower.startswith(("int", "uint")): + return 0 + # Fallback for string-like types + return "" + + +def _geometry_wkt_from_shape_constraints( + constraints: tuple[ConstraintSource, ...], +) -> str: + """Extract the allowed geometry type from constraints and return valid WKT.""" + for cs in constraints: + if isinstance(cs.constraint, GeometryTypeConstraint): + geom_type = cs.constraint.allowed_types[0] + wkt = _VALID_GEOMETRY_WKT.get(geom_type) + if wkt is not None: + return wkt + raise ValueError(f"No WKT defined for geometry type: {geom_type!r}") + # No constraint — default to POINT + return _VALID_GEOMETRY_WKT[GeometryType.POINT] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py new file mode 100644 index 000000000..055cb2c51 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/invalid_value.py @@ -0,0 +1,129 @@ +"""Generate constraint-violating values for the rendered conformance tests. + +`invalid_value` returns a concrete value that violates a given check. The +generated tests inject these into otherwise-valid rows to confirm that +each constraint produces the expected violation. +""" + +from __future__ import annotations + +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + HexColorConstraint, + LanguageTagConstraint, + NoWhitespaceConstraint, + PhoneNumberConstraint, + RegionCodeConstraint, + SnakeCaseConstraint, + WikidataIdConstraint, +) +from overture.schema.system.primitive.geom import GeometryType + +from ..constraint_dispatch import ExpressionDescriptor + +__all__ = ["invalid_value"] + +# Ordered candidates for the invalid geometry side (first not in allowed set wins) +_INVALID_GEOMETRY_CANDIDATES: tuple[tuple[GeometryType, str], ...] = ( + (GeometryType.POINT, "POINT (0 0)"), + (GeometryType.LINE_STRING, "LINESTRING (0 0, 1 1)"), + (GeometryType.GEOMETRY_COLLECTION, "GEOMETRYCOLLECTION EMPTY"), +) + + +# Pattern-constraint -> sample value that violates the pattern. +# Used by `check_pattern` whose constraint_type identifies which validator. +_INVALID_PATTERN_VALUES: dict[type, str] = { + NoWhitespaceConstraint: "has whitespace", + CountryCodeAlpha2Constraint: "99", + RegionCodeConstraint: "99-999", + SnakeCaseConstraint: "HAS SPACES", + PhoneNumberConstraint: "1234567890", + WikidataIdConstraint: "P999", + HexColorConstraint: "not-hex", + LanguageTagConstraint: "123", +} + +# Direct lookup: check function name -> invalid value (no descriptor inspection). +_INVALID_LITERALS: dict[str, object] = { + "check_required": None, + "check_enum": "__INVALID__", + "check_url_format": "not-a-url", + "check_url_length": "https://" + "x" * 2076, + "check_email": "not-an-email", + "check_stripped": " has spaces ", + "check_json_pointer": "no-slash", + "check_array_min_length": [], + "check_string_min_length": "", + "check_linear_range_length": [0.5], + "check_linear_range_bounds": [1.5, 2.0], + "check_linear_range_order": [0.8, 0.2], + "check_bbox_completeness": {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0}, + "check_bbox_lat_ordering": {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0}, + "check_bbox_lat_range": {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0}, +} + + +def invalid_value(desc: ExpressionDescriptor) -> object: + """Return a Python value that violates `desc`'s check function. + + Parameters + ---------- + desc + The expression descriptor to produce an invalid value for. + + Raises + ------ + ValueError + For unrecognised check function names or when all geometry candidates + are in the allowed set. + """ + fn = desc.function + if fn in _INVALID_LITERALS: + return _INVALID_LITERALS[fn] + if fn == "check_bounds": + return _invalid_bound(desc) + if fn == "check_pattern": + return _INVALID_PATTERN_VALUES.get(desc.constraint_type, "!!!INVALID!!!") # type: ignore[arg-type] + if fn == "check_array_max_length": + max_len = int(desc.args[0]) # type: ignore[call-overload] + return [{}] * (max_len + 1) + if fn == "check_string_max_length": + max_len = int(desc.args[0]) # type: ignore[call-overload] + return "x" * (max_len + 1) + if fn == "check_geometry_type": + return _invalid_geometry(desc) + raise ValueError(f"No invalid value defined for check function: {fn!r}") + + +def _invalid_bound(desc: ExpressionDescriptor) -> object: + """Produce a value violating a bounds check for invalid-value generation. + + The `ge` / `le` branches return one below / above the bound. For + `ge=0` this returns `-1`, which violates the bound but would also + underflow an unsigned base type. No schema today combines `ge=0` with + an unsigned terminal -- if that ever changes, the caller will need to + consult the base type and pick a sentinel (e.g. a string or null) for + the violating value. + """ + kwargs = dict(desc.kwargs) + if "ge" in kwargs: + return kwargs["ge"] - 1 # type: ignore[operator] + if "gt" in kwargs: + return kwargs["gt"] + if "le" in kwargs: + return kwargs["le"] + 1 # type: ignore[operator] + if "lt" in kwargs: + return kwargs["lt"] + raise ValueError(f"No recognised bound key in kwargs: {kwargs!r}") + + +def _invalid_geometry(desc: ExpressionDescriptor) -> str: + allowed = set(desc.args) + for geom_type, wkt in _INVALID_GEOMETRY_CANDIDATES: + if geom_type not in allowed: + return wkt + raise ValueError( + f"All geometry candidates are in the allowed set: {allowed!r}. " + "Cannot produce an invalid geometry value." + ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py new file mode 100644 index 000000000..d78cf3c43 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_data/scaffold.py @@ -0,0 +1,264 @@ +"""Generate sparse path scaffolds for the rendered conformance tests. + +`generate_scaffold` builds a sparse dict that, when merged with a base +row, supplies the nested intermediates (optional structs, arrays) the +base row lacks but a check's field path requires. +`generate_model_scaffold` does the same for model-level constraints. +`leaf_list_depth` reports unaccounted-for list depth on a target field. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + FieldPath, + PathSegment, +) + +from ...extraction.field_walk import has_array_layer, list_depth, terminal_model_ref +from ...extraction.specs import FeatureSpec, FieldSpec +from ..check_ir import ( + Check, + ElementGuard, + ModelCheck, +) +from .base_row import value_for_field + +__all__ = [ + "generate_model_scaffold", + "generate_scaffold", + "leaf_list_depth", +] + + +@dataclass(frozen=True, slots=True) +class _ElementDiscriminator: + """Discriminator value to seed at one nesting depth of the scaffold.""" + + field: str + value: str + depth: int + + +def _find_field_spec(fields: list[FieldSpec], name: str) -> FieldSpec | None: + """Find a FieldSpec by name in a list.""" + for f in fields: + if f.name == name: + return f + return None + + +def leaf_list_depth(field_path: FieldPath, spec: FeatureSpec) -> int: + """Return the unaccounted-for list depth of the leaf field. + + Walks the spec's field tree along *field_path* and returns the + leaf's `list_depth(shape)` minus any `iter_count` on the terminal + path segment. Paths whose terminal segment is itself an array + target the array's elements, so the mutation already operates one + level deep. Returns 0 when *field_path* is empty or when any + segment fails to resolve against *spec* (e.g. union arms that + don't share the path's intermediate fields). + """ + segments = field_path.segments + if not segments: + return 0 + fields = list(spec.fields) + for seg in segments[:-1]: + field = _find_field_spec(fields, seg.name) + if field is None: + return 0 + model_ref = terminal_model_ref(field.shape) + if model_ref is None: + return 0 + fields = model_ref.model.fields + leaf_seg = segments[-1] + leaf = _find_field_spec(fields, leaf_seg.name) + if leaf is None: + return 0 + terminal_iter = leaf_seg.iter_count if isinstance(leaf_seg, ArraySegment) else 0 + return max(0, list_depth(leaf.shape) - terminal_iter) + + +def _required_siblings( + fields: list[FieldSpec], exclude: str, spec_name: str +) -> dict[str, Any]: + """Populate required siblings at one nesting level, excluding the target.""" + result: dict[str, Any] = {} + for f in fields: + if f.name == exclude or not f.is_required: + continue + result[f.name] = value_for_field(f, spec_name) + return result + + +def _walk_to_target( + segments: tuple[PathSegment, ...], + fields: list[FieldSpec], + spec_name: str, + *, + discriminator: _ElementDiscriminator | None, + current_depth: int = 0, +) -> dict[str, Any]: + """Recursively build the scaffold dict along the path segments.""" + if not segments: + return {} + + seg = segments[0] + remaining = segments[1:] + field_spec = _find_field_spec(fields, seg.name) + + inner: Any + child_model = ( + terminal_model_ref(field_spec.shape) if field_spec is not None else None + ) + if remaining and child_model is not None: + child_fields = child_model.model.fields + inner = _walk_to_target( + remaining, + child_fields, + spec_name, + discriminator=discriminator, + current_depth=current_depth + 1, + ) + siblings = _required_siblings(child_fields, remaining[0].name, spec_name) + inner = {**siblings, **inner} + elif not remaining and field_spec is not None: + inner = value_for_field(field_spec, spec_name) + else: + inner = {} + + if ( + isinstance(inner, dict) + and discriminator is not None + and current_depth == discriminator.depth + ): + inner[discriminator.field] = discriminator.value + + # When the terminal segment is an array and the field itself is a list, + # `value_for_field` already wrapped the value -- skip extra wrapping. + if isinstance(seg, ArraySegment): + if ( + not remaining + and field_spec is not None + and has_array_layer(field_spec.shape) + ): + return {seg.name: inner} + wrapped: Any = inner + for _ in range(seg.iter_count): + wrapped = [wrapped] + return {seg.name: wrapped} + if remaining and field_spec is not None and has_array_layer(field_spec.shape): + return {seg.name: [inner]} + return {seg.name: inner} + + +def _element_discriminator(check: Check) -> _ElementDiscriminator | None: + """Return the element-level discriminator for a Check, or None. + + Bundles the discriminator field, the value to seed, and the depth at + which to seed it (the innermost array segment in the target path). + The check_ir invariant is that nested-union gating composes at most + one `ElementGuard` per Check; more than one would mean the gate + composition rule changed without updating the scaffold, so raise to + surface the gap rather than silently dropping guards. + """ + element_guards = [g for g in check.guards if isinstance(g, ElementGuard)] + if len(element_guards) > 1: + raise NotImplementedError( + f"Check carries {len(element_guards)} ElementGuards " + f"({element_guards!r}); the scaffold only seeds one. Update " + "the scaffold builder when the gate composition rule changes." + ) + if not element_guards or not element_guards[0].values: + return None + guard = element_guards[0] + segments = check.target.segments + for i in range(len(segments) - 1, -1, -1): + if isinstance(segments[i], ArraySegment): + return _ElementDiscriminator( + field=guard.discriminator, value=guard.values[0], depth=i + ) + return None + + +def generate_scaffold(check: Check, spec: FeatureSpec) -> dict[str, Any]: + """Build a sparse dict from null to the target field of a Check.""" + segments = check.target.segments + if not segments: + return {} + + if len(segments) == 1: + seg0 = segments[0] + field_spec = _find_field_spec(spec.fields, seg0.name) + if field_spec is None or field_spec.is_required: + return {} + return {seg0.name: value_for_field(field_spec, spec.name)} + + return _walk_to_target( + segments, + spec.fields, + spec.name, + discriminator=_element_discriminator(check), + ) + + +def generate_model_scaffold(check: ModelCheck, spec: FeatureSpec) -> dict[str, Any]: + """Build a sparse dict for a model-level check's nesting structure. + + Only top-level array columns are supported -- a `ScalarPath` target + returns `{}` (no scaffold needed at row root) and an `ArrayPath` + whose column lives inside a struct raises `NotImplementedError`. + No schema today places a list of model-constrained models inside a + struct field, so the case has no test coverage. + """ + match check.target: + case ArrayPath() as target: + pass + case _: + return {} + column_prefix = target.column_prefix + if column_prefix.segments: + raise NotImplementedError( + "Multi-segment column paths (struct fields containing arrays) " + "require walking the parent tree from the root to the array " + f"column; got {target!r}" + ) + + field_spec = _find_field_spec(spec.fields, target.column_path) + if field_spec is None: + return {} + + inner_levels = target.iter_struct_paths + leaf_path = target.leaf + + inner: dict[str, Any] = {} + root_model = terminal_model_ref(field_spec.shape) + current_fields: list[FieldSpec] = root_model.model.fields if root_model else [] + nested = inner + + for level in inner_levels: + for part in level: + child_spec = _find_field_spec(current_fields, part) + child_is_list = child_spec is not None and has_array_layer(child_spec.shape) + child_model = ( + terminal_model_ref(child_spec.shape) if child_spec is not None else None + ) + if child_is_list: + nested[part] = [{}] + nested = nested[part][0] + else: + nested[part] = {} + nested = nested[part] + current_fields = child_model.model.fields if child_model else [] + + for part in leaf_path: + nested[part] = {} + nested = nested[part] + + if has_array_layer(field_spec.shape): + return {target.column_path: [inner]} + return {target.column_path: inner} if inner else {} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py new file mode 100644 index 000000000..bd933fb20 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/test_renderer.py @@ -0,0 +1,423 @@ +"""Render Check / ModelCheck IR into generated conformance test modules.""" + +from __future__ import annotations + +from typing import Any, NamedTuple + +from typing_extensions import assert_never + +from overture.schema.system.field_path import ArrayPath + +from ..extraction.field import FieldShape +from ..extraction.field_walk import has_array_layer +from ..extraction.specs import FeatureSpec +from ._render_common import ( + check_name, + compute_label_suffixes, + disambiguate, + field_label, + jinja_env, + model_constraint_field_label, + parse_field_eq, + py_literal, +) +from .check_ir import ( + Check, + ColumnGuard, + ModelCheck, +) +from .constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + ModelConstraintDescriptor, + RadioGroup, + RequireAnyOf, + RequireIf, + model_constraint_function, + model_mutation_function, +) +from .test_data.invalid_value import invalid_value +from .test_data.scaffold import ( + generate_model_scaffold, + generate_scaffold, + leaf_list_depth, +) + +__all__ = ["render_test_module"] + + +def _check_belongs_to_arm(check: Check, arm: str) -> bool: + """Return True when a Check applies to a given union arm. + + The outermost union's discriminator surfaces as `ColumnGuard`s; inner + unions use `ElementGuard`s on a different discriminator field and are + irrelevant to arm filtering. A check belongs to *arm* when every + `ColumnGuard` admits it (guards are AND-composed). + """ + column_guards = [g for g in check.guards if isinstance(g, ColumnGuard)] + return all(arm in g.values for g in column_guards) + + +def _model_check_belongs_to_arm(check: ModelCheck, arm: str) -> bool: + """Return True when a ModelCheck applies to a given union arm. + + `ModelCheck.arm` is `None` for union-level constraints (which apply + regardless of discriminator) and set to a discriminator value for + constraints contributed by one specific member class. + """ + return check.arm is None or check.arm == arm + + +def render_test_module( + feature_name: str, + field_checks: list[Check], + model_checks: list[ModelCheck], + *, + expression_import: str, + support_prefix: str, + base_row_sparse: dict[str, Any] | None = None, + base_row_populated: dict[str, Any] | None = None, + arm: str | None = None, + spec: FeatureSpec | None = None, +) -> str: + """Render a complete pytest test file for a feature's validation checks. + + Arm filtering uses two complementary signals. A field check's + `ColumnGuard`s identify the arms it belongs to. A model check's `arm` + attribute is set for member-specific constraints and `None` for + union-level constraints (which apply to every arm). + """ + if arm is not None: + field_checks = [c for c in field_checks if _check_belongs_to_arm(c, arm)] + model_checks = [c for c in model_checks if _model_check_belongs_to_arm(c, arm)] + + model_scenarios, used_mutation_fns = _render_model_scenarios( + feature_name, model_checks, spec + ) + field_scenarios, field_helpers = _render_field_check_scenarios( + feature_name, field_checks, spec + ) + used_mutation_fns |= field_helpers - {"set_at_path"} + + sparse_repr = py_literal(base_row_sparse) if base_row_sparse is not None else "{}" + populated_repr = ( + py_literal(base_row_populated) if base_row_populated is not None else "{}" + ) + + all_scenarios = field_scenarios + model_scenarios + + template = jinja_env().get_template("test_module.py.jinja2") + return template.render( + feature_name=feature_name, + schema_name=f"{feature_name.upper()}_SCHEMA", + mutation_imports=sorted(used_mutation_fns), + needs_set_at_path="set_at_path" in field_helpers, + base_row_sparse=sparse_repr, + base_row_populated=populated_repr, + scenarios=all_scenarios, + expression_import=expression_import, + support_prefix=support_prefix, + ) + + +def _scenario_entry( + *, + scenario_id: str, + scaffold: dict[str, Any], + mutate_expr: str, + expected_field: str, + expected_check: str, +) -> list[tuple[str, str]]: + """Build a rendered Scenario kwargs list for the test_module template.""" + return [ + ("id", py_literal(scenario_id)), + ("scaffold", py_literal(scaffold)), + ("mutate", mutate_expr), + ("expected_field", py_literal(expected_field)), + ("expected_check", py_literal(expected_check)), + ] + + +class _MutateExpr(NamedTuple): + """One rendered `mutate=` expression and the helper it imports. + + `helper` is `None` when the expression is a literal `set_at_path` + call (the default), and otherwise names a `mutate_*` helper from + `tests/_support/mutations.py` to import. + """ + + expr: str + helper: str | None + + +def _field_mutate_expr( + check: Check, desc: ExpressionDescriptor, spec: FeatureSpec | None +) -> _MutateExpr: + """Render the `mutate=` expression for one field-check descriptor. + + `check_struct_unique` calls the `mutate_unique_items` helper at the + target path; every other descriptor injects a constraint-violating + literal via `set_at_path`. + """ + target_repr = py_literal(str(check.target)) + if desc.function == "check_struct_unique": + return _MutateExpr( + f"lambda row: mutate_unique_items(row, {target_repr})", + "mutate_unique_items", + ) + iv_val = _wrap_for_list_leaf(invalid_value(desc), check, spec) + return _MutateExpr(f"set_at_path({target_repr}, {py_literal(iv_val)})", None) + + +def _render_field_check_scenarios( + feature_name: str, + field_checks: list[Check], + spec: FeatureSpec | None, +) -> tuple[list[list[tuple[str, str]]], set[str]]: + """Render Scenario entries for field-level checks. + + Returns the entries and the set of mutation helper names referenced + by them, mirroring `_render_model_scenarios`. + """ + rows: list[tuple[Check, ExpressionDescriptor, str, str]] = [] + for check in field_checks: + label = field_label(check) + for desc in check.descriptors: + name = check_name(desc.function, desc.check_name) + rows.append((check, desc, label, name)) + + scenario_ids = disambiguate( + [f"{feature_name}::{label}:{name}" for _check, _desc, label, name in rows] + ) + + entries: list[list[tuple[str, str]]] = [] + used_helpers: set[str] = set() + for (check, desc, label, name), scenario_id in zip(rows, scenario_ids, strict=True): + scaffold = generate_scaffold(check, spec) if spec is not None else {} + try: + mutate = _field_mutate_expr(check, desc, spec) + except ValueError as exc: + raise ValueError( + f"Cannot render mutate expression for {scenario_id}: {exc}" + ) from exc + used_helpers.add(mutate.helper or "set_at_path") + entries.append( + _scenario_entry( + scenario_id=scenario_id, + scaffold=scaffold, + mutate_expr=mutate.expr, + expected_field=label, + expected_check=name, + ) + ) + + return entries, used_helpers + + +def _checks_array_element(check: Check) -> bool: + """True when the check fires on each element of an `ArrayPath` directly. + + The check target ends at the array (`leaf=()`), so the mutation + replaces an array element rather than a struct field on one. For + these checks, a `None` invalid value still needs list wrapping; for + nested struct fields, `None` already sits at the right level. + """ + return isinstance(check.target, ArrayPath) and not check.target.leaf + + +def _wrap_for_list_leaf( + value: object, + check: Check, + spec: FeatureSpec | None, +) -> object: + """Wrap a scalar invalid value to match the field's list nesting depth.""" + if spec is None or isinstance(value, list): + return value + if value is None and not _checks_array_element(check): + return value + depth = leaf_list_depth(check.target, spec) + for _ in range(depth): + value = [value] + return value + + +def _render_model_scenarios( + feature_name: str, + model_checks: list[ModelCheck], + spec: FeatureSpec | None, +) -> tuple[list[list[tuple[str, str]]], set[str]]: + """Render Scenario entries for model-level checks. + + Returns the entries and the set of mutation helper names referenced + by them, so the caller can scope the test module's imports. + """ + entries: list[list[tuple[str, str]]] = [] + used_mutation_fns: set[str] = set() + label_suffixes = compute_label_suffixes(model_checks) + + for idx, mc in enumerate(model_checks): + desc = mc.descriptor + fn = model_constraint_function(desc) + mutation_fn = model_mutation_function(desc) + name = check_name(fn) + scenario_id = f"{feature_name}::model:{name}:{idx}" + label = model_constraint_field_label(mc, label_suffixes[idx]) + scaffold = generate_model_scaffold(mc, spec) if spec is not None else {} + + try: + call = _render_mutation_call(mutation_fn, desc, mc) + except ValueError as exc: + raise ValueError( + f"Cannot render mutation call for {scenario_id}: {exc}" + ) from exc + mutate_expr = f"lambda row: {call}" + used_mutation_fns.add(mutation_fn) + entries.append( + _scenario_entry( + scenario_id=scenario_id, + scaffold=scaffold, + mutate_expr=mutate_expr, + expected_field=label, + expected_check=name, + ) + ) + + return entries, used_mutation_fns + + +def _render_mutation_call( + mutation_fn: str, + desc: ModelConstraintDescriptor, + check: ModelCheck, +) -> str: + """Render a model mutation helper function call.""" + fields_repr = py_literal(list(desc.field_names)) + + match desc: + case RequireIf() | ForbidIf(): + return _render_conditional_mutation_call( + mutation_fn, desc, check, fields_repr + ) + case RadioGroup(): + if isinstance(check.target, ArrayPath): + raise ValueError( + "mutate_radio_group does not accept array_path " + f"(target={check.target!r})" + ) + return f"{mutation_fn}(row, {fields_repr})" + case RequireAnyOf() | MinFieldsSet(): + parts = _array_kwargs_leaf(check, mutation_fn) + suffix = ", " + ", ".join(parts) if parts else "" + return f"{mutation_fn}(row, {fields_repr}{suffix})" + assert_never(desc) + + +def _render_conditional_mutation_call( + mutation_fn: str, + desc: RequireIf | ForbidIf, + check: ModelCheck, + fields_repr: str, +) -> str: + """Render a mutate_require_if or mutate_forbid_if call.""" + parsed = parse_field_eq(desc.condition) + fn = model_constraint_function(desc) + if parsed is None: + raise ValueError( + f"{fn} condition {desc.condition!r} is not a " + "FieldEqCondition or Not(FieldEqCondition); cannot render " + f"{mutation_fn} call" + ) + fill = _render_fill_values(desc) if isinstance(desc, ForbidIf) else None + kwarg_parts: list[str] = [] + if parsed.negated: + kwarg_parts.append("negate=True") + if fill: + kwarg_parts.append(f"fill_values={fill}") + kwarg_parts.extend(_array_kwargs_inner(check, mutation_fn)) + suffix = ", " + ", ".join(kwarg_parts) if kwarg_parts else "" + return ( + f"{mutation_fn}(row, {fields_repr}, " + f"{py_literal(parsed.field_name)}, {py_literal(parsed.value)}{suffix})" + ) + + +def _fill_value_literal(shape: FieldShape) -> str: + """Return a Python source literal for a type-appropriate non-null fill value.""" + if has_array_layer(shape): + return "[{}]" + return "{}" + + +def _render_fill_values(desc: ForbidIf) -> str | None: + """Render a `fill_values` dict literal for non-string ForbidIf targets.""" + if not desc.field_shapes: + return None + items = [ + f"{py_literal(name)}: {_fill_value_literal(shape)}" + for name, shape in desc.field_shapes + ] + return "{" + ", ".join(items) + "}" + + +def _array_kwargs_leaf(check: ModelCheck, mutation_fn: str) -> list[str]: + """Array kwargs for mutations accepting `struct_path` (a trailing leaf). + + Yields `array_path=...` and optionally `struct_path=...`. Inner array + iteration is rejected -- these mutations consume only the outermost + array level. + """ + if not isinstance(check.target, ArrayPath): + return [] + inner_struct_paths = check.target.iter_struct_paths + leaf_path = check.target.leaf + + if inner_struct_paths: + raise ValueError( + f"{mutation_fn} does not accept inner_array_path " + f"(inner struct paths={inner_struct_paths!r})" + ) + + kwargs = [f'array_path="{check.target.column_path}"'] + if leaf_path: + if len(leaf_path) > 1: + raise ValueError( + f"multi-segment leaf_path {leaf_path!r} not supported by " + f"{mutation_fn} (struct_path must be a single segment)" + ) + kwargs.append(f'struct_path="{leaf_path[0]}"') + return kwargs + + +def _array_kwargs_inner(check: ModelCheck, mutation_fn: str) -> list[str]: + """Array kwargs for mutations accepting `inner_array_path`. + + Yields `array_path=...` and optionally `inner_array_path=...`. A + trailing leaf path is rejected -- these mutations target an inner + array directly, not a struct field on its elements. + """ + if not isinstance(check.target, ArrayPath): + return [] + inner_struct_paths = check.target.iter_struct_paths + leaf_path = check.target.leaf + + if leaf_path: + raise ValueError( + f"{mutation_fn} does not accept struct_path (leaf_path={leaf_path!r})" + ) + + kwargs = [f'array_path="{check.target.column_path}"'] + if inner_struct_paths: + if len(inner_struct_paths) > 1: + raise ValueError( + f"multi-level inner struct paths {inner_struct_paths!r} not supported by " + f"{mutation_fn} (inner_array_path consumes one iteration)" + ) + if not inner_struct_paths[0]: + raise ValueError( + f"empty inner struct path not supported by {mutation_fn} " + f"(target={check.target!r}); nested-iteration arrays without " + f"intermediate struct fields cannot be addressed via inner_array_path" + ) + kwargs.append(f'inner_array_path="{".".join(inner_struct_paths[0])}"') + return kwargs diff --git a/packages/overture-schema-codegen/tests/codegen_test_support.py b/packages/overture-schema-codegen/tests/codegen_test_support.py index 2a18faf13..4bdea9f62 100644 --- a/packages/overture-schema-codegen/tests/codegen_test_support.py +++ b/packages/overture-schema-codegen/tests/codegen_test_support.py @@ -12,19 +12,27 @@ from typing import Annotated, Generic, Literal, NewType, TypeVar import pytest +from annotated_types import MinLen +from overture.schema.codegen.extraction.field import LiteralScalar, Primitive +from overture.schema.codegen.extraction.field_walk import terminal_of from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.pydantic_extraction import extract_pydantic_type from overture.schema.codegen.extraction.specs import ( AnnotatedField, EnumMemberSpec, EnumSpec, + FeatureSpec, FieldSpec, + MemberSpec, ModelSpec, TypeIdentity, UnionSpec, is_model_class, + is_union_alias, + partitions_from_tags, ) -from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.layout.module_layout import entry_point_class from overture.schema.system.discovery import ( TagSelector, discover_models, @@ -33,7 +41,7 @@ from overture.schema.system.discovery.tag import get_values_for_key from overture.schema.system.doc import DocumentedEnum from overture.schema.system.field_constraint import UniqueItemsConstraint -from overture.schema.system.model_constraint import require_any_of +from overture.schema.system.model_constraint import radio_group, require_any_of from overture.schema.system.primitive import ( Geometry, GeometryType, @@ -45,7 +53,7 @@ from overture.schema.system.string import HexColor, LanguageTag, StrippedString from pydantic import BaseModel, EmailStr, Field, HttpUrl -STR_TYPE = TypeInfo(base_type="str", kind=TypeKind.PRIMITIVE) +STR_TYPE = Primitive(base_type="str") ThemeT = TypeVar("ThemeT") TypeT = TypeVar("TypeT") @@ -210,6 +218,20 @@ class FeatureWithUrl(FeatureBase[Literal["test"], Literal["linked"]]): emails: list[EmailStr] | None = None +class DatasetEntry(BaseModel): + """A dataset with required URL fields.""" + + name: str = Field(description="Dataset name") + url: HttpUrl + download_urls: list[HttpUrl] | None = None + + +class FeatureWithRequiredUrl(FeatureBase[Literal["test"], Literal["urlreq"]]): + """A feature with required URL fields at multiple nesting levels.""" + + datasets: list[DatasetEntry] + + HTTP_URL_SPEC = extract_pydantic_type(HttpUrl) EMAIL_STR_SPEC = extract_pydantic_type(EmailStr) @@ -243,6 +265,49 @@ class WaterSegment(SegmentBase): ] +class ShortNamesSegment(SegmentBase): + """Segment variant whose `aliases` requires at least one entry.""" + + subtype: Literal["short"] + aliases: Annotated[list[str], Field(min_length=1)] | None = None + + +class LongNamesSegment(SegmentBase): + """Segment variant whose `aliases` requires at least five entries.""" + + subtype: Literal["long"] + aliases: Annotated[list[str], Field(min_length=5)] | None = None + + +TestSegmentDivergingConstraints = Annotated[ + ShortNamesSegment | LongNamesSegment, + Field(description="Union whose members declare diverging field constraints"), +] + + +class VehicleKind(str, Enum): + """Vehicle classification.""" + + CAR = "car" + BIKE = "bike" + + +class CarVariant(SegmentBase): + subtype: Literal[VehicleKind.CAR] + doors: int | None = None + + +class BikeVariant(SegmentBase): + subtype: Literal[VehicleKind.BIKE] + has_basket: bool | None = None + + +TestEnumDiscriminatorUnion = Annotated[ + CarVariant | BikeVariant, + Field(description="Union with enum-valued discriminator", discriminator="subtype"), +] + + class ContactInfo(BaseModel): """Contact information for a venue.""" @@ -273,16 +338,23 @@ def make_union_spec( common_base: type[BaseModel] | None = None, entry_point: str | None = None, ) -> UnionSpec: - """Build a UnionSpec with sensible defaults for tests.""" + """Build a UnionSpec with sensible defaults for tests. + + `member_specs` is derived from `members` via `extract_model`, matching + what `extract_union` produces, so specs built here behave the same + through `_model_checks_for_union` and the base-row generators. + """ + members = members or [] return UnionSpec( name=name, description=description, annotated_fields=annotated_fields or [], - members=members or [], + members=members, discriminator_field=None, discriminator_mapping=None, source_annotation=source_annotation, common_base=common_base or BaseModel, + member_specs=[MemberSpec(m, extract_model(m)) for m in members], entry_point=entry_point, ) @@ -333,8 +405,9 @@ def assert_literal_field( ) -> None: """Assert a field is a single-value Literal with the expected value.""" field = find_field(spec, field_name) - assert field.type_info.kind == TypeKind.LITERAL - assert field.type_info.literal_values == (expected_value,) + terminal = terminal_of(field.shape) + assert isinstance(terminal, LiteralScalar) + assert terminal.values == (expected_value,) def flat_specs_from_discovery( @@ -354,6 +427,67 @@ def flat_specs_from_discovery( return result +class LiteralSubtypeModel(BaseModel): + """Model with a required Literal field and an optional string.""" + + subtype: Literal["a", "b", "c"] + name: str | None = None + + +class TripleInnerModel(BaseModel): + tag: Annotated[str, MinLen(1)] + + +class TripleNestedArrayModel(BaseModel): + deep: list[list[list[TripleInnerModel]]] + + +@radio_group("a", "b") +class RadioModel(BaseModel): + a: bool = False + b: bool = False + + +@require_any_of("x", "y") +class RequireAnyModel(BaseModel): + x: str | None = None + y: str | None = None + + +def discover_feature(class_name: str) -> FeatureSpec: + """Discover and extract a feature spec by class name.""" + models = discover_models() + for key, entry in models.items(): + partitions = partitions_from_tags(key.tags) + if is_model_class(entry) and entry.__name__ == class_name: + return extract_model( + entry, entry_point=key.entry_point, partitions=partitions + ) + if is_union_alias(entry) and entry_point_class(key.entry_point) == class_name: + return extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + partitions=partitions, + ) + raise LookupError(f"{class_name} not found in discovered models") + + +def feature_spec_for_model( + cls: type[BaseModel], + *, + entry_point: str | None = None, + partitions: Mapping[str, str] | None = None, +) -> ModelSpec: + """Extract a model class for tests; sub-specs are populated by extract_model.""" + return extract_model(cls, entry_point=entry_point, partitions=partitions) + + +def union_spec_for(name: str, union_type: object) -> UnionSpec: + """Extract a discriminated-union annotation for tests.""" + return extract_union(name, union_type) + + def assert_golden(actual: str, golden_path: Path, *, update: bool) -> None: """Compare rendered output against a golden file. diff --git a/packages/overture-schema-codegen/tests/test_cli.py b/packages/overture-schema-codegen/tests/test_cli.py index 606e1837f..d81843027 100644 --- a/packages/overture-schema-codegen/tests/test_cli.py +++ b/packages/overture-schema-codegen/tests/test_cli.py @@ -438,6 +438,62 @@ def test_segment_appears_in_markdown_output( assert "subtype" in content +class TestCliGeneratePyspark: + def test_pyspark_format_accepted(self, cli_runner: CliRunner) -> None: + """pyspark format should be a valid --format choice.""" + result = cli_runner.invoke(cli, ["generate", "--format", "pyspark"]) + assert "Invalid value" not in (result.output or "") + assert result.exit_code == 0 + + def test_pyspark_to_output_dir(self, cli_runner: CliRunner, tmp_path: Path) -> None: + """pyspark format with --output-dir should create expression files.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "pyspark", + "--tag", + "overture:theme=divisions", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + py_files = list(tmp_path.rglob("*.py")) + assert len(py_files) > 0 + names = {f.stem for f in py_files} + assert "division_area" in names + + def test_pyspark_writes_under_entry_point_namespace( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Expression modules land under the entry-point namespace, no extra `expressions/` wrapper.""" + output_dir = tmp_path / "expressions" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "pyspark", + "--tag", + "overture:theme=divisions", + "--output-dir", + str(output_dir), + ], + ) + assert result.exit_code == 0 + + # Modules land under the entry-point namespace. + assert (output_dir / "overture" / "schema" / "divisions").is_dir() + + # No nested expressions/ subdirectory. + nested = output_dir / "expressions" + assert not nested.exists(), ( + f"Nested expressions/ directory found: {list(nested.iterdir())}" + ) + + class TestReverseReferences: """Integration test: Reverse references appear in generated markdown.""" diff --git a/packages/overture-schema-codegen/tests/test_constraint_description.py b/packages/overture-schema-codegen/tests/test_constraint_description.py index cd31f2554..4ae9f2dff 100644 --- a/packages/overture-schema-codegen/tests/test_constraint_description.py +++ b/packages/overture-schema-codegen/tests/test_constraint_description.py @@ -1,10 +1,16 @@ """Tests for constraint description (model-level and field-level).""" -from annotated_types import Ge, Gt, Interval, Le, Lt, MaxLen, MinLen +from annotated_types import Ge, Gt, Interval, Le, Lt from overture.schema.codegen.extraction.field_constraints import ( constraint_display_text, describe_field_constraint, ) +from overture.schema.codegen.extraction.length_constraints import ( + ArrayMaxLen, + ArrayMinLen, + ScalarMaxLen, + ScalarMinLen, +) from overture.schema.codegen.extraction.model_constraints import ( analyze_model_constraints, ) @@ -339,11 +345,27 @@ def test_gt(self) -> None: def test_lt(self) -> None: assert describe_field_constraint(Lt(lt=100)) == "`< 100`" - def test_min_len(self) -> None: - assert describe_field_constraint(MinLen(min_length=1)) == "Minimum length: 1" + def test_scalar_min_len(self) -> None: + assert ( + describe_field_constraint(ScalarMinLen(min_length=1)) == "Minimum length: 1" + ) + + def test_array_min_len(self) -> None: + assert ( + describe_field_constraint(ArrayMinLen(min_length=1)) == "Minimum length: 1" + ) - def test_max_len(self) -> None: - assert describe_field_constraint(MaxLen(max_length=10)) == "Maximum length: 10" + def test_scalar_max_len(self) -> None: + assert ( + describe_field_constraint(ScalarMaxLen(max_length=10)) + == "Maximum length: 10" + ) + + def test_array_max_len(self) -> None: + assert ( + describe_field_constraint(ArrayMaxLen(max_length=10)) + == "Maximum length: 10" + ) def test_interval_closed(self) -> None: assert describe_field_constraint(Interval(ge=0, le=100)) == "`0 ≤ x ≤ 100`" diff --git a/packages/overture-schema-codegen/tests/test_example_loader.py b/packages/overture-schema-codegen/tests/test_example_loader.py index 1f94bc06d..541b81282 100644 --- a/packages/overture-schema-codegen/tests/test_example_loader.py +++ b/packages/overture-schema-codegen/tests/test_example_loader.py @@ -9,6 +9,7 @@ from typing import Annotated, Literal import pytest +from overture.schema.buildings.building import Building from overture.schema.codegen.extraction.examples import ( ExampleRecord, _inject_literal_fields, @@ -20,7 +21,14 @@ resolve_pyproject_path, validate_example, ) +from overture.schema.system.primitive import BBox, Geometry +from overture.schema.transportation import Segment +from overture.schema.transportation.segment.models import ( + RoadSegment, + TransportationSegment, +) from pydantic import BaseModel, ConfigDict, Field, Tag, ValidationError +from shapely.geometry import Point class TestOrderExampleRows: @@ -639,10 +647,6 @@ class TestIntegration: def test_real_building_examples_validate(self) -> None: """Validate real Building examples from the schema package.""" - pytest.importorskip("overture.schema.buildings.building") - - from overture.schema.buildings.building import Building # noqa: PLC0415 - pyproject_path = resolve_pyproject_path(Building) assert pyproject_path is not None, "Could not find pyproject.toml for Building" @@ -657,14 +661,6 @@ def test_real_building_examples_validate(self) -> None: def test_real_segment_examples_validate(self) -> None: """Validate real Segment examples (discriminated union with cross-arm fields).""" - pytest.importorskip("overture.schema.transportation") - - from overture.schema.transportation import Segment # noqa: PLC0415 - from overture.schema.transportation.segment.models import ( # noqa: PLC0415 - RoadSegment, - TransportationSegment, - ) - pyproject_path = resolve_pyproject_path(RoadSegment) assert pyproject_path is not None @@ -889,7 +885,6 @@ class Aliased(BaseModel): def test_slots_based_field_flattened(self) -> None: """Non-BaseModel types with __slots__ and properties are flattened.""" - from overture.schema.system.primitive import BBox # noqa: PLC0415 class WithBBox(BaseModel): id: str @@ -907,7 +902,6 @@ class WithBBox(BaseModel): def test_none_slots_based_field_is_leaf(self) -> None: """A slots-based field with None value is a leaf.""" - from overture.schema.system.primitive import BBox # noqa: PLC0415 class WithBBox(BaseModel): id: str @@ -919,8 +913,6 @@ class WithBBox(BaseModel): def test_single_slot_wrapper_is_leaf(self) -> None: """Single-slot types (wrappers like Geometry) are leaf values.""" - from overture.schema.system.primitive import Geometry # noqa: PLC0415 - from shapely.geometry import Point # noqa: PLC0415 class WithGeom(BaseModel): id: str diff --git a/packages/overture-schema-codegen/tests/test_field_walk.py b/packages/overture-schema-codegen/tests/test_field_walk.py new file mode 100644 index 000000000..d0d493cf9 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_field_walk.py @@ -0,0 +1,164 @@ +"""Tests for the `FieldShape` walker and structural helpers.""" + +import pytest +from overture.schema.codegen.extraction.field import ( + AnyScalar, + ArrayOf, + LiteralScalar, + MapOf, + ModelRef, + NewTypeShape, + Primitive, + UnionRef, +) +from overture.schema.codegen.extraction.field_walk import ( + shape_children, + terminal_model_ref, + terminal_of, + terminal_primitive, + terminal_scalar, + walk_shape, +) + + +class TestShapeChildren: + """Direct child enumeration over `FieldShape`.""" + + def test_scalar_has_no_children(self) -> None: + assert list(shape_children(Primitive(base_type="str"))) == [] + + def test_array_yields_element(self) -> None: + inner = Primitive(base_type="int32") + assert list(shape_children(ArrayOf(element=inner))) == [inner] + + def test_map_yields_key_then_value(self) -> None: + k = Primitive(base_type="str") + v = Primitive(base_type="int32") + assert list(shape_children(MapOf(key=k, value=v))) == [k, v] + + def test_model_ref_has_no_children(self) -> None: + sentinel = object() + assert list(shape_children(ModelRef(model=sentinel))) == [] # type: ignore[arg-type] + + def test_union_ref_has_no_children(self) -> None: + sentinel = object() + assert list(shape_children(UnionRef(union=sentinel))) == [] # type: ignore[arg-type] + + def test_newtype_shape_yields_inner(self) -> None: + inner = Primitive(base_type="int32") + nt = NewTypeShape(name="N", ref=object(), inner=inner) + assert list(shape_children(nt)) == [inner] + + +class TestWalkShape: + """Pre-order traversal over `FieldShape` trees.""" + + @staticmethod + def _collect(root: object) -> list[object]: + seen: list[object] = [] + walk_shape(root, seen.append) # type: ignore[arg-type] + return seen + + def test_scalar_visits_once(self) -> None: + root = Primitive(base_type="str") + assert self._collect(root) == [root] + + def test_nested_arrays(self) -> None: + leaf = Primitive(base_type="int32") + middle = ArrayOf(element=leaf) + root = ArrayOf(element=middle) + assert self._collect(root) == [root, middle, leaf] + + def test_map_visits_self_key_value(self) -> None: + k = Primitive(base_type="str") + v = Primitive(base_type="int32") + root = MapOf(key=k, value=v) + assert self._collect(root) == [root, k, v] + + def test_model_ref_is_boundary(self) -> None: + sentinel = object() + root = ModelRef(model=sentinel) # type: ignore[arg-type] + assert self._collect(root) == [root] + + def test_union_ref_is_boundary(self) -> None: + sentinel = object() + root = UnionRef(union=sentinel) # type: ignore[arg-type] + assert self._collect(root) == [root] + + def test_array_of_newtype_walks_through(self) -> None: + leaf = Primitive(base_type="str") + nt = NewTypeShape(name="N", ref=object(), inner=leaf) + root = ArrayOf(element=nt) + assert self._collect(root) == [root, nt, leaf] + + +_STR = Primitive(base_type="str") +_INT = Primitive(base_type="int32") +_LITERAL = LiteralScalar(values=("a",)) +_ANY = AnyScalar() +_MODEL = ModelRef(model=object()) # type: ignore[arg-type] +_MAP = MapOf(key=_STR, value=_INT) +_NEWTYPE_STR = NewTypeShape(name="N", ref=object(), inner=_STR) +_ARRAY_NEWTYPE_STR = ArrayOf(element=_NEWTYPE_STR) + + +class TestTerminalFilters: + """`terminal_of` and the three typed `terminal_*` narrowing helpers.""" + + @pytest.mark.parametrize( + ("shape", "expected"), + [ + (_STR, _STR), + (ArrayOf(element=ArrayOf(element=_INT)), _INT), + (_NEWTYPE_STR, _STR), + (_ARRAY_NEWTYPE_STR, _STR), + (ArrayOf(element=_MODEL), _MODEL), + (_MAP, _MAP), + ], + ) + def test_terminal_of_unwraps_to_innermost( + self, shape: object, expected: object + ) -> None: + assert terminal_of(shape) is expected # type: ignore[arg-type] + + @pytest.mark.parametrize( + ("shape", "expected"), + [ + (_STR, _STR), + (ArrayOf(element=_INT), _INT), + (_NEWTYPE_STR, _STR), + (_LITERAL, None), + (_ANY, None), + (_MODEL, None), + ], + ) + def test_terminal_primitive(self, shape: object, expected: object) -> None: + assert terminal_primitive(shape) is expected # type: ignore[arg-type] + + @pytest.mark.parametrize( + ("shape", "expected"), + [ + (_STR, _STR), + (_LITERAL, _LITERAL), + (_ANY, _ANY), + (ArrayOf(element=_LITERAL), _LITERAL), + (_MODEL, None), + (_MAP, None), + ], + ) + def test_terminal_scalar(self, shape: object, expected: object) -> None: + assert terminal_scalar(shape) is expected # type: ignore[arg-type] + + @pytest.mark.parametrize( + ("shape", "expected"), + [ + (_MODEL, _MODEL), + (ArrayOf(element=_MODEL), _MODEL), + (NewTypeShape(name="N", ref=object(), inner=_MODEL), _MODEL), + (_STR, None), + (_LITERAL, None), + (_ANY, None), + ], + ) + def test_terminal_model_ref(self, shape: object, expected: object) -> None: + assert terminal_model_ref(shape) is expected # type: ignore[arg-type] diff --git a/packages/overture-schema-codegen/tests/test_golden_markdown.py b/packages/overture-schema-codegen/tests/test_golden_markdown.py index 42320ee69..e75eddcc5 100644 --- a/packages/overture-schema-codegen/tests/test_golden_markdown.py +++ b/packages/overture-schema-codegen/tests/test_golden_markdown.py @@ -18,14 +18,11 @@ Venue, Widget, assert_golden, + feature_spec_for_model, ) from overture.schema.codegen.extraction.enum_extraction import extract_enum -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) from overture.schema.codegen.extraction.newtype_extraction import extract_newtype -from overture.schema.codegen.extraction.specs import TypeIdentity +from overture.schema.codegen.extraction.specs import FeatureSpec, TypeIdentity from overture.schema.codegen.layout.type_collection import ( collect_all_supplementary_types, ) @@ -67,12 +64,10 @@ @pytest.fixture(scope="module") def reverse_refs() -> dict[TypeIdentity, list[UsedByEntry]]: """Compute reverse references for all test models.""" - feature_specs = [] + feature_specs: list[FeatureSpec] = [] for model_class, _ in FEATURE_CASES: assert isinstance(model_class, type) and issubclass(model_class, BaseModel) - spec = extract_model(model_class) - expand_model_tree(spec) - feature_specs.append(spec) + feature_specs.append(feature_spec_for_model(model_class)) all_specs = collect_all_supplementary_types(feature_specs) return compute_reverse_references(feature_specs, all_specs) @@ -89,8 +84,7 @@ def test_feature_golden( update_golden: bool, reverse_refs: dict[TypeIdentity, list[UsedByEntry]], ) -> None: - spec = extract_model(model_class) - expand_model_tree(spec) + spec = feature_spec_for_model(model_class) used_by = reverse_refs.get(spec.identity) actual = render_feature(spec, used_by=used_by) assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden) diff --git a/packages/overture-schema-codegen/tests/test_integration_real_models.py b/packages/overture-schema-codegen/tests/test_integration_real_models.py index 9ed20d112..b0f90e266 100644 --- a/packages/overture-schema-codegen/tests/test_integration_real_models.py +++ b/packages/overture-schema-codegen/tests/test_integration_real_models.py @@ -5,7 +5,7 @@ """ import pytest -from codegen_test_support import assert_literal_field +from codegen_test_support import assert_literal_field, feature_spec_for_model from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.specs import ( FeatureSpec, @@ -15,7 +15,6 @@ is_model_class, is_union_alias, ) -from overture.schema.codegen.extraction.type_analyzer import TypeKind from overture.schema.codegen.extraction.union_extraction import extract_union from overture.schema.codegen.layout.module_layout import entry_point_class from overture.schema.codegen.markdown.pipeline import generate_markdown_pages @@ -29,21 +28,6 @@ class TestDiscoverModels: """Tests for model discovery.""" - def test_discover_models_returns_dict(self) -> None: - """discover_models() should return a dictionary.""" - models = discover_models() - assert isinstance(models, dict) - - def test_discover_models_finds_building( - self, building_class: type[BaseModel] - ) -> None: - """Should discover the Building model.""" - assert issubclass(building_class, BaseModel) - - def test_discover_models_finds_place(self, place_class: type[BaseModel]) -> None: - """Should discover the Place model.""" - assert issubclass(place_class, BaseModel) - def test_discover_models_returns_multiple_themes(self) -> None: """Should discover models from multiple themes.""" models = discover_models() @@ -68,11 +52,10 @@ def test_extract_building_has_fields(self, building_spec: ModelSpec) -> None: field_names = {f.name for f in building_spec.fields} assert "id" in field_names - def test_building_field_types_are_valid(self, building_spec: ModelSpec) -> None: - """All Building fields should have valid TypeInfo.""" + def test_building_field_shapes_are_present(self, building_spec: ModelSpec) -> None: + """Every Building field has a `FieldShape`.""" for field in building_spec.fields: - assert field.type_info is not None - assert field.type_info.kind in TypeKind + assert field.shape is not None class TestExtractPlaceModel: @@ -109,17 +92,12 @@ def test_no_analyze_type_crashes(self, all_discovered_models: dict) -> None: spec = extract_model(model_class) assert spec.name == model_class.__name__ - def test_all_field_types_resolved(self, all_discovered_models: dict) -> None: - """All fields should have resolved TypeInfo.""" + def test_all_field_shapes_resolved(self, all_discovered_models: dict) -> None: + """Every field of every discovered model carries a `FieldShape`.""" for model_class in filter_model_classes(all_discovered_models): spec = extract_model(model_class) for field in spec.fields: - assert field.type_info.base_type, ( - f"No base_type for {spec.name}.{field.name}" - ) - assert field.type_info.kind in TypeKind, ( - f"Invalid kind for {spec.name}.{field.name}" - ) + assert field.shape is not None, f"No shape for {spec.name}.{field.name}" class TestMarkdownRenderingRealModels: @@ -127,7 +105,7 @@ class TestMarkdownRenderingRealModels: def test_render_building_content(self, building_class: type[BaseModel]) -> None: """Building renders with title, field table, and expected fields.""" - markdown = render_feature(extract_model(building_class)) + markdown = render_feature(feature_spec_for_model(building_class)) assert "# Building" in markdown assert "| Name |" in markdown @@ -138,9 +116,7 @@ def test_render_building_content(self, building_class: type[BaseModel]) -> None: def test_render_all_models_without_crash(self, all_discovered_models: dict) -> None: """render_feature should not crash on any discovered model.""" for model_class in filter_model_classes(all_discovered_models): - markdown = render_feature(extract_model(model_class)) - assert isinstance(markdown, str) - assert len(markdown) > 0 + render_feature(feature_spec_for_model(model_class)) class TestDiscriminatedUnions: @@ -221,9 +197,8 @@ def test_segment_discriminator_extracted_from_callable( assert segment_spec.discriminator_field == "subtype" assert segment_spec.discriminator_mapping is not None assert len(segment_spec.discriminator_mapping) == 3 - # Keys are str(enum_member), e.g. "Subtype.ROAD" - road_key = next(k for k in segment_spec.discriminator_mapping if "ROAD" in k) - assert segment_spec.discriminator_mapping[road_key] is RoadSegment + # Keys are runtime string values, e.g. "road" + assert segment_spec.discriminator_mapping["road"] is RoadSegment def test_segment_common_base_is_base_model(self, segment_spec: UnionSpec) -> None: """Segment common_base is the shared base class.""" diff --git a/packages/overture-schema-codegen/tests/test_markdown_renderer.py b/packages/overture-schema-codegen/tests/test_markdown_renderer.py index 698f9d70a..92f1d0ac1 100644 --- a/packages/overture-schema-codegen/tests/test_markdown_renderer.py +++ b/packages/overture-schema-codegen/tests/test_markdown_renderer.py @@ -21,13 +21,11 @@ Sources, TreeNode, Venue, + feature_spec_for_model, make_union_spec, ) from overture.schema.codegen.extraction.examples import ExampleRecord -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) +from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.codegen.extraction.newtype_extraction import extract_newtype from overture.schema.codegen.extraction.specs import ( AnnotatedField, @@ -365,9 +363,7 @@ class ModelWithSources(BaseModel): sources: TestSources | None = None - spec = extract_model(ModelWithSources) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(ModelWithSources)) assert "`TestSources`" in result assert "(list, optional)" in result @@ -441,9 +437,7 @@ class Outer(BaseModel): inner: Inner - spec = extract_model(Outer) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(Outer)) assert "| `Inner` |" in result @@ -453,9 +447,7 @@ class TestRenderFeatureInlineExpansion: def test_direct_model_fields_expanded_with_dot_prefix(self) -> None: """Direct model field expands sub-fields with dot notation.""" - spec = extract_model(FeatureWithAddress) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(FeatureWithAddress)) assert "| `address.street` |" in result assert "| `address.city` |" in result @@ -463,18 +455,14 @@ def test_direct_model_fields_expanded_with_dot_prefix(self) -> None: def test_list_of_model_fields_expanded_with_bracket_dot_prefix(self) -> None: """List-of-model field expands sub-fields with []. notation.""" - spec = extract_model(FeatureWithSources) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(FeatureWithSources)) assert "| `sources[]` |" in result assert "| `sources[].dataset` |" in result def test_cycle_detection_prevents_infinite_recursion(self) -> None: """Recursive model emits parent row but does not recurse.""" - spec = extract_model(TreeNode) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(TreeNode)) # The parent field row appears assert "| `parent` |" in result @@ -491,9 +479,7 @@ def test_primitive_field_unchanged(self) -> None: def test_parent_row_preserved_before_expansion(self) -> None: """The parent field row still appears before expanded sub-fields.""" - spec = extract_model(FeatureWithAddress) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(FeatureWithAddress)) # Parent row for 'address' itself appears assert "| `address` |" in result @@ -589,13 +575,11 @@ def test_field_with_no_description_gets_constraint_note(self) -> None: class TestRenderFeatureFieldConstraints: - """Tests for field-level constraint annotation from TypeInfo.""" + """Tests for field-level constraint annotation from the field's shape.""" def test_venue_geometry_shows_allowed_types(self) -> None: """Venue's geometry field shows GeometryTypeConstraint as a note.""" - spec = extract_model(Venue) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(Venue)) lines = result.splitlines() geo_line = next(line for line in lines if "| `geometry` |" in line) @@ -603,8 +587,6 @@ def test_venue_geometry_shows_allowed_types(self) -> None: def test_venue_reference_links_when_context_available(self) -> None: """Reference constraint links the target type when LinkContext has the page.""" - spec = extract_model(Venue) - expand_model_tree(spec) ctx = LinkContext( page_path=PurePosixPath("music/venue.md"), registry={ @@ -613,7 +595,7 @@ def test_venue_reference_links_when_context_available(self) -> None: ) }, ) - result = render_feature(spec, link_ctx=ctx) + result = render_feature(feature_spec_for_model(Venue), link_ctx=ctx) lines = result.splitlines() ref_line = next(line for line in lines if "| `resident_ensemble` |" in line) @@ -622,9 +604,7 @@ def test_venue_reference_links_when_context_available(self) -> None: def test_venue_reference_unlinked_without_context(self) -> None: """Reference constraint renders as plain code when no LinkContext.""" - spec = extract_model(Venue) - expand_model_tree(spec) - result = render_feature(spec) + result = render_feature(feature_spec_for_model(Venue)) lines = result.splitlines() ref_line = next(line for line in lines if "| `resident_ensemble` |" in line) @@ -1203,7 +1183,7 @@ def test_shared_fields_have_no_variant_tag(self) -> None: AnnotatedField( field_spec=FieldSpec( name="id", - type_info=STR_TYPE, + shape=STR_TYPE, description="ID", is_required=True, ), @@ -1217,17 +1197,21 @@ def test_shared_fields_have_no_variant_tag(self) -> None: def test_variant_fields_have_inline_tag(self) -> None: """Variant-specific fields get *(Variant)* tag.""" + + class RoadSegment(BaseModel): + pass + spec = make_union_spec( name="Segment", annotated_fields=[ AnnotatedField( field_spec=FieldSpec( name="speed_limit", - type_info=STR_TYPE, + shape=STR_TYPE, description=None, is_required=False, ), - variant_sources=("RoadSegment",), + variant_sources=(RoadSegment,), ), ], ) diff --git a/packages/overture-schema-codegen/tests/test_markdown_type_format.py b/packages/overture-schema-codegen/tests/test_markdown_type_format.py index e54426f5f..fc1b946a2 100644 --- a/packages/overture-schema-codegen/tests/test_markdown_type_format.py +++ b/packages/overture-schema-codegen/tests/test_markdown_type_format.py @@ -4,15 +4,18 @@ from pathlib import PurePosixPath from typing import Literal, NewType -from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity -from overture.schema.codegen.extraction.type_analyzer import ( - TypeInfo, - TypeKind, - analyze_type, +from overture.schema.codegen.extraction.field import ( + AnyScalar, + ArrayOf, + LiteralScalar, + Scalar, + UnionRef, ) +from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity +from overture.schema.codegen.extraction.type_analyzer import analyze_type from overture.schema.codegen.markdown.link_computation import LinkContext from overture.schema.codegen.markdown.type_format import ( - format_dict_type, + _registry_name, format_type, format_underlying_type, ) @@ -32,34 +35,34 @@ class TestFormatType: """Tests for format_type.""" def test_plain_str_renders_as_string(self) -> None: - ti = analyze_type(str) - assert format_type(_make_field(ti)) == "`string`" + assert format_type(_make_field(str)) == "`string`" def test_optional_adds_qualifier(self) -> None: - ti = analyze_type(str | None) - assert format_type(_make_field(ti, is_required=False)) == "`string` (optional)" + assert ( + format_type(_make_field(str | None, is_required=False)) + == "`string` (optional)" + ) def test_literal_renders_as_quoted_value(self) -> None: - ti = analyze_type(Literal["places"]) - assert format_type(_make_field(ti)) == '`"places"`' + assert format_type(_make_field(Literal["places"])) == '`"places"`' def test_multi_value_literal_renders_comma_separated(self) -> None: - ti = analyze_type(Literal["a", "b", "c"]) - assert format_type(_make_field(ti)) == '`"a"` \\| `"b"` \\| `"c"`' + assert ( + format_type(_make_field(Literal["a", "b", "c"])) + == '`"a"` \\| `"b"` \\| `"c"`' + ) def test_enum_without_context_renders_as_code(self) -> None: class Color(str, Enum): RED = "red" - ti = analyze_type(Color) - assert format_type(_make_field(ti)) == "`Color`" + assert format_type(_make_field(Color)) == "`Color`" def test_enum_with_link_context(self) -> None: class Color(str, Enum): RED = "red" - ti = analyze_type(Color) - field = _make_field(ti) + field = _make_field(Color) ctx = LinkContext( page_path=PurePosixPath("buildings/building/building.md"), registry={ @@ -69,55 +72,70 @@ class Color(str, Enum): assert format_type(field, ctx) == "[`Color`](../../types/enums/color.md)" def test_list_of_primitives(self) -> None: - ti = analyze_type(list[str]) - assert format_type(_make_field(ti)) == "`list`" + assert format_type(_make_field(list[str])) == "`list`" def test_nested_list_of_primitives(self) -> None: - ti = analyze_type(list[list[str]]) - assert format_type(_make_field(ti)) == "`list>`" + assert format_type(_make_field(list[list[str]])) == "`list>`" def test_registered_primitive_not_linked(self) -> None: - ti = analyze_type(int32) - result = format_type(_make_field(ti)) + result = format_type(_make_field(int32)) assert result == "`int32`" assert "](int32.md)" not in result -class TestFormatDictType: - """Tests for format_dict_type.""" - - def test_simple_dict_renders_as_map(self) -> None: - ti = analyze_type(dict[str, int]) - result = format_dict_type(ti) - assert result == "map" - - def test_dict_with_newtype_shows_semantic_name(self) -> None: - MyKey = NewType("MyKey", str) - ti = analyze_type(dict[MyKey, int]) - result = format_dict_type(ti) - assert result == "map" - - def _make_field( - ti: TypeInfo, *, name: str = "x", is_required: bool = True + annotation: object, + *, + name: str = "x", + is_required: bool = True, + is_optional: bool = False, ) -> FieldSpec: - """Build a FieldSpec for test convenience.""" - return FieldSpec(name=name, type_info=ti, description=None, is_required=is_required) + """Build a FieldSpec from an annotation for test convenience.""" + from overture.schema.codegen.extraction.field import FieldShape + + if isinstance(annotation, (Scalar, ArrayOf, UnionRef)): + shape: FieldShape = annotation # type: ignore[assignment] + else: + shape, resolved_optional, _ = analyze_type(annotation) + is_optional = is_optional or resolved_optional + return FieldSpec( + name=name, + shape=shape, + description=None, + is_required=is_required, + is_optional=is_optional, + ) + + +def _union_ref(members: list[type]) -> UnionRef: + """Build a UnionRef for tests without running through extract_union.""" + from overture.schema.codegen.extraction.specs import UnionSpec + from pydantic import BaseModel + + union_spec = UnionSpec( + name=members[0].__name__, + description=None, + annotated_fields=[], + members=members, # type: ignore[arg-type] + discriminator_field=None, + discriminator_mapping=None, + source_annotation=object(), + common_base=BaseModel, + ) + return UnionRef(union=union_spec) class TestFormatUnionType: - """Tests for UNION-kind TypeInfo in format_type.""" + """Tests for union FieldShape in format_type.""" def test_union_renders_all_members(self) -> None: - ti = analyze_type(_ModelA | _ModelB) - result = format_type(_make_field(ti)) + result = format_type(_make_field(_union_ref([_ModelA, _ModelB]))) assert "`_ModelA`" in result assert "`_ModelB`" in result # Pipe separator escaped for table cells assert r"\|" in result def test_union_with_link_context_links_each_member(self) -> None: - ti = analyze_type(_ModelA | _ModelB) ctx = LinkContext( page_path=PurePosixPath("theme/feature/feature.md"), registry={ @@ -129,39 +147,36 @@ def test_union_with_link_context_links_each_member(self) -> None: ), }, ) - result = format_type(_make_field(ti), ctx) + result = format_type(_make_field(_union_ref([_ModelA, _ModelB])), ctx) assert "[`_ModelA`](types/model_a.md)" in result assert "[`_ModelB`](types/model_b.md)" in result def test_optional_union_adds_qualifier(self) -> None: - ti = analyze_type(_ModelA | _ModelB | None) - result = format_type(_make_field(ti, is_required=False)) + result = format_type( + _make_field( + _union_ref([_ModelA, _ModelB]), is_required=False, is_optional=True + ) + ) assert "(optional)" in result assert "`_ModelA`" in result assert "`_ModelB`" in result def test_list_of_union_adds_qualifier(self) -> None: - ti = TypeInfo( - base_type="_ModelA", - kind=TypeKind.UNION, - list_depth=1, - union_members=(_ModelA, _ModelB), - ) - result = format_type(_make_field(ti)) + """list[union] renders with (list) qualifier.""" + shape = ArrayOf(element=_union_ref([_ModelA, _ModelB])) + result = format_type(_make_field(shape)) assert "(list)" in result assert "`_ModelA`" in result assert "`_ModelB`" in result def test_union_members_unlinked_without_context(self) -> None: - ti = analyze_type(_ModelA | _ModelB) - result = format_type(_make_field(ti)) + result = format_type(_make_field(_union_ref([_ModelA, _ModelB]))) # No markdown links without context assert "]()" not in result assert "[`" not in result def test_union_partial_links(self) -> None: """Members with pages get linked; members without don't.""" - ti = analyze_type(_ModelA | _ModelB) ctx = LinkContext( page_path=PurePosixPath("theme/feature/feature.md"), registry={ @@ -170,19 +185,45 @@ def test_union_partial_links(self) -> None: ) }, ) - result = format_type(_make_field(ti), ctx) + result = format_type(_make_field(_union_ref([_ModelA, _ModelB])), ctx) assert "[`_ModelA`](types/model_a.md)" in result assert "`_ModelB`" in result # _ModelB should NOT be linked assert "[`_ModelB`]" not in result +class TestScalarVariantRendering: + """format_type and _registry_name handle all three Scalar variants correctly.""" + + def test_registry_name_any_scalar(self) -> None: + assert _registry_name(AnyScalar()) == "Any" + + def test_registry_name_literal_scalar(self) -> None: + assert _registry_name(LiteralScalar(values=("road",))) == "Literal" + + def test_any_scalar_renders_as_Any(self) -> None: + assert format_type(_make_field(AnyScalar())) == "`Any`" + + def test_literal_scalar_renders_first_value_quoted(self) -> None: + # LiteralScalar goes through the Literal path in format_type, not _registry_name + assert format_type(_make_field(LiteralScalar(values=("road",)))) == '`"road"`' + + def test_literal_scalar_multi_value(self) -> None: + result = format_type(_make_field(LiteralScalar(values=("a", "b")))) + assert '`"a"`' in result + assert '`"b"`' in result + + def test_list_of_literal_single_value(self) -> None: + assert format_type(_make_field(list[Literal["road"]])) == '`list<"road">`' + + def test_list_of_literal_multi_value(self) -> None: + assert format_type(_make_field(list[Literal["a", "b"]])) == '`list<"a" | "b">`' + + class TestPydanticTypeLinking: """Tests for PRIMITIVE types with pages getting linked.""" def test_pydantic_type_linked_when_in_registry(self) -> None: - ti = analyze_type(HttpUrl) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={ @@ -191,24 +232,20 @@ def test_pydantic_type_linked_when_in_registry(self) -> None: ) }, ) - result = format_type(field, ctx) + result = format_type(_make_field(HttpUrl), ctx) assert "[`HttpUrl`]" in result assert "pydantic/networks/http_url.md" in result def test_pydantic_type_unlinked_without_registry_entry(self) -> None: - ti = analyze_type(HttpUrl) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={}, ) - result = format_type(field, ctx) + result = format_type(_make_field(HttpUrl), ctx) assert result == "`HttpUrl`" assert "[" not in result def test_list_of_pydantic_type_linked(self) -> None: - ti = analyze_type(list[HttpUrl]) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={ @@ -217,14 +254,12 @@ def test_list_of_pydantic_type_linked(self) -> None: ) }, ) - result = format_type(field, ctx) + result = format_type(_make_field(list[HttpUrl]), ctx) assert "HttpUrl" in result assert "pydantic/networks/http_url.md" in result def test_registered_primitive_links_to_aggregate_page(self) -> None: """int32 links to the primitives aggregate page when in registry.""" - ti = analyze_type(int32) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={ @@ -233,7 +268,7 @@ def test_registered_primitive_links_to_aggregate_page(self) -> None: ) }, ) - result = format_type(field, ctx) + result = format_type(_make_field(int32), ctx) assert "[`int32`]" in result assert "system/primitive/primitives.md" in result @@ -249,8 +284,7 @@ class TestListOfSemanticNewtype: def test_list_of_scalar_newtype_renders_list_syntax(self) -> None: """list[ScalarNewType] renders as list, not Name (list).""" ScalarNT = NewType("ScalarNT", str) - ti = analyze_type(list[ScalarNT]) - result = format_type(_make_field(ti)) + result = format_type(_make_field(list[ScalarNT])) assert "list<" in result assert "ScalarNT" in result assert "(list)" not in result @@ -258,23 +292,20 @@ def test_list_of_scalar_newtype_renders_list_syntax(self) -> None: def test_newtype_wrapping_list_renders_qualifier(self) -> None: """NewType wrapping list[X] renders as Name (list).""" ListNT = NewType("ListNT", list[str]) - ti = analyze_type(ListNT) - result = format_type(_make_field(ti)) + result = format_type(_make_field(ListNT)) assert "(list)" in result assert "ListNT" in result def test_list_of_scalar_newtype_with_link(self) -> None: """list[ScalarNewType] with link context renders linked list.""" ScalarNT = NewType("ScalarNT", str) - ti = analyze_type(list[ScalarNT]) - field = _make_field(ti) ctx = LinkContext( page_path=PurePosixPath("places/place/place.md"), registry={ TypeIdentity(ScalarNT, "ScalarNT"): PurePosixPath("system/scalar_nt.md") }, ) - result = format_type(field, ctx) + result = format_type(_make_field(list[ScalarNT]), ctx) assert "list<" in result assert "ScalarNT" in result assert "system/scalar_nt.md" in result @@ -283,8 +314,7 @@ def test_list_of_scalar_newtype_with_link(self) -> None: def test_nested_list_of_scalar_newtype_renders_nested_list_syntax(self) -> None: """list[list[ScalarNewType]] renders as list>.""" ScalarNT = NewType("ScalarNT", str) - ti = analyze_type(list[list[ScalarNT]]) - result = format_type(_make_field(ti)) + result = format_type(_make_field(list[list[ScalarNT]])) assert "list<" in result assert "list<`" in result or "`list None: class TestFormatUnderlyingUnionType: - """Tests for UNION-kind TypeInfo in format_underlying_type.""" + """Tests for union FieldShape in format_underlying_type.""" def test_union_renders_all_members(self) -> None: - ti = analyze_type(_ModelA | _ModelB) - result = format_underlying_type(ti) + shape = _union_ref([_ModelA, _ModelB]) + result = format_underlying_type(shape) assert result == "`_ModelA` | `_ModelB`" def test_union_with_link_context(self) -> None: - ti = analyze_type(_ModelA | _ModelB) + shape = _union_ref([_ModelA, _ModelB]) ctx = LinkContext( page_path=PurePosixPath("types/my_union.md"), registry={ @@ -312,6 +342,6 @@ def test_union_with_link_context(self) -> None: ), }, ) - result = format_underlying_type(ti, ctx) + result = format_underlying_type(shape, ctx) assert "[`_ModelA`](../theme/feature/types/model_a.md)" in result assert "[`_ModelB`](../theme/feature/types/model_b.md)" in result diff --git a/packages/overture-schema-codegen/tests/test_model_extraction.py b/packages/overture-schema-codegen/tests/test_model_extraction.py new file mode 100644 index 000000000..d5791ee61 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_model_extraction.py @@ -0,0 +1,43 @@ +"""Tests for `extract_model`.""" + +from overture.schema.codegen.extraction.field import ArrayOf, UnionRef +from overture.schema.codegen.extraction.field_walk import terminal_of +from overture.schema.codegen.extraction.length_constraints import ArrayMinLen +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.common.scoping.vehicle import VehicleSelector +from pydantic import BaseModel, Field + + +def test_extract_model_populates_union_terminal() -> None: + """`extract_model` resolves UNION terminals to a `UnionRef` carrying a `UnionSpec`.""" + + class Container(BaseModel): + items: list[VehicleSelector] + + spec = extract_model(Container) + items_field = next(f for f in spec.fields if f.name == "items") + + terminal = terminal_of(items_field.shape) + assert isinstance(terminal, UnionRef) + assert terminal.union.discriminator_field == "dimension" + + +def test_field_metadata_minlen_wrapped_as_array_min_len() -> None: + """MinLen in field_info.metadata is wrapped to ArrayMinLen, not left as raw MinLen. + + Pydantic strips the outermost Annotated wrapper from non-optional, + non-union list fields and moves MinLen to field_info.metadata. Without + routing through attach_constraints, the raw MinLen would survive into + the constraint table untyped, causing dispatch to raise TypeError at + codegen time. + """ + + class M(BaseModel): + items: list[str] = Field(min_length=2) + + spec = extract_model(M) + items_field = next(f for f in spec.fields if f.name == "items") + + assert isinstance(items_field.shape, ArrayOf) + constraints = [cs.constraint for cs in items_field.shape.constraints] + assert ArrayMinLen(min_length=2) in constraints diff --git a/packages/overture-schema-codegen/tests/test_model_extractor.py b/packages/overture-schema-codegen/tests/test_model_extractor.py index f2b2bd257..c033a19cf 100644 --- a/packages/overture-schema-codegen/tests/test_model_extractor.py +++ b/packages/overture-schema-codegen/tests/test_model_extractor.py @@ -12,11 +12,9 @@ assert_literal_field, find_field, ) -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) -from overture.schema.codegen.extraction.specs import ModelSpec +from overture.schema.codegen.extraction.field import ModelRef, Primitive +from overture.schema.codegen.extraction.field_walk import has_array_layer, terminal_of +from overture.schema.codegen.extraction.model_extraction import extract_model from overture.schema.system.field_constraint import UniqueItemsConstraint from overture.schema.system.model_constraint import ( FieldEqCondition, @@ -90,7 +88,9 @@ class SimpleModel(BaseModel): assert result.description == "A simple test model." assert len(result.fields) == 1 assert result.fields[0].name == "name" - assert result.fields[0].type_info.base_type == "str" + scalar = terminal_of(result.fields[0].shape) + assert isinstance(scalar, Primitive) + assert scalar.base_type == "str" assert result.fields[0].is_required is True def test_extract_model_does_not_set_entry_point(self) -> None: @@ -118,7 +118,7 @@ class ModelWithOptional(BaseModel): nickname_field = find_field(result, "nickname") assert nickname_field.is_required is False - assert nickname_field.type_info.is_optional is True + assert nickname_field.is_optional is True def test_extract_model_with_field_description(self) -> None: """Should extract field descriptions from Field().""" @@ -144,8 +144,10 @@ class ModelWithList(BaseModel): tags_field = result.fields[0] assert tags_field.name == "tags" - assert tags_field.type_info.is_list is True - assert tags_field.type_info.base_type == "str" + assert has_array_layer(tags_field.shape) + scalar = terminal_of(tags_field.shape) + assert isinstance(scalar, Primitive) + assert scalar.base_type == "str" class TestExtractModelWithThemeType: @@ -365,115 +367,115 @@ class Child(Parent, ChildMixin): assert field_names == ["core", "p", "pm", "own", "cm"] -class TestExpandModelTree: - """Tests for expand_model_tree.""" +class TestSubModelExpansion: + """Sub-model resolution at extract_model time.""" def test_model_without_sub_models_unchanged(self) -> None: - """Fields without MODEL kind get model=None.""" + """Fields without MODEL kind have no ModelRef in their shape.""" class Simple(BaseModel): name: str count: int spec = extract_model(Simple) - expand_model_tree(spec) for f in spec.fields: - assert f.model is None - assert f.starts_cycle is False + assert not isinstance(terminal_of(f.shape), ModelRef) def test_nested_model_gets_expanded(self) -> None: - """MODEL-kind fields get their model populated.""" + """MODEL-kind fields resolve to a ModelRef in the shape.""" spec = extract_model(FeatureWithAddress) - expand_model_tree(spec) addr_field = find_field(spec, "address") - assert addr_field.model is not None - assert addr_field.model.name == "Address" - assert addr_field.starts_cycle is False + terminal = terminal_of(addr_field.shape) + assert isinstance(terminal, ModelRef) + assert terminal.model.name == "Address" + assert terminal.starts_cycle is False # Sub-model fields should exist - sub_names = [f.name for f in addr_field.model.fields] + sub_names = [f.name for f in terminal.model.fields] assert "street" in sub_names assert "city" in sub_names def test_cycle_detected_and_marked(self) -> None: - """Self-referential model gets starts_cycle=True.""" + """Self-referential model gets starts_cycle=True on the ModelRef.""" spec = extract_model(TreeNode) - expand_model_tree(spec) parent_field = find_field(spec, "parent") - assert parent_field.model is not None - assert parent_field.model is spec # Same object -- cycle - assert parent_field.starts_cycle is True + terminal = terminal_of(parent_field.shape) + assert isinstance(terminal, ModelRef) + assert terminal.model is spec # Same object -- cycle + assert terminal.starts_cycle is True - def test_shared_reference_not_marked_as_cycle(self) -> None: - """Two models referencing the same sub-model share it without cycle.""" + def test_shared_reference_within_one_extraction(self) -> None: + """Two fields referencing the same sub-model share the ModelSpec.""" class Shared(BaseModel): value: str - class ModelA(BaseModel): - ref: Shared - - class ModelB(BaseModel): - ref: Shared - - cache: dict[type, ModelSpec] = {} - spec_a = extract_model(ModelA) - expand_model_tree(spec_a, cache) - - spec_b = extract_model(ModelB) - expand_model_tree(spec_b, cache) + class Container(BaseModel): + first: Shared + second: Shared - ref_a = find_field(spec_a, "ref") - ref_b = find_field(spec_b, "ref") + spec = extract_model(Container) + first = find_field(spec, "first") + second = find_field(spec, "second") - # Same ModelSpec object, neither is a cycle - assert ref_a.model is ref_b.model - assert ref_a.starts_cycle is False - assert ref_b.starts_cycle is False + first_ref = terminal_of(first.shape) + second_ref = terminal_of(second.shape) + assert isinstance(first_ref, ModelRef) + assert isinstance(second_ref, ModelRef) + # Within one extract_model call, the cache ensures the same + # ModelSpec is reused for both references; neither is a cycle. + assert first_ref.model is second_ref.model + assert first_ref.starts_cycle is False + assert second_ref.starts_cycle is False def test_list_of_model_gets_expanded(self) -> None: - """list[Model] fields also get their model populated.""" + """list[Model] fields also get their model populated via ModelRef.""" class HasList(BaseModel): items: list[SourceItem] spec = extract_model(HasList) - expand_model_tree(spec) items_field = find_field(spec, "items") - assert items_field.model is not None - assert items_field.model.name == "SourceItem" + terminal = terminal_of(items_field.shape) + assert isinstance(terminal, ModelRef) + assert terminal.model.name == "SourceItem" class TestFieldInfoMetadataConstraints: - """Constraints from field_info.metadata are merged into TypeInfo. + """Constraints from `field_info.metadata` attach to the field's shape. Pydantic strips the Annotated wrapper from some fields and moves the - metadata to field_info.metadata. extract_model merges these back into - TypeInfo.constraints so they aren't silently dropped. + metadata to `field_info.metadata`. `extract_model` attaches these + constraints to the appropriate `FieldShape` layer so they aren't + silently dropped. """ def test_geometry_type_constraint_extracted(self) -> None: """GeometryTypeConstraint on geometry field should appear in constraints.""" + from overture.schema.codegen.extraction.field_walk import all_constraints + spec = extract_model(Venue) geometry_field = find_field(spec, "geometry") constraint_types = [ - type(cs.constraint) for cs in geometry_field.type_info.constraints + type(cs.constraint) for cs in all_constraints(geometry_field.shape) ] assert GeometryTypeConstraint in constraint_types def test_geometry_type_constraint_has_null_source(self) -> None: """Constraints from field_info.metadata have source_ref=None (not from a NewType).""" + from overture.schema.codegen.extraction.field_walk import all_constraints + spec = extract_model(Venue) geometry_field = find_field(spec, "geometry") geo_constraints = [ cs - for cs in geometry_field.type_info.constraints + for cs in all_constraints(geometry_field.shape) if isinstance(cs.constraint, GeometryTypeConstraint) ] assert len(geo_constraints) == 1 @@ -485,12 +487,14 @@ def test_metadata_constraints_not_duplicated(self) -> None: When field_info.metadata is empty (Pydantic kept the Annotated wrapper), no extra constraints are added. """ + from overture.schema.codegen.extraction.field_walk import all_constraints + spec = extract_model(Instrument) tags_field = find_field(spec, "tags") unique_constraints = [ cs - for cs in tags_field.type_info.constraints + for cs in all_constraints(tags_field.shape) if isinstance(cs.constraint, UniqueItemsConstraint) ] assert len(unique_constraints) == 1 @@ -498,6 +502,7 @@ def test_metadata_constraints_not_duplicated(self) -> None: def test_standalone_annotated_field_extracts_metadata(self) -> None: """Direct Annotated[Type, constraint] fields (non-optional, non-union) get their constraints from field_info.metadata.""" + from overture.schema.codegen.extraction.field_walk import all_constraints class Model(BaseModel): geo: Annotated[ @@ -509,7 +514,7 @@ class Model(BaseModel): geo_field = find_field(spec, "geo") constraint_types = [ - type(cs.constraint) for cs in geo_field.type_info.constraints + type(cs.constraint) for cs in all_constraints(geo_field.shape) ] assert GeometryTypeConstraint in constraint_types diff --git a/packages/overture-schema-codegen/tests/test_newtype_extraction.py b/packages/overture-schema-codegen/tests/test_newtype_extraction.py index 6cd73c5c2..150198668 100644 --- a/packages/overture-schema-codegen/tests/test_newtype_extraction.py +++ b/packages/overture-schema-codegen/tests/test_newtype_extraction.py @@ -3,6 +3,7 @@ from typing import Annotated, NewType from codegen_test_support import STR_TYPE +from overture.schema.codegen.extraction.field import ArrayOf from overture.schema.codegen.extraction.newtype_extraction import extract_newtype from overture.schema.codegen.extraction.specs import NewTypeSpec from overture.schema.system.field_constraint import UniqueItemsConstraint @@ -19,15 +20,23 @@ def test_extract_hex_color(self) -> None: spec = extract_newtype(HexColor) assert spec.name == "HexColor" - assert spec.type_info.newtype_name == "HexColor" + # Outermost NewTypeShape stripped; shape is the underlying scalar. + from overture.schema.codegen.extraction.field_walk import terminal_scalar + + assert terminal_scalar(spec.shape) is not None def test_extract_id(self) -> None: """Should extract Id NewType with nested chain.""" spec = extract_newtype(Id) assert spec.name == "Id" - assert spec.type_info.newtype_name == "Id" - assert spec.type_info.base_type == "NoWhitespaceString" + # Id wraps NoWhitespaceString, which is a registered semantic newtype + # resolving to a Scalar. After stripping "Id", shape is Scalar with + # base_type "NoWhitespaceString". + from overture.schema.codegen.extraction.field import Primitive + + assert isinstance(spec.shape, Primitive) + assert spec.shape.base_type == "NoWhitespaceString" def test_extract_newtype_wrapping_list(self) -> None: """Should extract a list-wrapping NewType.""" @@ -41,8 +50,8 @@ class Item(BaseModel): spec = extract_newtype(TestSources) assert spec.name == "TestSources" - assert spec.type_info.is_list is True - assert spec.type_info.newtype_name == "TestSources" + # After stripping the outer NewTypeShape("TestSources"), shape is ArrayOf. + assert isinstance(spec.shape, ArrayOf) def test_extract_newtype_without_doc_uses_field_description(self) -> None: """NewType with Field(description=...) but no __doc__ uses Field description.""" @@ -66,7 +75,7 @@ class TestNewTypeSpecSourceType: """Tests for source_type on NewTypeSpec.""" def test_newtype_spec_source_type_defaults_to_none(self) -> None: - spec = NewTypeSpec(name="Test", description=None, type_info=STR_TYPE) + spec = NewTypeSpec(name="Test", description=None, shape=STR_TYPE) assert spec.source_type is None def test_extract_newtype_sets_source_type(self) -> None: diff --git a/packages/overture-schema-codegen/tests/test_numeric_extraction.py b/packages/overture-schema-codegen/tests/test_numeric_extraction.py index ee604ba75..6f3a5767f 100644 --- a/packages/overture-schema-codegen/tests/test_numeric_extraction.py +++ b/packages/overture-schema-codegen/tests/test_numeric_extraction.py @@ -55,7 +55,7 @@ class TestExtractNumericBounds: def test_signed_integer_bounds(self) -> None: """Should extract ge/le from a constrained integer NewType.""" spec = extract_newtype(int32) - bounds = extract_numeric_bounds(spec.type_info) + bounds = extract_numeric_bounds(spec.shape) assert bounds.ge == -(2**31) assert bounds.le == 2**31 - 1 @@ -63,7 +63,7 @@ def test_signed_integer_bounds(self) -> None: def test_unsigned_integer_bounds(self) -> None: """Should extract 0-based bounds from unsigned NewType.""" spec = extract_newtype(uint8) - bounds = extract_numeric_bounds(spec.type_info) + bounds = extract_numeric_bounds(spec.shape) assert bounds.ge == 0 assert bounds.le == 255 @@ -71,7 +71,7 @@ def test_unsigned_integer_bounds(self) -> None: def test_int64_bounds(self) -> None: """Should extract large bounds from int64.""" spec = extract_newtype(int64) - bounds = extract_numeric_bounds(spec.type_info) + bounds = extract_numeric_bounds(spec.shape) assert bounds.ge == -(2**63) assert bounds.le == 2**63 - 1 @@ -79,7 +79,7 @@ def test_int64_bounds(self) -> None: def test_unconstrained_type(self) -> None: """Should return empty Interval for types without numeric constraints.""" spec = extract_newtype(float32) - bounds = extract_numeric_bounds(spec.type_info) + bounds = extract_numeric_bounds(spec.shape) assert bounds.ge is None assert bounds.gt is None @@ -91,8 +91,8 @@ def test_exclusive_bounds(self) -> None: ExclusiveBounded = NewType( "ExclusiveBounded", Annotated[int, Field(gt=0, lt=100)] ) - type_info = analyze_type(ExclusiveBounded) - bounds = extract_numeric_bounds(type_info) + shape, _, _ = analyze_type(ExclusiveBounded) + bounds = extract_numeric_bounds(shape) assert bounds.gt == 0 assert bounds.lt == 100 @@ -102,8 +102,8 @@ def test_exclusive_bounds(self) -> None: def test_mixed_bounds(self) -> None: """Should extract a mix of inclusive and exclusive bounds.""" MixedBounded = NewType("MixedBounded", Annotated[int, Field(ge=0, lt=256)]) - type_info = analyze_type(MixedBounded) - bounds = extract_numeric_bounds(type_info) + shape, _, _ = analyze_type(MixedBounded) + bounds = extract_numeric_bounds(shape) assert bounds.ge == 0 assert bounds.lt == 256 diff --git a/packages/overture-schema-codegen/tests/test_pyspark_base_row.py b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py new file mode 100644 index 000000000..fadcd94fe --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_base_row.py @@ -0,0 +1,319 @@ +"""Tests for valid-row generation from FeatureSpecs.""" + +import uuid + +import pytest +from codegen_test_support import ( + FeatureWithRequiredUrl, + discover_feature, + feature_spec_for_model, +) +from overture.schema.codegen.extraction.field import AnyScalar, LiteralScalar, ModelRef +from overture.schema.codegen.extraction.specs import ( + FeatureSpec, + FieldSpec, + UnionSpec, +) +from overture.schema.codegen.pyspark.test_data.base_row import ( + _primitive_default, + generate_arm_rows, + generate_base_row, + generate_populated_arm_rows, + generate_populated_row, + value_for_field, +) +from pydantic import HttpUrl, TypeAdapter + + +@pytest.fixture(scope="module") +def connector_spec() -> FeatureSpec: + return discover_feature("Connector") + + +@pytest.fixture(scope="module") +def segment_spec() -> FeatureSpec: + return discover_feature("Segment") + + +@pytest.fixture(scope="module") +def segment_union(segment_spec: FeatureSpec) -> UnionSpec: + assert isinstance(segment_spec, UnionSpec) + return segment_spec + + +class TestPrimitiveDefault: + """Primitive defaults for string-like types that need valid placeholders.""" + + def test_http_url_is_valid(self) -> None: + val = _primitive_default("HttpUrl") + TypeAdapter(HttpUrl).validate_python(val) + + def test_email_str_contains_at(self) -> None: + val = _primitive_default("EmailStr") + assert isinstance(val, str) + assert "@" in val + + +class TestBaseRowUrlFields: + """Base rows with URL-typed fields produce Pydantic-valid values.""" + + def test_required_url_field_passes_validation(self) -> None: + spec = feature_spec_for_model(FeatureWithRequiredUrl) + row = generate_base_row(spec) + TypeAdapter(FeatureWithRequiredUrl).validate_python(row) + + +class TestGenerateBaseRow: + def test_passes_pydantic_validation(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + assert connector_spec.source_type is not None + TypeAdapter(connector_spec.source_type).validate_python(row) + + def test_required_fields_present(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + required_names = {f.name for f in connector_spec.fields if f.is_required} + assert required_names <= set(row.keys()) + + def test_optional_fields_absent(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + optional_names = {f.name for f in connector_spec.fields if not f.is_required} + assert optional_names.isdisjoint(set(row.keys())) + + def test_id_is_deterministic_uuid(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + assert "id" in row + parsed = uuid.UUID(row["id"]) + assert parsed.version == 5 + + def test_geometry_is_valid_wkt(self, connector_spec: FeatureSpec) -> None: + row = generate_base_row(connector_spec) + assert "geometry" in row + assert row["geometry"].startswith("POINT") + + +class TestGenerateArmRows: + def test_returns_dict_per_arm( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_arm_rows(segment_spec) + assert segment_union.discriminator_mapping is not None + assert set(rows.keys()) == set(segment_union.discriminator_mapping.keys()) + + def test_each_row_passes_validation( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_arm_rows(segment_spec) + adapter: TypeAdapter[object] = TypeAdapter(segment_union.source_annotation) + for _arm_val, row in rows.items(): + adapter.validate_python(row) + + def test_discriminator_field_set( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_arm_rows(segment_spec) + assert segment_union.discriminator_field is not None + for arm_val, row in rows.items(): + assert row[segment_union.discriminator_field] == arm_val + + def test_arm_specific_required_fields_present( + self, segment_spec: FeatureSpec + ) -> None: + """Road arm requires 'class' field; water arm does not.""" + rows = generate_arm_rows(segment_spec) + assert "class" in rows["road"] + assert "class" not in rows["water"] + + +class TestPopulateOptionalFlag: + """populate_optional flag controls recursion depth.""" + + def test_value_for_field_default_skips_optional_children( + self, connector_spec: FeatureSpec + ) -> None: + """Default (`populate_optional=False`) yields sparse sub-models.""" + field = next(f for f in connector_spec.fields if f.name == "sources") + model_ref = _list_of_model(field.shape) + val = value_for_field(field, "Connector") + assert isinstance(val, list) + elem = val[0] + assert isinstance(elem, dict) + optional_names = {f.name for f in model_ref.model.fields if not f.is_required} + assert not (optional_names & set(elem.keys())) + + def test_value_for_field_populate_includes_optional_children( + self, connector_spec: FeatureSpec + ) -> None: + """`populate_optional=True` yields sub-models that include optional fields.""" + field = next(f for f in connector_spec.fields if f.name == "sources") + model_ref = _list_of_model(field.shape) + val = value_for_field(field, "Connector", populate_optional=True) + assert isinstance(val, list) + elem = val[0] + assert isinstance(elem, dict) + optional_names = {f.name for f in model_ref.model.fields if not f.is_required} + assert optional_names & set(elem.keys()) == optional_names + + +def _list_of_model(shape: object) -> ModelRef: + """Peel `ArrayOf` / `NewTypeShape` layers to reach the inner `ModelRef`.""" + from overture.schema.codegen.extraction.field_walk import terminal_of + + terminal = terminal_of(shape) # type: ignore[arg-type] + assert isinstance(terminal, ModelRef), ( + f"Expected ModelRef terminal, got {type(terminal).__name__}" + ) + return terminal + + +class TestGeneratePopulatedRow: + def test_passes_pydantic_validation(self, connector_spec: FeatureSpec) -> None: + row = generate_populated_row(connector_spec) + assert connector_spec.source_type is not None + TypeAdapter(connector_spec.source_type).validate_python(row) + + def test_required_fields_present(self, connector_spec: FeatureSpec) -> None: + row = generate_populated_row(connector_spec) + required_names = {f.name for f in connector_spec.fields if f.is_required} + assert required_names <= set(row.keys()) + + def test_optional_fields_present(self, connector_spec: FeatureSpec) -> None: + row = generate_populated_row(connector_spec) + optional_names = {f.name for f in connector_spec.fields if not f.is_required} + assert optional_names <= set(row.keys()) + + def test_id_matches_sparse_row(self, connector_spec: FeatureSpec) -> None: + sparse = generate_base_row(connector_spec) + populated = generate_populated_row(connector_spec) + assert populated["id"] == sparse["id"] + + def test_nested_structs_populated(self, connector_spec: FeatureSpec) -> None: + """Optional struct fields contain populated sub-dicts, not empty.""" + row = generate_populated_row(connector_spec) + assert "sources" in row + elem = row["sources"][0] + sources_field = next(f for f in connector_spec.fields if f.name == "sources") + model_ref = _list_of_model(sources_field.shape) + optional_source_fields = { + f.name for f in model_ref.model.fields if not f.is_required + } + present = optional_source_fields & set(elem.keys()) + assert present == optional_source_fields + + +class TestGeneratePopulatedArmRows: + def test_returns_dict_per_arm( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_populated_arm_rows(segment_spec) + assert segment_union.discriminator_mapping is not None + assert set(rows.keys()) == set(segment_union.discriminator_mapping.keys()) + + def test_each_row_passes_validation( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_populated_arm_rows(segment_spec) + adapter: TypeAdapter[object] = TypeAdapter(segment_union.source_annotation) + for _arm_val, row in rows.items(): + adapter.validate_python(row) + + def test_discriminator_field_set( + self, segment_spec: FeatureSpec, segment_union: UnionSpec + ) -> None: + rows = generate_populated_arm_rows(segment_spec) + assert segment_union.discriminator_field is not None + for arm_val, row in rows.items(): + assert row[segment_union.discriminator_field] == arm_val + + def test_optional_fields_present(self, segment_spec: FeatureSpec) -> None: + """Populated arm rows include optional fields.""" + rows = generate_populated_arm_rows(segment_spec) + # Road arm has optional speed_limits + road_row = rows["road"] + assert "speed_limits" in road_row + + +class TestValueForShapeScalarVariants: + """_value_for_shape handles the Scalar variants it can reach.""" + + def test_any_scalar_raises(self) -> None: + # `AnyScalar` only appears as a `MapOf` value type in feature + # models; `_value_for_shape` returns `{}` for `MapOf` without + # descending, so reaching `AnyScalar` directly is a bug. + field = FieldSpec(name="x", shape=AnyScalar()) + with pytest.raises(TypeError, match="AnyScalar reached base-row generation"): + value_for_field(field, "Foo") + + def test_literal_scalar_returns_first_value(self) -> None: + field = FieldSpec(name="x", shape=LiteralScalar(values=("road",))) + assert value_for_field(field, "Foo") == "road" + + +class TestMinFieldsSetSatisfied: + """`_satisfy_model_constraints` populates optional fields for `min_fields_set`.""" + + def test_min_fields_set_populates_optional_fields(self) -> None: + from overture.schema.codegen.extraction.model_extraction import extract_model + from overture.schema.system.model_constraint import min_fields_set + from pydantic import BaseModel + + @min_fields_set(2) + class MinTwoModel(BaseModel): + a: str | None = None + b: str | None = None + c: str | None = None + + spec = extract_model(MinTwoModel) + row = generate_base_row(spec) + present = [name for name in ("a", "b", "c") if name in row] + assert len(present) >= 2 + + def test_min_fields_set_counts_required_fields(self) -> None: + # Required fields are always present in the sparse base row, and they + # count against `min_fields_set(N)` -- matching Pydantic's + # `model_fields_set` semantics. With one required + three optional + # and `min_fields_set(2)`, the required field plus one optional + # already satisfy the constraint, so the sparse row only needs + # one additional optional fill. + from overture.schema.codegen.extraction.model_extraction import extract_model + from overture.schema.system.model_constraint import min_fields_set + from pydantic import BaseModel + + @min_fields_set(2) + class MixedMinModel(BaseModel): + required_field: str + opt_a: str | None = None + opt_b: str | None = None + opt_c: str | None = None + + spec = extract_model(MixedMinModel) + row = generate_base_row(spec) + assert "required_field" in row + present_optional = [n for n in ("opt_a", "opt_b", "opt_c") if n in row] + assert len(present_optional) >= 1 + assert ( + sum( + 1 + for name in row + if name in {"required_field", "opt_a", "opt_b", "opt_c"} + ) + >= 2 + ) + + def test_min_fields_set_all_required_needs_no_optional_fill(self) -> None: + # When required fields alone satisfy `count`, no optional fills are + # needed -- matching Pydantic, which counts required fields toward + # `model_fields_set`. + from overture.schema.codegen.extraction.model_extraction import extract_model + from overture.schema.system.model_constraint import min_fields_set + from pydantic import BaseModel + + @min_fields_set(2) + class AllRequiredModel(BaseModel): + req_a: str + req_b: str + opt_a: str | None = None + + spec = extract_model(AllRequiredModel) + row = generate_base_row(spec) + assert "req_a" in row and "req_b" in row + assert "opt_a" not in row diff --git a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py new file mode 100644 index 000000000..0c89a0367 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py @@ -0,0 +1,1877 @@ +"""Tests for check_builder -- scalar fields, struct recursion, and model constraints.""" + +from dataclasses import replace +from enum import Enum +from typing import Annotated, Literal, NewType, Union + +import pytest +from annotated_types import Ge, Le, MinLen +from codegen_test_support import ( + LiteralSubtypeModel, + RadioModel, + RequireAnyModel, + TripleNestedArrayModel, + discover_feature, + feature_spec_for_model, + union_spec_for, +) +from overture.schema.codegen.extraction.field import ConstraintSource, Primitive +from overture.schema.codegen.extraction.specs import ( + FeatureSpec, + FieldSpec, + ModelSpec, +) +from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.pyspark._render_common import column_level_suffix +from overture.schema.codegen.pyspark.check_builder import ( + build_checks, +) +from overture.schema.codegen.pyspark.check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) +from overture.schema.codegen.pyspark.constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + RequireIf, + model_constraint_function, +) +from overture.schema.common.scoping.lr import LinearlyReferencedRange +from overture.schema.system.field_constraint.collection import UniqueItemsConstraint +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + FieldPath, + ScalarPath, + parse, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + Not, + forbid_if, + require_any_of, +) +from overture.schema.system.string import CountryCodeAlpha2 +from pydantic import BaseModel, Field +from pydantic.fields import FieldInfo +from pydantic.networks import HttpUrl + +_path = parse + + +def _column_guard(check: Check) -> ColumnGuard | None: + """Return the first ColumnGuard, or None.""" + for g in check.guards: + if isinstance(g, ColumnGuard): + return g + return None + + +def _element_guard(check: Check) -> ElementGuard | None: + """Return the first ElementGuard, or None.""" + for g in check.guards: + if isinstance(g, ElementGuard): + return g + return None + + +def _checks_for( + model_cls: type[BaseModel], +) -> tuple[list[Check], list[ModelCheck]]: + return build_checks(feature_spec_for_model(model_cls)) + + +def _condition_of(check: ModelCheck) -> object: + """Return the condition of a RequireIf or ForbidIf descriptor.""" + desc = check.descriptor + assert isinstance(desc, (RequireIf, ForbidIf)), ( + f"Expected RequireIf or ForbidIf, got {type(desc).__name__}" + ) + return desc.condition + + +def _filter_nodes( + nodes: list[ModelCheck], + function: str | tuple[str, ...], + field_names: tuple[str, ...] | None = None, +) -> list[ModelCheck]: + functions = (function,) if isinstance(function, str) else function + return [ + n + for n in nodes + if model_constraint_function(n.descriptor) in functions + and (field_names is None or n.descriptor.field_names == field_names) + ] + + +def _union_checks( + name: str, union_type: object +) -> tuple[list[Check], list[ModelCheck]]: + return build_checks(union_spec_for(name, union_type)) + + +def _union_model_nodes(name: str, union_type: object) -> list[ModelCheck]: + _, model_nodes = _union_checks(name, union_type) + return model_nodes + + +class TestScalarChecks: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(LiteralSubtypeModel) + return nodes + + def test_literal_produces_enum_check(self, nodes: list[Check]) -> None: + enum_nodes = [n for n in nodes if n.target == _path("subtype")] + assert len(enum_nodes) == 1 + node = enum_nodes[0] + descriptors = node.descriptors + funcs = [d.function for d in descriptors] + assert "check_required" in funcs + assert "check_enum" in funcs + + def test_optional_field_no_required_check(self, nodes: list[Check]) -> None: + name_nodes = [n for n in nodes if n.target == _path("name")] + for node in name_nodes: + funcs = [d.function for d in node.descriptors] + assert "check_required" not in funcs + + def test_required_comes_first_in_coalesce(self, nodes: list[Check]) -> None: + enum_nodes = [n for n in nodes if n.target == _path("subtype")] + node = enum_nodes[0] + funcs = [d.function for d in node.descriptors] + req_idx = funcs.index("check_required") + enum_idx = funcs.index("check_enum") + assert req_idx < enum_idx + + def test_enum_args_contain_literal_values(self, nodes: list[Check]) -> None: + enum_nodes = [n for n in nodes if n.target == _path("subtype")] + node = enum_nodes[0] + enum_desc = next(d for d in node.descriptors if d.function == "check_enum") + assert enum_desc.args == (("a", "b", "c"),) + + def test_optional_str_field_no_checks(self, nodes: list[Check]) -> None: + # name: str | None = None has no constraints, so no check node + name_nodes = [n for n in nodes if n.target == _path("name")] + assert len(name_nodes) == 0 + + +class _RequiredNewtypeModel(BaseModel): + country: CountryCodeAlpha2 + + +class TestRequiredNewtypeChecks: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_RequiredNewtypeModel) + return nodes + + def test_required_newtype_includes_check_required(self, nodes: list[Check]) -> None: + country_nodes = [n for n in nodes if n.target == _path("country")] + assert len(country_nodes) == 1 + funcs = [d.function for d in country_nodes[0].descriptors] + assert "check_required" in funcs + + def test_required_newtype_includes_newtype_function( + self, nodes: list[Check] + ) -> None: + country_nodes = [n for n in nodes if n.target == _path("country")] + funcs = [d.function for d in country_nodes[0].descriptors] + assert "check_pattern" in funcs + + def test_required_precedes_newtype_function(self, nodes: list[Check]) -> None: + country_nodes = [n for n in nodes if n.target == _path("country")] + funcs = [d.function for d in country_nodes[0].descriptors] + assert funcs.index("check_required") < funcs.index("check_pattern") + + +class _Color(str, Enum): + RED = "red" + GREEN = "green" + BLUE = "blue" + + +class EnumFieldModel(BaseModel): + color: _Color + + +class TestEnumKindChecks: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(EnumFieldModel) + return nodes + + def test_enum_field_produces_check_enum(self, nodes: list[Check]) -> None: + enum_descs = [ + d for n in nodes for d in n.descriptors if d.function == "check_enum" + ] + assert len(enum_descs) == 1 + + def test_enum_field_uses_member_values(self, nodes: list[Check]) -> None: + enum_descs = [ + d for n in nodes for d in n.descriptors if d.function == "check_enum" + ] + assert enum_descs[0].args == (("red", "green", "blue"),) + + +class InnerModel(BaseModel): + value: str + count: int = Field(ge=0) + + +class OuterModel(BaseModel): + inner: InnerModel | None = None + + +class _ArrayElement(BaseModel): + tag: str + + +class _NullableWithArray(BaseModel): + items: list[_ArrayElement] | None = None + + +class _NullableArrayGrandparent(BaseModel): + parent: _NullableWithArray | None = None + + +class TestNullableParentGating: + """Required fields within optional struct parents get gated check_required.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(OuterModel) + return nodes + + def test_required_field_has_gated_check_required(self, nodes: list[Check]) -> None: + value_nodes = [n for n in nodes if n.target == _path("inner.value")] + req_descs = [ + d + for n in value_nodes + for d in n.descriptors + if d.function == "check_required" + ] + assert len(req_descs) == 1 + assert req_descs[0].gate == _path("inner") + + def test_non_check_required_descriptors_have_no_gate( + self, nodes: list[Check] + ) -> None: + count_nodes = [n for n in nodes if n.target == _path("inner.count")] + for node in count_nodes: + for desc in node.descriptors: + if desc.function != "check_required": + assert desc.gate is None + + def test_other_checks_still_present(self, nodes: list[Check]) -> None: + count_nodes = [n for n in nodes if n.target == _path("inner.count")] + assert len(count_nodes) >= 1 + funcs = [d.function for d in count_nodes[0].descriptors] + assert "check_bounds" in funcs + + +class TestArrayBoundaryResetsNullable: + """nullable_gate resets at array boundaries.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_NullableArrayGrandparent) + return nodes + + def test_required_field_in_array_element_has_check_required( + self, nodes: list[Check] + ) -> None: + tag_nodes = [n for n in nodes if n.target == _path("parent.items[].tag")] + assert len(tag_nodes) >= 1 + funcs = [d.function for n in tag_nodes for d in n.descriptors] + assert "check_required" in funcs + + def test_array_element_required_has_no_gate(self, nodes: list[Check]) -> None: + tag_nodes = [n for n in nodes if n.target == _path("parent.items[].tag")] + req_descs = [ + d + for n in tag_nodes + for d in n.descriptors + if d.function == "check_required" + ] + assert len(req_descs) == 1 + assert req_descs[0].gate is None + + +class _OptionalNested(BaseModel): + mode: str + + +class _ElementWithOptional(BaseModel): + nested: _OptionalNested | None = None + + +class _ArrayWithOptionalNested(BaseModel): + items: list[_ElementWithOptional] + + +class TestArrayElementConditionalGate: + """Optional structs within array elements get gated check_required.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_ArrayWithOptionalNested) + return nodes + + def test_required_in_optional_element_struct_has_gate( + self, nodes: list[Check] + ) -> None: + mode_nodes = [n for n in nodes if n.target == _path("items[].nested.mode")] + req_descs = [ + d + for n in mode_nodes + for d in n.descriptors + if d.function == "check_required" + ] + assert len(req_descs) == 1 + assert req_descs[0].gate == _path("items[].nested") + + +class TestStructRecursion: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(OuterModel) + return nodes + + def test_recurses_into_model_fields(self, nodes: list[Check]) -> None: + paths = {n.target for n in nodes} + assert _path("inner.count") in paths + + def test_nested_field_uses_dot_path(self, nodes: list[Check]) -> None: + count_nodes = [n for n in nodes if n.target == _path("inner.count")] + assert len(count_nodes) == 1 + + +class ItemModel(BaseModel): + value: str + + +class ArrayModel(BaseModel): + items: Annotated[list[ItemModel], MinLen(1)] + + +class TestArrayChecks: + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(ArrayModel) + return nodes + + def test_array_min_length_is_scalar_shape(self, nodes: list[Check]) -> None: + length_nodes = [ + n + for n in nodes + if any(d.function == "check_array_min_length" for d in n.descriptors) + ] + assert len(length_nodes) == 1 + assert isinstance(length_nodes[0].target, ScalarPath) + + def test_array_element_field_uses_bracket_notation( + self, nodes: list[Check] + ) -> None: + paths = {n.target for n in nodes} + assert any(isinstance(p, ArrayPath) for p in paths) + + def test_array_element_subfield_path(self, nodes: list[Check]) -> None: + # ItemModel.value is required, so a check node for items[].value must exist + paths = {n.target for n in nodes} + assert _path("items[].value") in paths + + def test_array_level_check_has_no_inner_levels(self, nodes: list[Check]) -> None: + length_nodes = [ + n + for n in nodes + if any(d.function == "check_array_min_length" for d in n.descriptors) + ] + assert length_nodes[0].target == _path("items") + + def test_required_array_field_has_required_check(self, nodes: list[Check]) -> None: + # check_required on an array field is a column-level null check; its + # target is the scalar `items` column, not an element path. + required_nodes = [ + n + for n in nodes + if n.target == _path("items") + and any(d.function == "check_required" for d in n.descriptors) + ] + assert len(required_nodes) == 1 + + def test_array_element_subfield_has_single_check(self, nodes: list[Check]) -> None: + value_nodes = [n for n in nodes if n.target == _path("items[].value")] + assert len(value_nodes) == 1 + + +class _StringListModel(BaseModel): + tags: Annotated[list[str], MinLen(1)] + + +class _NestedListModel(BaseModel): + """list[list[ItemModel]] — both layers contribute MinLen + UniqueItems.""" + + items: Annotated[ + list[Annotated[list[InnerModel], MinLen(1), UniqueItemsConstraint()]], + MinLen(1), + UniqueItemsConstraint(), + ] + + +class _StringInListModel(BaseModel): + """list[Annotated[str, MinLen]] with outer list MinLen — inner is string MinLen.""" + + tags: Annotated[list[Annotated[str, MinLen(1)]], MinLen(1)] + + +_HierarchyItemList = NewType( + "_HierarchyItemList", + Annotated[list[InnerModel], MinLen(1), UniqueItemsConstraint()], +) + + +class _HierarchyLikeModel(BaseModel): + """Mirror of Division.hierarchies: inner list lives inside a NewType.""" + + hierarchies: Annotated[ + list[_HierarchyItemList], + MinLen(1), + UniqueItemsConstraint(), + ] + + +class TestListFieldNameSplitting: + """Column-level and element-level checks for list fields get distinct field names.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_StringListModel) + return nodes + + def test_unique_labels_for_different_shapes(self, nodes: list[Check]) -> None: + labels = [(n.target, column_level_suffix(n)) for n in nodes] + assert len(labels) == len(set(labels)), f"Duplicate labels: {labels}" + + def test_min_length_check_carries_min_length_suffix( + self, nodes: list[Check] + ) -> None: + min_len_nodes = [ + n + for n in nodes + if any(d.function == "check_array_min_length" for d in n.descriptors) + ] + assert len(min_len_nodes) == 1 + assert min_len_nodes[0].target == _path("tags") + assert column_level_suffix(min_len_nodes[0]) == "_min_length" + + +def _node_for(nodes: list[Check], field: str, function: str) -> Check: + field_path = _path(field) + matching = [ + n + for n in nodes + if n.target == field_path and any(d.function == function for d in n.descriptors) + ] + assert len(matching) == 1, ( + f"expected exactly one node for field={field!r} function={function!r}, " + f"got {len(matching)}" + ) + return matching[0] + + +@pytest.mark.parametrize( + ("model_cls", "field"), + [ + (_NestedListModel, "items"), + (_HierarchyLikeModel, "hierarchies"), + ], + ids=["nested_list", "hierarchy_newtype"], +) +class TestPerLevelListConstraints: + """Each layer of `list[list[X]]` emits its own column-level check. + + Covers both raw nested lists (`_NestedListModel`) and the + NewType-wrapped variant (`_HierarchyLikeModel`, mirroring + `Division.hierarchies`). + """ + + def test_no_duplicate_labels(self, model_cls: type[BaseModel], field: str) -> None: + nodes, _ = _checks_for(model_cls) + labels = [(n.target, column_level_suffix(n)) for n in nodes] + assert len(labels) == len(set(labels)), f"Duplicate labels: {labels}" + + def test_outer_min_length_check( + self, model_cls: type[BaseModel], field: str + ) -> None: + nodes, _ = _checks_for(model_cls) + outer = _node_for(nodes, field, "check_array_min_length") + assert outer.target == _path(field) + + def test_inner_min_length_check( + self, model_cls: type[BaseModel], field: str + ) -> None: + nodes, _ = _checks_for(model_cls) + inner = _node_for(nodes, f"{field}[]", "check_array_min_length") + assert inner.target == _path(f"{field}[]") + + def test_outer_unique_check(self, model_cls: type[BaseModel], field: str) -> None: + nodes, _ = _checks_for(model_cls) + outer = _node_for(nodes, field, "check_struct_unique") + assert outer.target == _path(field) + + def test_inner_unique_check(self, model_cls: type[BaseModel], field: str) -> None: + nodes, _ = _checks_for(model_cls) + inner = _node_for(nodes, f"{field}[]", "check_struct_unique") + assert inner.target == _path(f"{field}[]") + + +class TestPerLevelScalarMinLen: + """list[Annotated[str, MinLen]] with outer list MinLen splits cleanly.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_StringInListModel) + return nodes + + def test_outer_array_min_length(self, nodes: list[Check]) -> None: + outer = _node_for(nodes, "tags", "check_array_min_length") + assert outer.target == _path("tags") + + def test_inner_string_min_length(self, nodes: list[Check]) -> None: + inner = _node_for(nodes, "tags[]", "check_string_min_length") + assert inner.target == _path("tags[]") + + +class TestDescriptorDedupKey: + """Descriptor equality drives layer-level dedup via `dict.fromkeys`.""" + + def test_identical_descriptors_collapse(self) -> None: + desc = ExpressionDescriptor(function="check_array_min_length", args=(1,)) + assert list(dict.fromkeys([desc, desc])) == [desc] + + def test_distinct_descriptors_preserve_order(self) -> None: + first = ExpressionDescriptor(function="check_array_min_length", args=(1,)) + second = ExpressionDescriptor(function="check_struct_unique") + assert list(dict.fromkeys([first, second, first])) == [first, second] + + def test_different_args_are_distinct(self) -> None: + one = ExpressionDescriptor(function="check_array_min_length", args=(1,)) + two = ExpressionDescriptor(function="check_array_min_length", args=(2,)) + assert list(dict.fromkeys([one, two])) == [one, two] + + def test_different_gates_are_distinct(self) -> None: + ungated = ExpressionDescriptor(function="check_required") + gated = ExpressionDescriptor(function="check_required", gate=_path("parent")) + assert list(dict.fromkeys([ungated, gated])) == [ungated, gated] + + +class TestListOfNewtypeConstraintDispatch: + """Element-level MinLen from NewType inside a list dispatches as string check.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + MyId = NewType("MyId", Annotated[str, MinLen(1)]) + + class ListOfIdModel(BaseModel): + ids: list[MyId] + + nodes, _ = _checks_for(ListOfIdModel) + return nodes + + def test_element_min_length_dispatches_as_string_check( + self, nodes: list[Check] + ) -> None: + """MinLen from the element NewType should produce check_string_min_length, not check_array_min_length.""" + all_funcs = [d.function for n in nodes for d in n.descriptors] + assert "check_string_min_length" in all_funcs + # check_array_min_length should NOT appear — there's no list-level MinLen + assert "check_array_min_length" not in all_funcs + + +class _InternalListNewtypeModel(BaseModel): + """Model with a NewType that wraps list[float] (list is inside the NewType).""" + + between: list[CountryCodeAlpha2] | None = None # outer list wrapping + + +class TestNewtypeWithInternalList: + """When a NewType IS a list, the check function handles the whole array.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + class InternalListModel(BaseModel): + between: LinearlyReferencedRange | None = None + + nodes, _ = _checks_for(InternalListModel) + return nodes + + def test_internal_list_newtype_has_single_check(self, nodes: list[Check]) -> None: + between_nodes = [n for n in nodes if n.target == _path("between")] + assert len(between_nodes) == 1 + + def test_internal_list_newtype_has_three_descriptors( + self, nodes: list[Check] + ) -> None: + between_nodes = [n for n in nodes if n.target == _path("between")] + fns = [d.function for d in between_nodes[0].descriptors] + assert "check_linear_range_length" in fns + assert "check_linear_range_bounds" in fns + assert "check_linear_range_order" in fns + + +class TestBaseTypeDispatchInCheckBuilder: + """Base type dispatch generates element-level checks for HttpUrl/EmailStr.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + class HttpUrlListModel(BaseModel): + websites: list[HttpUrl] | None = None + + nodes, _ = _checks_for(HttpUrlListModel) + return nodes + + def test_http_url_produces_check_url_format(self, nodes: list[Check]) -> None: + url_nodes = [ + n + for n in nodes + if any(d.function == "check_url_format" for d in n.descriptors) + ] + assert len(url_nodes) == 1 + + def test_http_url_element_check_is_array_shape(self, nodes: list[Check]) -> None: + url_nodes = [ + n + for n in nodes + if any(d.function == "check_url_format" for d in n.descriptors) + ] + assert isinstance(url_nodes[0].target, ArrayPath) + + +class _DeepInner(BaseModel): + field: str + + +class _ArrayElementWithNestedStruct(BaseModel): + nested: _DeepInner + + +class _DeepNestedArrayModel(BaseModel): + items: list[_ArrayElementWithNestedStruct] + + +class TestArrayElementNestedStructChecks: + """Struct fields inside array elements produce array-shaped checks.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_DeepNestedArrayModel) + return nodes + + def test_nested_struct_field_path(self, nodes: list[Check]) -> None: + paths = {n.target for n in nodes} + assert _path("items[].nested.field") in paths + + +class _ArrayElementWithList(BaseModel): + tags: list[CountryCodeAlpha2] + + +class _ListInArrayModel(BaseModel): + items: list[_ArrayElementWithList] + + +class TestArrayElementListChecks: + """List fields inside array elements need nested iteration.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_ListInArrayModel) + return nodes + + def test_list_subfield_element_checks_have_inner_levels( + self, nodes: list[Check] + ) -> None: + # Element-level check on a list field inside an outer array: target + # encodes both iterations explicitly as `items[].tags[]`. + element_nodes = [n for n in nodes if n.target == _path("items[].tags[]")] + assert len(element_nodes) >= 1 + + def test_list_subfield_column_path_is_enclosing_array( + self, nodes: list[Check] + ) -> None: + tag_nodes = [n for n in nodes if str(n.target).startswith("items[].tags")] + for node in tag_nodes: + assert isinstance(node.target, ArrayPath) + # the outermost iterated column is `items`, not the inner `tags` list + assert node.target.array_chunks[0] == ((), "items", 1) + + +class _ArrayElementWithNewtype(BaseModel): + country: CountryCodeAlpha2 + + +class _NewtypeInArrayModel(BaseModel): + items: list[_ArrayElementWithNewtype] + + +class TestArrayElementNewtypeChecks: + """Newtype fields inside array elements: shape=ARRAY, no inner_levels.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_NewtypeInArrayModel) + return nodes + + def test_newtype_subfield_has_single_check(self, nodes: list[Check]) -> None: + country_nodes = [n for n in nodes if n.target == _path("items[].country")] + assert len(country_nodes) == 1 + + +class TestModelLevelConstraints: + @pytest.fixture + def radio_model_nodes(self) -> list[ModelCheck]: + _, model_nodes = _checks_for(RadioModel) + return model_nodes + + @pytest.fixture + def require_any_model_nodes(self) -> list[ModelCheck]: + _, model_nodes = _checks_for(RequireAnyModel) + return model_nodes + + def test_radio_group_produces_model_check( + self, radio_model_nodes: list[ModelCheck] + ) -> None: + assert len(_filter_nodes(radio_model_nodes, "check_radio_group")) == 1 + + def test_radio_group_field_names(self, radio_model_nodes: list[ModelCheck]) -> None: + radio = _filter_nodes(radio_model_nodes, "check_radio_group")[0] + assert set(radio.descriptor.field_names) == {"a", "b"} + + def test_require_any_of_produces_model_check( + self, require_any_model_nodes: list[ModelCheck] + ) -> None: + assert len(_filter_nodes(require_any_model_nodes, "check_require_any_of")) == 1 + + def test_require_any_of_field_names( + self, require_any_model_nodes: list[ModelCheck] + ) -> None: + node = _filter_nodes(require_any_model_nodes, "check_require_any_of")[0] + assert set(node.descriptor.field_names) == {"x", "y"} + + def test_no_constraints_returns_empty_model_nodes(self) -> None: + _, model_nodes = _checks_for(LiteralSubtypeModel) + assert model_nodes == [] + + +class _SpeedStruct(BaseModel): + value: int + unit: str + + +@require_any_of("fast", "slow") +class _RequireAnyOfStructFields(BaseModel): + fast: _SpeedStruct | None = None + slow: _SpeedStruct | None = None + + +class TestRequireAnyOfStructUnwrapping: + """require_any_of on struct fields must reference the leaf scalar, not the struct.""" + + @pytest.fixture + def node(self) -> ModelCheck: + _, model_nodes = _checks_for(_RequireAnyOfStructFields) + nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(nodes) == 1 + return nodes[0] + + def test_field_names_use_leaf_path(self, node: ModelCheck) -> None: + assert set(node.descriptor.field_names) == {"fast.value", "slow.value"} + + +class _SyntheticUnionFixtures: + """Discriminated-union models exercising union check generation.""" + + class Base(BaseModel): + kind: str + + class TypeA(Base): + kind: Literal["a"] = "a" + a_field: Literal["x", "y"] | None = None + + class TypeB(Base): + kind: Literal["b"] = "b" + b_field: Literal["p", "q"] | None = None + + SyntheticUnion = Annotated[ + Union[TypeA, TypeB], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + @require_any_of("p", "q") + class ConstrainedMember(Base): + kind: Literal["c"] = "c" + p: str | None = None + q: str | None = None + + ConstrainedUnion = Annotated[ + Union[TypeA, ConstrainedMember], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class MemberX(Base): + kind: Literal["x"] = "x" + shared_name: Literal["x1", "x2"] + + class MemberY(Base): + kind: Literal["y"] = "y" + shared_name: Literal["y1", "y2"] + + class MemberZ(Base): + kind: Literal["z"] = "z" + + ThreeWayUnion = Annotated[ + Union[MemberX, MemberY, MemberZ], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class MixedRequired(Base): + kind: Literal["r"] = "r" + mixed_field: str + + class MixedOptional(Base): + kind: Literal["o"] = "o" + mixed_field: str | None = None + + class MixedAbsent(Base): + kind: Literal["a"] = "a" + + MixedRequirednessUnion = Annotated[ + Union[MixedRequired, MixedOptional, MixedAbsent], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class AllVarA(Base): + kind: Literal["a"] = "a" + everywhere: str | None = None + + class AllVarB(Base): + kind: Literal["b"] = "b" + everywhere: str | None = None + + AllVariantsUnion = Annotated[ + Union[AllVarA, AllVarB], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + @require_any_of("fast", "slow") + @forbid_if(["restrictions"], FieldEqCondition("gated", True)) + class MemberWithModelConstraints(Base): + """Union member carrying model constraints over struct/compound fields.""" + + kind: Literal["m"] = "m" + gated: bool = False + fast: _SpeedStruct | None = None + slow: _SpeedStruct | None = None + restrictions: list[str] | None = None + + MemberConstraintUnion = Annotated[ + Union[TypeA, MemberWithModelConstraints], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class PlainMember(Base): + kind: Literal["p"] = "p" + + +class TestSyntheticUnionChecks: + @pytest.fixture + def field_nodes(self) -> list[Check]: + nodes, _ = _union_checks("Synthetic", _SyntheticUnionFixtures.SyntheticUnion) + return nodes + + def test_variant_field_gets_variant_values(self, field_nodes: list[Check]) -> None: + a_nodes = [n for n in field_nodes if n.target == _path("a_field")] + assert len(a_nodes) > 0 + for node in a_nodes: + assert node.guards == (ColumnGuard(discriminator="kind", values=("a",)),) + + def test_shared_field_has_no_variant_values(self, field_nodes: list[Check]) -> None: + kind_nodes = [n for n in field_nodes if n.target == _path("kind")] + for node in kind_nodes: + assert node.guards == () + + def test_b_field_gets_b_variant_value(self, field_nodes: list[Check]) -> None: + b_nodes = [n for n in field_nodes if n.target == _path("b_field")] + assert len(b_nodes) > 0 + for node in b_nodes: + assert node.guards == (ColumnGuard(discriminator="kind", values=("b",)),) + + def test_variant_nodes_carry_discriminator_field( + self, field_nodes: list[Check] + ) -> None: + variant_nodes = [n for n in field_nodes if n.guards] + for node in variant_nodes: + for guard in node.guards: + assert guard.discriminator == "kind" + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes("Synthetic", _SyntheticUnionFixtures.SyntheticUnion) + + @pytest.mark.parametrize( + ("field_name", "expected_value"), + [("a_field", "b"), ("b_field", "a")], + ) + def test_variant_field_gets_forbid_if( + self, + model_nodes: list[ModelCheck], + field_name: str, + expected_value: str, + ) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", (field_name,)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.field_name == "kind" + assert condition.value == expected_value + + def test_forbid_if_nodes_are_top_level(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if") + assert len(forbid_nodes) == 2 + for node in forbid_nodes: + assert node.target == ScalarPath() + + +class TestUnionMemberModelConstraints: + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "Constrained", _SyntheticUnionFixtures.ConstrainedUnion + ) + + def test_member_model_constraints_collected( + self, model_nodes: list[ModelCheck] + ) -> None: + assert len(_filter_nodes(model_nodes, "check_require_any_of")) == 1 + + def test_member_constraint_tagged_with_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + """Constraint from ConstrainedMember carries that member's discriminator value.""" + require_any_of_nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(require_any_of_nodes) == 1 + assert require_any_of_nodes[0].arm == "c" + + def test_exclusivity_checks_have_no_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + """Synthesized forbid_if/require_if checks apply to every arm.""" + exclusivity_nodes = _filter_nodes( + model_nodes, ("check_forbid_if", "check_require_if") + ) + assert exclusivity_nodes + for node in exclusivity_nodes: + assert node.arm is None + + +class TestUnionMemberStructAndCompoundConstraints: + """Member-level constraints on struct/compound fields dispatch with real shapes. + + `@require_any_of` over struct fields must unwrap to the first required + leaf scalar; `@forbid_if` over a compound field must populate + `field_shapes`. Both depend on the member being run through real + extraction rather than stubbed proxies -- a latent gap, since no real + schema member currently carries a model-level constraint decorator. + """ + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "MemberConstraint", _SyntheticUnionFixtures.MemberConstraintUnion + ) + + def test_require_any_of_unwraps_struct_leaf( + self, model_nodes: list[ModelCheck] + ) -> None: + nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(nodes) == 1 + assert set(nodes[0].descriptor.field_names) == {"fast.value", "slow.value"} + + def test_forbid_if_populates_compound_field_shapes( + self, model_nodes: list[ModelCheck] + ) -> None: + # Exclusivity logic also emits a forbid_if for `restrictions`, but + # gated on the discriminator; the member-level constraint is the + # one whose condition references `gated`. + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", ("restrictions",)) + member_level = [ + n + for n in forbid_nodes + if isinstance((cond := _condition_of(n)), FieldEqCondition) + and cond.field_name == "gated" + ] + assert len(member_level) == 1 + descriptor = member_level[0].descriptor + assert isinstance(descriptor, ForbidIf) + assert "restrictions" in dict(descriptor.field_shapes) + + +@require_any_of("max_speed", "min_speed") +class _SpeedLimitElement(BaseModel): + """Element model with its own @require_any_of constraint.""" + + max_speed: int | None = None + min_speed: int | None = None + + +class _VariantWithConstrainedList(_SyntheticUnionFixtures.Base): + """Union member with a variant-specific list of constrained sub-models.""" + + kind: Literal["v"] = "v" + speed_limits: list[_SpeedLimitElement] | None = None + + +_VariantFieldConstraintUnion = Annotated[ + Union[_VariantWithConstrainedList, _SyntheticUnionFixtures.PlainMember], # noqa: UP007 + FieldInfo(discriminator="kind"), +] + + +class TestVariantSpecificFieldDiscoveredModelConstraints: + """Model constraints discovered through a variant-specific field carry the contributing arm. + + A `@require_any_of` declared on an element model of a list field that + appears only in one union arm must be tagged with that arm, not + propagated to every arm. + """ + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "VariantFieldConstraint", _VariantFieldConstraintUnion + ) + + def test_field_discovered_constraint_tagged_with_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("speed_limits[]") + ] + assert len(nodes) == 1 + assert nodes[0].arm == "v" + + +class _VariantWithConstrainedModelRef(_SyntheticUnionFixtures.Base): + """Variant-specific direct (non-list) model ref with model-level constraint.""" + + kind: Literal["d"] = "d" + speed: _SpeedLimitElement | None = None + + +_DirectModelRefConstraintUnion = Annotated[ + Union[_VariantWithConstrainedModelRef, _SyntheticUnionFixtures.PlainMember], # noqa: UP007 + FieldInfo(discriminator="kind"), +] + + +class TestVariantSpecificDirectModelRefConstraint: + """Variant-specific non-list `ModelRef` with a constrained sub-model is unsupported. + + The direct-ref path routes through `_recurse_into_model` rather + than the array branch of `_walk_field_shape`, and pure struct + nesting can't anchor a real model constraint -- the dispatch + raises `NotImplementedError`. Distinct from the `list[Model]` + case in `TestVariantSpecificFieldDiscoveredModelConstraints`, + which is supported. + """ + + def test_direct_modelref_constraint_raises(self) -> None: + # Pure struct nesting can't anchor a real model constraint; today + # the only constraint kind that survives struct nesting raises. + with pytest.raises( + NotImplementedError, match="Model constraint on struct-nested" + ): + _union_model_nodes( + "DirectModelRefConstraint", _DirectModelRefConstraintUnion + ) + + +class _OuterWithStructNestedUnion(BaseModel): + """Non-list `UnionRef` field reaches a union with a constrained member.""" + + nested: _SyntheticUnionFixtures.ConstrainedUnion + + +class TestStructNestedUnionWithConstraint: + """Non-list `UnionRef` reaching a union with model checks is unsupported. + + `_recurse_into_union` mirrors `_recurse_into_model`'s guard: when + the prefix is struct-nested (no `ArrayPath` segment) and the union + would emit either union-level constraints or synthesized + exclusivity checks (`check_forbid_if`/`check_require_if`), the + dispatch raises because `_model_constraint_target` would collapse + the anchor to the row root with field names that don't exist + there. This fixture exercises the union-level branch; the + exclusivity branch isn't covered by a synthetic fixture today + because the dual-trigger raise body is one statement. + """ + + def test_struct_nested_union_constraint_raises(self) -> None: + with pytest.raises( + NotImplementedError, match="Model constraint on struct-nested" + ): + build_checks(feature_spec_for_model(_OuterWithStructNestedUnion)) + + +class _NestedInnerBase(BaseModel): + inner_kind: str + + +class _NestedInnerArmA(_NestedInnerBase): + inner_kind: Literal["i_a"] = "i_a" + a_only: str | None = None + + +@require_any_of("first", "second") +class _NestedInnerArmB(_NestedInnerBase): + """Inner-union arm with its own model-level constraint.""" + + inner_kind: Literal["i_b"] = "i_b" + first: str | None = None + second: str | None = None + + +_NestedInnerUnion = Annotated[ + Union[_NestedInnerArmA, _NestedInnerArmB], # noqa: UP007 + FieldInfo(discriminator="inner_kind"), +] + + +class _OuterArmWithInnerUnion(_SyntheticUnionFixtures.Base): + """Outer-union arm that wraps a nested union via a list field.""" + + kind: Literal["n"] = "n" + inners: list[_NestedInnerUnion] | None = None + + +_NestedUnionViaVariantField = Annotated[ + Union[_OuterArmWithInnerUnion, _SyntheticUnionFixtures.PlainMember], # noqa: UP007 + FieldInfo(discriminator="kind"), +] + + +class TestNestedUnionThroughVariantField: + """Inner-union member constraints inherit the outer-union arm. + + Reached through a variant-specific field carrying a nested union, + the inner member's `@require_any_of` must be tagged with the outer + arm ('n'), not the inner discriminator value ('i_b'). The outermost + union's discriminator is the only one per-arm test filtering keys + on. + """ + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "NestedUnionViaVariantField", _NestedUnionViaVariantField + ) + + def test_inner_member_constraint_tagged_with_outer_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + require_any_of_nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(require_any_of_nodes) == 1 + assert require_any_of_nodes[0].arm == "n" + + +class _MultiArmContributorA(_SyntheticUnionFixtures.Base): + kind: Literal["a"] = "a" + shared_limits: list[_SpeedLimitElement] | None = None + + +class _MultiArmContributorB(_SyntheticUnionFixtures.Base): + kind: Literal["b"] = "b" + shared_limits: list[_SpeedLimitElement] | None = None + + +class _MultiArmThirdMember(_SyntheticUnionFixtures.Base): + """Third arm that does NOT contribute the shared field.""" + + kind: Literal["c"] = "c" + + +_MultiArmVariantSourcesUnion = Annotated[ + Union[ # noqa: UP007 + _MultiArmContributorA, _MultiArmContributorB, _MultiArmThirdMember + ], + FieldInfo(discriminator="kind"), +] + + +class TestMultiArmVariantSourcesPolicy: + """Tombstone: a 2-of-N variant-specific field collapses to `arm=None`. + + No real schema today declares a variant-specific field on a proper + subset of arms (2-of-N). When/if that pattern surfaces with a + sub-model carrying its own model constraint, the current policy + routes the constraint to every arm rather than the intersection -- + including arms the field doesn't belong to. This pins the + behaviour explicitly so the gap surfaces if anyone treats it as + correct or relies on it. See `_singleton_arm` in `check_builder.py`. + """ + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "MultiArmVariantSources", _MultiArmVariantSourcesUnion + ) + + def test_multi_arm_field_discovered_constraint_has_no_arm( + self, model_nodes: list[ModelCheck] + ) -> None: + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("shared_limits[]") + ] + assert len(nodes) == 1 + # The 2-of-N case can't pick a single arm, so the constraint + # carries arm=None -- broadcasting to every arm, including the + # third member that doesn't declare shared_limits at all. + # Tracked for resolution if/when a real schema surfaces this. + assert nodes[0].arm is None + + +class TestGroupedExclusivityChecks: + """A required field with the same name in 2 of 3 variants (different types) groups correctly.""" + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes("ThreeWay", _SyntheticUnionFixtures.ThreeWayUnion) + + def test_grouped_field_forbid_if_for_excluded_variant( + self, model_nodes: list[ModelCheck] + ) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", ("shared_name",)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.field_name == "kind" + assert condition.value == "z" + + def test_grouped_field_require_if_per_variant( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if", ("shared_name",)) + assert len(require_nodes) == 2 + conditions = set() + for node in require_nodes: + cond = _condition_of(node) + assert isinstance(cond, FieldEqCondition) + conditions.add(cond.value) + assert conditions == {"x", "y"} + + +class TestMixedRequirednessExclusivity: + """Same-named field required in one variant, optional in another.""" + + @pytest.fixture + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes( + "Mixed", _SyntheticUnionFixtures.MixedRequirednessUnion + ) + + def test_require_if_only_for_required_variant( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if", ("mixed_field",)) + assert len(require_nodes) == 1 + condition = _condition_of(require_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.value == "r" + + def test_forbid_if_for_absent_variant(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", ("mixed_field",)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.value == "a" + + +class TestExclusivityEdgeCases: + def test_no_discriminator_produces_zero_exclusivity_nodes(self) -> None: + """Union without discriminator_mapping produces no exclusivity checks.""" + spec = replace( + extract_union("Synthetic", _SyntheticUnionFixtures.SyntheticUnion), + discriminator_mapping=None, + discriminator_field=None, + ) + _, model_nodes = build_checks(spec) + forbid = _filter_nodes(model_nodes, "check_forbid_if") + require = _filter_nodes(model_nodes, "check_require_if") + assert len(forbid) + len(require) == 0 + + def test_field_in_all_variants_no_exclusivity(self) -> None: + """Field present in every variant via variant_sources produces no exclusivity checks.""" + _, model_nodes = _union_checks( + "AllVariants", _SyntheticUnionFixtures.AllVariantsUnion + ) + forbid = _filter_nodes(model_nodes, "check_forbid_if") + require = _filter_nodes(model_nodes, "check_require_if") + assert len(forbid) + len(require) == 0 + + +@require_any_of("x", "y") +class _ArrayElementWithConstraint(BaseModel): + x: str | None = None + y: str | None = None + + +class _ArrayOfConstrainedModel(BaseModel): + items: list[_ArrayElementWithConstraint] + + +@require_any_of("a", "b") +class _NestedConstrainedStruct(BaseModel): + a: str | None = None + b: str | None = None + + +class _ArrayElementWithConstrainedNested(BaseModel): + nested: _NestedConstrainedStruct + + +class _ArrayOfNestedConstrained(BaseModel): + items: list[_ArrayElementWithConstrainedNested] + + +@require_any_of("a", "b") +class _InnerConstrainedElement(BaseModel): + a: str | None = None + b: str | None = None + + +class _OuterElementWithConstrainedList(BaseModel): + things: list[_InnerConstrainedElement] + + +class _DoubleNestedConstrained(BaseModel): + items: list[_OuterElementWithConstrainedList] + + +def _require_any_node_for(model_cls: type[BaseModel]) -> ModelCheck: + _, model_nodes = _checks_for(model_cls) + nodes = _filter_nodes(model_nodes, "check_require_any_of") + assert len(nodes) == 1 + return nodes[0] + + +@pytest.mark.parametrize( + ("model_cls", "expected_target"), + [ + pytest.param(_ArrayOfConstrainedModel, _path("items[]"), id="direct_element"), + pytest.param( + _ArrayOfNestedConstrained, _path("items[].nested"), id="nested_struct" + ), + ], +) +class TestArrayContextModelConstraints: + """Model constraints on array-element (or nested struct) models produce array-context ModelChecks.""" + + def test_produces_model_check_node( + self, model_cls: type[BaseModel], expected_target: FieldPath + ) -> None: + node = _require_any_node_for(model_cls) + assert model_constraint_function(node.descriptor) == "check_require_any_of" + + def test_target( + self, model_cls: type[BaseModel], expected_target: FieldPath + ) -> None: + node = _require_any_node_for(model_cls) + assert node.target == expected_target + + +class TestDoubleNestedArrayModelConstraints: + """Model constraints on list[] elements nested inside another array use nested geometry.""" + + def test_target_is_nested_inner_array(self) -> None: + # `things` is itself an ArraySegment, so the constraint's target + # iterates items[] then things[] with no struct nav between. + node = _require_any_node_for(_DoubleNestedConstrained) + assert node.target == _path("items[].things[]") + + +class TestSegmentUnionChecks: + @pytest.fixture(scope="class") + def segment_spec(self) -> FeatureSpec: + return discover_feature("Segment") + + @pytest.fixture(scope="class") + def segment_checks( + self, segment_spec: FeatureSpec + ) -> tuple[list[Check], list[ModelCheck]]: + return build_checks(segment_spec) + + @pytest.fixture(scope="class") + def field_nodes( + self, segment_checks: tuple[list[Check], list[ModelCheck]] + ) -> list[Check]: + return segment_checks[0] + + @pytest.fixture(scope="class") + def model_nodes( + self, segment_checks: tuple[list[Check], list[ModelCheck]] + ) -> list[ModelCheck]: + return segment_checks[1] + + def test_produces_variant_gated_checks(self, field_nodes: list[Check]) -> None: + variant_nodes = [n for n in field_nodes if n.guards] + assert len(variant_nodes) > 0 + + def test_shared_fields_have_no_variant_values( + self, field_nodes: list[Check] + ) -> None: + subtype_nodes = [n for n in field_nodes if n.target == _path("subtype")] + for node in subtype_nodes: + assert node.guards == () + + def test_speed_limits_require_any_of_in_model_nodes( + self, model_nodes: list[ModelCheck] + ) -> None: + speed_limit_nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("speed_limits[]") + ] + assert len(speed_limit_nodes) >= 1 + + def test_destinations_require_any_of_in_model_nodes( + self, model_nodes: list[ModelCheck] + ) -> None: + dest_nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("destinations[]") + ] + assert len(dest_nodes) >= 1 + + def test_speed_limits_when_require_any_of_in_model_nodes( + self, model_nodes: list[ModelCheck] + ) -> None: + when_nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("speed_limits[].when") + ] + assert len(when_nodes) >= 1 + + @pytest.mark.parametrize( + ("field_name", "expected_subtype"), + [("road_surface", "road"), ("rail_flags", "rail")], + ) + def test_single_variant_field_forbid_if( + self, + model_nodes: list[ModelCheck], + field_name: str, + expected_subtype: str, + ) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", (field_name,)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, Not) + assert isinstance(condition.inner, FieldEqCondition) + assert condition.inner.field_name == "subtype" + assert condition.inner.value == expected_subtype + + def test_class_forbid_if_for_water(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if", ("class",)) + assert len(forbid_nodes) == 1 + condition = _condition_of(forbid_nodes[0]) + assert isinstance(condition, FieldEqCondition) + assert condition.field_name == "subtype" + assert condition.value == "water" + + def test_class_require_if_for_road_and_rail( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if", ("class",)) + assert len(require_nodes) == 2 + conditions = [_condition_of(n) for n in require_nodes] + assert all(isinstance(c, FieldEqCondition) for c in conditions) + values = {c.value for c in conditions if isinstance(c, FieldEqCondition)} + assert values == {"road", "rail"} + + def test_nested_union_discriminator_preserved( + self, field_nodes: list[Check] + ) -> None: + """Inner VehicleSelector discriminator survives outer Segment annotation. + + Vehicle unit checks inside variant-specific fields (speed_limits, + prohibited_transitions) need both the outer subtype guard and the + inner dimension discriminator. The outer annotation must not + clobber the inner one. + """ + unit_nodes = [ + n for n in field_nodes if "vehicle[].unit" in str(n.target) and n.guards + ] + assert len(unit_nodes) > 0, "Expected variant-gated vehicle unit nodes" + + for node in unit_nodes: + inner = _element_guard(node) + assert inner is not None, ( + f"{node.target}: inner discriminator should be element-level, " + f"got guards {node.guards}" + ) + assert inner.discriminator == "dimension", ( + f"{node.target}: inner discriminator should be 'dimension', " + f"got {inner.discriminator!r}" + ) + + # Variant-specific fields (speed_limits, prohibited_transitions) + # also need the outer subtype guard. + speed_unit_nodes = [n for n in unit_nodes if "speed_limits" in str(n.target)] + for node in speed_unit_nodes: + outer = _column_guard(node) + assert outer is not None, f"{node.target}: missing outer subtype guard" + assert outer.discriminator == "subtype", ( + f"{node.target}: outer discriminator should be 'subtype', " + f"got {outer.discriminator!r}" + ) + + def test_segment_vehicle_selector_field_checks( + self, field_nodes: list[Check] + ) -> None: + """VehicleSelector fields appear with correct nesting.""" + vehicle_nodes = [n for n in field_nodes if "vehicle[]" in str(n.target)] + assert len(vehicle_nodes) > 0 + + dim_nodes = [n for n in vehicle_nodes if "dimension" in str(n.target)] + assert any("speed_limits" in str(n.target) for n in dim_nodes) + assert any("access_restrictions" in str(n.target) for n in dim_nodes) + + for node in dim_nodes: + assert isinstance(node.target, ArrayPath) + # vehicle[] is nested inside an outer array (speed_limits, etc.), + # so the struct nav to `dimension` lands in the target's leaf. + assert len(node.target.leaf) >= 1 + + def test_segment_vehicle_selector_exclusivity( + self, model_nodes: list[ModelCheck] + ) -> None: + """VehicleSelector produces forbid_if/require_if for unit field.""" + vehicle_forbid = [ + n + for n in _filter_nodes(model_nodes, "check_forbid_if") + if "unit" in n.descriptor.field_names and isinstance(n.target, ArrayPath) + ] + assert len(vehicle_forbid) > 0 + + vehicle_require = [ + n + for n in _filter_nodes(model_nodes, "check_require_if") + if "unit" in n.descriptor.field_names and isinstance(n.target, ArrayPath) + ] + assert len(vehicle_require) > 0 + + def test_segment_vehicle_selector_exclusivity_has_inner_levels( + self, model_nodes: list[ModelCheck] + ) -> None: + """VehicleSelector exclusivity checks use nested geometry to reach vehicle[].""" + vehicle_constraint_nodes = [ + n + for n in _filter_nodes(model_nodes, ("check_forbid_if", "check_require_if")) + if "unit" in n.descriptor.field_names and isinstance(n.target, ArrayPath) + ] + for node in vehicle_constraint_nodes: + assert isinstance(node.target, ArrayPath) + # The target reaches the inner vehicle[] via a second iteration: + # one inner level navigating `when` to the `vehicle` array. + iter_paths = node.target.iter_struct_paths + assert len(iter_paths) == 1 + assert "when" in iter_paths[0] + assert "vehicle" in iter_paths[0] + + +class _InnerBase(BaseModel): + kind: str + + +class _InnerA(_InnerBase): + kind: Literal["a"] = "a" + a_field: str + + +class _InnerB(_InnerBase): + kind: Literal["b"] = "b" + b_field: int = Field(ge=0) + + +_InnerUnion = Annotated[ + _InnerA | _InnerB, + Field(discriminator="kind"), +] + + +class _Wrapper(BaseModel): + items: list[_InnerUnion] + + +class TestUnionInsideArray: + """UNION-kind fields nested inside list[] produce variant-gated checks.""" + + @pytest.fixture(scope="class") + def results(self) -> tuple[list[Check], list[ModelCheck]]: + return build_checks(feature_spec_for_model(_Wrapper)) + + @pytest.fixture(scope="class") + def field_nodes(self, results: tuple[list[Check], list[ModelCheck]]) -> list[Check]: + return results[0] + + @pytest.fixture(scope="class") + def model_nodes( + self, results: tuple[list[Check], list[ModelCheck]] + ) -> list[ModelCheck]: + return results[1] + + @pytest.fixture(scope="class") + def a_nodes(self, field_nodes: list[Check]) -> list[Check]: + return [n for n in field_nodes if n.target == _path("items[].a_field")] + + @pytest.fixture(scope="class") + def b_nodes(self, field_nodes: list[Check]) -> list[Check]: + return [n for n in field_nodes if n.target == _path("items[].b_field")] + + def test_a_field_check_produced(self, a_nodes: list[Check]) -> None: + assert len(a_nodes) >= 1 + + def test_a_field_is_array_shape(self, a_nodes: list[Check]) -> None: + assert isinstance(a_nodes[0].target, ArrayPath) + + def test_a_field_target_is_items(self, a_nodes: list[Check]) -> None: + assert a_nodes[0].target == _path("items[].a_field") + + def test_a_field_guard(self, a_nodes: list[Check]) -> None: + assert a_nodes[0].guards == (ElementGuard(discriminator="kind", values=("a",)),) + + def test_a_nodes_have_array_shape(self, a_nodes: list[Check]) -> None: + assert all(isinstance(n.target, ArrayPath) for n in a_nodes) + + def test_b_field_check_produced(self, b_nodes: list[Check]) -> None: + assert len(b_nodes) >= 1 + + def test_b_field_guard(self, b_nodes: list[Check]) -> None: + assert b_nodes[0].guards == (ElementGuard(discriminator="kind", values=("b",)),) + + def test_b_nodes_have_array_shape(self, b_nodes: list[Check]) -> None: + assert all(isinstance(n.target, ArrayPath) for n in b_nodes) + + def test_forbid_nodes_produced(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if") + assert len(forbid_nodes) > 0 + + def test_forbid_nodes_have_array_column_path( + self, model_nodes: list[ModelCheck] + ) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if") + for node in forbid_nodes: + assert node.target == _path("items[]") + + def test_require_if_model_nodes_have_array_column_path( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if") + for node in require_nodes: + assert node.target == _path("items[]") + + +class TestTopLevelUnionColumnPath: + """Top-level union (not inside array) exclusivity nodes have column_path=None.""" + + @pytest.fixture(scope="class") + def model_nodes(self) -> list[ModelCheck]: + return _union_model_nodes("Synthetic", _SyntheticUnionFixtures.SyntheticUnion) + + def test_forbid_if_column_path_is_none(self, model_nodes: list[ModelCheck]) -> None: + forbid_nodes = _filter_nodes(model_nodes, "check_forbid_if") + assert len(forbid_nodes) > 0 + for node in forbid_nodes: + assert node.target == ScalarPath() + + def test_require_if_column_path_is_none( + self, model_nodes: list[ModelCheck] + ) -> None: + require_nodes = _filter_nodes(model_nodes, "check_require_if") + for node in require_nodes: + assert node.target == ScalarPath() + + +class _ListUnionContainer(BaseModel): + """Top-level list of a discriminated union. + + The variant fields live inside each list element, so variant gating + must reference the element-level discriminator (`el["kind"]`), not a + top-level column (`F.col("kind")`). + """ + + items: list[_SyntheticUnionFixtures.SyntheticUnion] + + +class TestTopLevelListUnion: + """Field-level checks for `list[DiscriminatedUnion]` at the feature root. + + Regression test: the discriminator must be flagged as element-level so + the renderer accesses `el["kind"]` rather than `F.col("kind")`. + """ + + @pytest.fixture() + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_ListUnionContainer) + return nodes + + def test_variant_field_uses_element_level_discriminator( + self, nodes: list[Check] + ) -> None: + for variant_field in ("a_field", "b_field"): + variant_nodes = [n for n in nodes if variant_field in str(n.target)] + assert variant_nodes, f"Expected variant-gated {variant_field} nodes" + for node in variant_nodes: + guard = _element_guard(node) + assert guard is not None, ( + f"{node.target}: list[Union] descendants must use the " + "element-level discriminator" + ) + assert guard.discriminator == "kind", ( + f"{node.target}: discriminator should be 'kind'" + ) + + +class _NestedListUnionContainer(BaseModel): + """Top-level `list[list[DiscriminatedUnion]]` with a constrained member. + + A union nested under multiple list layers would need the union + target to record `list_depth` iterations, but the rebase in + `_recurse_into_union` records only one. No real schema exercises + this path; `build_checks` raises rather than emit a target that + silently drops iterations. + """ + + nested: list[list[_SyntheticUnionFixtures.ConstrainedUnion]] + + +class TestNestedListUnionModelConstraints: + """`list[list[Union]]` raises rather than emit a collapsed target.""" + + def test_build_checks_raises_not_implemented(self) -> None: + with pytest.raises(NotImplementedError, match="multiple list layers"): + _checks_for(_NestedListUnionContainer) + + +class _DeepInnerModel(BaseModel): + value: Annotated[str, Field(min_length=1)] + + +class _DoubleNestedArrayModel(BaseModel): + items: list[list[_DeepInnerModel]] + + +class TestDoubleNestedArrayFieldChecks: + """Sub-field validation for list[list[Model]] (list_depth=2).""" + + @pytest.fixture() + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(_DoubleNestedArrayModel) + return nodes + + def test_subfield_target_encodes_both_array_levels( + self, nodes: list[Check] + ) -> None: + # A `list[list[Model]]` sub-field reaches `value` through a single + # ArraySegment with iter_count=2; the target pins the full geometry. + assert any(n.target == _path("items[][].value") for n in nodes) + + +class TestTripleNestedArrayFieldChecks: + """Verify depth=3 nesting generates correct geometry.""" + + @pytest.fixture() + def nodes(self) -> list[Check]: + nodes, _ = _checks_for(TripleNestedArrayModel) + return nodes + + def test_subfield_target_shows_three_brackets(self, nodes: list[Check]) -> None: + assert any(n.target == _path("deep[][][].tag") for n in nodes) + + +class _NestedScalarListModel(BaseModel): + """list[list[scalar]] terminating directly in a constrained scalar. + + Exercises the one nested-array geometry the other tests miss: an + element-level check whose target's terminal ArraySegment carries + iter_count > 1 with no struct leaf after it (`grid[][]`, not + `grid[][].field`). + """ + + grid: list[list[Annotated[str, MinLen(1)]]] + + +class TestNestedScalarListTarget: + """Element-level check on list[list[scalar]] targets a bare `field[][]`.""" + + def test_terminal_target_carries_iter_count_two(self) -> None: + nodes, _ = _checks_for(_NestedScalarListModel) + node = _node_for(nodes, "grid[][]", "check_string_min_length") + target = node.target + assert isinstance(target, ArrayPath) + last = target.segments[-1] + assert isinstance(last, ArraySegment) + assert last.name == "grid" + assert last.iter_count == 2 + + +class TestPrimitiveBoundsFiltered: + """Constraints inherent to primitive numeric types are filtered out.""" + + @pytest.fixture + def nodes(self) -> list[Check]: + """Field with int32-inherent and layered bounds.""" + shape = Primitive( + base_type="int32", + constraints=( + # Layered by schema author + ConstraintSource( + source_ref=None, source_name="FeatureVersion", constraint=Ge(ge=0) + ), + # Inherent to int32 + ConstraintSource( + source_ref=None, source_name="int32", constraint=Ge(ge=-(2**31)) + ), + ConstraintSource( + source_ref=None, source_name="int32", constraint=Le(le=2**31 - 1) + ), + ), + ) + field = FieldSpec( + name="version", shape=shape, description=None, is_required=True + ) + spec = ModelSpec(name="Test", description=None, fields=[field]) + nodes, _ = build_checks(spec) + return nodes + + def test_layered_bound_survives(self, nodes: list[Check]) -> None: + descs = nodes[0].descriptors + bounds = [d for d in descs if d.function == "check_bounds"] + assert len(bounds) == 1 + assert dict(bounds[0].kwargs) == {"ge": 0} + + def test_primitive_bounds_excluded(self, nodes: list[Check]) -> None: + descs = nodes[0].descriptors + bounds = [d for d in descs if d.function == "check_bounds"] + for b in bounds: + d = dict(b.kwargs) + assert d.get("ge") != -(2**31) + assert d.get("le") != 2**31 - 1 diff --git a/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py new file mode 100644 index 000000000..2cfd6a676 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_constraint_dispatch.py @@ -0,0 +1,385 @@ +"""Tests for pyspark constraint dispatch.""" + +import pytest +from annotated_types import Ge, Gt, Interval, Le, Lt +from overture.schema.codegen.extraction.field import Primitive +from overture.schema.codegen.extraction.length_constraints import ( + ArrayMaxLen, + ArrayMinLen, + ScalarMaxLen, + ScalarMinLen, +) +from overture.schema.codegen.extraction.specs import FieldSpec +from overture.schema.codegen.pyspark.constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + RadioGroup, + RequireAnyOf, + RequireIf, + dispatch_base_type, + dispatch_constraint, + dispatch_model_constraint, + dispatch_newtype, + model_constraint_function, +) +from overture.schema.system.field_constraint.collection import UniqueItemsConstraint +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + JsonPointerConstraint, + PatternConstraint, + SnakeCaseConstraint, + StrippedConstraint, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + ForbidIfConstraint, + MinFieldsSetConstraint, + NoExtraFieldsConstraint, + Not, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, +) +from overture.schema.system.primitive import GeometryType, GeometryTypeConstraint +from overture.schema.system.ref import Identified, Reference, Relationship +from pydantic import Strict + + +class _Stub(Identified): + pass + + +class TestBoundsDispatch: + @pytest.mark.parametrize( + ("constraint", "expected_kwargs"), + [ + (Ge(ge=0), (("ge", 0),)), + (Gt(gt=0), (("gt", 0),)), + (Le(le=100), (("le", 100),)), + (Lt(lt=100), (("lt", 100),)), + (Interval(ge=0, le=1), (("ge", 0), ("le", 1))), + (Interval(ge=0), (("ge", 0),)), + ], + ) + def test_bound_dispatches_to_check_bounds( + self, constraint: object, expected_kwargs: tuple[tuple[str, object], ...] + ) -> None: + desc = dispatch_constraint(constraint) + assert desc is not None + assert desc.function == "check_bounds" + assert desc.kwargs == expected_kwargs + + def test_int_bounds_coerced_to_float_for_float_type(self) -> None: + """Integer bound values become float when the field is a float type.""" + desc = dispatch_constraint(Ge(ge=0), base_type="float64") + assert desc is not None + assert desc.kwargs == (("ge", 0.0),) + assert isinstance(dict(desc.kwargs)["ge"], float) + + def test_int_bounds_preserved_for_int_type(self) -> None: + desc = dispatch_constraint(Ge(ge=0), base_type="int32") + assert desc is not None + assert desc.kwargs == (("ge", 0),) + assert isinstance(dict(desc.kwargs)["ge"], int) + + def test_float_bounds_unchanged_for_float_type(self) -> None: + desc = dispatch_constraint(Ge(ge=0.0), base_type="float64") + assert desc is not None + assert desc.kwargs == (("ge", 0.0),) + assert isinstance(dict(desc.kwargs)["ge"], float) + + +class TestLengthDispatch: + def test_min_len_on_array(self) -> None: + desc = dispatch_constraint(ArrayMinLen(min_length=2)) + assert desc == ExpressionDescriptor( + function="check_array_min_length", args=(2,) + ) + + def test_min_len_on_scalar(self) -> None: + desc = dispatch_constraint(ScalarMinLen(min_length=1)) + assert desc == ExpressionDescriptor( + function="check_string_min_length", args=(1,) + ) + + def test_max_len_on_array(self) -> None: + desc = dispatch_constraint(ArrayMaxLen(max_length=10)) + assert desc == ExpressionDescriptor( + function="check_array_max_length", args=(10,) + ) + + def test_max_len_on_scalar(self) -> None: + desc = dispatch_constraint(ScalarMaxLen(max_length=10)) + assert desc == ExpressionDescriptor( + function="check_string_max_length", args=(10,) + ) + + +class TestStringConstraintDispatch: + def test_stripped(self) -> None: + desc = dispatch_constraint(StrippedConstraint()) + assert desc is not None + assert desc.function == "check_stripped" + + def test_json_pointer(self) -> None: + desc = dispatch_constraint(JsonPointerConstraint()) + assert desc is not None + assert desc.function == "check_json_pointer" + + def test_pattern_constraint_base(self) -> None: + c = PatternConstraint(r"^[A-Z]{2}$", "test error") + desc = dispatch_constraint(c) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"^[A-Z]{2}\z",) # anchor normalized + + def test_country_code_dispatches_as_pattern(self) -> None: + c = CountryCodeAlpha2Constraint() + desc = dispatch_constraint(c) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"^[A-Z]{2}\z",) # anchor normalized + assert desc.label == "ISO 3166-1 alpha-2 country code" + assert desc.check_name == "country_code_alpha2" + + def test_snake_case_dispatches_as_pattern(self) -> None: + c = SnakeCaseConstraint() + desc = dispatch_constraint(c) + assert desc is not None + assert desc.function == "check_pattern" + assert desc.args == (r"^[a-z0-9]+(_[a-z0-9]+)*\z",) # anchor normalized + assert desc.label == "Category in snake_case format" + assert desc.check_name == "snake_case" + + +class TestPatternConstraintDispatch: + def test_pattern_constraint_label_fallback_to_docstring(self) -> None: + """PatternConstraint with no description falls back to docstring, period stripped.""" + c = PatternConstraint(r"^test$", "error: {value}") + desc = dispatch_constraint(c) + assert desc is not None + # Base PatternConstraint has docstring "Generic pattern-based string constraint." + assert desc.label == "Generic pattern-based string constraint" + + def test_pattern_constraint_check_name_base_class(self) -> None: + c = PatternConstraint(r"^test$", "error: {value}") + desc = dispatch_constraint(c) + assert desc is not None + assert desc.check_name == "pattern" + + def test_anchor_normalized_dollar_to_backslash_z(self) -> None: + c = CountryCodeAlpha2Constraint() # pattern ends with $ + desc = dispatch_constraint(c) + assert desc is not None + pattern = str(desc.args[0]) + assert pattern.endswith(r"\z") + assert not pattern.endswith("$") + + def test_anchor_normalization_replaces_only_trailing_dollar(self) -> None: + """Dollar signs inside character classes are not end-anchors.""" + c = PatternConstraint(r"^[\$]+$", "error: {value}") + desc = dispatch_constraint(c) + assert desc is not None + pattern = str(desc.args[0]) + # The trailing $ is replaced; the \$ inside the class is preserved + assert pattern == r"^[\$]+\z" + + +class TestStructuralConstraintDispatch: + def test_unique_items(self) -> None: + desc = dispatch_constraint(UniqueItemsConstraint()) + assert desc is not None + assert desc.function == "check_struct_unique" + + def test_geometry_type(self) -> None: + c = GeometryTypeConstraint(GeometryType.POINT) + desc = dispatch_constraint(c) + assert desc is not None + assert desc.function == "check_geometry_type" + assert GeometryType.POINT in desc.args + + +class TestSkippedConstraints: + def test_reference_returns_none(self) -> None: + r = Reference(Relationship.AGGREGATION, _Stub) + desc = dispatch_constraint(r) + assert desc is None + + def test_strict_returns_none(self) -> None: + desc = dispatch_constraint(Strict()) + assert desc is None + + +class TestBaseTypeDispatch: + def test_http_url_dispatches_to_check_url_format_and_length(self) -> None: + descs = dispatch_base_type("HttpUrl") + assert descs is not None + assert len(descs) == 2 + assert descs[0].function == "check_url_format" + assert descs[1].function == "check_url_length" + + def test_email_str_dispatches_to_check_email(self) -> None: + descs = dispatch_base_type("EmailStr") + assert descs is not None + assert len(descs) == 1 + assert descs[0].function == "check_email" + + def test_bbox_dispatches_to_three_checks(self) -> None: + descs = dispatch_base_type("BBox") + assert descs is not None + assert len(descs) == 3 + assert descs[0].function == "check_bbox_completeness" + assert descs[1].function == "check_bbox_lat_ordering" + assert descs[2].function == "check_bbox_lat_range" + + def test_unknown_base_type_returns_none(self) -> None: + descs = dispatch_base_type("str") + assert descs is None + + +class TestNewtypeDispatch: + def test_linear_range(self) -> None: + descs = dispatch_newtype("LinearlyReferencedRange") + assert descs is not None + assert len(descs) == 3 + assert descs[0].function == "check_linear_range_length" + assert descs[1].function == "check_linear_range_bounds" + assert descs[2].function == "check_linear_range_order" + + def test_country_code_alpha2_returns_none(self) -> None: + descs = dispatch_newtype("CountryCodeAlpha2") + assert descs is None + + def test_region_code_returns_none(self) -> None: + descs = dispatch_newtype("RegionCode") + assert descs is None + + def test_unknown_newtype_returns_none(self) -> None: + desc = dispatch_newtype("FeatureVersion") + assert desc is None + + +class TestUnknownConstraintFails: + def test_unknown_constraint_raises(self) -> None: + with pytest.raises(TypeError, match="Unhandled constraint"): + dispatch_constraint(object()) + + +class TestModelConstraintDispatch: + def test_require_any_of(self) -> None: + c = RequireAnyOfConstraint("a", "b") + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, RequireAnyOf) + assert model_constraint_function(desc) == "check_require_any_of" + assert desc.field_names == ("a", "b") + + def test_radio_group(self) -> None: + c = RadioGroupConstraint("is_land", "is_territorial") + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, RadioGroup) + assert model_constraint_function(desc) == "check_radio_group" + assert desc.field_names == ("is_land", "is_territorial") + + def test_require_if(self) -> None: + c = RequireIfConstraint( + field_names=("class",), + condition=FieldEqCondition(field_name="subtype", value="road"), + ) + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, RequireIf) + assert model_constraint_function(desc) == "check_require_if" + assert desc.field_names == ("class",) + assert desc.condition is c.condition + + def test_require_if_multi_field_splits(self) -> None: + """Multi-field `@require_if(["a", "b"], cond)` splits into one descriptor per field. + + Each runtime `check_require_if` call takes a single target + column, so the descriptor mirrors that: one per field, sharing + the same condition. + """ + condition = FieldEqCondition(field_name="subtype", value="road") + c = RequireIfConstraint(field_names=("a", "b"), condition=condition) + descs = dispatch_model_constraint(c, []) + assert len(descs) == 2 + assert all(isinstance(d, RequireIf) for d in descs) + assert [d.field_names for d in descs] == [("a",), ("b",)] + assert all(d.condition is condition for d in descs) # type: ignore[union-attr] + + def test_forbid_if(self) -> None: + c = ForbidIfConstraint( + field_names=("class",), + condition=FieldEqCondition(field_name="subtype", value="water"), + ) + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, ForbidIf) + assert model_constraint_function(desc) == "check_forbid_if" + assert desc.field_names == ("class",) + assert desc.field_shapes == () + + def test_forbid_if_negated(self) -> None: + c = ForbidIfConstraint( + field_names=("parent_division_id",), + condition=Not(FieldEqCondition(field_name="subtype", value="country")), + ) + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, ForbidIf) + assert model_constraint_function(desc) == "check_forbid_if" + assert desc.condition is c.condition + + def test_forbid_if_multi_field_splits(self) -> None: + """Multi-field `@forbid_if` splits into one descriptor per field, each with its own shape.""" + condition = FieldEqCondition(field_name="subtype", value="road") + c = ForbidIfConstraint(field_names=("a", "b"), condition=condition) + descs = dispatch_model_constraint(c, []) + assert len(descs) == 2 + assert all(isinstance(d, ForbidIf) for d in descs) + assert [d.field_names for d in descs] == [("a",), ("b",)] + + def test_min_fields_set(self) -> None: + c = MinFieldsSetConstraint(count=1) + (desc,) = dispatch_model_constraint(c, []) + assert isinstance(desc, MinFieldsSet) + assert model_constraint_function(desc) == "check_min_fields_set" + assert desc.count == 1 + assert desc.field_names == () + + def test_min_fields_set_enumerates_all_fields(self) -> None: + """`field_names` holds every field -- required and optional alike. + + Matches Pydantic's `model_fields_set` semantics, where required + fields are always set by the constructor and contribute to the + count alongside any explicitly-set optional fields. + """ + fields = [ + FieldSpec(name="required_a", shape=Primitive(base_type="str")), + FieldSpec( + name="optional_b", + shape=Primitive(base_type="str"), + is_required=False, + ), + FieldSpec(name="required_c", shape=Primitive(base_type="str")), + FieldSpec( + name="optional_d", + shape=Primitive(base_type="str"), + is_required=False, + ), + ] + c = MinFieldsSetConstraint(count=1) + (desc,) = dispatch_model_constraint(c, fields) + assert isinstance(desc, MinFieldsSet) + assert desc.field_names == ( + "required_a", + "optional_b", + "required_c", + "optional_d", + ) + + def test_no_extra_fields_skipped(self) -> None: + c = NoExtraFieldsConstraint() + assert dispatch_model_constraint(c, []) == () + + def test_unknown_model_constraint_raises(self) -> None: + with pytest.raises(TypeError, match="Unhandled model constraint"): + dispatch_model_constraint(object(), []) diff --git a/packages/overture-schema-codegen/tests/test_pyspark_e2e.py b/packages/overture-schema-codegen/tests/test_pyspark_e2e.py new file mode 100644 index 000000000..6b7629ea1 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_e2e.py @@ -0,0 +1,206 @@ +"""End-to-end generation tests: verify generated modules match hand-written references.""" + +import ast +from pathlib import Path +from typing import Annotated, Literal + +import pytest +from annotated_types import Ge +from codegen_test_support import discover_feature +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.pyspark.pipeline import ( + GeneratedModule, + generate_pyspark_module, +) +from pydantic import BaseModel + + +class SimpleModel(BaseModel): + subtype: Literal["a", "b"] + score: Annotated[float, Ge(0.0)] | None = None + + +class TestDivisionAreaGeneration: + @pytest.fixture + def generated(self) -> GeneratedModule: + spec = discover_feature("DivisionArea") + return generate_pyspark_module(spec) + + def test_generates_valid_python(self, generated: GeneratedModule) -> None: + ast.parse(generated.content) + + def test_has_builder_function(self, generated: GeneratedModule) -> None: + assert "def division_area_checks()" in generated.content + + def test_has_schema_constant(self, generated: GeneratedModule) -> None: + assert "DIVISION_AREA_SCHEMA" in generated.content + + def test_output_path(self, generated: GeneratedModule) -> None: + assert generated.path.name == "division_area.py" + + def test_checks_cover_expected_fields(self, generated: GeneratedModule) -> None: + """Generated checks should cover the fields from the hand-written module.""" + content = generated.content + # Hand-written checks: subtype, class, country, region, radio_group (is_land, is_territorial), admin_level + for field in ["subtype", "class", "country", "region"]: + assert f'field="{field}"' in content, f"Missing check for {field}" + + def test_schema_has_expected_fields(self, generated: GeneratedModule) -> None: + """Schema should contain all expected DivisionArea fields.""" + content = generated.content + expected_fields = [ + "id", + "geometry", + "bbox", + "country", + "version", + "subtype", + "class", + "names", + "is_land", + "is_territorial", + "region", + "admin_level", + "division_id", + "theme", + "type", + ] + for field in expected_fields: + assert f'"{field}"' in content, f"Missing schema field: {field}" + + def test_uses_bbox_shared_struct(self, generated: GeneratedModule) -> None: + """Should reference BBOX_STRUCT from _schema_structs (BBox is not a BaseModel).""" + assert "BBOX_STRUCT" in generated.content + + def test_imports_constraint_expressions(self, generated: GeneratedModule) -> None: + """Should import constraint expression functions.""" + content = generated.content + assert ( + "from overture.schema.pyspark.expressions.constraint_expressions import" + in content + ) + + def test_radio_group_constraint(self, generated: GeneratedModule) -> None: + """Should have a radio_group check for is_land/is_territorial.""" + content = generated.content + assert "check_radio_group" in content + assert "is_land" in content + assert "is_territorial" in content + + def test_subtype_has_check_enum(self, generated: GeneratedModule) -> None: + """Subtype (ENUM-kind field) should produce a check_enum with member values.""" + assert "check_enum" in generated.content + + def test_country_uses_check_pattern(self, generated: GeneratedModule) -> None: + """Country field (required newtype) produces both check_required and check_pattern.""" + assert "check_pattern" in generated.content + # Bug #1 regression: check_required must not be skipped for required newtype fields. + # With split checks, each descriptor produces its own function; both must appear. + assert "check_required" in generated.content + + def test_region_uses_check_pattern(self, generated: GeneratedModule) -> None: + """Region field produces check_pattern with the region-code label.""" + assert "ISO 3166-2 subdivision code" in generated.content + + +@pytest.mark.parametrize( + "class_name,builder_name,schema_name", + [ + ("DivisionArea", "division_area_checks", "DIVISION_AREA_SCHEMA"), + ("Division", "division_checks", "DIVISION_SCHEMA"), + ("DivisionBoundary", "division_boundary_checks", "DIVISION_BOUNDARY_SCHEMA"), + ("Place", "place_checks", "PLACE_SCHEMA"), + ], +) +class TestModelFeatureGeneration: + @pytest.fixture + def generated(self, class_name: str) -> GeneratedModule: + spec = discover_feature(class_name) + return generate_pyspark_module(spec) + + def test_generates_valid_python( + self, + generated: GeneratedModule, + class_name: str, + builder_name: str, + schema_name: str, + ) -> None: + ast.parse(generated.content) + + def test_has_builder_function( + self, + generated: GeneratedModule, + class_name: str, + builder_name: str, + schema_name: str, + ) -> None: + assert f"def {builder_name}()" in generated.content + + def test_has_schema_constant( + self, + generated: GeneratedModule, + class_name: str, + builder_name: str, + schema_name: str, + ) -> None: + assert schema_name in generated.content + + def test_has_shared_bbox_struct( + self, + generated: GeneratedModule, + class_name: str, + builder_name: str, + schema_name: str, + ) -> None: + assert "BBOX_STRUCT" in generated.content + + +class TestSegmentGeneration: + @pytest.fixture + def generated(self) -> GeneratedModule: + spec = discover_feature("Segment") + return generate_pyspark_module(spec) + + def test_generates_valid_python(self, generated: GeneratedModule) -> None: + ast.parse(generated.content) + + def test_has_builder_and_schema(self, generated: GeneratedModule) -> None: + assert "def segment_checks()" in generated.content + assert "SEGMENT_SCHEMA" in generated.content + + def test_has_shared_bbox_struct(self, generated: GeneratedModule) -> None: + assert "BBOX_STRUCT" in generated.content + + def test_has_variant_conditional_checks(self, generated: GeneratedModule) -> None: + """Segment has subtype-gated fields using runtime values like 'road'.""" + assert "F.when" in generated.content + assert "isin" in generated.content + # Variant values must use the runtime string value, not the enum repr + assert '"road"' in generated.content or "'road'" in generated.content + assert "Subtype.ROAD" not in generated.content + + def test_array_discriminator_outside_lambda( + self, generated: GeneratedModule + ) -> None: + """Top-level discriminator must wrap array_check, not appear inside the lambda.""" + # el["subtype"] must never appear — subtype is a top-level column, not an element field + assert 'el["subtype"]' not in generated.content, ( + 'el["subtype"] found — top-level discriminator placed inside array lambda' + ) + # F.col("subtype") must appear as the discriminator reference + assert 'F.col("subtype")' in generated.content + + +def test_cli_writes_init_modules(tmp_path: Path) -> None: + from overture.schema.codegen.cli import _generate_pyspark + + spec = extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel") + out = tmp_path / "src" + test_out = tmp_path / "tests" + _generate_pyspark([spec], out, test_out) + assert (out / "overture" / "schema" / "simple" / "__init__.py").exists() + assert (out / "overture" / "schema" / "simple" / "simple_model.py").exists() + assert (test_out / "overture" / "schema" / "simple" / "__init__.py").exists() + assert ( + test_out / "overture" / "schema" / "simple" / "test_simple_model.py" + ).exists() diff --git a/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py b/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py new file mode 100644 index 000000000..d2a7811d1 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_invalid_value.py @@ -0,0 +1,175 @@ +"""Tests for constraint-violating value generation.""" + +import pytest +from overture.schema.codegen.pyspark.constraint_dispatch import ExpressionDescriptor +from overture.schema.codegen.pyspark.test_data.invalid_value import invalid_value +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + NoWhitespaceConstraint, + RegionCodeConstraint, +) +from overture.schema.system.primitive.geom import GeometryType + + +class TestInvalidValueRequired: + def test_returns_none(self) -> None: + desc = ExpressionDescriptor(function="check_required") + assert invalid_value(desc) is None + + +class TestInvalidValueEnum: + def test_returns_invalid_sentinel(self) -> None: + desc = ExpressionDescriptor(function="check_enum", args=(["a", "b"],)) + assert invalid_value(desc) == "__INVALID__" + + +class TestInvalidValueBounds: + def test_ge(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 0),)) + assert invalid_value(desc) == -1 + + def test_ge_float(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 0.0),)) + assert invalid_value(desc) == -1.0 + assert isinstance(invalid_value(desc), float) + + def test_gt(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("gt", 0),)) + assert invalid_value(desc) == 0 + + def test_le(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("le", 100),)) + assert invalid_value(desc) == 101 + + def test_lt(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("lt", 100),)) + assert invalid_value(desc) == 100 + + def test_unknown_bound_raises(self) -> None: + desc = ExpressionDescriptor(function="check_bounds", kwargs=(("unknown", 5),)) + with pytest.raises(ValueError): + invalid_value(desc) + + +class TestInvalidValuePattern: + def test_default_pattern(self) -> None: + desc = ExpressionDescriptor(function="check_pattern", args=(r"^[A-Z]+$",)) + assert invalid_value(desc) == "!!!INVALID!!!" + + def test_no_whitespace_pattern(self) -> None: + desc = ExpressionDescriptor( + function="check_pattern", + args=(r"^\S+$",), + constraint_type=NoWhitespaceConstraint, + ) + assert invalid_value(desc) == "has whitespace" + + +class TestInvalidValueStringTypes: + def test_country_code(self) -> None: + desc = ExpressionDescriptor( + function="check_pattern", + constraint_type=CountryCodeAlpha2Constraint, + ) + assert invalid_value(desc) == "99" + + def test_region_code(self) -> None: + desc = ExpressionDescriptor( + function="check_pattern", + constraint_type=RegionCodeConstraint, + ) + assert invalid_value(desc) == "99-999" + + def test_url_format(self) -> None: + desc = ExpressionDescriptor(function="check_url_format") + assert invalid_value(desc) == "not-a-url" + + def test_url_length(self) -> None: + desc = ExpressionDescriptor(function="check_url_length") + assert invalid_value(desc) == "https://" + "x" * 2076 + + def test_email(self) -> None: + desc = ExpressionDescriptor(function="check_email") + assert invalid_value(desc) == "not-an-email" + + def test_stripped(self) -> None: + desc = ExpressionDescriptor(function="check_stripped") + assert invalid_value(desc) == " has spaces " + + def test_json_pointer(self) -> None: + desc = ExpressionDescriptor(function="check_json_pointer") + assert invalid_value(desc) == "no-slash" + + +class TestInvalidValueCollections: + def test_min_length_empty_list(self) -> None: + desc = ExpressionDescriptor(function="check_array_min_length", args=(1,)) + assert invalid_value(desc) == [] + + def test_max_length_oversized(self) -> None: + desc = ExpressionDescriptor(function="check_array_max_length", args=(3,)) + assert invalid_value(desc) == [{}] * 4 + + def test_string_min_length_empty_string(self) -> None: + desc = ExpressionDescriptor(function="check_string_min_length", args=(1,)) + assert invalid_value(desc) == "" + + def test_string_max_length_oversized_string(self) -> None: + desc = ExpressionDescriptor(function="check_string_max_length", args=(3,)) + assert invalid_value(desc) == "x" * 4 + + +class TestInvalidValueLinearRange: + def test_linear_range_length(self) -> None: + desc = ExpressionDescriptor(function="check_linear_range_length") + assert invalid_value(desc) == [0.5] + + def test_linear_range_bounds(self) -> None: + desc = ExpressionDescriptor(function="check_linear_range_bounds") + assert invalid_value(desc) == [1.5, 2.0] + + def test_linear_range_order(self) -> None: + desc = ExpressionDescriptor(function="check_linear_range_order") + assert invalid_value(desc) == [0.8, 0.2] + + +class TestInvalidValueGeometry: + def test_point_not_allowed_picks_point(self) -> None: + # Allowed: polygon only → first candidate (POINT) not in allowed set + desc = ExpressionDescriptor( + function="check_geometry_type", args=(GeometryType.POLYGON,) + ) + assert invalid_value(desc) == "POINT (0 0)" + + def test_point_allowed_picks_linestring(self) -> None: + desc = ExpressionDescriptor( + function="check_geometry_type", + args=(GeometryType.POINT, GeometryType.POLYGON), + ) + assert invalid_value(desc) == "LINESTRING (0 0, 1 1)" + + def test_point_and_linestring_allowed_picks_collection(self) -> None: + desc = ExpressionDescriptor( + function="check_geometry_type", + args=(GeometryType.POINT, GeometryType.LINE_STRING), + ) + assert invalid_value(desc) == "GEOMETRYCOLLECTION EMPTY" + + def test_all_candidates_allowed_raises(self) -> None: + desc = ExpressionDescriptor( + function="check_geometry_type", + args=( + GeometryType.POINT, + GeometryType.LINE_STRING, + GeometryType.GEOMETRY_COLLECTION, + ), + ) + with pytest.raises(ValueError): + invalid_value(desc) + + +class TestInvalidValueUnknown: + def test_unknown_function_raises(self) -> None: + desc = ExpressionDescriptor(function="check_something_unknown") + with pytest.raises(ValueError): + invalid_value(desc) diff --git a/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py b/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py new file mode 100644 index 000000000..95201a09b --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_pipeline.py @@ -0,0 +1,391 @@ +"""Tests for the PySpark generation pipeline.""" + +import ast +from pathlib import PurePosixPath +from typing import Annotated, Literal + +import pytest +from annotated_types import Ge +from codegen_test_support import find_theme, partitions_from_tags +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.extraction.specs import ( + FeatureSpec, + is_model_class, + is_union_alias, +) +from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.layout.module_layout import entry_point_class +from overture.schema.codegen.pyspark.check_ir import Check +from overture.schema.codegen.pyspark.constraint_dispatch import ExpressionDescriptor +from overture.schema.codegen.pyspark.pipeline import ( + GeneratedModule, + PipelineOutput, + _extract_geometry_types, + generate_pyspark_module, + generate_pyspark_modules, +) +from overture.schema.system.field_path import ScalarPath +from overture.schema.system.primitive import GeometryType +from pydantic import BaseModel + + +class SimpleModel(BaseModel): + subtype: Literal["a", "b"] + score: Annotated[float, Ge(0.0)] | None = None + + +class BoundsModel(BaseModel): + value: Annotated[float, Ge(0.0)] + + +class TestGeneratePysparkModule: + @pytest.fixture + def simple_module(self) -> GeneratedModule: + return generate_pyspark_module( + extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel") + ) + + def test_returns_generated_module(self, simple_module: GeneratedModule) -> None: + assert isinstance(simple_module, GeneratedModule) + + def test_content_is_nonempty(self, simple_module: GeneratedModule) -> None: + assert simple_module.content + + def test_content_is_valid_python(self, simple_module: GeneratedModule) -> None: + ast.parse(simple_module.content) + + def test_path_uses_snake_case_feature_name( + self, simple_module: GeneratedModule + ) -> None: + assert simple_module.path == PurePosixPath( + "overture/schema/simple/simple_model.py" + ) + + def test_path_for_bounds_model(self) -> None: + result = generate_pyspark_module( + extract_model(BoundsModel, entry_point="overture.schema.bounds:BoundsModel") + ) + assert result.path == PurePosixPath("overture/schema/bounds/bounds_model.py") + + def test_content_contains_checks_function( + self, simple_module: GeneratedModule + ) -> None: + assert "simple_model_checks" in simple_module.content + + def test_content_contains_schema_constant( + self, simple_module: GeneratedModule + ) -> None: + assert "SIMPLE_MODEL_SCHEMA" in simple_module.content + + +def _two_specs() -> list[FeatureSpec]: + return [ + extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel"), + extract_model(BoundsModel, entry_point="overture.schema.bounds:BoundsModel"), + ] + + +def _features(modules: list[GeneratedModule]) -> list[GeneratedModule]: + return [m for m in modules if m.path.name != "__init__.py"] + + +class TestGeneratePysparkModules: + @pytest.fixture + def two_spec_modules(self) -> PipelineOutput: + return generate_pyspark_modules(_two_specs()) + + def test_empty_specs_returns_no_modules(self) -> None: + result = generate_pyspark_modules([]) + assert result.source == [] + assert result.test == [] + + def test_one_module_per_spec(self, two_spec_modules: PipelineOutput) -> None: + assert len(_features(two_spec_modules.source)) == 2 + + def test_paths_unique_per_tree(self, two_spec_modules: PipelineOutput) -> None: + # source and test trees mirror the same dirs; uniqueness is + # only required within each tree, not across them. + for tree in (two_spec_modules.source, two_spec_modules.test): + paths = [m.path for m in tree] + assert len(paths) == len(set(paths)) + + def test_all_content_is_valid_python( + self, two_spec_modules: PipelineOutput + ) -> None: + for mod in (*two_spec_modules.source, *two_spec_modules.test): + ast.parse(mod.content) + + def test_divisions_theme_produces_division_area( + self, all_discovered_models: dict + ) -> None: + """divisions theme should produce a division_area.py module.""" + division_specs: list[FeatureSpec] = [] + for key, entry in all_discovered_models.items(): + if find_theme(key.tags) != "divisions": + continue + partitions = partitions_from_tags(key.tags) + if is_model_class(entry): + division_specs.append( + extract_model( + entry, entry_point=key.entry_point, partitions=partitions + ) + ) + elif is_union_alias(entry): + division_specs.append( + extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + partitions=partitions, + ) + ) + + results = generate_pyspark_modules(division_specs) + names = {r.path.stem for r in results.source} + assert "division_area" in names + + +class TestTestModuleGeneration: + @pytest.fixture + def all_modules(self) -> PipelineOutput: + return generate_pyspark_modules(_two_specs()) + + def test_generates_test_modules(self, all_modules: PipelineOutput) -> None: + assert len(_features(all_modules.test)) == 2 # one per feature spec + + def test_test_module_paths(self, all_modules: PipelineOutput) -> None: + paths = {m.path.name for m in _features(all_modules.test)} + assert "test_simple_model.py" in paths + assert "test_bounds_model.py" in paths + + def test_test_modules_are_valid_python(self, all_modules: PipelineOutput) -> None: + for mod in all_modules.test: + ast.parse(mod.content) + + def test_test_module_contains_imports(self, all_modules: PipelineOutput) -> None: + for mod in _features(all_modules.test): + assert "_support.harness import" in mod.content + assert "_support.scenarios import" in mod.content + + +def _extract_scenarios_block(content: str) -> str: + """Extract the SCENARIOS list literal from generated test source.""" + start = content.index("SCENARIOS:") + end = content.index("]", start) + 1 + return content[start:end] + + +class TestPerArmTestGeneration: + """Union features with multiple examples produce per-arm test modules.""" + + @pytest.fixture + def segment_modules(self, all_discovered_models: dict) -> PipelineOutput: + specs: list[FeatureSpec] = [] + for key, entry in all_discovered_models.items(): + if key.name != "segment": + continue + if is_union_alias(entry): + specs.append( + extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + partitions=partitions_from_tags(key.tags), + ) + ) + return generate_pyspark_modules(specs) + + def test_produces_per_arm_test_files(self, segment_modules: PipelineOutput) -> None: + paths = {m.path.name for m in _features(segment_modules.test)} + assert "test_segment_road.py" in paths + assert "test_segment_rail.py" in paths + + def test_no_monolithic_test_file(self, segment_modules: PipelineOutput) -> None: + """When per-arm tests exist, no undifferentiated test_segment.py.""" + paths = {m.path.name for m in _features(segment_modules.test)} + assert "test_segment.py" not in paths + + def test_per_arm_modules_are_valid_python( + self, segment_modules: PipelineOutput + ) -> None: + for mod in segment_modules.test: + ast.parse(mod.content) + + def test_road_module_has_road_checks(self, segment_modules: PipelineOutput) -> None: + road = next( + m for m in segment_modules.test if m.path.name == "test_segment_road.py" + ) + assert "road_surface" in road.content + + def test_rail_module_has_rail_checks(self, segment_modules: PipelineOutput) -> None: + rail = next( + m for m in segment_modules.test if m.path.name == "test_segment_rail.py" + ) + assert "rail_flags" in rail.content + + def test_road_module_no_rail_field_scenarios( + self, segment_modules: PipelineOutput + ) -> None: + road = next( + m for m in segment_modules.test if m.path.name == "test_segment_road.py" + ) + scenarios = _extract_scenarios_block(road.content) + assert "rail_flags[].values" not in scenarios + + def test_rail_module_no_road_field_scenarios( + self, segment_modules: PipelineOutput + ) -> None: + rail = next( + m for m in segment_modules.test if m.path.name == "test_segment_rail.py" + ) + scenarios = _extract_scenarios_block(rail.content) + assert "road_surface" not in scenarios + + def test_non_union_still_gets_single_test(self) -> None: + """Non-union features produce a single test module (unchanged).""" + modules = generate_pyspark_modules( + [ + extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + ] + ) + tests = _features(modules.test) + assert len(tests) == 1 + assert tests[0].path.name == "test_simple_model.py" + + +class TestNestedSourcePaths: + def test_module_path_mirrors_entry_point(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + features = _features(modules.source) + assert len(features) == 1 + assert features[0].path == PurePosixPath( + "overture/schema/simple/simple_model.py" + ) + + def test_two_packages_no_collision(self) -> None: + a = extract_model(SimpleModel, entry_point="overture.schema.places:Place") + b = extract_model(SimpleModel, entry_point="annex.schema.places:Place") + modules = generate_pyspark_modules([a, b]) + paths = {m.path for m in _features(modules.source)} + assert PurePosixPath("overture/schema/places/place.py") in paths + assert PurePosixPath("annex/schema/places/place.py") in paths + + +_EXPECTED_INIT_PATHS = { + PurePosixPath("__init__.py"), + PurePosixPath("overture/__init__.py"), + PurePosixPath("overture/schema/__init__.py"), + PurePosixPath("overture/schema/simple/__init__.py"), +} + + +def _init_paths(modules: list[GeneratedModule]) -> set[PurePosixPath]: + return {m.path for m in modules if m.path.name == "__init__.py"} + + +class TestInitModuleEmission: + def test_intermediate_dirs_get_init_modules(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + assert _init_paths(modules.source) == _EXPECTED_INIT_PATHS + + def test_init_modules_are_empty(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + init = next(m for m in modules.source if m.path.name == "__init__.py") + assert init.content == "" + + def test_shared_dirs_emitted_once(self) -> None: + a = extract_model(SimpleModel, entry_point="overture.schema.simple:SimpleModel") + b = extract_model(BoundsModel, entry_point="overture.schema.simple:BoundsModel") + modules = generate_pyspark_modules([a, b]) + init_paths = [m.path for m in modules.source if m.path.name == "__init__.py"] + assert len(init_paths) == len(set(init_paths)) + + +class TestNoRegistryEmitted: + def test_registry_module_is_no_longer_generated(self) -> None: + # The runtime builds the registry via entry-point discovery; codegen + # must not emit `_registry.py`. + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + for tree in (modules.source, modules.test): + assert all(m.path.name != "_registry.py" for m in tree) + + +class TestNestedTestPaths: + def test_test_module_path_mirrors_source(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + tests = _features(modules.test) + assert len(tests) == 1 + assert tests[0].path == PurePosixPath( + "overture/schema/simple/test_simple_model.py" + ) + + def test_test_module_imports_nested_expression(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + test_mod = next(iter(_features(modules.test))) + assert ( + "from overture.schema.pyspark.expressions.generated.overture.schema.simple.simple_model import" + in test_mod.content + ) + + def test_test_dirs_get_init_modules(self) -> None: + spec = extract_model( + SimpleModel, entry_point="overture.schema.simple:SimpleModel" + ) + modules = generate_pyspark_modules([spec]) + # Source-tree init modules already covered in TestInitModuleEmission. + # The test tree must mirror the same package layout. + assert _init_paths(modules.test) == _EXPECTED_INIT_PATHS + + +class TestExtractGeometryTypes: + """`_extract_geometry_types` aggregates across descriptors and checks.""" + + def test_aggregates_across_descriptors(self) -> None: + checks = [ + Check( + descriptors=( + ExpressionDescriptor( + function="check_geometry_type", + args=(GeometryType.POINT,), + ), + ), + target=ScalarPath(), + ), + Check( + descriptors=( + ExpressionDescriptor( + function="check_geometry_type", + args=(GeometryType.POLYGON, GeometryType.LINE_STRING), + ), + ), + target=ScalarPath(), + ), + ] + assert _extract_geometry_types(checks) == ( + GeometryType.LINE_STRING, + GeometryType.POINT, + GeometryType.POLYGON, + ) + + def test_returns_empty_when_absent(self) -> None: + assert _extract_geometry_types([]) == () diff --git a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py new file mode 100644 index 000000000..42775be41 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py @@ -0,0 +1,1097 @@ +"""Tests for pyspark feature module renderer.""" + +import ast +import re +from enum import Enum +from typing import Annotated, Literal, Union + +import pytest +from annotated_types import Ge, MinLen +from codegen_test_support import ( + LiteralSubtypeModel, + RadioModel, + RequireAnyModel, + TripleNestedArrayModel, + feature_spec_for_model, +) +from overture.schema.codegen.pyspark._render_common import jinja_env +from overture.schema.codegen.pyspark.check_builder import build_checks +from overture.schema.codegen.pyspark.check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) +from overture.schema.codegen.pyspark.constraint_dispatch import ( + ExpressionDescriptor, + RequireAnyOf, +) +from overture.schema.codegen.pyspark.renderer import ( + _render_check_function_context, + _render_model_constraint_function_context, + render_feature_module, +) +from overture.schema.codegen.pyspark.schema_builder import build_schema +from overture.schema.system.field_path import ( + parse, +) +from overture.schema.system.model_constraint import ( + FieldEqCondition, + Not, + forbid_if, + require_any_of, + require_if, +) +from overture.schema.system.primitive import ( + Geometry, + GeometryType, + GeometryTypeConstraint, +) +from overture.schema.system.string import CountryCodeAlpha2 +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +_path = parse + + +class BoundsModel(BaseModel): + score: Annotated[float, Ge(0.0)] + + +class ArrayModel(BaseModel): + tags: Annotated[list[str], MinLen(1)] + + +class InnerModel(BaseModel): + value: str + + +class NestedArrayModel(BaseModel): + items: list[InnerModel] | None = None + + +# list[Annotated[float, Ge(0.0)]] produces ARRAY-shape nodes because +# check_bounds is an element-level function (not in _COLUMN_LEVEL_FUNCTIONS). +class FloatListModel(BaseModel): + scores: list[Annotated[float, Ge(0.0)]] | None = None + + +def _render(model_cls: type[BaseModel], name: str = "simple") -> str: + spec = feature_spec_for_model(model_cls) + field_checks, model_checks = build_checks(spec) + schema_fields = build_schema(spec) + return render_feature_module(name, field_checks, model_checks, schema_fields) + + +def _render_check_function_string(ctx: dict[str, object]) -> str: + """Render a single check function context to source via the Jinja macro.""" + template = jinja_env().get_template("_check_function.py.jinja2") + return str(template.module.check_function(c=ctx)) # type: ignore[attr-defined] + + +def _render_check_function( + check: Check, func_name: str, descriptor_idx: int = 0 +) -> str: + """Render a per-field check function source from a Check.""" + ctx = _render_check_function_context(check, func_name, descriptor_idx) + return _render_check_function_string(ctx) + + +def _render_node(check: Check) -> str: + """Render a single Check to its function source.""" + return _render_check_function(check, "_test_check", descriptor_idx=0) + + +def _render_model_node(check: ModelCheck) -> str: + """Render a single ModelCheck to its function source.""" + ctx = _render_model_constraint_function_context(check, 0, "") + return _render_check_function_string(ctx) + + +@pytest.fixture(scope="module") +def literal_subtype_source() -> str: + """Rendered `LiteralSubtypeModel` source (default `simple` feature name). + + Module-scoped so the extraction+render cost is paid once for all + consumers in this file. + """ + return _render(LiteralSubtypeModel) + + +class TestParseable: + def test_renders_parseable_python(self, literal_subtype_source: str) -> None: + ast.parse(literal_subtype_source) + + def test_bounds_model_parseable(self) -> None: + source = _render(BoundsModel) + ast.parse(source) + + def test_array_model_parseable(self) -> None: + source = _render(ArrayModel) + ast.parse(source) + + def test_nested_array_model_parseable(self) -> None: + source = _render(NestedArrayModel) + ast.parse(source) + + def test_radio_model_parseable(self) -> None: + source = _render(RadioModel, "radio") + ast.parse(source) + + def test_require_any_model_parseable(self) -> None: + source = _render(RequireAnyModel, "require_any") + ast.parse(source) + + def test_depth_3_renders_valid_python(self) -> None: + source = _render(TripleNestedArrayModel, "triple") + ast.parse(source) + assert "nested_array_check(" in source + + +class TestBuilderFunction: + def test_contains_builder_function(self, literal_subtype_source: str) -> None: + assert "def simple_checks()" in literal_subtype_source + + def test_builder_returns_list_check(self, literal_subtype_source: str) -> None: + assert "list[Check]" in literal_subtype_source + + def test_builder_name_uses_feature_name(self) -> None: + source = _render(LiteralSubtypeModel, "my_feature") + assert "def my_feature_checks()" in source + + +class TestSchemaConstant: + def test_contains_schema_constant(self, literal_subtype_source: str) -> None: + assert "SIMPLE_SCHEMA" in literal_subtype_source + + def test_schema_constant_name_uppercased(self) -> None: + source = _render(LiteralSubtypeModel, "my_feature") + assert "MY_FEATURE_SCHEMA" in source + + def test_contains_struct_type(self, literal_subtype_source: str) -> None: + assert "StructType" in literal_subtype_source + + def test_contains_struct_field(self, literal_subtype_source: str) -> None: + assert "StructField" in literal_subtype_source + + def test_shared_struct_ref_emits_struct_field(self) -> None: + """Shared struct refs (BBOX_STRUCT) render as the type of a StructField.""" + from overture.schema.codegen.pyspark.schema_builder import SchemaField + + schema_fields = [SchemaField(name="bbox", type_expr="BBOX_STRUCT")] + source = render_feature_module("simple", [], [], schema_fields) + assert 'StructField("bbox", BBOX_STRUCT, True)' in source + + +class TestGeometryTypes: + """`GEOMETRY_TYPES` constant emission for runtime discovery.""" + + def test_omitted_when_empty(self, literal_subtype_source: str) -> None: + assert "GEOMETRY_TYPES" not in literal_subtype_source + + def test_emitted_when_provided(self) -> None: + spec = feature_spec_for_model(LiteralSubtypeModel) + field_nodes, model_nodes = build_checks(spec) + schema_fields = build_schema(spec) + source = render_feature_module( + "simple", + field_nodes, + model_nodes, + schema_fields, + geometry_types=(GeometryType.POINT,), + ) + assert ( + "GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,)" in source + ) + + def test_geometry_type_imported_when_only_constant_needs_it(self) -> None: + # LiteralSubtypeModel has no check_geometry_type constraint, so the + # import is only required because GEOMETRY_TYPES references it. + spec = feature_spec_for_model(LiteralSubtypeModel) + field_nodes, model_nodes = build_checks(spec) + schema_fields = build_schema(spec) + source = render_feature_module( + "simple", + field_nodes, + model_nodes, + schema_fields, + geometry_types=(GeometryType.POINT,), + ) + assert "from overture.schema.system.primitive import GeometryType" in source + + +class TestImports: + def test_imports_pyspark_functions(self, literal_subtype_source: str) -> None: + assert "from pyspark.sql import functions as F" in literal_subtype_source + + def test_imports_check_classes(self, literal_subtype_source: str) -> None: + assert ( + "from overture.schema.pyspark.check import Check, CheckShape" + in literal_subtype_source + ) + + def test_imports_constraint_expressions(self, literal_subtype_source: str) -> None: + assert ( + "from overture.schema.pyspark.expressions.constraint_expressions import" + in literal_subtype_source + ) + + def test_imports_schema_types(self, literal_subtype_source: str) -> None: + # StructType and StructField must appear in the import section (before first def) + first_def = literal_subtype_source.index("\ndef ") + import_section = literal_subtype_source[:first_def] + assert "pyspark.sql.types" in import_section + assert "StructType" in import_section + assert "StructField" in import_section + + def test_imports_array_check_when_needed(self) -> None: + source = _render(FloatListModel, "float_list") + assert "array_check" in source + + def test_no_unused_column_patterns_import_for_simple( + self, literal_subtype_source: str + ) -> None: + # LiteralSubtypeModel has no array fields -- column_patterns import not needed + assert "column_patterns" not in literal_subtype_source + + +class TestPerFieldFunctions: + def test_per_field_function_exists(self, literal_subtype_source: str) -> None: + # With split checks, compound fields produce suffixed names + assert ( + "_subtype_required_check" in literal_subtype_source + or "_subtype_enum_check" in literal_subtype_source + ) + + def test_check_has_name_field(self, literal_subtype_source: str) -> None: + """Rendered Check includes name= derived from constraint function.""" + assert 'name="required"' in literal_subtype_source + assert 'name="enum"' in literal_subtype_source + + def test_no_field_in_check_calls(self, literal_subtype_source: str) -> None: + """check_* calls should not include field string as second arg.""" + # Match pattern: check_xxx(F.col("yyy"), "yyy", ...) — field as 2nd arg + field_arg_pattern = re.compile(r'check_\w+\(F\.col\("[^"]+"\),\s*"[^"]+"') + assert not field_arg_pattern.search(literal_subtype_source) + + def test_scalar_single_descriptor_no_coalesce(self) -> None: + class OptionalBounds(BaseModel): + value: Annotated[float, Ge(0.0)] | None = None + + source = _render(OptionalBounds, "opt") + assert "check_bounds" in source + assert "F.coalesce" not in source + + def test_scalar_multi_descriptor_produces_separate_checks( + self, literal_subtype_source: str + ) -> None: + """SimpleModel.subtype has check_required + check_enum -> two separate functions.""" + assert "F.coalesce" not in literal_subtype_source + assert 'name="required"' in literal_subtype_source + assert 'name="enum"' in literal_subtype_source + + def test_compound_checks_split(self, literal_subtype_source: str) -> None: + """A field with required + enum produces two Check functions, not one coalesced.""" + assert "F.coalesce" not in literal_subtype_source + + def test_array_shape_uses_array_check(self) -> None: + source = _render(FloatListModel, "float_list") + assert "array_check" in source + + def test_field_function_name_sanitized(self) -> None: + # nested field like "items[].value" -> _items_value_check + source = _render(NestedArrayModel) + assert "_items_value_check" in source + + def test_builder_collects_all_checks(self, literal_subtype_source: str) -> None: + # With split checks, both descriptors appear in the builder + assert "_subtype_required_check()" in literal_subtype_source + assert "_subtype_enum_check()" in literal_subtype_source + + +class TestModelConstraintFunctions: + def test_radio_group_check_rendered(self) -> None: + source = _render(RadioModel, "radio") + assert "check_radio_group" in source + + def test_require_any_of_rendered(self) -> None: + source = _render(RequireAnyModel, "require_any") + assert "check_require_any_of" in source + + def test_radio_group_no_context_arg(self) -> None: + """check_radio_group must not receive a context string argument.""" + source = _render(RadioModel, "radio") + # Context arg was the model name, e.g. "RadioModel" — must not appear + assert "'RadioModel'" not in source + + def test_require_any_of_no_context_arg(self) -> None: + """check_require_any_of must not receive a context string argument.""" + source = _render(RequireAnyModel, "require_any") + assert "'RequireAnyModel'" not in source + + def test_model_constraint_imports_function(self) -> None: + source = _render(RadioModel, "radio") + assert "check_radio_group" in source + # imported from constraint_expressions + assert ( + "from overture.schema.pyspark.expressions.constraint_expressions import" + in source + ) + + def test_model_constraint_included_in_builder(self) -> None: + source = _render(RadioModel, "radio") + # some check function for radio_group should appear in builder return + lines = source.splitlines() + builder_lines = [] + in_builder = False + for line in lines: + if "def radio_checks()" in line: + in_builder = True + if in_builder: + builder_lines.append(line) + builder_src = "\n".join(builder_lines) + assert "check" in builder_src.lower() + + +class TestEnumConstants: + def test_enum_values_appear_as_list(self, literal_subtype_source: str) -> None: + for value in ("a", "b", "c"): + assert f"'{value}'" in literal_subtype_source + + def test_check_enum_called_with_values(self, literal_subtype_source: str) -> None: + assert "check_enum" in literal_subtype_source + + +class GeomModel(BaseModel): + geometry: Annotated[ + Geometry, + GeometryTypeConstraint(GeometryType.POLYGON, GeometryType.MULTI_POLYGON), + ] + + +class TestGeometryTypeRendering: + def test_geometry_type_renders_valid_python(self) -> None: + source = _render(GeomModel, "geom") + ast.parse(source) + + def test_geometry_type_uses_qualified_name(self) -> None: + source = _render(GeomModel, "geom") + assert "GeometryType.POLYGON" in source + assert "GeometryType.MULTI_POLYGON" in source + + def test_geometry_type_import_present(self) -> None: + source = _render(GeomModel, "geom") + assert "from overture.schema.system.primitive import GeometryType" in source + + def test_no_geometry_type_import_without_geometry_field( + self, literal_subtype_source: str + ) -> None: + assert "GeometryType" not in literal_subtype_source + + +class _DeepInner(BaseModel): + field: str + + +class _ArrayElementWithNestedStruct(BaseModel): + nested: _DeepInner + + +class DeepNestedArrayModel(BaseModel): + items: list[_ArrayElementWithNestedStruct] + + +class _ArrayElementWithList(BaseModel): + countries: list[CountryCodeAlpha2] + + +class ListInArrayModel(BaseModel): + items: list[_ArrayElementWithList] + + +class _ArrayElementWithNewtype(BaseModel): + country: Annotated[str, Ge(0)] # stand-in for a constrained field + + +class TestArrayElementSubfieldRendering: + """Scalar sub-fields of array elements render as array_check with el[...] accessors.""" + + def test_scalar_subfield_uses_array_check(self) -> None: + source = _render(NestedArrayModel, "nested") + assert "array_check(" in source + + def test_scalar_subfield_uses_element_accessor(self) -> None: + source = _render(NestedArrayModel, "nested") + assert 'el["value"]' in source + + def test_scalar_subfield_no_f_col_with_brackets(self) -> None: + source = _render(NestedArrayModel, "nested") + assert 'F.col("items[].value")' not in source + + def test_nested_struct_subfield_chained_brackets(self) -> None: + source = _render(DeepNestedArrayModel, "deep") + assert 'el["nested"]["field"]' in source + + def test_nested_struct_subfield_no_dot_in_brackets(self) -> None: + source = _render(DeepNestedArrayModel, "deep") + assert 'el["nested.field"]' not in source + + def test_list_subfield_uses_nested_array_check(self) -> None: + source = _render(ListInArrayModel, "list_in_array") + assert "nested_array_check(" in source + + def test_list_subfield_has_inner_array_check(self) -> None: + source = _render(ListInArrayModel, "list_in_array") + # nested_array_check outer + array_check inner + assert "nested_array_check(" in source + assert "array_check(" in source + + def test_list_subfield_parseable(self) -> None: + source = _render(ListInArrayModel, "list_in_array") + ast.parse(source) + + def test_deep_nested_parseable(self) -> None: + source = _render(DeepNestedArrayModel, "deep") + ast.parse(source) + + +class TestNoFunctionNameCollisions: + def test_list_field_produces_unique_function_names(self) -> None: + source = _render(ArrayModel, "arr") + # Each "def _" function name should appear exactly once + func_defs = re.findall(r"^def (_\w+_check)\(", source, re.MULTILINE) + assert len(func_defs) == len(set(func_defs)), ( + f"Duplicate function names: {func_defs}" + ) + + def test_list_field_renders_parseable(self) -> None: + source = _render(ArrayModel, "arr") + ast.parse(source) + + +class PlaceSubtype(str): + COUNTRY = "country" + REGION = "region" + + def __new__(cls, value: str) -> "PlaceSubtype": + return str.__new__(cls, value) + + +class _SubtypeEnum(str, Enum): + COUNTRY = "country" + REGION = "region" + + +@require_if(["admin_level"], FieldEqCondition("subtype", _SubtypeEnum.COUNTRY)) +class RequireIfEnumModel(BaseModel): + subtype: str + admin_level: int | None = None + + +class TestModelConstraintNoRedundantArgs: + """Model constraints must not embed context or target_name strings.""" + + def test_require_if_no_target_name_arg(self) -> None: + """check_require_if must not pass the field name as a string arg.""" + source = _render(RequireIfEnumModel, "require_if_enum") + # Was: check_require_if(F.col("admin_level"), "admin_level", condition, desc) + # Now: check_require_if(F.col("admin_level"), condition, desc) + # Pattern: check_require_if(col_expr, "field_name", ... + pattern = re.compile(r'check_require_if\([^,]+,\s*"[^"]+",\s*F\.') + assert not pattern.search(source), ( + "check_require_if still passes field name as string arg" + ) + + def test_forbid_if_no_target_name_arg(self) -> None: + """check_forbid_if must not pass the field name as a string arg.""" + source = _render(RequireForbidModel, "rf") + pattern = re.compile(r'check_forbid_if\([^,]+,\s*"[^"]+",\s*F\.') + assert not pattern.search(source), ( + "check_forbid_if still passes field name as string arg" + ) + + +class TestEnumValueInCondition: + def test_renders_valid_python(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + ast.parse(source) + + def test_enum_value_rendered_as_string_literal_in_column_expr(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + # The column expression (F.col == ...) must use the plain string value, + # not the non-parseable enum repr <_SubtypeEnum.COUNTRY: 'country'>. + # The condition description string may still contain the enum repr since + # it's only displayed in error messages (inside a quoted string literal). + assert "'country'" in source + + +class TestConditionDescriptionRendering: + """Model constraint condition descriptions are human-readable, not Python repr.""" + + def test_condition_desc_no_enum_repr(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + # The condition_desc string (4th arg to check_require_if) must not contain + # the non-parseable enum repr like <_SubtypeEnum.COUNTRY: 'country'> + assert "<_SubtypeEnum" not in source + + def test_condition_desc_uses_field_eq_format(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + # Should render as "subtype = 'country'" style (value quoted) + assert "subtype = 'country'" in source + + def test_condition_desc_with_double_quote_in_value_parseable(self) -> None: + """Condition values containing double-quotes must produce parseable output.""" + + @require_if(["admin_level"], FieldEqCondition("subtype", 'say "hi"')) + class DoubleQuoteCondModel(BaseModel): + subtype: str + admin_level: int | None = None + + source = _render(DoubleQuoteCondModel, "dq_cond") + ast.parse(source) + + +@forbid_if(["admin_level"], FieldEqCondition("subtype", "country")) +@require_if(["admin_level"], Not(FieldEqCondition("subtype", "country"))) +class RequireForbidModel(BaseModel): + subtype: str + admin_level: int | None = None + + +class TestModelConstraintFieldLabels: + """require_if/forbid_if field labels: no suffix when unique, per-field counter on collision.""" + + def test_require_if_single_constraint_no_suffix(self) -> None: + source = _render(RequireIfEnumModel, "require_if_enum") + assert 'field="admin_level_required"' in source + + def test_forbid_if_single_constraint_no_suffix(self) -> None: + source = _render(RequireForbidModel, "rf") + assert 'field="admin_level_forbidden"' in source + + def test_require_and_forbid_have_distinct_labels(self) -> None: + source = _render(RequireForbidModel, "rf") + assert 'field="admin_level_required"' in source + assert 'field="admin_level_forbidden"' in source + + def test_multiple_require_if_same_target_disambiguated(self) -> None: + """Multiple require_if on the same target get per-field numeric suffixes.""" + + @require_if(["level"], FieldEqCondition("kind", "a")) + @require_if(["level"], FieldEqCondition("kind", "b")) + class MultiRequireModel(BaseModel): + kind: str + level: int | None = None + + source = _render(MultiRequireModel, "multi_req") + labels = re.findall(r'field="(level_required[^"]*)"', source) + assert len(labels) >= 2, f"Expected >=2 unique labels, got {labels}" + assert len(labels) == len(set(labels)), f"Duplicate labels: {labels}" + assert all(re.search(r"_\d+$", lbl) for lbl in labels), ( + f"Expected numeric suffixes on collision labels: {labels}" + ) + + +class TestDuplicateFunctionNames: + def test_column_and_element_level_get_unique_names(self) -> None: + """division_ids and division_ids[] should produce distinct function names.""" + col_check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items"), + ) + elem_check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items[]"), + ) + source = render_feature_module("dup", [col_check, elem_check], [], []) + ast.parse(source) + func_defs = re.findall(r"^def (_\w+_check\w*)\(", source, re.MULTILINE) + assert len(func_defs) == len(set(func_defs)), ( + f"Duplicate function names: {func_defs}" + ) + + def test_same_field_different_variants_get_unique_names(self) -> None: + """class for road and class for rail should produce distinct function names.""" + road_check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["a", "b"],)), + ), + target=_path("class"), + guards=(ColumnGuard(discriminator="subtype", values=("road",)),), + ) + rail_check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["x", "y"],)), + ), + target=_path("class"), + guards=(ColumnGuard(discriminator="subtype", values=("rail",)),), + ) + source = render_feature_module("dup", [road_check, rail_check], [], []) + ast.parse(source) + func_defs = re.findall(r"^def (_\w+_check\w*)\(", source, re.MULTILINE) + assert len(func_defs) == len(set(func_defs)), ( + f"Duplicate function names: {func_defs}" + ) + + +@require_any_of("x", "y") +class _ArrayElementConstrained(BaseModel): + x: str | None = None + y: str | None = None + + +class ArrayOfConstrained(BaseModel): + items: list[_ArrayElementConstrained] + + +class TestArrayModelConstraintRendering: + """Model constraints on array elements render inside array_check.""" + + def test_renders_parseable_python(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + ast.parse(source) + + def test_renders_array_check(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert "array_check(" in source + + def test_renders_el_field_refs(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert 'el["x"]' in source + assert 'el["y"]' in source + + def test_no_f_col_for_array_element_constraint(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + # The array-element model constraint should not use F.col for its field refs + assert 'F.col("x")' not in source + assert 'F.col("y")' not in source + + def test_shape_is_array(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert "CheckShape.ARRAY" in source + + def test_field_label_uses_prefix(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert 'field="items[]' in source + + def test_imports_array_check(self) -> None: + source = _render(ArrayOfConstrained, "arr_constrained") + assert "array_check" in source + + +class TestVariantDiscriminatorField: + def test_variant_uses_check_discriminator_field(self) -> None: + """Variant gating should use the Guard's discriminator field, not hardcoded 'subtype'.""" + check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["x", "y"],)), + ), + target=_path("a_field"), + guards=(ColumnGuard(discriminator="kind", values=("a",)),), + ) + source = render_feature_module("test_variant", [check], [], []) + ast.parse(source) + assert 'F.col("kind")' in source + assert 'F.col("subtype")' not in source + + +@require_any_of("a", "b") +class _NestedConstrainedStruct(BaseModel): + a: str | None = None + b: str | None = None + + +class _ArrayElementWithNestedConstraint(BaseModel): + nested: _NestedConstrainedStruct + + +class ArrayOfNestedConstrained(BaseModel): + items: list[_ArrayElementWithNestedConstraint] + + +class TestVariantGatedArrayLambdaScope: + """Variant gating for ARRAY-shaped nodes must be inside the lambda, not wrapping it.""" + + @pytest.fixture(scope="class") + def rendered_source(self) -> str: + class _Base(BaseModel): + kind: str + + class _TypeA(_Base): + kind: Literal["a"] = "a" + a_field: str + + class _TypeB(_Base): + kind: Literal["b"] = "b" + + _Union = Annotated[ + Union[_TypeA, _TypeB], # noqa: UP007 + FieldInfo(discriminator="kind"), + ] + + class _Wrapper(BaseModel): + items: list[_Union] + + return _render(_Wrapper, "wrapper") + + def test_parseable(self, rendered_source: str) -> None: + ast.parse(rendered_source) + + def test_variant_gating_inside_lambda(self, rendered_source: str) -> None: + """el['kind'] must appear inside the lambda body, not outside array_check.""" + lines = rendered_source.splitlines() + for i, line in enumerate(lines): + if "array_check(" in line and i > 0: + preceding = lines[i - 1].strip() + assert not preceding.startswith("F.when("), ( + f"array_check wrapped by F.when at line {i}: {lines[i - 1]!r}" + ) + + lambda_found = False + el_kind_inside_lambda = False + for line in lines: + if "lambda el:" in line: + lambda_found = True + if lambda_found and 'el["kind"]' in line: + el_kind_inside_lambda = True + break + + assert lambda_found, "No lambda el: found in generated source" + assert el_kind_inside_lambda, ( + 'el["kind"] never appears after lambda el: — variant gating is outside lambda scope' + ) + + +class TestTopLevelVariantGatedArray: + """When the array column itself is variant-conditional, discriminator wraps array_check.""" + + @pytest.fixture(scope="class") + def surface_check(self) -> Check: + """ARRAY check with top-level discriminator -- surface only exists for subtype='a'.""" + return Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("surface[]"), + guards=(ColumnGuard(discriminator="subtype", values=("a",)),), + ) + + @pytest.fixture(scope="class") + def surface_value_check(self) -> Check: + """ARRAY check with leaf path and top-level discriminator.""" + return Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("surface[].value"), + guards=(ColumnGuard(discriminator="subtype", values=("a",)),), + ) + + def test_parseable(self, surface_check: Check) -> None: + source = render_feature_module("test", [surface_check], [], []) + ast.parse(source) + + def test_discriminator_uses_f_col(self, surface_check: Check) -> None: + """Top-level discriminator must reference F.col, not el[...].""" + source = render_feature_module("test", [surface_check], [], []) + assert 'F.col("subtype")' in source, ( + "Top-level discriminator must use F.col, not el[...]" + ) + assert 'el["subtype"]' not in source, ( + 'el["subtype"] found -- discriminator placed inside lambda' + ) + + def test_f_when_wraps_array_check(self, surface_check: Check) -> None: + """F.when must wrap the array_check call, not the lambda body.""" + source = _render_check_function(surface_check, "_surface_check") + # F.when must appear before array_check in the expression. + f_when_pos = source.find("F.when(") + array_check_pos = source.find("array_check(") + assert f_when_pos != -1, "F.when not found in output" + assert array_check_pos != -1, "array_check not found in output" + assert f_when_pos < array_check_pos, ( + f"F.when (pos {f_when_pos}) must appear before array_check (pos {array_check_pos})" + ) + + def test_no_el_discriminator_in_lambda(self, surface_value_check: Check) -> None: + """el['subtype'] must not appear even with leaf path -- subtype is top-level.""" + source = render_feature_module("test", [surface_value_check], [], []) + assert 'el["subtype"]' not in source, ( + 'el["subtype"] found -- top-level discriminator must not appear inside lambda' + ) + + def test_leaf_path_check_parseable(self, surface_value_check: Check) -> None: + source = render_feature_module("test", [surface_value_check], [], []) + ast.parse(source) + + +class TestNestedStructModelConstraintRendering: + """Nested struct model constraints inside array elements use chained el accessors.""" + + def test_renders_parseable_python(self) -> None: + source = _render(ArrayOfNestedConstrained, "nested_constrained") + ast.parse(source) + + def test_chained_struct_accessor(self) -> None: + source = _render(ArrayOfNestedConstrained, "nested_constrained") + assert 'el["nested"]["a"]' in source + assert 'el["nested"]["b"]' in source + + def test_no_direct_el_access(self) -> None: + """Should NOT produce el["a"] — must go through nested struct.""" + source = _render(ArrayOfNestedConstrained, "nested_constrained") + # el["a"] without ["nested"] prefix should not appear + lines = source.split("\n") + for line in lines: + if 'el["a"]' in line and '["nested"]' not in line: + pytest.fail(f'Found bare el["a"] without struct prefix: {line}') + + +class TestRenderNestedArrayCheckStructure: + """_render_check_function emits correct nested_array_check / lambda structure.""" + + def test_render_nested_array_check(self) -> None: + check = Check( + descriptors=( + ExpressionDescriptor(function="check_bounds", kwargs=(("ge", 0),)), + ), + target=_path("items[].things[].value"), + ) + source = _render_check_function(check, "_test_check") + assert "nested_array_check" in source + assert "lambda el" in source + assert "lambda inner" in source + assert 'el["things"]' in source + assert 'check_bounds(inner["value"],' in source + + def test_render_variant_expr_in_nested_array_top_level_disc(self) -> None: + """Top-level discriminator wraps nested_array_check in F.when(F.col(...)).""" + check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["m", "km"],)), + ), + target=_path("items[].things[].unit"), + guards=(ColumnGuard(discriminator="kind", values=("a", "b")),), + ) + source = _render_check_function(check, "_test_check") + assert "nested_array_check" in source + assert 'F.col("kind").isin(' in source + + def test_render_variant_expr_in_nested_array_element_disc(self) -> None: + """Element-level discriminator gates inside the inner lambda.""" + check = Check( + descriptors=( + ExpressionDescriptor(function="check_enum", args=(["m", "km"],)), + ), + target=_path("items[].things[].unit"), + guards=(ElementGuard(discriminator="kind", values=("a", "b")),), + ) + source = _render_check_function(check, "_test_check") + assert "nested_array_check" in source + assert 'F.col("kind")' not in source + assert 'inner["kind"]' in source + + +@require_any_of("a", "b") +class _DoubleNestedConstrainedElement(BaseModel): + a: str | None = None + b: str | None = None + + +class _OuterArrayElement(BaseModel): + things: list[_DoubleNestedConstrainedElement] + + +class _DoubleNestedModel(BaseModel): + items: list[_OuterArrayElement] + + +class TestDoubleNestedArrayModelConstraintRendering: + """Model constraints on list[] inside another array render nested_array_check.""" + + def test_renders_parseable_python(self) -> None: + source = _render(_DoubleNestedModel, "double_nested") + ast.parse(source) + + def test_uses_nested_array_check(self) -> None: + source = _render(_DoubleNestedModel, "double_nested") + assert "nested_array_check" in source + + def test_inner_lambda_uses_inner_variable(self) -> None: + source = _render(_DoubleNestedModel, "double_nested") + assert 'inner["a"]' in source + assert 'inner["b"]' in source + + def test_outer_lambda_navigates_to_inner_array(self) -> None: + source = _render(_DoubleNestedModel, "double_nested") + assert 'el["things"]' in source + + +class TestMultiLevelNestedArrayRendering: + """Rendering of deeply nested array checks (2+ inner levels).""" + + def test_two_inner_levels_produces_double_nesting(self) -> None: + """list[list[list[Struct]]].field -> nested(nested(array_check)).""" + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items[][][].value"), + ) + source = _render_node(check) + # Three IterateArrays -> 1 outer nested_array_check + 1 intermediate + # nested_array_check + 1 innermost array_check = 2 nested_array_check calls. + assert source.count("nested_array_check(") == 2 + assert "lambda el:" in source + assert "lambda el2:" in source # intermediate level + assert "lambda inner:" in source # innermost + assert 'check_required(inner["value"])' in source + + def test_two_inner_levels_with_struct_path(self) -> None: + """Intermediate level with struct navigation.""" + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("outer[].mid[][].leaf"), + ) + source = _render_node(check) + assert 'el["mid"]' in source + assert source.count("nested_array_check(") == 2 + + def test_model_constraint_with_two_inner_levels(self) -> None: + """Model constraint at depth 3 uses double-nested wrapping.""" + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=_path("items[][][]"), + ) + source = _render_model_node(check) + assert source.count("nested_array_check(") == 2 + assert "lambda el:" in source + assert "lambda el2:" in source + assert "array_check(" in source + + def test_variant_gating_only_at_innermost_level(self) -> None: + """Variant values on a multi-level check with element guard apply at innermost.""" + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("items[][][].value"), + guards=(ElementGuard(discriminator="kind", values=("type_a",)),), + ) + source = _render_node(check) + # Variant gating appears at the innermost level. + assert 'inner["kind"]' in source + + +class TestGatedScalarRendering: + """Gated check_required wraps expression in F.when(gate.isNotNull(), ...).""" + + @pytest.fixture + def gated_check(self) -> Check: + return Check( + descriptors=( + ExpressionDescriptor(function="check_required", gate=_path("inner")), + ), + target=_path("inner.value"), + ) + + def test_gated_scalar_has_when_wrapping(self, gated_check: Check) -> None: + source = _render_node(gated_check) + assert 'F.col("inner").isNotNull()' in source + assert "check_required" in source + assert "F.when(" in source + + def test_gated_scalar_is_parseable(self, gated_check: Check) -> None: + source = _render_node(gated_check) + ast.parse(source) + + def test_ungated_scalar_unchanged(self) -> None: + check = Check( + descriptors=(ExpressionDescriptor(function="check_required"),), + target=_path("value"), + ) + source = _render_node(check) + assert "isNotNull" not in source + assert "check_required" in source + + +class _NullableNestedElement(BaseModel): + value: str + + +class _ElementWithNullableStruct(BaseModel): + nested: _NullableNestedElement | None = None + + +class _ArrayWithNullableStruct(BaseModel): + items: list[_ElementWithNullableStruct] + + +class TestGatedFullModelRendering: + def test_gated_array_descriptor_is_parseable(self) -> None: + source = _render(_ArrayWithNullableStruct, "arr") + ast.parse(source) + + def test_gated_array_descriptor_has_element_gate(self) -> None: + source = _render(_ArrayWithNullableStruct, "arr") + assert 'el["nested"].isNotNull()' in source + assert "check_required" in source + + def test_model_with_nullable_parent_is_parseable(self) -> None: + class Inner(BaseModel): + value: str + + class Outer(BaseModel): + inner: Inner | None = None + + source = _render(Outer, "outer") + ast.parse(source) + assert "isNotNull" in source + assert "check_required" in source + + +class TestGatedArrayRendering: + """Gated check_required in array context uses element accessor for gate.""" + + @pytest.fixture + def element_gated_check(self) -> Check: + return Check( + descriptors=( + ExpressionDescriptor( + function="check_required", gate=_path("items[].nested") + ), + ), + target=_path("items[].nested.mode"), + ) + + def test_gated_array_has_element_gate(self, element_gated_check: Check) -> None: + source = _render_node(element_gated_check) + assert 'el["nested"].isNotNull()' in source + assert "check_required" in source + assert "F.when(" in source + + def test_gated_array_is_parseable(self, element_gated_check: Check) -> None: + source = _render_node(element_gated_check) + ast.parse(source) + + def test_column_level_gate_on_array_target_raises(self) -> None: + """A column-level gate on an ArrayPath target is not produced by check_builder.""" + check = Check( + descriptors=( + ExpressionDescriptor( + function="check_required", gate=_path("perspectives") + ), + ), + target=_path("perspectives.countries[]"), + ) + with pytest.raises(AssertionError, match="column-level gate"): + _render_node(check) + + def test_nested_array_gate_applied_at_outermost_lambda(self) -> None: + """Gate on a nested_array_check wraps the el lambda body, not inner.""" + check = Check( + descriptors=( + ExpressionDescriptor( + function="check_required", gate=_path("rules[].perspectives") + ), + ), + target=_path("rules[].perspectives.countries[]"), + ) + source = _render_node(check) + ast.parse(source) + assert "nested_array_check(" in source + # Gate must be on el (the rule struct), not inner (the country string). + assert 'el["perspectives"].isNotNull()' in source + assert "inner[" not in source diff --git a/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py new file mode 100644 index 000000000..aba025cda --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_scaffold.py @@ -0,0 +1,245 @@ +"""Tests for sparse path scaffold generation.""" + +from dataclasses import replace + +import pytest +from codegen_test_support import ( + FeatureWithRequiredUrl, + discover_feature, + feature_spec_for_model, +) +from overture.schema.codegen.extraction.specs import FeatureSpec +from overture.schema.codegen.pyspark.check_builder import build_checks +from overture.schema.codegen.pyspark.check_ir import ElementGuard +from overture.schema.codegen.pyspark.test_data.scaffold import ( + generate_model_scaffold, + generate_scaffold, + leaf_list_depth, +) +from overture.schema.system.field_path import ArrayPath, parse + +_path = parse + + +@pytest.fixture(scope="module") +def connector_spec() -> FeatureSpec: + return discover_feature("Connector") + + +@pytest.fixture(scope="module") +def division_area_spec() -> FeatureSpec: + return discover_feature("DivisionArea") + + +@pytest.fixture(scope="module") +def segment_spec() -> FeatureSpec: + return discover_feature("Segment") + + +class TestLeafListDepth: + def test_leaf_list_depth(self) -> None: + """leaf_list_depth returns unaccounted-for list depth.""" + spec = feature_spec_for_model(FeatureWithRequiredUrl) + # Scalar field inside array struct — no extra wrapping + assert leaf_list_depth(_path("datasets[].url"), spec) == 0 + # List field without trailing array marker — needs wrapping + assert leaf_list_depth(_path("datasets[].download_urls"), spec) == 1 + # List field with array marker means element-level access — no wrapping + assert leaf_list_depth(_path("datasets[].download_urls[]"), spec) == 0 + + +class TestNestedListUrlField: + """Scaffold for FeatureWithRequiredUrl handles nested list[HttpUrl] fields.""" + + def test_nested_list_url_field_single_depth(self) -> None: + """list[HttpUrl] scaffold should be single-depth, not double-wrapped.""" + spec = feature_spec_for_model(FeatureWithRequiredUrl) + field_nodes, _ = build_checks(spec) + url_nodes = [n for n in field_nodes if "download_urls" in str(n.target)] + assert url_nodes, "Expected check nodes for download_urls" + for node in url_nodes: + scaffold = generate_scaffold(node, spec) + if "datasets" in scaffold: + entry = scaffold["datasets"][0] + if "download_urls" in entry: + val = entry["download_urls"] + assert isinstance(val, list) + assert all(isinstance(v, str) for v in val), ( + f"Expected list[str], got nested structure: {val!r}" + ) + + +class TestGenerateScaffoldConnector: + """Scaffold for Connector — simple top-level and one-level-nested fields.""" + + def test_required_top_level_field_produces_empty_scaffold( + self, connector_spec: FeatureSpec + ) -> None: + """Required top-level fields exist in base row; scaffold adds nothing.""" + field_nodes, _ = build_checks(connector_spec) + id_node = next(n for n in field_nodes if n.target == _path("id")) + scaffold = generate_scaffold(id_node, connector_spec) + assert scaffold == {} + + def test_optional_top_level_field_produces_scaffold( + self, connector_spec: FeatureSpec + ) -> None: + """Optional fields absent from base row get a valid scaffold value.""" + field_nodes, _ = build_checks(connector_spec) + node = next( + n + for n in field_nodes + if n.target == _path("sources") + and any(d.function == "check_array_min_length" for d in n.descriptors) + ) + scaffold = generate_scaffold(node, connector_spec) + assert "sources" in scaffold + assert isinstance(scaffold["sources"], list) + assert len(scaffold["sources"]) >= 1 + + def test_array_nested_field_builds_path(self, connector_spec: FeatureSpec) -> None: + """sources[].property needs a sources array with one element.""" + field_nodes, _ = build_checks(connector_spec) + node = next(n for n in field_nodes if n.target == _path("sources[].property")) + scaffold = generate_scaffold(node, connector_spec) + assert "sources" in scaffold + assert isinstance(scaffold["sources"], list) + assert len(scaffold["sources"]) == 1 + elem = scaffold["sources"][0] + # Required sibling 'dataset' populated + assert "dataset" in elem + + def test_scaffold_is_dict(self, connector_spec: FeatureSpec) -> None: + field_nodes, _ = build_checks(connector_spec) + for node in field_nodes: + scaffold = generate_scaffold(node, connector_spec) + assert isinstance(scaffold, dict) + + +class TestGenerateScaffoldSegment: + """Scaffold for Segment — deeply nested arrays and discriminators.""" + + def test_suffixed_nested_leaf_uses_actual_field_name( + self, segment_spec: FeatureSpec + ) -> None: + """Column-level checks share the structural path with the real field.""" + field_nodes, _ = build_checks(segment_spec) + node = next( + n + for n in field_nodes + if n.target == _path("access_restrictions[].when.mode") + and any(d.function == "check_array_min_length" for d in n.descriptors) + ) + scaffold = generate_scaffold(node, segment_spec) + assert "access_restrictions" in scaffold + when = scaffold["access_restrictions"][0]["when"] + assert "mode" in when, f"Expected 'mode', got keys: {list(when.keys())}" + assert "mode_min_length" not in when + + def test_deeply_nested_array_path(self, segment_spec: FeatureSpec) -> None: + """speed_limits[].when.vehicle[].dimension builds full nesting.""" + field_nodes, _ = build_checks(segment_spec) + node = next( + n + for n in field_nodes + if n.target == _path("speed_limits[].when.vehicle[].dimension") + ) + scaffold = generate_scaffold(node, segment_spec) + assert "speed_limits" in scaffold + sl_elem = scaffold["speed_limits"][0] + assert "when" in sl_elem + when = sl_elem["when"] + assert "vehicle" in when + assert isinstance(when["vehicle"], list) + assert len(when["vehicle"]) == 1 + + def test_element_guard_discriminator_set(self, segment_spec: FeatureSpec) -> None: + """Checks with an `ElementGuard` set the discriminator value in the scaffold.""" + field_checks, _ = build_checks(segment_spec) + # Find a speed_limits check with an ElementGuard. + check = next( + c + for c in field_checks + if any(isinstance(g, ElementGuard) for g in c.guards) + and "speed_limits" in str(c.target) + ) + scaffold = generate_scaffold(check, segment_spec) + # Walk to the innermost array element where the discriminator lives. + assert "speed_limits" in scaffold + sl_elem = scaffold["speed_limits"][0] + when = sl_elem["when"] + vehicle_elem = when["vehicle"][0] + element_guard = next(g for g in check.guards if isinstance(g, ElementGuard)) + assert element_guard.discriminator in vehicle_elem + assert vehicle_elem[element_guard.discriminator] == element_guard.values[0] + + def test_column_variant_does_not_appear_inside_scaffold( + self, segment_spec: FeatureSpec + ) -> None: + """`ColumnGuard`s don't set discriminator inside the scaffold dict.""" + field_checks, _ = build_checks(segment_spec) + # Find a check whose only guard is a ColumnGuard (no ElementGuard). + check = next( + c + for c in field_checks + if c.guards + and not any(isinstance(g, ElementGuard) for g in c.guards) + and "speed_limits[]." in str(c.target) + ) + scaffold = generate_scaffold(check, segment_spec) + # The column-level discriminator is NOT set in the scaffold -- + # it belongs at the row level, which the base row handles. + assert isinstance(scaffold, dict) + + def test_multiple_element_guards_raises(self, segment_spec: FeatureSpec) -> None: + """The check_ir invariant allows at most one `ElementGuard` per Check. + + Multiple guards would indicate the gate composition rule changed + without updating the scaffold, so the scaffold raises rather than + silently dropping all but the first. + """ + field_checks, _ = build_checks(segment_spec) + check = next( + c + for c in field_checks + if any(isinstance(g, ElementGuard) for g in c.guards) + ) + bogus = replace( + check, + guards=( + *check.guards, + ElementGuard(discriminator="other_field", values=("other_value",)), + ), + ) + with pytest.raises(NotImplementedError, match="ElementGuards"): + generate_scaffold(bogus, segment_spec) + + +class TestGenerateModelScaffold: + def test_top_level_model_constraint_produces_empty_scaffold( + self, division_area_spec: FeatureSpec + ) -> None: + """Model constraints at the top level need no nesting.""" + _, model_nodes = build_checks(division_area_spec) + assert model_nodes, "DivisionArea should have model constraints" + node = model_nodes[0] + scaffold = generate_model_scaffold(node, division_area_spec) + assert isinstance(scaffold, dict) + + def test_array_nested_model_constraint_builds_path( + self, segment_spec: FeatureSpec + ) -> None: + """Model constraints inside arrays build the array path.""" + _, model_checks = build_checks(segment_spec) + if not model_checks: + pytest.skip("Segment has no model constraints") + # Find one with an array target. + nested = [c for c in model_checks if isinstance(c.target, ArrayPath)] + if not nested: + pytest.skip("No nested model constraints found") + check = nested[0] + scaffold = generate_model_scaffold(check, segment_spec) + assert isinstance(scaffold, dict) + # The scaffold should contain the column root (top-level column name). + assert isinstance(check.target, ArrayPath) + assert check.target.array_chunks[0][1] in scaffold diff --git a/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py new file mode 100644 index 000000000..26dcdff30 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_schema_builder.py @@ -0,0 +1,213 @@ +"""Tests for schema_builder.""" + +from enum import Enum + +import pytest +from codegen_test_support import feature_spec_for_model +from overture.schema.codegen.extraction.field import Primitive +from overture.schema.codegen.extraction.specs import ( + AnnotatedField, + FieldSpec, + UnionSpec, +) +from overture.schema.codegen.pyspark.schema_builder import SchemaField, build_schema +from overture.schema.divisions import DivisionArea +from pydantic import BaseModel, Field + + +class SimpleModel(BaseModel): + name: str + count: int = Field(ge=0) + + +class TestPrimitiveFields: + @pytest.fixture + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(SimpleModel)) + + def test_string_field_maps_to_string_type(self, fields: list[SchemaField]) -> None: + name_field = next(f for f in fields if f.name == "name") + assert name_field.type_expr == "StringType()" + + def test_int_field_maps_to_long_type(self, fields: list[SchemaField]) -> None: + count_field = next(f for f in fields if f.name == "count") + assert count_field.type_expr == "LongType()" + + +class NestedModel(BaseModel): + value: str + count: int + + +class ContainerModel(BaseModel): + item: NestedModel | None = None + + +class TestNestedModel: + @pytest.fixture + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(ContainerModel)) + + def test_nested_model_emits_struct_type(self, fields: list[SchemaField]) -> None: + item_field = next(f for f in fields if f.name == "item") + assert item_field.type_expr.startswith("StructType([") + + def test_nested_struct_contains_subfields(self, fields: list[SchemaField]) -> None: + item_field = next(f for f in fields if f.name == "item") + assert 'StructField("value"' in item_field.type_expr + assert 'StructField("count"' in item_field.type_expr + + +class ListModel(BaseModel): + tags: list[str] + counts: list[int] | None = None + + +class TestListFields: + @pytest.fixture + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(ListModel)) + + def test_list_str_maps_to_array_string(self, fields: list[SchemaField]) -> None: + tags_field = next(f for f in fields if f.name == "tags") + assert tags_field.type_expr == "ArrayType(StringType(), True)" + + def test_optional_list_int_maps_to_array_long( + self, fields: list[SchemaField] + ) -> None: + counts_field = next(f for f in fields if f.name == "counts") + assert counts_field.type_expr == "ArrayType(LongType(), True)" + + +class DictModel(BaseModel): + labels: dict[str, str] | None = None + + +class TestDictFields: + @pytest.fixture + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(DictModel)) + + def test_dict_str_str_maps_to_map_type(self, fields: list[SchemaField]) -> None: + labels_field = next(f for f in fields if f.name == "labels") + assert labels_field.type_expr == "MapType(StringType(), StringType(), True)" + + +class TestDivisionAreaSchema: + @pytest.fixture(scope="class") + def fields(self) -> list[SchemaField]: + return build_schema(feature_spec_for_model(DivisionArea)) + + def test_id_field_is_string_type(self, fields: list[SchemaField]) -> None: + id_field = next(f for f in fields if f.name == "id") + assert id_field.type_expr == "StringType()" + + def test_geometry_field_is_binary_type(self, fields: list[SchemaField]) -> None: + geom_field = next(f for f in fields if f.name == "geometry") + assert geom_field.type_expr == "BinaryType()" + + def test_bbox_emits_shared_struct_ref(self, fields: list[SchemaField]) -> None: + bbox_field = next(f for f in fields if f.name == "bbox") + assert bbox_field.type_expr == "BBOX_STRUCT" + + def test_version_is_integer_type(self, fields: list[SchemaField]) -> None: + ver_field = next(f for f in fields if f.name == "version") + assert ver_field.type_expr == "IntegerType()" + + def test_is_land_is_boolean_type(self, fields: list[SchemaField]) -> None: + field = next(f for f in fields if f.name == "is_land") + assert field.type_expr == "BooleanType()" + + def test_country_is_string_type(self, fields: list[SchemaField]) -> None: + field = next(f for f in fields if f.name == "country") + assert field.type_expr == "StringType()" + + def test_admin_level_is_integer_type(self, fields: list[SchemaField]) -> None: + field = next(f for f in fields if f.name == "admin_level") + assert field.type_expr == "IntegerType()" + + def test_subtype_enum_is_string_type(self, fields: list[SchemaField]) -> None: + field = next(f for f in fields if f.name == "subtype") + assert field.type_expr == "StringType()" + + def test_theme_appears_once_at_model_position( + self, fields: list[SchemaField] + ) -> None: + theme_fields = [f for f in fields if f.name == "theme"] + assert len(theme_fields) == 1 + + def test_theme_and_type_present(self, fields: list[SchemaField]) -> None: + names = [f.name for f in fields] + assert "theme" in names + assert "type" in names + + +class _ColorA(Enum): + RED = "red" + GREEN = "green" + + +class _ColorB(Enum): + BLUE = "blue" + YELLOW = "yellow" + + +class _VariantA(BaseModel): + pass + + +class _VariantB(BaseModel): + pass + + +class TestUnionSchemaDeduplicate: + """build_schema deduplicates same-name fields from different union variants.""" + + @pytest.fixture + def fields(self) -> list[SchemaField]: + af_shared = AnnotatedField( + field_spec=FieldSpec( + name="id", + shape=Primitive(base_type="str"), + description=None, + is_required=True, + ), + variant_sources=None, + ) + af_color_a = AnnotatedField( + field_spec=FieldSpec( + name="color", + shape=Primitive(base_type="ColorA", source_type=_ColorA), + description=None, + is_required=True, + ), + variant_sources=(_VariantA,), + ) + af_color_b = AnnotatedField( + field_spec=FieldSpec( + name="color", + shape=Primitive(base_type="ColorB", source_type=_ColorB), + description=None, + is_required=True, + ), + variant_sources=(_VariantB,), + ) + spec = UnionSpec( + name="TestUnion", + description=None, + annotated_fields=[af_shared, af_color_a, af_color_b], + members=[], + discriminator_field=None, + discriminator_mapping=None, + source_annotation=object(), + common_base=BaseModel, + ) + return build_schema(spec) + + def test_one_schema_field_per_name(self, fields: list[SchemaField]) -> None: + color_fields = [f for f in fields if f.name == "color"] + assert len(color_fields) == 1 + + def test_color_field_is_string_type(self, fields: list[SchemaField]) -> None: + color_field = next(f for f in fields if f.name == "color") + assert color_field.type_expr == "StringType()" diff --git a/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py new file mode 100644 index 000000000..64537c9b5 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pyspark_test_renderer.py @@ -0,0 +1,880 @@ +"""Tests for the generated conformance test module renderer.""" + +import ast +import re +from enum import Enum + +import pytest +from overture.schema.codegen.extraction.field import ArrayOf, Primitive +from overture.schema.codegen.pyspark.check_ir import ( + Check, + ColumnGuard, + ElementGuard, + ModelCheck, +) +from overture.schema.codegen.pyspark.constraint_dispatch import ( + ExpressionDescriptor, + ForbidIf, + MinFieldsSet, + RadioGroup, + RequireAnyOf, + RequireIf, +) +from overture.schema.codegen.pyspark.test_renderer import ( + render_test_module as _real_render_test_module, +) +from overture.schema.system.field_constraint.string import ( + CountryCodeAlpha2Constraint, + NoWhitespaceConstraint, +) +from overture.schema.system.field_path import ArrayPath, ScalarPath, parse +from overture.schema.system.model_constraint import FieldEqCondition, Not +from overture.schema.system.primitive.geom import GeometryType + +_path = parse + +# Placeholder expression import path -- tests parse the rendered source +# rather than executing it, so the import target need not be real. +_TEST_EXPRESSION_IMPORT = "_placeholder.expression_module" + + +def render_test_module(*args: object, **kwargs: object) -> str: + """Invoke the renderer with placeholder `expression_import`/`support_prefix`. + + Tests parse the rendered source rather than executing it, so neither + the expression import target nor the relative `_support` package depth + needs to match a real layout. Defining this as a free function (rather + than a fixture) keeps test bodies terse. + """ + kwargs.setdefault("expression_import", _TEST_EXPRESSION_IMPORT) + kwargs.setdefault("support_prefix", "..") + return _real_render_test_module(*args, **kwargs) # type: ignore[arg-type] + + +def make_check( + function: str, + target: object, + *, + args: tuple[object, ...] = (), + kwargs: tuple[tuple[str, object], ...] = (), + constraint_type: object = None, + label: str | None = None, + check_name: str | None = None, + guards: tuple[object, ...] = (), +) -> Check: + """Build a single-descriptor Check; defaults match Check/ExpressionDescriptor.""" + descriptor_kwargs: dict[str, object] = {"function": function} + if args: + descriptor_kwargs["args"] = args + if kwargs: + descriptor_kwargs["kwargs"] = kwargs + if constraint_type is not None: + descriptor_kwargs["constraint_type"] = constraint_type + if label is not None: + descriptor_kwargs["label"] = label + if check_name is not None: + descriptor_kwargs["check_name"] = check_name + return Check( + descriptors=(ExpressionDescriptor(**descriptor_kwargs),), # type: ignore[arg-type] + target=target, # type: ignore[arg-type] + guards=guards, # type: ignore[arg-type] + ) + + +def _array( + column: str, + inner_struct_paths: tuple[tuple[str, ...], ...] = (), + leaf_path: tuple[str, ...] = (), +) -> ArrayPath: + """Build an ArrayPath from a column name, inner struct paths, and a leaf path. + + Each entry in `inner_struct_paths` is `(prefix_structs..., inner_array_name)`: + the prefix names become struct segments and the last name becomes an + inner ArraySegment. + """ + column_path = _path(column) + if isinstance(column_path, ScalarPath): + prefix_structs = column_path.segments[:-1] + outer_name = column_path.segments[-1].name + prefix = ScalarPath(segments=prefix_structs) + path = prefix.append_array(outer_name, iter_count=1) + else: + path = column_path + for sp in inner_struct_paths: + for n in sp[:-1]: + path = path.append_struct(n) + path = path.append_array(sp[-1], iter_count=1) + for n in leaf_path: + path = path.append_struct(n) + return path + + +class TestRenderTestModuleParseable: + def test_renders_valid_python_with_nodes(self) -> None: + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("division_area", nodes, []) + ast.parse(source) + + def test_empty_nodes_renders_valid_python(self) -> None: + source = render_test_module("empty", [], []) + ast.parse(source) + + +class TestBaseRow: + def test_default_base_rows_are_empty(self) -> None: + source = render_test_module("test", [], []) + assert "BASE_ROW_SPARSE: dict = {}" in source + assert "BASE_ROW_POPULATED: dict = {}" in source + + def test_provided_sparse_row_rendered(self) -> None: + source = render_test_module("test", [], [], base_row_sparse={"id": "abc"}) + assert "BASE_ROW_SPARSE: dict = " in source + assert "'id': 'abc'" in source + + def test_provided_populated_row_rendered(self) -> None: + source = render_test_module( + "test", + [], + [], + base_row_sparse={"id": "abc"}, + base_row_populated={"id": "abc", "names": {"primary": ""}}, + ) + assert "BASE_ROW_POPULATED: dict = " in source + assert "'names'" in source + + +class TestFieldScenarios: + def test_required_produces_none_value(self) -> None: + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("test", nodes, []) + assert "Scenario(" in source + assert "set_at_path('country', None)" in source + assert "'country'" in source + assert "'required'" in source + + def test_enum_produces_invalid_string(self) -> None: + nodes = [ + make_check("check_enum", _path("subtype"), args=(["a", "b", "c"],)), + ] + source = render_test_module("test", nodes, []) + assert "__INVALID__" in source + assert "'enum'" in source + + def test_bounds_produces_out_of_range(self) -> None: + nodes = [ + make_check("check_bounds", _path("score"), kwargs=(("ge", 0.0),)), + ] + source = render_test_module("test", nodes, []) + assert "-1" in source or "-1.0" in source + assert "'bounds'" in source + + def test_bounds_preserves_int_type(self) -> None: + """Integer bound kwargs emit integer literals for IntegerType fields.""" + nodes = [ + make_check("check_bounds", _path("version"), kwargs=(("ge", 0),)), + ] + source = render_test_module("test", nodes, []) + assert "set_at_path('version', -1)" in source + + def test_bounds_preserves_float_type(self) -> None: + """Float bound kwargs emit float literals for DoubleType fields.""" + nodes = [ + make_check("check_bounds", _path("height"), kwargs=(("ge", 0.0),)), + ] + source = render_test_module("test", nodes, []) + assert "-1.0" in source + + def test_unknown_constraint_raises(self) -> None: + nodes = [make_check("check_something_unknown", _path("geom"))] + with pytest.raises(ValueError, match="Cannot render mutate expression"): + render_test_module("test", nodes, []) + + def test_pattern_produces_invalid_string(self) -> None: + nodes = [ + make_check("check_pattern", _path("wikidata.value"), args=(r"^Q\d+$",)), + ] + source = render_test_module("test", nodes, []) + assert "'pattern'" in source + + def test_no_whitespace_pattern_mutation_contains_whitespace(self) -> None: + """Mutation for NoWhitespaceConstraint must contain whitespace to violate ^\\S+$.""" + nodes = [ + make_check( + "check_pattern", + _path("id"), + args=(r"^\S+$",), + constraint_type=NoWhitespaceConstraint, + ), + ] + source = render_test_module("test", nodes, []) + match = re.search( + r"set_at_path\('id',\s*(.+?)\)", + source, + re.DOTALL, + ) + assert match, f"no id:pattern set_at_path found in:\n{source}" + mutation_value = match.group(1).strip() + assert re.search(r"\\s|\s", mutation_value.strip("'")), ( + f"mutation {mutation_value} does not contain whitespace" + ) + + def test_country_code_uses_invalid_value(self) -> None: + nodes = [ + make_check( + "check_pattern", + _path("country.value"), + constraint_type=CountryCodeAlpha2Constraint, + label="ISO 3166-1 alpha-2 country code", + check_name="country_code_alpha2", + ), + ] + source = render_test_module("test", nodes, []) + assert "'99'" in source + + def test_multiple_descriptors_produce_multiple_entries(self) -> None: + """A field with required + enum produces two scenario entries.""" + nodes = [ + Check( + descriptors=( + ExpressionDescriptor(function="check_required"), + ExpressionDescriptor(function="check_enum", args=(["a"],)), + ), + target=_path("subtype"), + ), + ] + source = render_test_module("test", nodes, []) + assert "'required'" in source + assert "'enum'" in source + + def test_min_length_produces_empty_list(self) -> None: + nodes = [ + make_check("check_array_min_length", _path("sources"), args=(1,)), + ] + source = render_test_module("test", nodes, []) + assert "set_at_path('sources', [])" in source + assert "expected_field='sources_min_length'" in source + + def test_max_length_produces_oversized_list(self) -> None: + nodes = [ + make_check("check_array_max_length", _path("connectors"), args=(3,)), + ] + source = render_test_module("test", nodes, []) + assert "[{}, {}, {}, {}]" in source or "[{}] * 4" in source + assert "expected_field='connectors_max_length'" in source + + def test_scenario_id_includes_feature_name(self) -> None: + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("division_area", nodes, []) + assert "division_area::country:required" in source + + def test_scenario_has_scaffold(self) -> None: + """Scenario includes a scaffold dict (empty when spec is None).""" + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("test", nodes, []) + assert "scaffold={}" in source + + +class TestModelScenarios: + def test_radio_group_imports_mutation(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("is_land", "is_territorial")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate_radio_group" in source + assert "radio_group" in source + + def test_require_any_of_imports_mutation(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("x", "y")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate_require_any_of" in source + + def test_require_if_includes_condition(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate_require_if" in source + assert "'country'" in source + + def test_model_scenario_uses_contains_assertion(self) -> None: + """Model-level tests use 'in' not '==' to check violation membership.""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "assert expected in invalid_violations" in source + + def test_renders_valid_python(self) -> None: + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + + def test_enum_condition_value_renders_valid_python(self) -> None: + """Enum condition values must render as their string payload, not repr.""" + + class PlaceType(str, Enum): + COUNTY = "county" + + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", PlaceType.COUNTY), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "'county'" in source + + def test_forbid_if_array_field_generates_fill_values(self) -> None: + """forbid_if targeting an array field emits fill_values with [{}].""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("destinations",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=( + ( + "destinations", + ArrayOf(element=Primitive(base_type="Destination")), + ), + ), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" in source + assert "[{}]" in source + + def test_forbid_if_struct_field_generates_fill_values(self) -> None: + """forbid_if targeting a struct field emits fill_values with {}.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("road_surface",), + condition=FieldEqCondition("subtype", "road"), + field_shapes=( + ("road_surface", Primitive(base_type="RoadSurface")), + ), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" in source + assert "'road_surface': {}" in source + + def test_forbid_if_string_field_no_fill_values(self) -> None: + """forbid_if targeting a string field does not emit fill_values.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("class",), + condition=FieldEqCondition("subtype", "water"), + field_shapes=(), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "fill_values" not in source + + def test_forbid_if_not_condition_uses_negate(self) -> None: + """forbid_if with Not(FieldEqCondition) passes negate=True to mutation.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("destinations",), + condition=Not(FieldEqCondition("subtype", "road")), + field_shapes=(), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "negate=True" in source + assert "'road'" in source + + def test_require_any_of_nested_uses_array_path(self) -> None: + """require_any_of in an array element passes array_path to mutation.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("labels", "symbols")), + target=_array("destinations"), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert 'array_path="destinations"' in source + + def test_require_any_of_nested_with_leaf_path(self) -> None: + """require_any_of nested in struct within array passes struct_path.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("heading", "during")), + target=_array("access_restrictions", leaf_path=("when",)), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert 'array_path="access_restrictions"' in source + assert 'struct_path="when"' in source + + def test_require_any_of_top_level_no_array_path(self) -> None: + """Top-level require_any_of does not emit array_path.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "array_path" not in source + + def test_require_if_not_condition_uses_negate(self) -> None: + """require_if with Not(FieldEqCondition) passes negate=True to mutation.""" + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("class",), + condition=Not(FieldEqCondition("subtype", "road")), + ), + ), + ] + source = render_test_module("test", [], model_nodes) + ast.parse(source) + assert "negate=True" in source + + def test_model_scenario_uses_inline_lambda(self) -> None: + """Model scenarios emit mutate=lambda row: ... directly.""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate=lambda row:" in source + assert "mutate_radio_group(" in source + + def test_model_scenario_has_scaffold(self) -> None: + """Scenario includes a scaffold dict (empty when spec is None).""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "Scenario(" in source + assert "scaffold={}" in source + + def test_min_fields_set_renders_mutation_call(self) -> None: + """MinFieldsSet dispatches to `mutate_min_fields_set`.""" + model_nodes = [ + ModelCheck( + descriptor=MinFieldsSet(field_names=("x", "y"), count=1), + ), + ] + source = render_test_module("test", [], model_nodes) + assert "mutate_min_fields_set(row, ['x', 'y'])" in source + import_match = re.search( + r"from \.\._support\.mutations\s+import\s+(.+?)(?:\n\n|\Z)", + source, + re.DOTALL, + ) + assert import_match is not None + assert "mutate_min_fields_set" in import_match.group(1) + + def test_require_any_of_with_inner_levels_raises(self) -> None: + """require_any_of does not accept inner_array_path.""" + model_nodes = [ + ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=_array("outer", inner_struct_paths=(("inner",),)), + ), + ] + with pytest.raises(ValueError, match="inner_array_path"): + render_test_module("test", [], model_nodes) + + def test_radio_group_with_array_path_raises(self) -> None: + """radio_group takes no array kwargs; nodes with column_path raise.""" + model_nodes = [ + ModelCheck( + descriptor=RadioGroup(field_names=("a", "b")), + target=_array("outer"), + ), + ] + with pytest.raises(ValueError, match="array_path"): + render_test_module("test", [], model_nodes) + + def test_require_if_with_leaf_path_raises(self) -> None: + """require_if does not accept struct_path; nodes with leaf_path raise.""" + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + target=_array("outer", leaf_path=("when",)), + ), + ] + with pytest.raises(ValueError, match="struct_path"): + render_test_module("test", [], model_nodes) + + def test_require_if_with_multi_inner_levels_raises(self) -> None: + """require_if only consumes one inner iteration; multi-level is rejected.""" + model_nodes = [ + ModelCheck( + descriptor=RequireIf( + field_names=("admin_level",), + condition=FieldEqCondition("subtype", "country"), + ), + target=_array("outer", inner_struct_paths=(("middle",), ("inner",))), + ), + ] + with pytest.raises(ValueError, match="multi-level inner struct paths"): + render_test_module("test", [], model_nodes) + + +class TestTestLayer: + @pytest.fixture(scope="class") + def empty_source(self) -> str: + return render_test_module("test", [], []) + + def test_test_scenario_sparse_present(self, empty_source: str) -> None: + assert "def test_scenario_sparse(" in empty_source + + def test_test_scenario_populated_present(self, empty_source: str) -> None: + assert "def test_scenario_populated(" in empty_source + + def test_test_baseline_sparse_present(self, empty_source: str) -> None: + assert "def test_baseline_sparse(" in empty_source + + def test_test_baseline_populated_present(self, empty_source: str) -> None: + assert "def test_baseline_populated(" in empty_source + + def test_sparse_results_fixture_present(self, empty_source: str) -> None: + assert "def sparse_results(" in empty_source + + def test_populated_results_fixture_present(self, empty_source: str) -> None: + assert "def populated_results(" in empty_source + + def test_assert_scenario_helper_present(self, empty_source: str) -> None: + assert "def _assert_scenario(" in empty_source + + def test_imports_scenario(self, empty_source: str) -> None: + assert "Scenario" in empty_source + + def test_uses_harness_imports(self, empty_source: str) -> None: + assert "from .._support.harness import" in empty_source + + def test_imports_set_at_path_only_when_field_scenarios_present(self) -> None: + # No field checks -> no set_at_path scenarios -> no import + empty = render_test_module("test", [], []) + assert "from .._support.helpers import set_at_path" not in empty + + # Field check -> set_at_path used -> import emitted + with_field = render_test_module( + "test", + [make_check("check_required", _path("country"))], + [], + ) + assert "from .._support.helpers import set_at_path" in with_field + + def test_scenario_checks_valid_and_invalid(self, empty_source: str) -> None: + assert "::valid" in empty_source + assert "::invalid" in empty_source + + def test_scenarios_list_type_annotation(self, empty_source: str) -> None: + assert "list[Scenario]" in empty_source + + def test_populated_tests_not_marked_skip(self, empty_source: str) -> None: + assert "pytest.mark.skip" not in empty_source + + +class TestStructUniqueCheckScenarios: + @pytest.fixture() + def sources_unique_output(self) -> str: + nodes = [make_check("check_struct_unique", _path("sources"))] + return render_test_module("test", nodes, []) + + def test_struct_unique_emits_scenario(self, sources_unique_output: str) -> None: + """struct_unique_check produces Scenario with scaffold and inline lambda.""" + assert "Scenario(" in sources_unique_output + assert "expected_field='sources_unique'" in sources_unique_output + assert "expected_check='struct_unique'" in sources_unique_output + + def test_struct_unique_imports_mutate_unique_items( + self, sources_unique_output: str + ) -> None: + assert ( + "from .._support.mutations import mutate_unique_items" + in sources_unique_output + ) + + def test_no_struct_unique_does_not_import_mutate_unique_items(self) -> None: + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("test", nodes, []) + assert "mutate_unique_items" not in source + + def test_struct_unique_inline_lambda(self, sources_unique_output: str) -> None: + """struct_unique_check emits mutate=lambda row: mutate_unique_items(...).""" + assert "mutate=lambda row: mutate_unique_items(" in sources_unique_output + assert "'sources'" in sources_unique_output + + def test_struct_unique_nested_path_strips_suffix(self) -> None: + """Nested bracket path uses the structural field for mutation.""" + nodes = [ + make_check("check_struct_unique", _path("access_restrictions[].when.mode")), + ] + source = render_test_module("test", nodes, []) + # Black may wrap the long lambda — check parts separately + assert "mutate_unique_items(" in source + assert "'access_restrictions[].when.mode'" in source + assert "expected_field='access_restrictions[].when.mode_unique'" in source + + def test_struct_unique_renders_valid_python( + self, sources_unique_output: str + ) -> None: + ast.parse(sources_unique_output) + + def test_struct_unique_mixed_with_field_scenarios(self) -> None: + """struct_unique_check alongside normal field checks renders valid Python.""" + nodes = [ + make_check("check_required", _path("sources")), + make_check("check_struct_unique", _path("sources")), + ] + source = render_test_module("test", nodes, []) + ast.parse(source) + assert source.count("Scenario(") == 2 + + def test_struct_unique_has_scaffold(self, sources_unique_output: str) -> None: + """struct_unique_check Scenario includes scaffold dict.""" + assert "scaffold={}" in sources_unique_output + + +class TestArmFiltering: + """Per-arm test generation filters field checks by discriminator value.""" + + def _common_node(self) -> Check: + return make_check("check_required", _path("id")) + + def _road_node(self) -> Check: + return make_check( + "check_required", + _array("road_surface"), + guards=(ColumnGuard(discriminator="subtype", values=("road",)),), + ) + + def _rail_node(self) -> Check: + return make_check( + "check_required", + _array("rail_flags"), + guards=(ColumnGuard(discriminator="subtype", values=("rail",)),), + ) + + def _inner_disc_node(self) -> Check: + """Road-arm check with in-element discriminator (vehicle dimension).""" + return make_check( + "check_required", + _path("speed_limits[].when.vehicle[].value"), + guards=( + ColumnGuard(discriminator="subtype", values=("road",)), + ElementGuard(discriminator="dimension", values=("height", "length")), + ), + ) + + def test_arm_road_includes_common_and_road_checks(self) -> None: + nodes = [self._common_node(), self._road_node(), self._rail_node()] + source = render_test_module("test", nodes, [], arm="road") + assert "set_at_path('id'" in source + assert "road_surface" in source + assert "rail_flags" not in source + + def test_arm_rail_includes_common_and_rail_checks(self) -> None: + nodes = [self._common_node(), self._road_node(), self._rail_node()] + source = render_test_module("test", nodes, [], arm="rail") + assert "set_at_path('id'" in source + assert "rail_flags" in source + assert "road_surface" not in source + + def test_arm_includes_inner_disc_by_outer_variant(self) -> None: + """In-element discriminator checks emit when the outer Guard matches the arm.""" + nodes = [self._inner_disc_node()] + source = render_test_module("test", nodes, [], arm="road") + assert "vehicle" in source + + def test_arm_excludes_inner_disc_wrong_outer(self) -> None: + nodes = [self._inner_disc_node()] + source = render_test_module("test", nodes, [], arm="rail") + assert "vehicle" not in source + + def test_no_arm_includes_all_checks(self) -> None: + """Without arm filtering, all checks are included.""" + nodes = [self._common_node(), self._road_node(), self._rail_node()] + source = render_test_module("test", nodes, []) + assert "set_at_path('id'" in source + assert "road_surface" in source + assert "rail_flags" in source + + def test_arm_includes_model_checks(self) -> None: + """Arm-agnostic ModelChecks (arm=None) reach every arm test.""" + model_nodes = [ + ModelCheck( + descriptor=ForbidIf( + field_names=("rail_flags",), + condition=Not(FieldEqCondition("subtype", "rail")), + field_shapes=(), + ), + ), + ] + source = render_test_module("test", [], model_nodes, arm="road") + assert "mutate_forbid_if" in source + + def test_arm_excludes_other_arms_model_checks(self) -> None: + """A ModelCheck tagged for one arm does not appear in another arm's tests.""" + road_only = ModelCheck( + descriptor=RadioGroup(field_names=("road_flag_a", "road_flag_b")), + arm="road", + ) + road_source = render_test_module("test", [], [road_only], arm="road") + assert "mutate_radio_group" in road_source + rail_source = render_test_module("test", [], [road_only], arm="rail") + assert "mutate_radio_group" not in rail_source + + def test_arm_renders_valid_python(self) -> None: + nodes = [self._common_node(), self._road_node(), self._rail_node()] + source = render_test_module("test", nodes, [], arm="road") + ast.parse(source) + + def test_arm_filtering_ignores_inner_element_discriminator(self) -> None: + """Element guards on inner-union discriminators don't gate arm filtering. + + The inner `ElementGuard` discriminator (`dimension`) is unrelated + to the outer union arm (`subtype`). When an `ElementGuard` value + happens to coincide with an arm name, an `any(...)` filter would + wrongly include the check in that arm; the correct filter + consults only `ColumnGuard`s. + """ + check = make_check( + "check_required", + _path("speed_limits[].when.vehicle[].value"), + guards=( + ColumnGuard(discriminator="subtype", values=("road",)), + # ElementGuard values include "rail" by coincidence -- it's + # a vehicle dimension, not a segment subtype. Filtering by + # `any(...)` would let arm="rail" include the check. + ElementGuard(discriminator="dimension", values=("rail",)), + ), + ) + rail = render_test_module("test", [check], [], arm="rail") + assert "speed_limits" not in rail + road = render_test_module("test", [check], [], arm="road") + assert "speed_limits" in road + + +class TestLinearRangeMutations: + @pytest.mark.parametrize( + ("function", "expected_value"), + [ + ("check_linear_range_length", "[0.5]"), + ("check_linear_range_bounds", "[1.5, 2.0]"), + ("check_linear_range_order", "[0.8, 0.2]"), + ], + ) + def test_mutation_renders(self, function: str, expected_value: str) -> None: + nodes = [make_check(function, _path("between"))] + source = render_test_module("test", nodes, []) + assert expected_value in source + + +class TestGeometryTypeMutations: + def test_point_allowed_emits_linestring(self) -> None: + """When Point is allowed, inject LineString as the wrong type.""" + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=(GeometryType.POINT,), + ), + ] + source = render_test_module("test", nodes, []) + assert "LineString" in source or "LINESTRING" in source + + def test_polygon_allowed_emits_point(self) -> None: + """When Point is not allowed, inject Point as the wrong type.""" + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=(GeometryType.POLYGON, GeometryType.MULTI_POLYGON), + ), + ] + source = render_test_module("test", nodes, []) + assert "POINT" in source or "Point" in source + + def test_geometry_type_renders_valid_python(self) -> None: + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=(GeometryType.POINT,), + ), + ] + source = render_test_module("test", nodes, []) + ast.parse(source) + + def test_geometry_type_uses_wkt_strings(self) -> None: + """Geometry scenarios use WKT strings, not shapely constructor calls.""" + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=(GeometryType.POINT,), + ), + ] + source = render_test_module("test", nodes, []) + assert "shapely" not in source + assert "LINESTRING" in source or "LineString" in source + + def test_all_candidates_allowed_raises(self) -> None: + """When all geometry candidates are allowed, scenario generation raises.""" + nodes = [ + make_check( + "check_geometry_type", + _path("geometry"), + args=( + GeometryType.POINT, + GeometryType.LINE_STRING, + GeometryType.GEOMETRY_COLLECTION, + ), + ), + ] + with pytest.raises(ValueError, match="Cannot render mutate expression"): + render_test_module("test", nodes, []) + + def test_no_geometry_type_no_shapely_imports(self) -> None: + """Shapely imports are absent when no geometry type scenario exists.""" + nodes = [make_check("check_required", _path("country"))] + source = render_test_module("test", nodes, []) + assert "shapely" not in source diff --git a/packages/overture-schema-codegen/tests/test_reverse_references.py b/packages/overture-schema-codegen/tests/test_reverse_references.py index fb8e1e41a..7897a8256 100644 --- a/packages/overture-schema-codegen/tests/test_reverse_references.py +++ b/packages/overture-schema-codegen/tests/test_reverse_references.py @@ -11,17 +11,18 @@ RoadSegment, TreeNode, Venue, + feature_spec_for_model, has_name, lookup_by_name, make_union_spec, ) from overture.schema.codegen.extraction.enum_extraction import extract_enum -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) from overture.schema.codegen.extraction.newtype_extraction import extract_newtype -from overture.schema.codegen.extraction.specs import PydanticTypeSpec, TypeIdentity +from overture.schema.codegen.extraction.specs import ( + ModelSpec, + PydanticTypeSpec, + TypeIdentity, +) from overture.schema.codegen.layout.type_collection import ( collect_all_supplementary_types, ) @@ -49,13 +50,12 @@ def test_model_referencing_type_produces_used_by_entry( target_name: str, ) -> None: """Model referencing a type produces a 'used by' entry on that type.""" - model_spec = extract_model(model_class, entry_point=model_name) - expand_model_tree(model_spec) - all_specs = collect_all_supplementary_types([model_spec]) + expanded = feature_spec_for_model(model_class, entry_point=model_name) + all_specs = collect_all_supplementary_types([expanded]) assert has_name(all_specs, target_name) - result = compute_reverse_references([model_spec], all_specs) + result = compute_reverse_references([expanded], all_specs) entries = lookup_by_name(result, target_name) assert len(entries) == 1 @@ -95,8 +95,8 @@ def test_union_members_have_used_by_entries() -> None: ) # Extract the member - road_spec = extract_model(RoadSegment) - expand_model_tree(road_spec) + road_spec = feature_spec_for_model(RoadSegment) + assert isinstance(road_spec, ModelSpec) all_specs = {TypeIdentity(RoadSegment, "RoadSegment"): road_spec} result = compute_reverse_references([union_spec], all_specs) @@ -109,8 +109,8 @@ def test_union_members_have_used_by_entries() -> None: def test_self_references_filtered_out() -> None: """Self-references are filtered out (handles recursive types).""" - tree_spec = extract_model(TreeNode, entry_point="TreeNode") - expand_model_tree(tree_spec) + tree_spec = feature_spec_for_model(TreeNode, entry_point="TreeNode") + assert isinstance(tree_spec, ModelSpec) # Manually add TreeNode to all_specs to test self-reference filtering all_specs = {TypeIdentity(TreeNode, "TreeNode"): tree_spec} @@ -124,10 +124,8 @@ def test_self_references_filtered_out() -> None: def test_deduplication_same_type_multiple_fields() -> None: """Deduplication works when same type is referenced via multiple fields.""" - instrument_spec = extract_model(Instrument, entry_point="Instrument") - venue_spec = extract_model(Venue, entry_point="Venue") - expand_model_tree(instrument_spec) - expand_model_tree(venue_spec) + instrument_spec = feature_spec_for_model(Instrument, entry_point="Instrument") + venue_spec = feature_spec_for_model(Venue, entry_point="Venue") all_specs = collect_all_supplementary_types([instrument_spec, venue_spec]) assert has_name(all_specs, "Id") @@ -145,14 +143,13 @@ def test_deduplication_same_type_multiple_fields() -> None: def test_pydantic_type_has_used_by_from_feature() -> None: """Pydantic type in all_specs gets used-by entries from features referencing it.""" - model_spec = extract_model(FeatureWithUrl, entry_point="FeatureWithUrl") - expand_model_tree(model_spec) - all_specs = collect_all_supplementary_types([model_spec]) + expanded = feature_spec_for_model(FeatureWithUrl, entry_point="FeatureWithUrl") + all_specs = collect_all_supplementary_types([expanded]) assert has_name(all_specs, "HttpUrl") assert isinstance(lookup_by_name(all_specs, "HttpUrl"), PydanticTypeSpec) - result = compute_reverse_references([model_spec], all_specs) + result = compute_reverse_references([expanded], all_specs) entries = lookup_by_name(result, "HttpUrl") assert any(e.identity.name == "FeatureWithUrl" for e in entries) @@ -176,10 +173,8 @@ class FeatureBeta(BaseModel): FeatureBeta.__name__ = "Feature" FeatureBeta.__module__ = "beta.models" - spec_a = extract_model(FeatureAlpha, entry_point="Feature") - spec_b = extract_model(FeatureBeta, entry_point="Feature") - expand_model_tree(spec_a) - expand_model_tree(spec_b) + spec_a = feature_spec_for_model(FeatureAlpha, entry_point="Feature") + spec_b = feature_spec_for_model(FeatureBeta, entry_point="Feature") enum_id = TypeIdentity(SharedEnum, "SharedEnum") all_specs = {enum_id: extract_enum(SharedEnum)} @@ -201,10 +196,8 @@ def test_sorting_models_before_newtypes() -> None: # Create a synthetic NewType that wraps Id CustomId = NewType("CustomId", Id) - instrument_spec = extract_model(Instrument, entry_point="Instrument") - venue_spec = extract_model(Venue, entry_point="Venue") - expand_model_tree(instrument_spec) - expand_model_tree(venue_spec) + instrument_spec = feature_spec_for_model(Instrument, entry_point="Instrument") + venue_spec = feature_spec_for_model(Venue, entry_point="Venue") all_specs = collect_all_supplementary_types([instrument_spec, venue_spec]) # Add the CustomId NewType which references Id diff --git a/packages/overture-schema-codegen/tests/test_specs.py b/packages/overture-schema-codegen/tests/test_specs.py index 0780e2fda..550af18b7 100644 --- a/packages/overture-schema-codegen/tests/test_specs.py +++ b/packages/overture-schema-codegen/tests/test_specs.py @@ -20,215 +20,86 @@ TypeIdentity, is_union_alias, ) -from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from overture.schema.system.primitive import int32 from pydantic import BaseModel, Field -class TestFeatureSpecProtocol: - """Tests for FeatureSpec protocol compliance.""" - - def test_model_spec_satisfies_feature_spec(self) -> None: - """ModelSpec satisfies the FeatureSpec protocol.""" - +class TestFeatureSpec: + def test_model_spec_is_feature_spec(self) -> None: class Simple(BaseModel): name: str - spec = extract_model(Simple) - # Protocol compliance check - assert isinstance(spec, FeatureSpec) - # Verify protocol attributes + spec: FeatureSpec = extract_model(Simple) assert spec.name == "Simple" assert isinstance(spec.fields, list) assert spec.source_type is Simple class TestFieldSpec: - """Tests for FieldSpec dataclass.""" - - def test_fieldspec_stores_basic_attributes(self) -> None: - """FieldSpec should store name, type_info, description, is_required.""" - field_spec = FieldSpec( - name="test_field", - type_info=STR_TYPE, - description="A test field", - is_required=True, - ) - - assert field_spec.name == "test_field" - assert field_spec.type_info == STR_TYPE - assert field_spec.description == "A test field" - assert field_spec.is_required is True - - def test_fieldspec_optional_field(self) -> None: - """FieldSpec should handle optional fields.""" - optional_str = TypeInfo( - base_type="str", kind=TypeKind.PRIMITIVE, is_optional=True - ) - - field_spec = FieldSpec( + def test_carries_shape_and_optional_flag(self) -> None: + fs = FieldSpec( name="optional_field", - type_info=optional_str, + shape=STR_TYPE, description=None, is_required=False, + is_optional=True, ) - - assert field_spec.is_required is False - assert field_spec.description is None - - -class TestModelSpec: - """Tests for ModelSpec dataclass.""" - - def test_modelspec_stores_basic_attributes(self) -> None: - """ModelSpec should store name, description, fields.""" - field = FieldSpec( - name="id", - type_info=STR_TYPE, - description="Unique identifier", - is_required=True, - ) - - model_spec = ModelSpec( - name="TestModel", - description="A test model", - fields=[field], - ) - - assert model_spec.name == "TestModel" - assert model_spec.description == "A test model" - assert len(model_spec.fields) == 1 - assert model_spec.fields[0].name == "id" - - def test_entry_point_defaults_to_none(self) -> None: - spec = ModelSpec(name="M", description=None) - assert spec.entry_point is None + assert fs.name == "optional_field" + assert fs.shape is STR_TYPE + assert fs.is_required is False + assert fs.is_optional is True class TestAnnotatedField: - """Tests for AnnotatedField wrapper.""" - def test_stores_field_and_variant_sources(self) -> None: - """AnnotatedField pairs a FieldSpec with variant provenance.""" - fs = FieldSpec(name="x", type_info=STR_TYPE, description=None, is_required=True) - af = AnnotatedField(field_spec=fs, variant_sources=("RoadSegment",)) + class RoadSegment(BaseModel): + pass + + fs = FieldSpec(name="x", shape=STR_TYPE) + af = AnnotatedField(field_spec=fs, variant_sources=(RoadSegment,)) assert af.field_spec is fs - assert af.variant_sources == ("RoadSegment",) + assert af.variant_sources == (RoadSegment,) def test_none_variant_sources_means_shared(self) -> None: - """variant_sources=None indicates a shared field.""" - fs = FieldSpec(name="x", type_info=STR_TYPE, description=None, is_required=True) + fs = FieldSpec(name="x", shape=STR_TYPE) af = AnnotatedField(field_spec=fs, variant_sources=None) assert af.variant_sources is None -class TestFieldSpecModelTree: - """Tests for FieldSpec model and starts_cycle fields.""" - - def test_model_defaults_to_none(self) -> None: - field_spec = FieldSpec( - name="test", type_info=STR_TYPE, description=None, is_required=True - ) - assert field_spec.model is None - - def test_starts_cycle_defaults_to_false(self) -> None: - field_spec = FieldSpec( - name="test", type_info=STR_TYPE, description=None, is_required=True - ) - assert field_spec.starts_cycle is False - - def test_model_can_hold_model_spec(self) -> None: - type_info = TypeInfo(base_type="Address", kind=TypeKind.MODEL) - sub = ModelSpec(name="Address", description=None) - field_spec = FieldSpec( - name="address", - type_info=type_info, - description=None, - is_required=True, - model=sub, - ) - assert field_spec.model is sub - - def test_starts_cycle_can_be_set(self) -> None: - type_info = TypeInfo(base_type="Node", kind=TypeKind.MODEL) - sub = ModelSpec(name="Node", description=None) - field_spec = FieldSpec( - name="parent", - type_info=type_info, - description=None, - is_required=False, - model=sub, - starts_cycle=True, - ) - assert field_spec.starts_cycle is True - assert field_spec.model is sub - - def test_starts_cycle_without_model_is_nonsensical(self) -> None: - """starts_cycle=True with model=None is expressible but invalid. - - expand_model_tree never produces this combination -- starts_cycle - is only set when model points to the cycle-causing ModelSpec. - Document the invariant so violations stand out. - """ - type_info = TypeInfo(base_type="Node", kind=TypeKind.MODEL) - field_spec = FieldSpec( - name="parent", - type_info=type_info, - description=None, - is_required=False, - starts_cycle=True, - ) - # Expressible but meaningless: cycle to nowhere - assert field_spec.starts_cycle is True - assert field_spec.model is None - - class TestIsUnionAlias: - """Tests for is_union_alias predicate.""" - def test_annotated_union_of_models_returns_true(self) -> None: - """Annotated[Union of BaseModels] is a union alias.""" - class A(BaseModel): x: int class B(BaseModel): y: str - union_type = Annotated[A | B, Field(description="test")] - assert is_union_alias(union_type) is True + assert is_union_alias(Annotated[A | B, Field(description="test")]) is True def test_model_class_returns_false(self) -> None: - """A concrete BaseModel class is not a union alias.""" - class A(BaseModel): x: int assert is_union_alias(A) is False def test_plain_string_returns_false(self) -> None: - """A plain string is not a union alias.""" assert is_union_alias("not a type") is False def test_non_model_union_returns_false(self) -> None: - """A union of non-model types is not a union alias.""" assert is_union_alias(str | int) is False class TestUnionSpec: - """Tests for UnionSpec data structure.""" - def test_fields_property_returns_plain_field_specs(self) -> None: - """UnionSpec.fields property returns list[FieldSpec] from annotated_fields.""" - fs1 = FieldSpec( - name="a", type_info=STR_TYPE, description=None, is_required=True - ) - fs2 = FieldSpec( - name="b", type_info=STR_TYPE, description=None, is_required=False - ) + class X(BaseModel): + pass + + fs1 = FieldSpec(name="a", shape=STR_TYPE) + fs2 = FieldSpec(name="b", shape=STR_TYPE, is_required=False) spec = make_union_spec( annotated_fields=[ AnnotatedField(field_spec=fs1, variant_sources=None), - AnnotatedField(field_spec=fs2, variant_sources=("X",)), + AnnotatedField(field_spec=fs2, variant_sources=(X,)), ], ) assert spec.fields == [fs1, fs2] @@ -240,20 +111,13 @@ def test_frozen(self) -> None: with pytest.raises(AttributeError): ti.obj = str # type: ignore[misc] - def test_same_obj_equal(self) -> None: + def test_equality_by_obj_identity(self) -> None: a = TypeIdentity(obj=int, name="int") b = TypeIdentity(obj=int, name="integer") + c = TypeIdentity(obj=str, name="int") assert a == b - - def test_same_obj_same_hash(self) -> None: - a = TypeIdentity(obj=int, name="int") - b = TypeIdentity(obj=int, name="integer") assert hash(a) == hash(b) - - def test_different_obj_not_equal(self) -> None: - a = TypeIdentity(obj=int, name="int") - b = TypeIdentity(obj=str, name="int") - assert a != b + assert a != c def test_works_as_dict_key(self) -> None: ti = TypeIdentity(obj=int, name="int") @@ -271,35 +135,21 @@ def test_not_equal_to_non_identity(self) -> None: class TestSpecIdentity: def test_model_spec_identity(self) -> None: spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel) - ident = spec.identity - assert isinstance(ident, TypeIdentity) - assert ident.obj is SimpleModel - assert ident.name == "Foo" + assert spec.identity.obj is SimpleModel + assert spec.identity.name == "Foo" def test_enum_spec_identity(self) -> None: spec = EnumSpec(name="Color", description=None, source_type=InstrumentFamily) - ident = spec.identity - assert ident.obj is InstrumentFamily - assert ident.name == "Color" + assert spec.identity.obj is InstrumentFamily def test_newtype_spec_identity(self) -> None: - from overture.schema.system.primitive import int32 - spec = NewTypeSpec( - name="int32", description=None, type_info=STR_TYPE, source_type=int32 + name="int32", description=None, shape=STR_TYPE, source_type=int32 ) - ident = spec.identity - assert ident.obj is int32 - assert ident.name == "int32" + assert spec.identity.obj is int32 def test_union_spec_identity(self) -> None: sentinel = object() spec = make_union_spec("TestUnion", source_annotation=sentinel) - ident = spec.identity - assert ident.obj is sentinel - assert ident.name == "TestUnion" - - def test_model_spec_satisfies_feature_protocol_with_identity(self) -> None: - spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel) - feature: FeatureSpec = spec - assert feature.identity.obj is SimpleModel + assert spec.identity.obj is sentinel + assert spec.identity.name == "TestUnion" diff --git a/packages/overture-schema-codegen/tests/test_type_analyzer.py b/packages/overture-schema-codegen/tests/test_type_analyzer.py index bbf8373fd..f8ccf88f0 100644 --- a/packages/overture-schema-codegen/tests/test_type_analyzer.py +++ b/packages/overture-schema-codegen/tests/test_type_analyzer.py @@ -1,18 +1,34 @@ -"""Tests for type analysis.""" +"""Tests for `analyze_type`: annotation -> `FieldShape` analysis.""" from enum import Enum from typing import Annotated, Any, Literal, NewType, Optional import pytest -from annotated_types import Ge +from annotated_types import Ge, MaxLen, MinLen +from overture.schema.codegen.extraction.field import ( + AnyScalar, + ArrayOf, + FieldShape, + LiteralScalar, + MapOf, + NewTypeShape, + Primitive, +) +from overture.schema.codegen.extraction.field_walk import ( + all_constraints, + list_depth, +) +from overture.schema.codegen.extraction.length_constraints import ( + ArrayMinLen, + ScalarMinLen, +) from overture.schema.codegen.extraction.type_analyzer import ( - TypeInfo, - TypeKind, UnsupportedUnionError, analyze_type, single_literal_value, + unwrap_list, ) -from overture.schema.system.primitive import float64, int32 +from overture.schema.system.primitive import int32 from overture.schema.system.ref import Id from overture.schema.system.string import ( HexColor, @@ -24,568 +40,290 @@ from typing_extensions import Sentinel -@pytest.fixture() -def id_type_info() -> TypeInfo: - return analyze_type(Id) +def _shape(annotation: object) -> FieldShape: + shape, _, _ = analyze_type(annotation) + return shape -@pytest.fixture() -def hex_color_type_info() -> TypeInfo: - return analyze_type(HexColor) +def _is_optional(annotation: object) -> bool: + _, is_optional, _ = analyze_type(annotation) + return is_optional -class TestAnalyzeTypePrimitives: - """Tests for primitive type analysis.""" +def _description(annotation: object) -> str | None: + _, _, description = analyze_type(annotation) + return description + +class TestPrimitives: @pytest.mark.parametrize("annotation", [str, int, float, bool]) - def test_builtin_returns_primitive_type_info(self, annotation: type) -> None: - """Builtin type annotations return PRIMITIVE TypeInfo with matching base_type.""" - result = analyze_type(annotation) + def test_builtin_emits_primitive(self, annotation: type) -> None: + shape = _shape(annotation) + assert isinstance(shape, Primitive) + assert shape.base_type == annotation.__name__ + assert shape.source_type is annotation - assert result.base_type == annotation.__name__ - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is False - assert result.is_list is False + def test_any_emits_any_scalar(self) -> None: + shape = _shape(Any) + assert isinstance(shape, AnyScalar) -class TestAnalyzeTypeSentinel: - """Tests for Sentinel type filtering in unions. - - Pydantic uses `typing_extensions.Sentinel` instances (like ``) - in union types for optional fields. The type analyzer filters these out - alongside `None` when processing unions. - """ +class TestSentinel: + """`Sentinel` arms in unions are filtered alongside `None`.""" @pytest.fixture() - def missing_sentinel(self) -> object: + def missing(self) -> object: return Sentinel("MISSING") - def test_sentinel_filtered_from_union(self, missing_sentinel: object) -> None: - """Sentinel is filtered out, leaving the concrete type.""" - result = analyze_type(str | missing_sentinel) # type: ignore[arg-type] - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is False - - def test_sentinel_with_none_sets_optional(self, missing_sentinel: object) -> None: - """Sentinel + None both filtered; None triggers is_optional.""" - result = analyze_type(str | missing_sentinel | None) # type: ignore[arg-type] - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is True - - -class TestAnalyzeTypeOptional: - """Tests for Optional type analysis.""" - - def test_pipe_none_sets_is_optional(self) -> None: - """str | None returns TypeInfo with is_optional=True.""" - result = analyze_type(str | None) - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is True - assert result.is_list is False - - def test_type_with_literal_and_none(self) -> None: - """str | Literal[""] | None filters Literal and marks optional.""" - result = analyze_type(str | Literal[""] | None) - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is True - - def test_typing_optional_sets_is_optional(self) -> None: - """Optional[str] from typing module returns TypeInfo with is_optional=True.""" - result = analyze_type(Optional[str]) # noqa: UP045 - - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is True - assert result.is_list is False + def test_filtered_leaves_concrete_type(self, missing: object) -> None: + shape = _shape(str | missing) # type: ignore[arg-type] + assert isinstance(shape, Primitive) + assert shape.base_type == "str" + assert _is_optional(str | missing) is False # type: ignore[arg-type] + def test_with_none_sets_optional(self, missing: object) -> None: + assert _is_optional(str | missing | None) is True # type: ignore[arg-type] -class TestAnalyzeTypeUnionLiteralFiltering: - """Tests for filtering Literal arms out of unions.""" - def test_type_with_literal_alternative(self) -> None: - """str | Literal[""] filters out the Literal and analyzes the concrete type.""" - result = analyze_type(str | Literal[""]) +class TestOptional: + def test_pipe_none(self) -> None: + assert _is_optional(str | None) is True - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is False + def test_typing_optional(self) -> None: + assert _is_optional(Optional[str]) is True # noqa: UP045 + def test_literal_arm_filtered_with_concrete(self) -> None: + shape, optional, _ = analyze_type(str | Literal[""] | None) + assert isinstance(shape, Primitive) and shape.base_type == "str" + assert optional is True -class TestAnalyzeTypeList: - """Tests for list type analysis.""" - def test_list_str_sets_is_list(self) -> None: - """list[str] returns TypeInfo with is_list=True.""" - result = analyze_type(list[str]) +class TestList: + def test_simple_list(self) -> None: + shape = _shape(list[str]) + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, Primitive) + assert shape.element.base_type == "str" - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE - assert result.is_optional is False - assert result.is_list is True + def test_nested_list_records_depth(self) -> None: + shape = _shape(list[list[str]]) + assert list_depth(shape) == 2 - def test_nested_list_sets_depth_2(self) -> None: - """list[list[str]] records two levels of nesting.""" - result = analyze_type(list[list[str]]) + def test_optional_list(self) -> None: + shape, optional, _ = analyze_type(list[str] | None) + assert isinstance(shape, ArrayOf) + assert optional is True - assert result.list_depth == 2 - assert result.base_type == "str" - assert result.kind == TypeKind.PRIMITIVE + def test_list_optional_element(self) -> None: + shape, optional, _ = analyze_type(list[str | None]) + assert isinstance(shape, ArrayOf) + # `is_optional` reflects the field accepting None; element-level + # `| None` propagates the same way. + assert optional is True -class TestAnalyzeTypeComposite: - """Tests for composite/nested type analysis.""" - - def test_list_optional_str(self) -> None: - """list[str | None] sets both is_list and is_optional.""" - result = analyze_type(list[str | None]) - - assert result.base_type == "str" - assert result.is_list is True - assert result.is_optional is True - - def test_optional_list_str(self) -> None: - """list[str] | None sets both is_list and is_optional.""" - result = analyze_type(list[str] | None) - - assert result.base_type == "str" - assert result.is_list is True - assert result.is_optional is True - - def test_annotated_optional_str(self) -> None: - """Annotated[str | None, ...] extracts constraints and sets is_optional.""" - result = analyze_type(Annotated[str | None, "description"]) - - assert result.base_type == "str" - assert result.is_optional is True - assert len(result.constraints) == 1 - assert result.constraints[0].source_ref is None - assert result.constraints[0].constraint == "description" - - def test_annotated_list_str(self) -> None: - """Annotated[list[str], ...] extracts constraints and sets is_list.""" - result = analyze_type(Annotated[list[str], Field(min_length=1)]) - - assert result.base_type == "str" - assert result.is_list is True - assert len(result.constraints) == 1 - assert result.constraints[0].source_ref is None - - -class TestAnalyzeTypeAnnotated: - """Tests for Annotated type analysis.""" - - def test_annotated_int_with_ge_extracts_constraint(self) -> None: - """Annotated[int, Field(ge=0)] unpacks FieldInfo to extract Ge constraint.""" - result = analyze_type(Annotated[int, Field(ge=0)]) - - assert result.base_type == "int" - assert result.kind == TypeKind.PRIMITIVE - assert len(result.constraints) == 1 - cs = result.constraints[0] - assert cs.source_ref is None +class TestAnnotated: + def test_ge_collected_on_terminal(self) -> None: + shape = _shape(Annotated[int, Field(ge=0)]) + assert isinstance(shape, Primitive) + assert len(shape.constraints) == 1 + cs = shape.constraints[0] assert isinstance(cs.constraint, Ge) - assert cs.constraint.ge == 0 - - def test_annotated_without_constraints(self) -> None: - """Annotated[str, 'description'] extracts non-Field metadata.""" - result = analyze_type(Annotated[str, "just a description"]) - - assert result.base_type == "str" - assert len(result.constraints) == 1 - assert result.constraints[0].source_ref is None - assert result.constraints[0].constraint == "just a description" - - -class TestAnalyzeTypeLiteral: - """Tests for Literal type analysis.""" - - def test_literal_string_extracts_values(self) -> None: - """Literal["active"] stores the value in literal_values tuple.""" - result = analyze_type(Literal["active"]) - - assert result.kind == TypeKind.LITERAL - assert result.literal_values == ("active",) - - def test_literal_int_extracts_values(self) -> None: - """Literal[42] stores the value in literal_values tuple.""" - result = analyze_type(Literal[42]) - - assert result.kind == TypeKind.LITERAL - assert result.literal_values == (42,) - - def test_multi_value_literal_stores_all_args(self) -> None: - """Literal["a", "b"] stores all args in literal_values tuple.""" - result = analyze_type(Literal["a", "b"]) - - assert result.kind == TypeKind.LITERAL - assert result.literal_values == ("a", "b") - - def test_optional_literal_extracts_values(self) -> None: - """Optional[Literal["x"]] unwraps to Literal with is_optional set.""" - result = analyze_type(Literal["x"] | None) - - assert result.kind == TypeKind.LITERAL - assert result.literal_values == ("x",) - assert result.is_optional is True - - -class TestAnalyzeTypeEnum: - """Tests for Enum type analysis.""" - - def test_enum_subclass_returns_kind_enum(self) -> None: - """Enum subclass returns TypeInfo with kind=ENUM.""" + assert cs.source_ref is None + def test_non_field_metadata_collected(self) -> None: + shape = _shape(Annotated[str, "just a description"]) + assert isinstance(shape, Primitive) + assert shape.constraints[0].constraint == "just a description" + + def test_list_level_minlen_lands_on_arrayof(self) -> None: + shape = _shape(Annotated[list[str], Field(min_length=1)]) + assert isinstance(shape, ArrayOf) + assert len(shape.constraints) == 1 + assert isinstance(shape.element, Primitive) + assert shape.element.constraints == () + + def test_layered_constraints_anchor_separately(self) -> None: + shape = _shape(Annotated[list[Annotated[str, MinLen(2)]], MinLen(3)]) + assert isinstance(shape, ArrayOf) + outer = shape.constraints + assert len(outer) == 1 + assert outer[0].constraint == ArrayMinLen(min_length=3) + assert isinstance(shape.element, Primitive) + inner = shape.element.constraints + assert len(inner) == 1 + assert inner[0].constraint == ScalarMinLen(min_length=2) + + +class TestLiteral: + def test_single_value(self) -> None: + shape = _shape(Literal["active"]) + assert isinstance(shape, LiteralScalar) + assert shape.values == ("active",) + + def test_multi_value(self) -> None: + shape = _shape(Literal["a", "b"]) + assert isinstance(shape, LiteralScalar) + assert shape.values == ("a", "b") + + def test_optional_literal(self) -> None: + shape, optional, _ = analyze_type(Literal["x"] | None) + assert isinstance(shape, LiteralScalar) + assert shape.values == ("x",) + assert optional is True + + +class TestEnumAndModel: + def test_enum_emits_primitive_with_source(self) -> None: class Color(Enum): RED = "red" - GREEN = "green" - - result = analyze_type(Color) - - assert result.base_type == "Color" - assert result.kind == TypeKind.ENUM - - -class TestAnalyzeTypeModel: - """Tests for BaseModel type analysis.""" - def test_basemodel_subclass_returns_kind_model(self) -> None: - """BaseModel subclass returns TypeInfo with kind=MODEL.""" + shape = _shape(Color) + assert isinstance(shape, Primitive) + assert shape.source_type is Color + def test_model_without_resolver_falls_back_to_primitive(self) -> None: class Person(BaseModel): name: str - result = analyze_type(Person) - - assert result.base_type == "Person" - assert result.kind == TypeKind.MODEL - - -class TestAnalyzeTypeNewType: - """Tests for NewType primitive analysis.""" - - def test_int32_returns_newtype_name(self) -> None: - """int32 NewType returns TypeInfo with base_type='int32'.""" - result = analyze_type(int32) - - assert result.base_type == "int32" - assert result.kind == TypeKind.PRIMITIVE - - def test_float64_returns_newtype_name(self) -> None: - """float64 NewType returns TypeInfo with base_type='float64'.""" - result = analyze_type(float64) - - assert result.base_type == "float64" - assert result.kind == TypeKind.PRIMITIVE - - def test_optional_int32(self) -> None: - """int32 | None sets is_optional and preserves base_type.""" - result = analyze_type(int32 | None) - - assert result.base_type == "int32" - assert result.is_optional is True - - -class TestNewtypeName: - """Tests for outermost NewType name tracking.""" - - def test_single_layer_newtype(self) -> None: - """Single NewType like int32 sets newtype_name to its name.""" - result = analyze_type(int32) - - assert result.newtype_name == "int32" - assert result.base_type == "int32" - - def test_nested_newtype_preserves_outermost(self, id_type_info: TypeInfo) -> None: - """Nested NewType chain uses outermost name for newtype_name.""" - assert id_type_info.newtype_name == "Id" - assert id_type_info.base_type == "NoWhitespaceString" - - def test_plain_type_has_no_newtype_name(self) -> None: - """Plain types without NewType wrapping have newtype_name=None.""" - result = analyze_type(str) - - assert result.newtype_name is None + shape = _shape(Person) + assert isinstance(shape, Primitive) + assert shape.source_type is Person + assert shape.base_type == "Person" - def test_newtype_ref_set_for_newtype(self, id_type_info: TypeInfo) -> None: - """newtype_ref points to the outermost NewType callable.""" - assert id_type_info.newtype_ref is Id - def test_newtype_ref_none_for_plain_type(self) -> None: - """Plain types have newtype_ref=None.""" - result = analyze_type(str) +class TestNewType: + def test_simple_newtype(self) -> None: + shape = _shape(int32) + assert isinstance(shape, NewTypeShape) + assert shape.name == "int32" + assert isinstance(shape.inner, Primitive) + assert shape.inner.base_type == "int32" - assert result.newtype_ref is None + def test_outermost_newtype_is_outer_wrapper(self) -> None: + shape = _shape(Id) + assert isinstance(shape, NewTypeShape) + assert shape.name == "Id" + def test_optional_newtype(self) -> None: + assert _is_optional(int32 | None) is True -class TestNewtypeWrappingList: - """Tests for NewType wrapping a list type.""" - def test_newtype_wrapping_list(self) -> None: - """NewType wrapping a list sets is_list and preserves newtype_name.""" +class TestNewTypeWrappingList: + def test_newtype_around_list(self) -> None: TestSources = NewType("TestSources", Annotated[list[str], Field(min_length=1)]) - result = analyze_type(TestSources) + shape = _shape(TestSources) + assert isinstance(shape, NewTypeShape) and shape.name == "TestSources" + assert isinstance(shape.inner, ArrayOf) - assert result.is_list is True - assert result.newtype_name == "TestSources" - - def test_scalar_newtype_is_not_list(self) -> None: - """Scalar NewType like int32 has is_list=False.""" - result = analyze_type(int32) - - assert result.is_list is False - - def test_plain_list_has_no_newtype_name(self) -> None: - """Plain list[str] without NewType has newtype_name=None.""" - result = analyze_type(list[str]) - - assert result.newtype_name is None - assert result.is_list is True - - def test_newtype_wrapping_list_of_models(self) -> None: - """list[NewType wrapping list[Model]] records depth 2, outer depth 1.""" - - class _Item(BaseModel): - name: str - - Inner = NewType("Inner", Annotated[list[_Item], Field(min_length=1)]) - result = analyze_type(list[Inner]) - - assert result.list_depth == 2 - assert result.newtype_outer_list_depth == 1 - assert result.base_type == "Inner" - assert result.kind == TypeKind.MODEL - assert result.source_type is _Item + def test_list_around_scalar_newtype(self) -> None: + ScalarNT = NewType("ScalarNT", str) + shape = _shape(list[ScalarNT]) + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, NewTypeShape) -class TestNewtypeOuterListDepth: - """Tests for newtype_outer_list_depth tracking.""" +class TestConstraintProvenance: + """Constraints carry the NewType that contributed them.""" - def test_list_of_scalar_newtype_has_outer_depth(self) -> None: - """list[ScalarNewType] records the list layer as outside the NewType.""" - ScalarNT = NewType("ScalarNT", str) - result = analyze_type(list[ScalarNT]) - - assert result.newtype_outer_list_depth == 1 - assert result.list_depth == 1 - - def test_newtype_wrapping_list_has_zero_outer_depth(self) -> None: - """NewType wrapping list[X] records no list layers outside the NewType.""" - ListNT = NewType("ListNT", Annotated[list[str], Field(min_length=1)]) - result = analyze_type(ListNT) - - assert result.newtype_outer_list_depth == 0 - assert result.list_depth == 1 - - @pytest.mark.parametrize( - "annotation", - [ - list[str], # list without NewType - int32, # scalar NewType - str, # plain type - ], - ids=["plain_list", "scalar_newtype", "plain_type"], - ) - def test_zero_outer_depth_without_newtype_boundary( - self, annotation: object - ) -> None: - """Types without a NewType inside a list have newtype_outer_list_depth=0.""" - result = analyze_type(annotation) - - assert result.newtype_outer_list_depth == 0 - - def test_nested_list_of_scalar_newtype_has_outer_depth_2(self) -> None: - """list[list[ScalarNewType]] records two outer list layers.""" - ScalarNT = NewType("ScalarNT", str) - result = analyze_type(list[list[ScalarNT]]) + @pytest.fixture() + def id_shape(self) -> FieldShape: + return _shape(Id) - assert result.newtype_outer_list_depth == 2 - assert result.list_depth == 2 + @pytest.fixture() + def hex_shape(self) -> FieldShape: + return _shape(HexColor) + def test_nested_newtype_flattens_with_sources(self, id_shape: FieldShape) -> None: + sources = {cs.source_name for cs in all_constraints(id_shape)} + assert "Id" in sources + assert "NoWhitespaceString" in sources -class TestConstraintProvenance: - """Tests for flattened constraints with provenance tracking.""" - - def test_nested_newtype_flattens_constraints(self, id_type_info: TypeInfo) -> None: - """Id -> NoWhitespaceString -> str flattens all constraints with sources.""" - source_names = { - cs.source_name for cs in id_type_info.constraints if cs.source_name - } - assert "Id" in source_names - assert "NoWhitespaceString" in source_names - - def test_nested_newtype_includes_inner_constraints( - self, id_type_info: TypeInfo - ) -> None: - """Inner NewType constraints are collected with provenance.""" - nws_constraints = [ - cs for cs in id_type_info.constraints if cs.source_ref is NoWhitespaceString + def test_inner_newtype_constraints_preserved(self, id_shape: FieldShape) -> None: + nws = [ + cs + for cs in all_constraints(id_shape) + if cs.source_ref is NoWhitespaceString ] - constraint_types = {type(cs.constraint) for cs in nws_constraints} - assert NoWhitespaceConstraint in constraint_types + assert NoWhitespaceConstraint in {type(cs.constraint) for cs in nws} def test_direct_annotation_has_none_source(self) -> None: - """Constraints from direct Annotated (no NewType) have source_ref=None.""" - result = analyze_type(Annotated[str, "direct"]) - - assert len(result.constraints) == 1 - assert result.constraints[0].source_ref is None - assert result.constraints[0].constraint == "direct" - - def test_single_newtype_constraints_attributed( - self, hex_color_type_info: TypeInfo - ) -> None: - """HexColor constraints are attributed to the HexColor callable.""" - assert all(cs.source_ref is HexColor for cs in hex_color_type_info.constraints) - assert len(hex_color_type_info.constraints) > 0 - - def test_source_ref_is_newtype_callable( - self, hex_color_type_info: TypeInfo - ) -> None: - """source_ref is the actual NewType callable, not a string.""" - cs = hex_color_type_info.constraints[0] - assert cs.source_ref is HexColor - - def test_constraint_preserves_original_object( - self, hex_color_type_info: TypeInfo - ) -> None: - """ConstraintSource.constraint holds the original constraint object.""" - hcc = next( - cs - for cs in hex_color_type_info.constraints - if type(cs.constraint).__name__ == "HexColorConstraint" - ) - assert hcc.constraint.__class__.__name__ == "HexColorConstraint" + shape = _shape(Annotated[str, "direct"]) + cs = all_constraints(shape) + assert len(cs) == 1 + assert cs[0].source_ref is None + def test_single_newtype_attributed_to_itself(self, hex_shape: FieldShape) -> None: + cs = all_constraints(hex_shape) + assert cs and all(c.source_ref is HexColor for c in cs) -class TestTypeInfoDescription: - """Tests for TypeInfo.description from Field(description=...) metadata.""" - def test_newtype_with_field_description( - self, hex_color_type_info: TypeInfo - ) -> None: - """Should extract Field description from HexColor.""" - assert hex_color_type_info.description is not None - assert "color" in hex_color_type_info.description.lower() +class TestDescription: + def test_newtype_field_description(self) -> None: + desc = _description(HexColor) + assert desc is not None and "color" in desc.lower() - def test_newtype_without_field_description(self) -> None: - """Should have None description for types without Field(description=...).""" - result = analyze_type(int) - assert result.description is None + def test_plain_type_has_no_description(self) -> None: + assert _description(int) is None - def test_plain_annotated_with_field_description(self) -> None: - """Should extract description from Annotated with Field(description=...).""" + def test_annotated_field_description(self) -> None: MyType = Annotated[str, Field(description="A test description")] - result = analyze_type(MyType) - assert result.description == "A test description" - - def test_outermost_description_wins(self, id_type_info: TypeInfo) -> None: - """Outermost FieldInfo.description takes precedence in nested NewTypes.""" - assert id_type_info.description is not None - assert "unique identifier" in id_type_info.description.lower() - - def test_newtype_without_field_has_none_description(self) -> None: - """NewType with constraints but no Field(description=...) has None.""" - result = analyze_type(SnakeCaseString) - assert result.description is None - - -class TestAnalyzeTypeAny: - """Tests for typing.Any analysis.""" - - def test_any_returns_primitive(self) -> None: - """Any annotation returns TypeInfo with base_type='Any' and kind=PRIMITIVE.""" - result = analyze_type(Any) + assert _description(MyType) == "A test description" - assert result.base_type == "Any" - assert result.kind == TypeKind.PRIMITIVE - - def test_dict_with_any_value(self) -> None: - """dict[str, Any] analyzes without error.""" - result = analyze_type(dict[str, Any]) - - assert result.is_dict is True - assert result.dict_value_type is not None - assert result.dict_value_type.base_type == "Any" + def test_outermost_description_wins(self) -> None: + desc = _description(Id) + assert desc is not None and "unique identifier" in desc.lower() + def test_newtype_without_field_description(self) -> None: + assert _description(SnakeCaseString) is None -class TestAnalyzeTypeDict: - """Tests for dict type analysis.""" - @pytest.fixture() - def dict_str_int(self) -> TypeInfo: - return analyze_type(dict[str, int]) - - def test_dict_str_int_sets_is_dict(self, dict_str_int: TypeInfo) -> None: - """dict[str, int] returns TypeInfo with is_dict=True.""" - assert dict_str_int.is_dict is True - assert dict_str_int.is_optional is False - assert dict_str_int.is_list is False - - def test_dict_key_type_analyzed(self, dict_str_int: TypeInfo) -> None: - """dict[str, int] has dict_key_type describing the key.""" - assert dict_str_int.dict_key_type is not None - assert dict_str_int.dict_key_type.base_type == "str" - assert dict_str_int.dict_key_type.kind == TypeKind.PRIMITIVE - - def test_dict_value_type_analyzed(self, dict_str_int: TypeInfo) -> None: - """dict[str, int] has dict_value_type describing the value.""" - assert dict_str_int.dict_value_type is not None - assert dict_str_int.dict_value_type.base_type == "int" - assert dict_str_int.dict_value_type.kind == TypeKind.PRIMITIVE +class TestDict: + def test_simple_dict(self) -> None: + shape = _shape(dict[str, int]) + assert isinstance(shape, MapOf) + assert isinstance(shape.key, Primitive) and shape.key.base_type == "str" + assert isinstance(shape.value, Primitive) and shape.value.base_type == "int" def test_optional_dict(self) -> None: - """dict[str, str] | None sets is_dict and is_optional.""" - result = analyze_type(dict[str, str] | None) - - assert result.is_dict is True - assert result.is_optional is True + shape, optional, _ = analyze_type(dict[str, str] | None) + assert isinstance(shape, MapOf) + assert optional is True - def test_newtype_wrapping_dict(self) -> None: - """NewType wrapping dict preserves newtype_name and sets is_dict.""" + def test_newtype_around_dict(self) -> None: TestMapping = NewType("TestMapping", dict[str, str]) - result = analyze_type(TestMapping) + shape = _shape(TestMapping) + assert isinstance(shape, NewTypeShape) and shape.name == "TestMapping" + assert isinstance(shape.inner, MapOf) - assert result.is_dict is True - assert result.newtype_name == "TestMapping" + def test_dict_with_any_value(self) -> None: + shape = _shape(dict[str, Any]) + assert isinstance(shape, MapOf) + assert isinstance(shape.value, AnyScalar) - def test_bare_dict_raises_type_error(self) -> None: - """Bare dict without type arguments raises TypeError.""" + def test_bare_dict_raises(self) -> None: with pytest.raises(TypeError, match="Bare dict"): analyze_type(dict) + def test_minlen_on_map_raises(self) -> None: + with pytest.raises(NotImplementedError, match="MinLen on a Map"): + _shape(Annotated[dict[str, int], MinLen(1)]) + + def test_maxlen_on_map_raises(self) -> None: + with pytest.raises(NotImplementedError, match="MaxLen on a Map"): + _shape(Annotated[dict[str, int], MaxLen(10)]) -class TestAnalyzeTypeErrors: - """Tests for error handling.""" - def test_unsupported_annotation_raises_type_error(self) -> None: - """Unsupported annotation type raises TypeError.""" +class TestErrors: + def test_unsupported_annotation(self) -> None: with pytest.raises(TypeError, match="Unsupported annotation type"): analyze_type("not a type") - def test_multi_type_union_raises_clear_error(self) -> None: - """Multi-type unions like str | int raise UnsupportedUnionError.""" - with pytest.raises( - UnsupportedUnionError, match="Multi-type unions not supported" - ): + def test_multi_type_union_without_resolver(self) -> None: + with pytest.raises(UnsupportedUnionError): analyze_type(str | int) - def test_multi_type_union_with_none_raises_clear_error(self) -> None: - """Multi-type optional unions like str | int | None raise UnsupportedUnionError.""" - with pytest.raises( - UnsupportedUnionError, match="Multi-type unions not supported" - ): - analyze_type(str | int | None) - - def test_bare_list_raises_type_error(self) -> None: - """Bare list without type argument raises TypeError.""" + def test_bare_list(self) -> None: with pytest.raises(TypeError, match="Bare list without type argument"): analyze_type(list) @@ -598,79 +336,184 @@ class UnionModelB(BaseModel): y: str -class TestAnalyzeTypeUnion: - """Tests for discriminated union analysis.""" +class TestUnionResolver: + """Multi-arm unions of models go through the resolver callback.""" + + def test_resolver_receives_annotation_members_and_description(self) -> None: + captured: list[tuple[object, tuple[type[BaseModel], ...], str | None]] = [] - def test_all_model_union_returns_union_kind(self) -> None: - """Annotated[Union of BaseModel subclasses] returns TypeKind.UNION.""" - union_type = Annotated[UnionModelA | UnionModelB, Field(description="test")] - result = analyze_type(union_type) + def resolver( + annotation: object, + members: tuple[type[BaseModel], ...], + description: str | None, + ) -> Primitive: + captured.append((annotation, members, description)) + return Primitive(base_type="__captured__") - assert result.kind == TypeKind.UNION - assert result.union_members is not None - assert len(result.union_members) == 2 - assert UnionModelA in result.union_members - assert UnionModelB in result.union_members + union_type = Annotated[UnionModelA | UnionModelB, Field(description="x")] + shape, _, _ = analyze_type(union_type, union_resolver=resolver) + + assert isinstance(shape, Primitive) + assert shape.base_type == "__captured__" + _ann, members, description = captured[0] + expected: set[type[BaseModel]] = {UnionModelA, UnionModelB} + assert set(members) == expected + assert description == "x" + + def test_no_resolver_raises_on_multi_arm(self) -> None: + union_type = Annotated[UnionModelA | UnionModelB, Field(description="x")] + with pytest.raises(UnsupportedUnionError): + analyze_type(union_type) def test_annotated_wrapped_members_unwrapped(self) -> None: - """Union members wrapped in Annotated[X, Tag(...)] are unwrapped.""" + from overture.schema.codegen.extraction.type_analyzer import analyze_type as at + + captured_members: list[tuple[type[BaseModel], ...]] = [] + + def resolver( + _ann: object, + members: tuple[type[BaseModel], ...], + _description: str | None, + ) -> Primitive: + captured_members.append(members) + return Primitive(base_type="x") + union_type = Annotated[ Annotated[UnionModelA, Tag("a")] | Annotated[UnionModelB, Tag("b")], Field(description="disc"), ] - result = analyze_type(union_type) + at(union_type, union_resolver=resolver) + expected: set[type[BaseModel]] = {UnionModelA, UnionModelB} + assert set(captured_members[0]) == expected - assert result.kind == TypeKind.UNION - assert result.union_members is not None - assert len(result.union_members) == 2 - assert UnionModelA in result.union_members - assert UnionModelB in result.union_members - - def test_mixed_model_nonmodel_union_still_raises(self) -> None: - """Union of model + non-model types still raises UnsupportedUnionError.""" + def test_mixed_model_nonmodel_raises(self) -> None: with pytest.raises(UnsupportedUnionError): analyze_type(UnionModelA | str) - def test_non_model_multi_union_still_raises(self) -> None: - """Multi-type union of non-models still raises UnsupportedUnionError.""" - with pytest.raises(UnsupportedUnionError): - analyze_type(str | int) - - def test_union_base_type_is_first_member_name(self) -> None: - """UNION TypeInfo base_type is the first member's class name.""" - result = analyze_type( - Annotated[UnionModelA | UnionModelB, Field(description="test")] - ) - assert result.base_type == "UnionModelA" - - def test_optional_union_sets_is_optional(self) -> None: - """Union with None among model members sets is_optional.""" - result = analyze_type( - Annotated[UnionModelA | UnionModelB, Field(description="test")] | None - ) - assert result.kind == TypeKind.UNION - assert result.is_optional is True - class TestSingleLiteralValue: - """Tests for single_literal_value convenience accessor.""" - - def test_single_value_literal(self) -> None: - """Literal["x"] returns the literal value.""" + def test_single_string(self) -> None: assert single_literal_value(Literal["x"]) == "x" - def test_single_int_literal(self) -> None: - """Literal[42] returns the integer value.""" + def test_single_int(self) -> None: assert single_literal_value(Literal[42]) == 42 - def test_multi_value_literal_returns_none(self) -> None: - """Multi-value Literal returns None (no single default).""" + def test_multi_value_returns_none(self) -> None: assert single_literal_value(Literal["a", "b"]) is None def test_non_literal_returns_none(self) -> None: - """Non-Literal types return None.""" assert single_literal_value(str) is None - def test_unsupported_type_returns_none(self) -> None: - """Types that raise during analysis return None.""" + def test_unsupported_returns_none(self) -> None: assert single_literal_value("not a type") is None + + +class TestUnwrapList: + def test_plain_list(self) -> None: + assert unwrap_list(list[int]) is int + + def test_nested_list(self) -> None: + assert unwrap_list(list[list[str]]) is str + + def test_non_list_passthrough(self) -> None: + assert unwrap_list(int) is int + + def test_optional_list(self) -> None: + assert unwrap_list(list[int] | None) is int + + def test_optional_list_preserves_annotated(self) -> None: + from overture.schema.common.scoping.vehicle import VehicleSelector + + assert unwrap_list(list[VehicleSelector] | None) is VehicleSelector + + +class TestNestedArrayCharacterization: + """Pin analyze_type behavior on consecutive-list and NewType-chain shapes. + + The schema has no genuine `list[list[X]]` field, so these are the only + coverage of the path the recursive _unwrap rewrite must preserve. + """ + + def test_list_of_list_nests_two_arrayofs(self) -> None: + shape = _shape(list[list[str]]) + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, ArrayOf) + assert isinstance(shape.element.element, Primitive) + assert shape.element.element.base_type == "str" + + def test_list_of_list_constraints_anchor_to_their_layer(self) -> None: + # Each MinLen lands on the ArrayOf layer it annotates, not flattened. + # Outer Annotated[..., Field(min_length=3)] targets the outer list. + # Inner Annotated[list[str], Field(min_length=2)] targets the inner list. + shape = _shape( + Annotated[ + list[Annotated[list[str], Field(min_length=2)]], Field(min_length=3) + ] + ) + assert isinstance(shape, ArrayOf) + inner = shape.element + assert isinstance(inner, ArrayOf) + outer_min_lens = [ + cs.constraint.min_length + for cs in shape.constraints + if isinstance(cs.constraint, ArrayMinLen) + ] + inner_min_lens = [ + cs.constraint.min_length + for cs in inner.constraints + if isinstance(cs.constraint, ArrayMinLen) + ] + assert outer_min_lens == [3] + assert inner_min_lens == [2] + + def test_nested_newtype_chain_flattens_to_one_wrapper(self) -> None: + # Id = NewType("Id", Annotated[NoWhitespaceString, Field(min_length=1)]) + shape = _shape(Id) + assert isinstance(shape, NewTypeShape) + assert shape.name == "Id" + # exactly one NewTypeShape -- the inner NoWhitespaceString does not nest + assert not isinstance(shape.inner, NewTypeShape) + assert isinstance(shape.inner, Primitive) + assert shape.inner.base_type == "NoWhitespaceString" + + def test_nested_newtype_constraint_order_outer_first(self) -> None: + shape = _shape(Id) + names = [cs.source_name for cs in all_constraints(shape)] + # Id's own constraint precedes NoWhitespaceString's + assert names == ["Id", "NoWhitespaceString"] + + def test_newtype_nested_as_list_element_flattens_under_outer_newtype(self) -> None: + # A NewType chain collapses to one NewTypeShape (the outermost) even + # when an inner NewType is nested across a list boundary -- the inner + # name survives only as the terminal `base_type`. + InnerElem = NewType("InnerElem", str) + OuterList = NewType("OuterList", list[InnerElem]) + shape = _shape(OuterList) + assert isinstance(shape, NewTypeShape) + assert shape.name == "OuterList" + assert isinstance(shape.inner, ArrayOf) + # the InnerElem NewType does NOT produce its own NewTypeShape + assert isinstance(shape.inner.element, Primitive) + assert shape.inner.element.base_type == "InnerElem" + + def test_sole_list_element_newtype_keeps_its_wrapper(self) -> None: + # With no outer NewType, a list-element NewType IS the outermost -- + # it keeps its NewTypeShape (guards against over-erasing). + ElemOnly = NewType("ElemOnly", str) + shape = _shape(list[ElemOnly]) + assert isinstance(shape, ArrayOf) + assert isinstance(shape.element, NewTypeShape) + assert shape.element.name == "ElemOnly" + + def test_newtype_inside_dict_value_is_an_independent_spine(self) -> None: + # `dict` key/value are independent spines: a NewType in the value + # keeps its wrapper even under an outer NewType, because erasure + # stops at MapOf. + DictValue = NewType("DictValue", str) + DictWrap = NewType("DictWrap", dict[str, DictValue]) + shape = _shape(DictWrap) + assert isinstance(shape, NewTypeShape) + assert shape.name == "DictWrap" + assert isinstance(shape.inner, MapOf) + assert isinstance(shape.inner.value, NewTypeShape) + assert shape.inner.value.name == "DictValue" diff --git a/packages/overture-schema-codegen/tests/test_type_collection.py b/packages/overture-schema-codegen/tests/test_type_collection.py index 154b39e2c..2df73cf2f 100644 --- a/packages/overture-schema-codegen/tests/test_type_collection.py +++ b/packages/overture-schema-codegen/tests/test_type_collection.py @@ -6,13 +6,10 @@ FeatureWithUrl, Instrument, TestSegmentWithSubModel, + feature_spec_for_model, has_name, lookup_by_name, ) -from overture.schema.codegen.extraction.model_extraction import ( - expand_model_tree, - extract_model, -) from overture.schema.codegen.extraction.specs import ( EnumSpec, ModelSpec, @@ -37,9 +34,7 @@ def _make_feature_with_sub_model(sub_model: type) -> type[BaseModel]: def _expanded_supplementary(model_class: type) -> dict[TypeIdentity, SupplementarySpec]: - spec = extract_model(model_class) - expand_model_tree(spec) - return collect_all_supplementary_types([spec]) + return collect_all_supplementary_types([feature_spec_for_model(model_class)]) class TestCollectAllSupplementarySpecs: @@ -77,11 +72,8 @@ def test_same_name_different_types_both_collected(self) -> None: ModelA = type("Address", (BaseModel,), {"__annotations__": {"x": str}}) ModelB = type("Address", (BaseModel,), {"__annotations__": {"y": int}}) - outer_a = extract_model(_make_feature_with_sub_model(ModelA)) - expand_model_tree(outer_a) - - outer_b = extract_model(_make_feature_with_sub_model(ModelB)) - expand_model_tree(outer_b) + outer_a = feature_spec_for_model(_make_feature_with_sub_model(ModelA)) + outer_b = feature_spec_for_model(_make_feature_with_sub_model(ModelB)) result = collect_all_supplementary_types([outer_a, outer_b]) diff --git a/packages/overture-schema-codegen/tests/test_type_placement.py b/packages/overture-schema-codegen/tests/test_type_placement.py index 63e26457c..8550a7319 100644 --- a/packages/overture-schema-codegen/tests/test_type_placement.py +++ b/packages/overture-schema-codegen/tests/test_type_placement.py @@ -11,7 +11,6 @@ lookup_by_name, make_union_spec, ) -from overture.schema.codegen.extraction.model_extraction import expand_model_tree from overture.schema.codegen.extraction.specs import ( AnnotatedField, FeatureSpec, @@ -45,9 +44,6 @@ def _build_registry( feature_specs: list[ModelSpec], ) -> tuple[dict[TypeIdentity, PurePosixPath], dict[TypeIdentity, SupplementarySpec]]: """Build placement registry with standard aggregate names.""" - cache: dict[type, ModelSpec] = {} - for spec in feature_specs: - expand_model_tree(spec, cache) all_specs = collect_all_supplementary_types(feature_specs) registry = build_placement_registry( feature_specs, all_specs, _NUMERIC_NAMES, _GEOMETRY_NAMES, _SCHEMA_ROOT @@ -162,7 +158,7 @@ class A(Base): AnnotatedField( field_spec=FieldSpec( name="name", - type_info=STR_TYPE, + shape=STR_TYPE, description=None, is_required=True, ), diff --git a/packages/overture-schema-codegen/tests/test_type_registry.py b/packages/overture-schema-codegen/tests/test_type_registry.py index b9d02d2ac..b2a4b45dc 100644 --- a/packages/overture-schema-codegen/tests/test_type_registry.py +++ b/packages/overture-schema-codegen/tests/test_type_registry.py @@ -1,7 +1,10 @@ """Tests for type registry.""" -import pytest -from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from overture.schema.codegen.extraction.field import ( + ArrayOf, + NewTypeShape, + Primitive, +) from overture.schema.codegen.extraction.type_registry import ( PRIMITIVE_TYPES, TypeMapping, @@ -11,33 +14,32 @@ class TestTypeMapping: - """Tests for TypeMapping dataclass.""" - - def test_typemapping_accepts_markdown(self) -> None: - """TypeMapping should construct with markdown field.""" - mapping = TypeMapping(markdown="int32") - - assert mapping.markdown == "int32" - - def test_for_target_returns_markdown(self) -> None: - """for_target should return markdown representation for markdown target.""" - mapping = TypeMapping(markdown="int32") - - assert mapping.for_target("markdown") == "int32" - - def test_for_target_rejects_unknown_target(self) -> None: - """for_target should raise ValueError for unknown targets.""" - mapping = TypeMapping(markdown="int32") - - with pytest.raises(ValueError, match="Unknown target 'scala'"): - mapping.for_target("scala") + def test_markdown_field(self) -> None: + assert TypeMapping(markdown="int32").markdown == "int32" + + def test_spark_type_mapping(self) -> None: + cases = [ + ("str", "StringType()"), + ("int32", "IntegerType()"), + ("int64", "LongType()"), + ("float64", "DoubleType()"), + ("bool", "BooleanType()"), + ("Geometry", "BinaryType()"), + ("float32", "FloatType()"), + ] + for type_name, expected in cases: + mapping = get_type_mapping(type_name) + assert mapping is not None, f"No mapping for {type_name!r}" + assert mapping.spark == expected + + def test_bbox_has_no_spark_mapping(self) -> None: + mapping = get_type_mapping("BBox") + assert mapping is not None + assert mapping.spark is None class TestPrimitiveTypes: - """Tests for PRIMITIVE_TYPES registry.""" - def test_registry_contains_expected_types(self) -> None: - """Registry should contain all expected primitive types.""" expected_types = { "int8", "int16", @@ -55,89 +57,48 @@ def test_registry_contains_expected_types(self) -> None: "Geometry", "BBox", } - assert set(PRIMITIVE_TYPES.keys()) == expected_types def test_bbox_mapping(self) -> None: - """BBox should map to bbox.""" bbox = PRIMITIVE_TYPES["BBox"] - assert bbox.markdown == "bbox" + assert bbox.spark is None class TestGetTypeMapping: - """Tests for get_type_mapping function.""" - def test_returns_mapping_for_known_type(self) -> None: - """Should return TypeMapping for known primitive type.""" - result = get_type_mapping("int32") - - assert result is not None - assert result.markdown == "int32" + assert get_type_mapping("int32").markdown == "int32" # type: ignore[union-attr] def test_returns_none_for_unknown_type(self) -> None: - """Should return None for unknown type names.""" - result = get_type_mapping("unknown_type") - - assert result is None + assert get_type_mapping("unknown_type") is None def test_returns_mapping_for_builtin_int(self) -> None: - """Should map Python int to int64.""" - result = get_type_mapping("int") - - assert result is not None - assert result.markdown == "int64" - - def test_returns_mapping_for_builtin_float(self) -> None: - """Should map Python float to float64.""" - result = get_type_mapping("float") - - assert result is not None - assert result.markdown == "float64" - + assert get_type_mapping("int").markdown == "int64" # type: ignore[union-attr] -class TestResolveTypeNameNewTypeFallback: - """Tests for resolve_type_name with unregistered NewTypes.""" +class TestResolveTypeName: def test_unregistered_newtype_falls_back_to_source_type(self) -> None: - """Unregistered NewType resolves to source_type name.""" - ti = TypeInfo( - base_type="Sources", - kind=TypeKind.MODEL, - newtype_name="Sources", - source_type=type("SourceItem", (), {}), + cls = type("SourceItem", (), {}) + shape = NewTypeShape( + name="Sources", + ref=object(), + inner=Primitive(base_type="Sources", source_type=cls), ) - result = resolve_type_name(ti, "markdown") - - assert result == "SourceItem" + assert resolve_type_name(shape) == "SourceItem" - def test_registered_newtype_unaffected(self) -> None: - """Registered NewType (int32) still resolves through the registry.""" - ti = TypeInfo( - base_type="int32", - kind=TypeKind.PRIMITIVE, - newtype_name="int32", - source_type=int, + def test_registered_newtype_resolves_via_registry(self) -> None: + shape = NewTypeShape( + name="int32", + ref=object(), + inner=Primitive(base_type="int32", source_type=int), ) - result = resolve_type_name(ti, "markdown") - - assert result == "int32" + assert resolve_type_name(shape) == "int32" + def test_plain_scalar(self) -> None: + assert ( + resolve_type_name(Primitive(base_type="str", source_type=str)) == "string" + ) -class TestResolveTypeName: - """Tests for resolve_type_name with list/optional flags.""" - - def _make_type_info(self, **kwargs: object) -> TypeInfo: - defaults = {"base_type": "str", "kind": TypeKind.PRIMITIVE} - defaults.update(kwargs) - return TypeInfo(**defaults) # type: ignore[arg-type] - - def test_ignores_list_depth(self) -> None: - """resolve_type_name returns the base type regardless of list_depth.""" - ti = self._make_type_info(list_depth=1) - assert resolve_type_name(ti, "markdown") == "string" - - def test_ignores_is_optional(self) -> None: - """resolve_type_name returns the base type regardless of is_optional.""" - ti = self._make_type_info(is_optional=True) - assert resolve_type_name(ti, "markdown") == "string" + def test_array_of_scalar_resolves_terminal(self) -> None: + shape = ArrayOf(element=Primitive(base_type="str", source_type=str)) + assert resolve_type_name(shape) == "string" diff --git a/packages/overture-schema-codegen/tests/test_union_extraction.py b/packages/overture-schema-codegen/tests/test_union_extraction.py index a8b685c48..42b5e0c43 100644 --- a/packages/overture-schema-codegen/tests/test_union_extraction.py +++ b/packages/overture-schema-codegen/tests/test_union_extraction.py @@ -5,11 +5,14 @@ RailSegment, RoadSegment, SegmentBase, + TestEnumDiscriminatorUnion, TestSegment, + TestSegmentDivergingConstraints, WaterSegment, ) from overture.schema.codegen.extraction.specs import FieldSpec, UnionSpec from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.common.scoping.vehicle import VehicleSelector class TestExtractUnion: @@ -51,19 +54,19 @@ def test_shared_fields_first(self, segment_spec: UnionSpec) -> None: def test_variant_specific_fields_have_sources( self, segment_spec: UnionSpec ) -> None: - """Variant-only fields carry their source class names.""" + """Variant-only fields carry their source classes.""" speed = next( af for af in segment_spec.annotated_fields if af.field_spec.name == "speed_limit" ) - assert speed.variant_sources == ("RoadSegment",) + assert speed.variant_sources == (RoadSegment,) gauge = next( af for af in segment_spec.annotated_fields if af.field_spec.name == "rail_gauge" ) - assert gauge.variant_sources == ("RailSegment",) + assert gauge.variant_sources == (RailSegment,) def test_heterogeneous_same_name_produces_separate_rows( self, segment_spec: UnionSpec @@ -74,8 +77,8 @@ def test_heterogeneous_same_name_produces_separate_rows( ] assert len(class_fields) == 2 sources = {af.variant_sources for af in class_fields} - assert ("RoadSegment",) in sources - assert ("RailSegment",) in sources + assert (RoadSegment,) in sources + assert (RailSegment,) in sources def test_members_lists_all_member_classes(self, segment_spec: UnionSpec) -> None: """UnionSpec.members contains all union member classes.""" @@ -89,3 +92,43 @@ def test_fields_property_returns_plain_list(self, segment_spec: UnionSpec) -> No """spec.fields returns list[FieldSpec] without provenance.""" for f in segment_spec.fields: assert isinstance(f, FieldSpec) + + +class TestExtractDiscriminatorWithEnumLiterals: + """Discriminator mapping uses runtime string values for enum literals.""" + + @pytest.fixture + def spec(self) -> UnionSpec: + return extract_union("TestEnumDiscriminatorUnion", TestEnumDiscriminatorUnion) + + def test_discriminator_mapping_uses_enum_values(self, spec: UnionSpec) -> None: + """Mapping keys must be the Parquet-serialized string values, not enum repr.""" + assert spec.discriminator_mapping is not None + assert set(spec.discriminator_mapping.keys()) == {"car", "bike"} + + +class TestDivergingConstraints: + """Same-named fields with matching shape but diverging constraints fail loudly.""" + + def test_diverging_constraints_raise(self) -> None: + """A field shared by structure but not by constraints raises ValueError. + + `ShortNamesSegment` and `LongNamesSegment` both declare `aliases` + as `list[str] | None`, so the structural fingerprint collapses + them — but the `min_length` constraints differ. Dedup would + silently keep one member's `FieldSpec`, so extraction raises + instead. + """ + with pytest.raises(ValueError, match="diverging constraints"): + extract_union( + "TestSegmentDivergingConstraints", TestSegmentDivergingConstraints + ) + + +class TestUnionNameDerivation: + """Union name fallback when the caller passes a member class name.""" + + def test_name_derived_from_common_base(self) -> None: + """When name matches a member class, derive from common base minus 'Base' suffix.""" + spec = extract_union("VehicleAxleCountSelector", VehicleSelector) + assert spec.name == "VehicleSelector" diff --git a/packages/overture-schema-pyspark/README.md b/packages/overture-schema-pyspark/README.md new file mode 100644 index 000000000..ef13ce9e9 --- /dev/null +++ b/packages/overture-schema-pyspark/README.md @@ -0,0 +1,238 @@ +# overture-schema-pyspark + +PySpark validation expressions for Overture Maps data. Translates schema +constraints into composable PySpark Column expressions that validate +DataFrames and produce per-row, per-field error messages. + +Expression modules and the registry are generated by +[overture-schema-codegen](../overture-schema-codegen/). Regenerate after +schema changes rather than editing the generated output. + +## Usage + +### Python API + +```python +from pyspark.sql import SparkSession + +from overture.schema.pyspark import validate_feature, explain_errors + +spark = SparkSession.builder.getOrCreate() +df = spark.read.parquet("samples/segment.parquet") + +result = validate_feature(df, "segment") + +result.evaluated.cache() +total_rows = result.evaluated.count() +error_count = result.error_rows().count() +print(f"{error_count} / {total_rows} rows with errors") + +if error_count > 0: + violations = explain_errors(result.evaluated, result.checks) + violations.select("id", "field", "check", "message").show(truncate=False) +``` + +`validate_feature()` looks up the feature type in the registry, compares +schemas, and evaluates all checks in a single pass. It returns a +`ValidationResult` with the evaluated DataFrame, the checks that ran, +any schema mismatches, and suppressed checks. + +| Function | Returns | Description | +| --- | --- | --- | +| `validate_feature(df, type)` | `ValidationResult` | Registry lookup, schema comparison, check evaluation. | +| `result.error_rows()` | `DataFrame` | Rows with at least one violation. Original columns only. | +| `explain_errors(evaluated, checks)` | `DataFrame` | One row per violation. Adds `field`, `check`, `message` columns. | +| `feature_types()` | `list[str]` | Available feature type names, sorted. | + +Lower-level helpers (`evaluate_checks`, `filter_errors`) are available +for consumers needing finer control. All public symbols are re-exported +from `overture.schema.pyspark`. + +### CLI + +```bash +# Validate and show first 20 error rows (default) +overture-validate segment samples/segment.parquet + +# Custom output path, show first 50 violations +overture-validate segment samples/segment.parquet -o errors.parquet --head 50 + +# Count errors only (skip unpivot/explain) +overture-validate segment samples/segment.parquet --count-only + +# Pass Spark config +overture-validate segment samples/segment.parquet \ + --conf spark.master=local[4] + +# Continue past schema mismatches (e.g. Float vs Double on bbox) +overture-validate place s3a://overturemaps-us-west-2/release/2026-02-18.0 \ + --skip-schema-check + +# Skip checks for a column absent from the data +overture-validate segment data.parquet --skip-columns connector_ids + +# Ignore extra columns in the data that aren't in the schema +overture-validate segment data.parquet --ignore-extra-columns my_custom_col + +# Suppress all checks on a field +overture-validate segment data.parquet --suppress sources + +# Suppress a specific check (FIELD:CHECK) +overture-validate segment data.parquet --suppress version:bounds +``` + +The output Parquet contains one row per violation with the original columns +(minus geometry, if present) plus `field`, `check`, and `message`. Summary and the +first N violations print to the terminal; the full set is in the Parquet +file for further analysis. + +| Option | Description | +| --- | --- | +| `--skip-schema-check` | Warn on schema mismatches instead of aborting. | +| `--skip-columns COL` | Declare a column absent from the data; skips its checks and schema comparison. Repeatable. | +| `--ignore-extra-columns COL` | Ignore an extra data column not in the expected schema. Repeatable. | +| `--suppress SPEC` | Suppress checks. `FIELD` suppresses all checks on that root field; `FIELD:CHECK` suppresses one specific check. Repeatable. | +| `--count-only` | Report error count only; skip the explain/unpivot step. | +| `--conf KEY=VALUE` | Spark config pair. Repeatable. Overrides S3A defaults. | +| `-o`, `--output PATH` | Write violations to a Parquet file. | +| `--head N` | Number of error rows to display (default: 20). | + +### Path resolution + +The CLI resolves the input path to a Parquet read plan based on its +structure: + +| Path shape | Example | Behavior | +| --- | --- | --- | +| Hive partition path (contains `/theme=`) | `.../theme=transportation/type=segment/` | Reads directly; derives `basePath` so Spark discovers partition columns. | +| Individual file | `segment.parquet` | Reads directly; data already contains `theme`/`type` columns. | +| Release root | `s3a://overturemaps-us-west-2/release/2026-02-18.0` | Appends `theme={theme}/type={type}` using the schema's theme mapping; sets `basePath` to the original path. | + +This means you can point the CLI at a release root and it constructs the +full Hive path automatically: + +```bash +# These are equivalent: +overture-validate segment s3a://overturemaps-us-west-2/release/2026-02-18.0 +overture-validate segment s3a://overturemaps-us-west-2/release/2026-02-18.0/theme=transportation/type=segment/ +``` + +### Reading from S3 + +Paths starting with `s3a://` are detected automatically. The CLI +configures `hadoop-aws`, the S3A filesystem implementation, and +anonymous credentials -- no setup required for public buckets like +the Overture release bucket: + +```bash +overture-validate segment \ + s3a://overturemaps-us-west-2/release/2026-02-18.0/theme=transportation/type=segment/ +``` + +To use named AWS credentials instead of anonymous access: + +```bash +overture-validate segment \ + s3a://overturemaps-us-west-2/release/2026-02-18.0/theme=transportation/type=segment/ \ + --conf spark.hadoop.fs.s3a.aws.credentials.provider=software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider +``` + +Any `--conf` values override the S3A defaults. + +## Architecture + +```text +validate_feature() Entry point -- registry lookup, schema check, evaluation + | + list[Check] Interface -- frozen (field, name, expr, shape) tuples + | + expression builders Translation -- schema constraints to Column expressions + (generated by registered in REGISTRY + overture-schema-codegen) + | + column_patterns / Reusable PySpark building blocks + constraint_expressions +``` + +**Check** is the interface between expression builders and composition. +Each `Check` carries a PySpark `Column` expression (unevaluated), a `field` +name for error grouping, a `name` identifying the check type (e.g. +`"required"`, `"bounds"`, `"enum"`), and a `shape` tag (`SCALAR` or `ARRAY`) +that tells `evaluate_checks()` how to normalize the result. + +Expression builders (like `connector_checks()`) are generated by +`overture-schema-codegen` from Pydantic schema models and registered in +`REGISTRY` by feature type name, paired with an expected `StructType` +schema via `FeatureValidation`. + +## Generated expression builders + +Expression builders return `list[Check]`. The generated code uses constraint +expressions for common patterns and column patterns for structural wrappers. +Here's what the generated output looks like, using connector as an example: + +```python +from pyspark.sql import functions as F + +from overture.schema.pyspark.check import Check, CheckShape +from overture.schema.pyspark.expressions.column_patterns import array_check +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_bounds, + check_enum, + check_array_min_length, + check_required, +) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["transportation"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check( + "sources", + lambda el: check_required(el["dataset"]), + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) +``` + +The registry maps feature type names to `FeatureValidation` pairs: + +```python +from overture.schema.pyspark.check import FeatureValidation +from overture.schema.pyspark._registry import REGISTRY + +# REGISTRY is auto-generated: +# REGISTRY["connector"] = FeatureValidation(schema=CONNECTOR_SCHEMA, checks=connector_checks) +``` diff --git a/packages/overture-schema-pyspark/pyproject.toml b/packages/overture-schema-pyspark/pyproject.toml new file mode 100644 index 000000000..1206ee7ad --- /dev/null +++ b/packages/overture-schema-pyspark/pyproject.toml @@ -0,0 +1,27 @@ +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling"] + +[project] +dependencies = [ + "click>=8.0", + "overture-schema-system", + "pyspark>=3.4", +] +description = "PySpark validation expressions for Overture Maps data" +dynamic = ["version"] +license = "MIT" +name = "overture-schema-pyspark" +requires-python = ">=3.10" + +[project.scripts] +overture-validate = "overture.schema.pyspark.cli:validate_cli" + +[tool.hatch.build.targets.wheel] +packages = ["src/overture"] + +[tool.hatch.version] +path = "src/overture/schema/pyspark/__about__.py" + +[tool.uv.sources] +overture-schema-system = { workspace = true } diff --git a/packages/overture-schema-pyspark/src/overture/__init__.py b/packages/overture-schema-pyspark/src/overture/__init__.py new file mode 100644 index 000000000..8db66d3d0 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/packages/overture-schema-pyspark/src/overture/schema/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/__init__.py new file mode 100644 index 000000000..8db66d3d0 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/__about__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__about__.py new file mode 100644 index 000000000..3dc1f76bc --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__about__.py @@ -0,0 +1 @@ +__version__ = "0.1.0" diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py new file mode 100644 index 000000000..cb262b3d7 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/__init__.py @@ -0,0 +1,27 @@ +"""PySpark validation expressions for Overture Maps data.""" + +from .check import Check, CheckShape +from .schema_check import SchemaMismatch, compare_schemas +from .validate import ( + ValidationResult, + evaluate_checks, + explain_errors, + feature_keys, + feature_names, + filter_errors, + validate_feature, +) + +__all__ = [ + "Check", + "CheckShape", + "SchemaMismatch", + "ValidationResult", + "compare_schemas", + "evaluate_checks", + "explain_errors", + "feature_keys", + "feature_names", + "filter_errors", + "validate_feature", +] diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py new file mode 100644 index 000000000..85158fb79 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/_registry.py @@ -0,0 +1,67 @@ +"""Runtime registry of feature validations. + +Built at import time by walking the generated `expressions.generated` +namespace and collecting every module that exposes the +codegen-emitted `ENTRY_POINT` and `FEATURE_VALIDATION` constants. + +The generated tree on disk is the runtime source of truth: the +registry contains exactly what was generated, regardless of which +theme packages are installed alongside the pyspark package. A missing +`expressions/generated/` subtree simply yields an empty registry -- +the package still imports cleanly. +""" + +from __future__ import annotations + +import importlib +import logging +import pkgutil + +from .check import FeatureValidation + +logger = logging.getLogger(__name__) + +_GENERATED_ROOT = "overture.schema.pyspark.expressions.generated" + + +def _walk() -> tuple[dict[str, FeatureValidation], dict[str, dict[str, str]]]: + """Walk the generated tree and collect registry + partition map. + + Returns a `(registry, partition_map)` pair: + + * `registry` keys every feature by its `ENTRY_POINT` value. + * `partition_map` keys partitioned features by entry-point, mapping + to a Hive partition dict (e.g. `{"theme": "places", "type": + "place"}`) for path construction. Features with no `PARTITIONS` + data (empty dict) are omitted; the codegen only sets `PARTITIONS` + when the data lake organizes the feature by Hive partitions. + `type` is appended here from the module file name so consumers + get a complete partition path without the codegen having to + duplicate the type value. + """ + registry: dict[str, FeatureValidation] = {} + partition_map: dict[str, dict[str, str]] = {} + + try: + root = importlib.import_module(_GENERATED_ROOT) + except ImportError: + return registry, partition_map + + for info in pkgutil.walk_packages(root.__path__, prefix=root.__name__ + "."): + if info.ispkg: + continue + module = importlib.import_module(info.name) + entry_point = getattr(module, "ENTRY_POINT", None) + validation = getattr(module, "FEATURE_VALIDATION", None) + if entry_point is None or validation is None: + continue + registry[entry_point] = validation + partitions = getattr(module, "PARTITIONS", None) or {} + if partitions: + feature_type = info.name.rsplit(".", 1)[-1] + partition_map[entry_point] = {**partitions, "type": feature_type} + + return registry, partition_map + + +REGISTRY, PARTITION_MAP = _walk() diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py new file mode 100644 index 000000000..de6e5f955 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/check.py @@ -0,0 +1,49 @@ +"""Check dataclass — interface between expression builders and composition.""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from enum import Enum + +from pyspark.sql import Column +from pyspark.sql.types import StructType + +from overture.schema.system.primitive import GeometryType + + +class CheckShape(Enum): + """How the composition layer handles a check expression.""" + + SCALAR = "scalar" # expression returns nullable string + ARRAY = "array" # expression returns array + + +@dataclass(frozen=True) +class Check: + """One validation check. + + `field` identifies what the check is about (for error column naming + and report grouping), not how to access the data. The expression in + `expr` already encodes the access pattern. + + `root_field` is the top-level schema column the check belongs to, + or None for synthetic model-level checks (radio_group, require_any_of) + that don't correspond to a single column. Used by `validate_feature` + to suppress or skip checks by column name. + """ + + field: str + name: str + expr: Column + shape: CheckShape + root_field: str | None + + +@dataclass(frozen=True) +class FeatureValidation: + """Pairs an expected schema with check builders for a feature type.""" + + schema: StructType + checks: Callable[[], list[Check]] + geometry_types: tuple[GeometryType, ...] = () diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py new file mode 100644 index 000000000..1a8ada445 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py @@ -0,0 +1,239 @@ +"""CLI entry point for validation.""" + +from __future__ import annotations + +import sys +from collections.abc import Mapping +from dataclasses import dataclass + +import click +from pyspark.sql import DataFrame, SparkSession + +from overture.schema.system.discovery import resolve_entry_point_key +from overture.schema.system.primitive import GeometryType + +from ._registry import PARTITION_MAP, REGISTRY +from .validate import ( + explain_errors, + feature_names, + validate_feature, +) + + +@dataclass(frozen=True) +class ReadSpec: + """Parquet read plan. + + `data_path` selects the files to read; `base_path`, when set, tells + Spark where to start discovering Hive partition columns. + """ + + data_path: str + base_path: str | None = None + + +def resolve_read(path: str, partitions: Mapping[str, str] | None) -> ReadSpec: + """Determine read strategy from path structure. + + Three cases: + + 1. **Hive partition path** (contains `/{key}=` for some key in + `partitions`) -- derive `basePath` so Spark discovers partition + columns. + 2. **Individual file** (`*.parquet`) or no partitions -- read + directly; data already contains the partition columns inline. + 3. **Release root** -- append the partition path + (`key1=v1/key2=v2/...`) and set `basePath` to the original path. + """ + stripped = path.rstrip("/") + + # Path already contains Hive partition directories + for key in partitions or (): + idx = stripped.find(f"/{key}=") + if idx >= 0: + return ReadSpec(data_path=path, base_path=stripped[:idx]) + + # Individual file or no partition mapping — data has partition columns inline + if stripped.endswith(".parquet") or not partitions: + return ReadSpec(data_path=path) + + # Release root — construct leaf path from partition map + partition_path = "/".join(f"{k}={v}" for k, v in partitions.items()) + return ReadSpec( + data_path=f"{stripped}/{partition_path}", + base_path=stripped, + ) + + +def read_feature(spark: SparkSession, spec: ReadSpec) -> DataFrame: + """Read a DataFrame according to a ReadSpec.""" + reader = spark.read + if spec.base_path: + reader = reader.option("basePath", spec.base_path) + return reader.parquet(spec.data_path) + + +_S3A_DEFAULTS: dict[str, str] = { + "spark.jars.packages": "org.apache.hadoop:hadoop-aws:3.4.1", + "spark.hadoop.fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem", + "spark.hadoop.fs.s3a.aws.credentials.provider": ( + "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider" + ), +} + +_LARGE_GEOMETRY_TYPES = frozenset( + { + GeometryType.LINE_STRING, + GeometryType.MULTI_LINE_STRING, + GeometryType.POLYGON, + GeometryType.MULTI_POLYGON, + GeometryType.GEOMETRY_COLLECTION, + } +) + + +def _may_have_large_geometry(feature_key: str) -> bool: + """Whether a registered feature's geometries may be large. + + Returns True when the registered geometry types include + (multi)linestrings, (multi)polygons, or geometry collections, + or when geometry types are unspecified (safe default). + """ + validation = REGISTRY[feature_key] + if not validation.geometry_types: + return True + return bool(set(validation.geometry_types) & _LARGE_GEOMETRY_TYPES) + + +def _spark_config(path: str, conf: tuple[str, ...], feature_key: str) -> dict[str, str]: + """Build Spark config dict with safe defaults. + + Disables the vectorized Parquet reader for features with large + geometries (polygons, linestrings) to avoid OOM on WKB binary + columns. Adds S3A credentials for `s3a://` paths. User-supplied + `--conf` values override any defaults. + """ + config: dict[str, str] = {} + if _may_have_large_geometry(feature_key): + config["spark.sql.parquet.enableVectorizedReader"] = "false" + if path.startswith("s3a://"): + config.update(_S3A_DEFAULTS) + for pair in conf: + key, _, value = pair.partition("=") + config[key] = value + return config + + +@click.command("overture-validate") +@click.argument("feature_type") +@click.argument("path") +@click.option("-o", "--output", default=None, help="Output path for validated Parquet.") +@click.option( + "--head", + "head_n", + default=20, + type=int, + show_default=True, + help="Error rows to display.", +) +@click.option("--conf", multiple=True, help="Spark config key=value pairs.") +@click.option( + "--count-only", + is_flag=True, + default=False, + help="Report error count only; skip explain/unpivot.", +) +@click.option( + "--skip-schema-check", + is_flag=True, + default=False, + help="Warn on schema mismatches instead of aborting.", +) +@click.option( + "--skip-columns", + multiple=True, + help="Columns declared absent from data; skips their checks.", +) +@click.option( + "--ignore-extra-columns", + multiple=True, + help="Extra data columns to ignore in schema comparison.", +) +@click.option( + "--suppress", + "suppress_specs", + multiple=True, + help="Suppress checks: FIELD (all checks) or FIELD:CHECK (specific).", +) +def validate_cli( + feature_type: str, + path: str, + output: str | None, + head_n: int, + conf: tuple[str, ...], + count_only: bool, + skip_schema_check: bool, + skip_columns: tuple[str, ...], + ignore_extra_columns: tuple[str, ...], + suppress_specs: tuple[str, ...], +) -> None: + """Validate Overture data at PATH and write annotated Parquet.""" + try: + resolved = resolve_entry_point_key(feature_type, REGISTRY) + except ValueError: + click.echo( + f"Unknown type '{feature_type}'. Known: {', '.join(feature_names())}", + err=True, + ) + sys.exit(1) + + builder = SparkSession.builder + for key, value in _spark_config(path, conf, resolved).items(): + builder = builder.config(key, value) + spark = builder.getOrCreate() + spark.sparkContext.setLogLevel("ERROR") + + spec = resolve_read(path, PARTITION_MAP.get(resolved)) + df = read_feature(spark, spec) + + suppress: list[str | tuple[str, str]] = [] + for s in suppress_specs: + if ":" in s: + field, name = s.split(":", 1) + suppress.append((field, name)) + else: + suppress.append(s) + + try: + result = validate_feature( + df, + resolved, + skip_columns=skip_columns, + ignore_extra_columns=ignore_extra_columns, + suppress=suppress, + ) + except ValueError as e: + click.echo(str(e), err=True) + sys.exit(1) + + if result.schema_mismatches: + click.echo(f"Schema mismatches for {resolved}:", err=True) + for m in result.schema_mismatches: + click.echo(f" {m.path}: expected {m.expected}, got {m.actual}", err=True) + if not skip_schema_check: + sys.exit(1) + + total_rows, error_count = result.row_counts() + click.echo(f"{error_count} / {total_rows} rows with errors", err=True) + + if error_count > 0: + if not count_only: + explained = explain_errors(result.evaluated, result.checks).drop("geometry") + if output and head_n > 0: + explained = explained.cache() + if output: + explained.write.mode("overwrite").parquet(output) + click.echo(f"Written to {output}", err=True) + if head_n > 0: + explained.show(head_n, truncate=False) + sys.exit(1) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/__init__.py new file mode 100644 index 000000000..572e57314 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/__init__.py @@ -0,0 +1 @@ +"""Expression builders and reusable PySpark column patterns.""" diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/_schema_structs.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/_schema_structs.py new file mode 100644 index 000000000..3bc8ea809 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/_schema_structs.py @@ -0,0 +1,22 @@ +"""Hand-written Spark StructType fragments for types the codegen can't generate. + +The codegen builds feature schemas by walking Pydantic `BaseModel` +subclasses. `BBox` is a plain class, not a `BaseModel`, so extraction +can't reach it -- `BBOX_STRUCT` is hand-written here to fill the gap. +Every other nested type is a `BaseModel` and gets generated directly +into each feature module, which is why this file holds only the one +struct. +""" + +from __future__ import annotations + +from pyspark.sql.types import DoubleType, StructField, StructType + +BBOX_STRUCT = StructType( + [ + StructField("xmin", DoubleType(), True), + StructField("xmax", DoubleType(), True), + StructField("ymin", DoubleType(), True), + StructField("ymax", DoubleType(), True), + ] +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py new file mode 100644 index 000000000..c6d274790 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/column_patterns.py @@ -0,0 +1,94 @@ +"""Structural PySpark column patterns for validation expression composition. + +These functions provide reusable wrappers for array iteration, null +guarding, and error message construction. Expression builders and +constraint translators compose them; codegen calls them rather than +reimplementing the patterns. +""" + +from __future__ import annotations + +from collections.abc import Callable + +from pyspark.sql import Column +from pyspark.sql import functions as F + + +def error_msg(prefix: str, *value_cols: Column) -> Column: + """Build an error message: literal prefix followed by interpolated values.""" + return F.concat(F.lit(prefix), *value_cols) + + +def _resolve_column(column: str | Column) -> Column: + """Resolve a string column name to a Column, passing Column through.""" + return F.col(column) if isinstance(column, str) else column + + +def _null_guarded_transform( + col: Column, + check_fn: Callable[[Column], Column], + flatten: bool = False, +) -> Column: + """Null-guard, transform, optionally flatten, and compact. + + When `flatten=True`, null inner arrays are coalesced to empty before + flattening. `F.flatten` returns NULL whenever any inner array is + NULL, which would silently drop sibling errors -- inner `array_check` + legitimately returns NULL when its column is null (e.g. an optional + nested array that's absent on some elements but populated on others). + """ + transformed = F.transform(col, check_fn) + if flatten: + empty = F.array().cast("array") + transformed = F.flatten( + F.transform(transformed, lambda inner: F.coalesce(inner, empty)) + ) + return F.when(col.isNotNull(), F.array_compact(transformed)) + + +def array_check(column: str | Column, check_fn: Callable[[Column], Column]) -> Column: + """Null-guard a column, transform its elements, compact out nulls. + + *check_fn* receives each array element and returns a string Column + (error message) or null. + """ + return _null_guarded_transform(_resolve_column(column), check_fn) + + +def nested_array_check( + column: str | Column, check_fn: Callable[[Column], Column] +) -> Column: + """Like `array_check` but flattens nested error arrays. + + Use when *check_fn* itself returns an `array` (e.g. an + inner `array_check`). The outer transform produces + `array>`; this function flattens to `array` + before compacting nulls. + """ + return _null_guarded_transform(_resolve_column(column), check_fn, flatten=True) + + +def check_struct_unique(column: str | Column) -> Column: + """Check that an array has no duplicate items by whole-element comparison. + + Compares `size(col)` against `size(array_distinct(col))`. + `array_distinct` handles struct and nested-array elements natively + in Spark 3.4+. + + For string arrays (e.g. websites, socials), this compares raw values. + Pydantic's UniqueItemsConstraint on `list[HttpUrl]` compares + *normalized* URLs (adds trailing slash, lowercases host and scheme), + so it catches duplicates that differ only in normalization. This + check catches exact duplicates only — the difference is accepted. + """ + col = _resolve_column(column) + has_duplicates = F.size(col) > F.size(F.array_distinct(col)) + return F.when( + col.isNotNull(), + F.when(has_duplicates, F.lit("contains duplicate items")), + ) + + +def coalesce_errors(check: Column) -> Column: + """Wrap an array-producing check so nulls become empty arrays.""" + return F.coalesce(check, F.array().cast("array")) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py new file mode 100644 index 000000000..9982b1486 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/constraint_expressions.py @@ -0,0 +1,484 @@ +"""Constraint type to PySpark Column expression translation. + +Semantic translation layer: maps constraint parameters to Column +expressions that detect violations. Analogous to +`field_constraint_description.py` in overture-schema-codegen +(which maps constraints to prose). + +Each function takes a column accessor (`F.col("x")` or +`el["field"]`) and constraint parameters. Returns a Column that +evaluates to an error string on violation or null on success. Field +identity is carried structurally by `Check.field`, not embedded in +error messages. +""" + +from __future__ import annotations + +from collections.abc import Callable +from functools import reduce +from typing import Literal + +from pyspark.sql import Column +from pyspark.sql import functions as F + +from overture.schema.system.primitive import GeometryType + +from .column_patterns import error_msg + +_WKB_TYPE_HEX: dict[GeometryType, str] = { + GeometryType.POINT: "01", + GeometryType.LINE_STRING: "02", + GeometryType.POLYGON: "03", + GeometryType.MULTI_POINT: "04", + GeometryType.MULTI_LINE_STRING: "05", + GeometryType.MULTI_POLYGON: "06", + GeometryType.GEOMETRY_COLLECTION: "07", +} + + +_BOUND_OPS: dict[str, tuple[str, Callable[[Column, float | int], Column]]] = { + "ge": (">=", lambda c, v: c < v), + "gt": (">", lambda c, v: c <= v), + "le": ("<=", lambda c, v: c > v), + "lt": ("<", lambda c, v: c >= v), +} + + +def check_bounds( + col: Column, + *, + ge: float | int | None = None, + gt: float | int | None = None, + le: float | int | None = None, + lt: float | int | None = None, +) -> Column: + """Numeric bounds check. Returns error string or null.""" + checks: list[Column] = [] + for key, value in (("ge", ge), ("gt", gt), ("le", le), ("lt", lt)): + if value is None: + continue + symbol, violates = _BOUND_OPS[key] + checks.append( + F.when( + violates(col, value), + error_msg( + f"must be {symbol} {value}, got ", + col.cast("string"), + ), + ) + ) + if not checks: + return F.lit(None).cast("string") + # null col -> all F.when checks return null (no false positive) + return F.coalesce(*checks) + + +def check_enum( + col: Column, + allowed: list[str], +) -> Column: + """Enum membership check. Returns error string or null.""" + return F.when( + col.isNotNull() & ~col.isin(allowed), + error_msg("invalid value '", col.cast("string"), F.lit("'")), + ) + + +def check_required(col: Column) -> Column: + """Null check for required fields. Returns error string or null.""" + return F.when(col.isNull(), F.lit("missing (null)")) + + +def check_pattern(col: Column, pattern: str, *, label: str) -> Column: + """Regex pattern check via rlike. Returns error string or null. + + Parameters + ---------- + col + Column to validate. + pattern + Java regex pattern (use `\\z` for absolute end-of-input). + label + Human-readable description used in error messages: + `"invalid {label}: got '...'"` + """ + msg = error_msg(f"invalid {label}: got '", col.cast("string"), F.lit("'")) + return F.when(col.isNotNull() & ~col.rlike(pattern), msg) + + +def check_url_format(col: Column) -> Column: + """HTTP/HTTPS URL format check via pattern match. Returns error string or null. + + Pydantic's `HttpUrl` additionally normalizes values (adds trailing + slash, lowercases host and scheme) before validation and comparison. + This check validates the raw string without normalization — format + acceptance is broader, and downstream uniqueness checks compare + un-normalized values. + """ + return check_pattern(col, r"^https?://[^\s]+\z", label="HTTP/HTTPS URL") + + +def check_url_length(col: Column) -> Column: + """URL length check: must not exceed 2083 characters. Returns error string or null.""" + return F.when( + col.isNotNull() & (F.length(col) > 2083), + error_msg("URL exceeds 2083 characters: length ", F.length(col).cast("string")), + ) + + +def check_email(col: Column) -> Column: + """Email address format check. Returns error string or null.""" + return check_pattern( + col, + r"^[^\s@.]+(\.[^\s@.]+)*@([^\s@.]+\.)+[^\s@.]+\z", + label="email address", + ) + + +def _check_length( + col: Column, + measure: Column, + limit: int, + *, + direction: Literal["minimum", "maximum"], +) -> Column: + """Shared length-check logic for arrays and strings. + + *measure* is the pre-computed size/length column. + *direction* is `"minimum"` or `"maximum"`, controlling the + comparison operator and error label. + """ + violation = measure < limit if direction == "minimum" else measure > limit + return F.when( + col.isNotNull() & violation, + error_msg(f"{direction} length {limit}, got ", measure.cast("string")), + ) + + +def check_array_min_length(col: Column, min_len: int) -> Column: + """Array minimum length check. Returns error string or null.""" + return _check_length(col, F.size(col), min_len, direction="minimum") + + +def check_array_max_length(col: Column, max_len: int) -> Column: + """Array maximum length check. Returns error string or null.""" + return _check_length(col, F.size(col), max_len, direction="maximum") + + +def check_string_min_length(col: Column, min_len: int) -> Column: + """String minimum character length check. Returns error string or null.""" + return _check_length(col, F.length(col), min_len, direction="minimum") + + +def check_string_max_length(col: Column, max_len: int) -> Column: + """String maximum character length check. Returns error string or null.""" + return _check_length(col, F.length(col), max_len, direction="maximum") + + +_STRIPPED_PATTERN = r"(?sU)^[^\s\p{Cc}](.*[^\s\p{Cc}])?\z" +r"""Java regex: reject whitespace AND control characters at string boundaries. + +Boundary class `[^\s\p{Cc}]` rejects two categories at the first and +last character positions: + +1. **Whitespace** (`\s` with `(?U)`): Unicode `White_Space` property + — space, tab, newline, NBSP, em-space, etc. +2. **Control characters** (`\p{Cc}`): Unicode "Control" category — + C0 (U+0000-001F), DEL (U+007F), and C1 (U+0080-009F). + +Why both are needed: Python's `\s` (and `str.strip()`) treats +U+001C-001F (file/group/record/unit separators) as whitespace. Java's +`\s` with `(?U)` follows the Unicode `White_Space` property, which +excludes those four characters. Using `\S` alone in Java misses them, +allowing strings like `"Main St \x1f"` to pass. Adding `\p{Cc}` +closes that gap and also rejects other control characters (NUL, SOH, +DEL, C1 controls) that have no place at string boundaries. + +Interior control characters (middle of the string) are NOT rejected — +the `.*` in the middle position still matches anything. Policing +interior content is a separate concern. + +Flags: `(?s)` (DOTALL) lets `.*` cross newlines. `(?U)` +(UNICODE_CHARACTER_CLASS) gives `\s` full Unicode coverage. `\z` +(absolute end-of-input) avoids `$` matching before a trailing newline. +""" + + +def check_stripped(col: Column) -> Column: + """No leading/trailing whitespace or control characters. Returns error string or null.""" + return F.when( + col.isNotNull() & (F.length(col) > 0) & ~col.rlike(_STRIPPED_PATTERN), + error_msg("leading/trailing whitespace"), + ) + + +def check_json_pointer(col: Column) -> Column: + """JSON Pointer (RFC 6901) format check. + + Valid pointers start with `/` or are the empty string (which + references the whole document). + """ + return F.when( + col.isNotNull() & (col != "") & ~col.startswith("/"), + error_msg( + "invalid JSON pointer, must start with /, got '", + col.cast("string"), + F.lit("'"), + ), + ) + + +def check_linear_range_length(col: Column) -> Column: + """Linear reference range length check: exactly 2 elements.""" + size = F.size(col) + return F.when( + col.isNotNull() & (size != 2), + error_msg("must have exactly 2 elements, got ", size.cast("string")), + ) + + +def check_linear_range_bounds(col: Column) -> Column: + """Linear reference range bounds check: both values in [0.0, 1.0]. + + The `F.size(col) == 2` guard skips wrong-length arrays so this + check only fires when exactly two elements are present. Length + validation is `check_linear_range_length`'s responsibility. + """ + size = F.size(col) + v0, v1 = F.get(col, 0), F.get(col, 1) + return F.when( + col.isNotNull() + & (size == 2) + & ((v0 < 0.0) | (v0 > 1.0) | (v1 < 0.0) | (v1 > 1.0)), + error_msg( + "values must be in [0.0, 1.0], got [", + v0.cast("string"), + F.lit(", "), + v1.cast("string"), + F.lit("]"), + ), + ) + + +def check_linear_range_order(col: Column) -> Column: + """Linear reference range ordering check: start < end. + + The `F.size(col) == 2` guard skips wrong-length arrays so this + check only fires when exactly two elements are present. Length + validation is `check_linear_range_length`'s responsibility. + """ + size = F.size(col) + return F.when( + col.isNotNull() & (size == 2) & (F.get(col, 0) >= F.get(col, 1)), + error_msg("start must be < end"), + ) + + +def check_radio_group( + cols: list[Column], + field_names: list[str], +) -> Column: + """Exactly one of the given boolean columns must be True.""" + true_count = reduce( + lambda a, b: a + b, + (F.when(c, 1).otherwise(0) for c in cols), + ) + names = ", ".join(field_names) + return F.when( + true_count != 1, + error_msg( + f"exactly one of {names} must be true, got ", + true_count.cast("string"), + F.lit(" true"), + ), + ) + + +def _count_non_null(cols: list[Column]) -> Column: + """Sum of non-null indicators across *cols*.""" + return reduce( + lambda a, b: a + b, + (F.when(c.isNotNull(), 1).otherwise(0) for c in cols), + ) + + +def check_require_any_of( + cols: list[Column], + field_names: list[str], +) -> Column: + """At least one of the given columns must be non-null.""" + all_null = reduce(lambda a, b: a & b, (c.isNull() for c in cols)) + names = ", ".join(field_names) + return F.when(all_null, F.lit(f"requires at least one of {names}")) + + +def check_min_fields_set( + cols: list[Column], + field_names: list[str], + count: int, +) -> Column: + """At least *count* of the given columns must be non-null. + + Parameters + ---------- + cols + Column expressions to test for non-null. + field_names + Human-readable names for each column, used in the error message. + count + Minimum number of non-null columns required. + + Returns + ------- + Column + Error string on violation, null on success. + """ + non_null_count = _count_non_null(cols) + names = ", ".join(field_names) + return F.when( + non_null_count < count, + error_msg( + f"at least {count} of {names} required, got ", + non_null_count.cast("string"), + F.lit(" non-null"), + ), + ) + + +def _check_conditional_presence( + target: Column, + condition: Column, + condition_desc: str, + *condition_value_cols: Column, + expect_present: bool, +) -> Column: + """Shared logic for require_if / forbid_if. + + *expect_present=True* means target must be non-null when condition + holds (require); *False* means target must be null (forbid). + """ + word = "required" if expect_present else "forbidden" + target_test = target.isNull() if expect_present else target.isNotNull() + prefix = f"{word} when {condition_desc}" + if condition_value_cols: + interleaved = [ + p + for vc in condition_value_cols + for p in (F.lit(", got "), vc.cast("string")) + ] + msg = error_msg(prefix, *interleaved) + else: + msg = F.lit(prefix) + return F.when(condition & target_test, msg) + + +def check_require_if( + target: Column, + condition: Column, + condition_desc: str, + *condition_value_cols: Column, +) -> Column: + """Target must be non-null when condition is true.""" + return _check_conditional_presence( + target, + condition, + condition_desc, + *condition_value_cols, + expect_present=True, + ) + + +def check_forbid_if( + target: Column, + condition: Column, + condition_desc: str, + *condition_value_cols: Column, +) -> Column: + """Target must be null when condition is true.""" + return _check_conditional_presence( + target, + condition, + condition_desc, + *condition_value_cols, + expect_present=False, + ) + + +def check_geometry_type( + col: Column, + *allowed: GeometryType, +) -> Column: + """Geometry type check via WKB header byte parsing. + + Reads the endianness indicator and type uint32 from the WKB binary + without deserializing coordinates. O(1) per row regardless of + geometry complexity. + + Extracts only the low byte of the type uint32, which is safe for + OGC types 1-7 and immune to Z/M/ZM flag bits (those modify high + bytes only). + """ + hex_geom = F.hex(col) + byte_order = F.substring(hex_geom, 1, 2) + # LE: type LSB at hex positions 3-4 + # BE: type LSB at hex positions 9-10 + type_hex = F.when( + byte_order == "01", + F.substring(hex_geom, 3, 2), + ).otherwise( + F.substring(hex_geom, 9, 2), + ) + allowed_hex = [_WKB_TYPE_HEX[t] for t in allowed] + names = " | ".join(t.geo_json_type for t in allowed) + if len(allowed_hex) == 1: + violation = type_hex != allowed_hex[0] + else: + violation = ~type_hex.isin(allowed_hex) + return F.when( + col.isNotNull() & violation, + error_msg(f"expected {names} geometry"), + ) + + +def check_bbox_completeness(col: Column) -> Column: + """Check that all bbox sub-fields are present when bbox is non-null.""" + return F.when( + col.isNotNull() + & ( + col["xmin"].isNull() + | col["ymin"].isNull() + | col["xmax"].isNull() + | col["ymax"].isNull() + ), + error_msg("bbox sub-fields must all be present"), + ) + + +def check_bbox_lat_ordering(col: Column) -> Column: + """Check that ymin does not exceed ymax.""" + return F.when( + col.isNotNull() & (col["ymin"] > col["ymax"]), + error_msg("expected ymin <= ymax"), + ) + + +def check_bbox_lat_range(col: Column) -> Column: + """Check that latitude values fall within [-90, 90].""" + return F.when( + col.isNotNull() + & ( + (col["ymin"] < -90) + | (col["ymin"] > 90) + | (col["ymax"] < -90) + | (col["ymax"] > 90) + ), + error_msg("latitude values must be in [-90, 90]"), + ) + + +# TODO: check_bbox_lon_ordering -- deferred pending antimeridian crossing +# policy. RFC 7946 section 5.2 allows xmin > xmax for bboxes that cross +# the antimeridian. + +# TODO: check_bbox_lon_range -- deferred pending decision on whether +# coordinates can wrap beyond [-180, 180]. diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/py.typed b/packages/overture-schema-pyspark/src/overture/schema/pyspark/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py new file mode 100644 index 000000000..8376ff5b0 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/schema_check.py @@ -0,0 +1,109 @@ +"""Schema comparison for structural validation. + +Recursively diffs two `StructType` objects and reports mismatches +as a flat list with dot-notation paths. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from pyspark.sql.types import ( + ArrayType, + DataType, + MapType, + StructType, +) + + +@dataclass(frozen=True) +class SchemaMismatch: + """One structural difference between actual and expected schemas. + + Parameters + ---------- + path + Dot-notation path to the field (e.g. `"bbox.xmin"`). + actual + Actual type name, or `"missing"` if the field is absent. + expected + Expected type name, or `"missing"` if the field is unexpected. + """ + + path: str + actual: str + expected: str + + +def _type_name(dt: DataType) -> str: + """Short display name for a DataType (e.g. `"StringType"`).""" + return type(dt).__name__ + + +def _compare( + actual: DataType, + expected: DataType, + prefix: str, + out: list[SchemaMismatch], +) -> None: + """Recursively compare two DataType trees.""" + if isinstance(expected, StructType) and isinstance(actual, StructType): + _compare_structs(actual, expected, prefix, out) + return + + if isinstance(expected, ArrayType) and isinstance(actual, ArrayType): + _compare(actual.elementType, expected.elementType, f"{prefix}[]", out) + return + + if isinstance(expected, MapType) and isinstance(actual, MapType): + _compare(actual.keyType, expected.keyType, f"{prefix}{{key}}", out) + _compare(actual.valueType, expected.valueType, f"{prefix}{{value}}", out) + return + + if type(actual) is not type(expected): + out.append(SchemaMismatch(prefix, _type_name(actual), _type_name(expected))) + + +def _compare_structs( + actual: StructType, + expected: StructType, + prefix: str, + out: list[SchemaMismatch], +) -> None: + """Compare two StructTypes field by field.""" + actual_fields = {f.name: f for f in actual.fields} + expected_fields = {f.name: f for f in expected.fields} + + # Ordered union: actual fields first, then any expected-only fields appended. + all_names = dict.fromkeys([*actual_fields, *expected_fields]) + for name in all_names: + path = f"{prefix}.{name}" if prefix else name + a = actual_fields.get(name) + e = expected_fields.get(name) + if a is None and e is not None: + out.append(SchemaMismatch(path, "missing", _type_name(e.dataType))) + elif e is None and a is not None: + out.append(SchemaMismatch(path, _type_name(a.dataType), "missing")) + elif a is not None and e is not None: + _compare(a.dataType, e.dataType, path, out) + + +def compare_schemas(actual: StructType, expected: StructType) -> list[SchemaMismatch]: + """Compare two Spark schemas and return all mismatches. + + Parameters + ---------- + actual + Schema inferred from the data (e.g. `df.schema`). + expected + Declared expected schema for the feature type. + + Returns + ------- + list[SchemaMismatch] + Empty when schemas match. Each mismatch identifies the + dot-notation path and what differs. + """ + out: list[SchemaMismatch] = [] + _compare_structs(actual, expected, "", out) + return out diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py new file mode 100644 index 000000000..9b03ed34b --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py @@ -0,0 +1,334 @@ +"""Validation pipeline for Overture feature data. + +`validate_feature()` is the primary entry point: it looks up the +feature type in the registry, compares schemas, filters checks, and +evaluates them in a single pass. Returns a `ValidationResult` +carrying the evaluated DataFrame and metadata. + +Lower-level helpers (`evaluate_checks`, `filter_errors`, +`explain_errors`) are available for consumers needing finer control. +""" + +from __future__ import annotations + +import re +from collections import Counter +from collections.abc import Iterable +from dataclasses import dataclass + +from pyspark.sql import DataFrame +from pyspark.sql import functions as F +from pyspark.sql.types import StringType, StructField, StructType + +from overture.schema.system.discovery import ( + entry_point_class_alias, + resolve_entry_point_key, +) + +from ._registry import REGISTRY +from .check import Check, CheckShape +from .expressions.column_patterns import coalesce_errors +from .schema_check import SchemaMismatch, compare_schemas + + +def feature_keys() -> list[str]: + """Canonical entry-point keys registered in the validation registry.""" + return sorted(REGISTRY) + + +def feature_names() -> list[str]: + """All names `validate_feature` accepts. + + Includes canonical entry-point keys and the snake-case class-name + aliases the resolver recognizes (only when an alias is unambiguous). + """ + aliases = { + name + for name, count in Counter(entry_point_class_alias(k) for k in REGISTRY).items() + if count == 1 + } + return sorted(set(REGISTRY) | aliases) + + +def _normalize_suppress( + suppress: Iterable[str | tuple[str, str] | Check], +) -> tuple[set[str], set[tuple[str, str]]]: + """Partition suppress entries into root field names and (field, name) pairs. + + Parameters + ---------- + suppress + Mix of bare field name strings, `(field, name)` tuples, and + `Check` objects. + + Returns + ------- + tuple[set[str], set[tuple[str, str]]] + `(root_fields, pairs)` where `root_fields` is bare field names + and `pairs` is `(field, name)` pairs extracted from tuples and + Check objects. + """ + root_fields: set[str] = set() + pairs: set[tuple[str, str]] = set() + for entry in suppress: + if isinstance(entry, str): + root_fields.add(entry) + elif isinstance(entry, Check): + pairs.add((entry.field, entry.name)) + else: + pairs.add(entry) + return root_fields, pairs + + +# Matches the `_err_` columns `evaluate_checks` appends; ordinary +# user columns starting with `_err_` (but not followed by digits only) +# are preserved. +_ERR_COLUMN = re.compile(r"^_err_\d+$") + + +def _non_error_columns(evaluated: DataFrame) -> list[str]: + """Column names excluding `_err_N` error columns appended by `evaluate_checks`.""" + return [c for c in evaluated.columns if not _ERR_COLUMN.match(c)] + + +def evaluate_checks(df: DataFrame, checks: list[Check]) -> DataFrame: + """Append `_err_N` columns for each check. + + Returns the input DataFrame with one `array` column per check, + containing error messages (non-empty) or null/empty (no error). + """ + error_cols = [] + for i, chk in enumerate(checks): + if chk.shape == CheckShape.SCALAR: + col = F.array_compact(F.array(chk.expr)) + else: + col = coalesce_errors(F.filter(chk.expr, lambda x: x.isNotNull())) + error_cols.append(col.cast("array").alias(f"_err_{i}")) + return df.select("*", *error_cols) + + +def _max_error_size(n: int) -> F.Column: + """Build a Column for the largest `_err_N` array size across all checks. + + Use `greatest()` instead of chaining OR across all checks. A 255-check + OR tree triggers Spark's CommutativeExpression.orderCommutative during + plan canonicalization, which is O(n²+) and OOMs the driver. `greatest()` + is not a CommutativeExpression, so the optimizer skips that path. + + Caller must guarantee `n >= 1`. + """ + err_sizes = [F.coalesce(F.size(F.col(f"_err_{i}")), F.lit(0)) for i in range(n)] + return err_sizes[0] if n == 1 else F.greatest(*err_sizes) + + +def filter_errors(evaluated: DataFrame, checks: list[Check]) -> DataFrame: + """Filter an evaluated DataFrame to rows with at least one error. + + Parameters + ---------- + evaluated + DataFrame produced by `evaluate_checks()`. + checks + Same check list passed to `evaluate_checks()`. + + Returns + ------- + DataFrame + Original columns only (`_err_N` columns stripped). + """ + return evaluated.filter(_max_error_size(len(checks)) > 0).select( + *_non_error_columns(evaluated) + ) + + +def explain_errors(evaluated: DataFrame, checks: list[Check]) -> DataFrame: + """Unpivot evaluated error columns into one row per violation. + + Parameters + ---------- + evaluated + DataFrame produced by `evaluate_checks()`. + checks + Same check list passed to `evaluate_checks()`. + + Returns + ------- + DataFrame + Schema: `, field, check, message`. + """ + orig_cols = _non_error_columns(evaluated) + n = len(checks) + if n == 0: + empty_schema = StructType( + [ + *evaluated.select(*orig_cols).schema.fields, + StructField("field", StringType(), True), + StructField("check", StringType(), True), + StructField("message", StringType(), True), + ] + ) + return evaluated.sparkSession.createDataFrame([], empty_schema) + stack_args = ", ".join(f"{i}, `_err_{i}`" for i in range(n)) + unpivoted = evaluated.select( + *orig_cols, + F.expr(f"stack({n}, {stack_args}) as (_idx, _errors)"), + ).filter(F.col("_errors").isNotNull() & (F.size("_errors") > 0)) + + exploded = unpivoted.select( + *orig_cols, + "_idx", + F.explode("_errors").alias("message"), + ) + + meta_df = evaluated.sparkSession.createDataFrame( + [(i, c.field, c.name) for i, c in enumerate(checks)], + ["_idx", "field", "check"], + ) + + return exploded.join(F.broadcast(meta_df), "_idx").select( + *orig_cols, "field", "check", "message" + ) + + +@dataclass(frozen=True) +class ValidationResult: + """Result of validate_feature(). + + Consumer owns caching of `evaluated`. Call `error_rows()` for + the filtered view; use `explain_errors(result.evaluated, + result.checks)` for the opt-in UNPIVOT. + """ + + evaluated: DataFrame + checks: list[Check] + schema_mismatches: list[SchemaMismatch] + suppressed_checks: list[Check] + + def error_rows(self) -> DataFrame: + """Rows with at least one violation. Original columns only.""" + if not self.checks: + return self.evaluated.limit(0) + return filter_errors(self.evaluated, self.checks) + + def row_counts(self) -> tuple[int, int]: + """Count total and error rows in a single pass. + + Computes both counts with one aggregation over the evaluated + DataFrame, avoiding the need to cache before counting. + + Returns + ------- + tuple[int, int] + `(total_rows, error_rows)`. + """ + if not self.checks: + return self.evaluated.count(), 0 + max_err = _max_error_size(len(self.checks)) + row = self.evaluated.agg( + F.count(F.lit(1)).alias("total"), + F.coalesce(F.sum(F.when(max_err > 0, 1).otherwise(0)), F.lit(0)).alias( + "errors" + ), + ).first() + assert row is not None # aggregation on a DataFrame always produces a row + return row["total"], row["errors"] + + +def validate_feature( + df: DataFrame, + feature_type: str, + *, + skip_columns: Iterable[str] = (), + ignore_extra_columns: Iterable[str] = (), + suppress: Iterable[str | tuple[str, str] | Check] = (), +) -> ValidationResult: + """Validate a DataFrame against a registered feature type. + + Parameters + ---------- + df + Input DataFrame to validate. + feature_type + Registered feature type name (e.g. `"building"`). + skip_columns + Columns declared absent from the data. Raises `ValueError` + if any are present in `df.columns`. + ignore_extra_columns + Columns that may be present in the data but absent from the + expected schema. + suppress + Checks to remove before evaluation. Bare strings suppress by + root field; tuples by exact `(field, name)`; Check objects + by extracting `(field, name)`. Raises `ValueError` if any + entry doesn't match a registered check. + + Raises + ------ + ValueError + If `feature_type` isn't registered. Message includes the + sorted list of known types. + """ + feature_type = resolve_entry_point_key(feature_type, REGISTRY) + validation = REGISTRY[feature_type] + skip = frozenset(skip_columns) + ignore_extra = frozenset(ignore_extra_columns) + suppress_roots, suppress_pairs = _normalize_suppress(suppress) + + # Validate skip_columns are actually absent + present = skip & set(df.columns) + if present: + raise ValueError( + f"skip_columns {sorted(present)} are present in the " + f"DataFrame; remove them from skip_columns or drop them " + f"from the data" + ) + + # Schema comparison with filtering + raw_mismatches = compare_schemas(df.schema, validation.schema) + mismatches = [] + for m in raw_mismatches: + root = m.path.split(".", 1)[0] + if root in skip: + continue + if m.expected == "missing" and root in ignore_extra: + continue + mismatches.append(m) + + # Validate suppress entries match real checks before filtering + all_checks = validation.checks() + valid_roots = {c.root_field for c in all_checks if c.root_field is not None} + valid_pairs = {(c.field, c.name) for c in all_checks} + unmatched_roots = suppress_roots - valid_roots + unmatched_pairs = suppress_pairs - valid_pairs + if unmatched_roots or unmatched_pairs: + parts = [] + if unmatched_roots: + parts.append(f"unknown root fields {sorted(unmatched_roots)}") + if unmatched_pairs: + parts.append(f"unknown (field, name) pairs {sorted(unmatched_pairs)}") + raise ValueError( + f"suppress entries don't match any check for {feature_type!r}: " + + "; ".join(parts) + ) + + # Check filtering + kept: list[Check] = [] + suppressed: list[Check] = [] + for chk in all_checks: + if chk.root_field is not None and chk.root_field in skip: + continue # structurally absent, not tracked in suppressed + if chk.root_field is not None and chk.root_field in suppress_roots: + suppressed.append(chk) + continue + if (chk.field, chk.name) in suppress_pairs: + suppressed.append(chk) + continue + kept.append(chk) + + evaluated = evaluate_checks(df, kept) + return ValidationResult( + evaluated=evaluated, + checks=kept, + schema_mismatches=mismatches, + suppressed_checks=suppressed, + ) diff --git a/packages/overture-schema-pyspark/tests/__init__.py b/packages/overture-schema-pyspark/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/_support/__init__.py b/packages/overture-schema-pyspark/tests/_support/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/_support/harness.py b/packages/overture-schema-pyspark/tests/_support/harness.py new file mode 100644 index 000000000..b21320bb3 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/harness.py @@ -0,0 +1,246 @@ +"""Validation harness for generated conformance tests. + +Builds a single DataFrame per feature type from scenario mutations, +runs validation once, and indexes violations by scenario ID. +""" + +from __future__ import annotations + +import copy +import uuid +from collections.abc import Sequence +from dataclasses import dataclass +from typing import Any + +from overture.schema.pyspark.check import Check +from overture.schema.pyspark.validate import evaluate_checks, explain_errors +from pyspark.sql import SparkSession +from pyspark.sql.types import StringType, StructField, StructType +from shapely import wkb, wkt + +from .helpers import PathTraversalError, deep_merge +from .scenarios import Scenario + +# Namespace for `_scenario_id` UUIDs. Distinct from +# `overture.schema.codegen.pyspark.test_data.base_row._BASE_ROW_NAMESPACE` +# (which synthesizes feature `id` values) so a feature `id` can never +# collide with a scenario tag and confuse the violations index. +_NAMESPACE = uuid.UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890") + + +@dataclass(frozen=True) +class ValidationResults: + """Named return type from run_validation_pipeline.""" + + violations: dict[str, set[tuple[str, str]]] + skipped: dict[str, str] + + +def scenario_uuid(scenario_id: str) -> str: + """Deterministic UUID for the harness's `_scenario_id` tag.""" + return str(uuid.uuid5(_NAMESPACE, scenario_id)) + + +def build_scenario_map( + scenarios: Sequence[Scenario], + *, + feature_name: str, +) -> dict[str, str]: + """Map _scenario_id values to human-readable scenario IDs. + + Parameters + ---------- + scenarios + All scenarios for a feature type. + feature_name + Feature name for the baseline row ID. + + Returns + ------- + dict[str, str] + Maps _scenario_id UUID string -> scenario ID. Includes baseline. + + Raises + ------ + ValueError + If two scenarios would produce the same UUID key. + """ + baseline_id = f"{feature_name}::baseline" + scenario_map: dict[str, str] = {scenario_uuid(baseline_id): baseline_id} + + for s in scenarios: + for suffix in ("::valid", "::invalid"): + label = f"{s.id}{suffix}" + key = scenario_uuid(label) + if key in scenario_map: + raise ValueError( + f"Duplicate scenario id {key!r}: {scenario_map[key]!r} and {label!r}" + ) + scenario_map[key] = label + + return scenario_map + + +def build_scenario_rows( + base_row: dict[str, Any], + scenarios: Sequence[Scenario], + *, + feature_name: str, +) -> tuple[list[dict[str, Any]], dict[str, str], dict[str, str]]: + """Build mutation rows and scenario mapping from scenarios. + + Parameters + ---------- + base_row + Valid base row dict from the example loader. + scenarios + Scenarios to apply. + feature_name + Feature name for baseline ID and UUID namespace. + + Returns + ------- + tuple + (rows, scenario_map, skipped) where rows is a list of row dicts, + scenario_map maps _scenario_id values to scenario IDs, and skipped + maps scenario IDs to skip reasons. + """ + scenario_map = build_scenario_map(scenarios, feature_name=feature_name) + base_row = sanitize_row(base_row) + # Deep-copy every row so nested structures aren't aliased with base_row; + # a future in-place mutation of one row would otherwise leak across rows. + rows: list[dict[str, Any]] = [ + { + **copy.deepcopy(base_row), + "_scenario_id": scenario_uuid(f"{feature_name}::baseline"), + } + ] + skipped: dict[str, str] = {} + + for s in scenarios: + try: + invalid_row = sanitize_row(s.mutate(deep_merge(base_row, s.scaffold))) + invalid_row["_scenario_id"] = scenario_uuid(f"{s.id}::invalid") + rows.append( + { + **copy.deepcopy(base_row), + "_scenario_id": scenario_uuid(f"{s.id}::valid"), + } + ) + rows.append(invalid_row) + except PathTraversalError as e: + skipped[s.id] = str(e) + + return rows, scenario_map, skipped + + +_WKT_PREFIXES = ( + "POINT", + "LINESTRING", + "POLYGON", + "MULTIPOINT", + "MULTILINESTRING", + "MULTIPOLYGON", + "GEOMETRYCOLLECTION", +) + +# Schema field whose string value should be parsed as WKT and re-emitted as +# WKB (the storage representation Spark's BinaryType expects). +_GEOMETRY_FIELD = "geometry" + + +def sanitize_row(row: dict[str, Any]) -> dict[str, Any]: + """Return a deep copy of `row` with WKT geometry strings converted to WKB. + + Geometry values from TOML examples are WKT strings, but the schema + expects BinaryType (WKB). Walks the row recursively; any string at + the `geometry` key that looks like WKT is converted via shapely. + """ + return _sanitize_in_place(copy.deepcopy(row)) + + +def _sanitize_in_place(d: dict[str, Any]) -> dict[str, Any]: + for key, value in d.items(): + if isinstance(value, dict): + d[key] = _sanitize_in_place(value) + elif isinstance(value, list): + d[key] = [ + _sanitize_in_place(item) if isinstance(item, dict) else item + for item in value + ] + elif ( + key == _GEOMETRY_FIELD + and isinstance(value, str) + and value.upper().startswith(_WKT_PREFIXES) + ): + d[key] = wkb.dumps(wkt.loads(value)) + return d + + +def assert_schema_covers_checks(schema: StructType, checks: list[Check]) -> None: + """Assert every check's root field exists in the schema. + + Synthetic model-level checks (`root_field=None`) pass + unconditionally. Otherwise the root must be a top-level schema + column. This is a fast sanity check; deeper field paths are the + codegen's responsibility and surface at Spark execution time. + """ + top_level = {f.name for f in schema.fields} + for chk in checks: + if chk.root_field is None or chk.root_field in top_level: + continue + raise AssertionError( + f"Check references root field {chk.root_field!r} " + f"not found in schema. Available: {sorted(top_level)}" + ) + + +def run_validation_pipeline( + spark: SparkSession, + schema: StructType, + checks: list[Check], + base_row: dict[str, Any], + scenarios: Sequence[Scenario], + feature_name: str, +) -> ValidationResults: + """Run the full validation pipeline. + + Returns a ValidationResults with violations indexed by scenario ID and + a skipped dict for scenarios that could not be built due to path + traversal errors. + """ + assert_schema_covers_checks(schema, checks) + rows, scenario_map, skipped = build_scenario_rows( + base_row, scenarios, feature_name=feature_name + ) + augmented_schema = StructType( + schema.fields + [StructField("_scenario_id", StringType(), True)] + ) + df = spark.createDataFrame(rows, schema=augmented_schema, verifySchema=False) # type: ignore[union-attr] + violations = explain_errors(evaluate_checks(df, checks), checks) + return ValidationResults( + violations=index_violations(violations.collect(), scenario_map), + skipped=skipped, + ) + + +def index_violations( + violation_rows: list[Any], + scenario_map: dict[str, str], +) -> dict[str, set[tuple[str, str]]]: + """Index collected violation rows by human-readable scenario ID. + + Parameters + ---------- + violation_rows + Collected rows from `explain().collect()`. + scenario_map + Mapping from _scenario_id values to scenario IDs. + """ + result: dict[str, set[tuple[str, str]]] = {} + for row in violation_rows: + scenario_id = scenario_map.get(row["_scenario_id"]) + if scenario_id is None: + continue + result.setdefault(scenario_id, set()).add((row["field"], row["check"])) + return result diff --git a/packages/overture-schema-pyspark/tests/_support/helpers.py b/packages/overture-schema-pyspark/tests/_support/helpers.py new file mode 100644 index 000000000..2551e4b8b --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/helpers.py @@ -0,0 +1,135 @@ +"""Low-level utilities for the conformance test harness. + +Internal to the harness — not imported directly by generated test files. +""" + +from __future__ import annotations + +import copy +from collections.abc import Callable +from typing import Any + +from overture.schema.system.field_path import ArraySegment, FieldPath, coerce + + +def deep_merge(base: dict, scaffold: dict) -> dict: + """Recursively merge scaffold onto a deep copy of base. + + Dict values merge recursively. All other values (including lists) + in scaffold replace the corresponding base values; scaffold values + are deep-copied so callers cannot accidentally share state with + the merged result. Keys present in base but absent from scaffold + are preserved. + """ + result = copy.deepcopy(base) + for key, value in scaffold.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = deep_merge(result[key], value) + else: + result[key] = copy.deepcopy(value) + return result + + +class PathTraversalError(Exception): + """Raised when set_at_path cannot traverse a path in the row dict.""" + + +def _scaffold_struct(target: dict, name: str) -> dict: + """Return target[name] as a dict, scaffolding `{}` when missing or None.""" + child = target.get(name) if isinstance(target, dict) else None + if child is None: + child = {} + target[name] = child + return child + + +def _scaffold_array(target: dict, name: str, path: FieldPath | str) -> list: + """Return target[name] as a list, scaffolding `[{}]` when None. + + Empty arrays raise — there is no element to mutate. + """ + child = target.get(name) if isinstance(target, dict) else None + if child is None: + child = [{}] + target[name] = child + if not isinstance(child, list): + raise PathTraversalError( + f"Expected list at '{name}' in path '{path}', got {type(child).__name__}" + ) + if len(child) == 0: + raise PathTraversalError(f"Empty array at '{name}' in path '{path}'") + return child + + +def _descend_through_array( + segment: ArraySegment, target: dict, path: FieldPath | str +) -> list: + """Enter an array segment and walk through its iter_count. + + Scaffolds `[{}]` at the outer level when None; deeper levels + (`iter_count > 1`) must already be lists -- scaffolding into + nested-list shapes isn't supported because no current schema + needs it. + + Returns the innermost list. For terminal use, write to `[0]`; + for intermediate use, the next segment lives in `[0]`. + """ + container = _scaffold_array(target, segment.name, path) + for _ in range(segment.iter_count - 1): + if len(container) == 0 or not isinstance(container[0], list): + raise PathTraversalError( + f"Expected non-empty nested list at '{segment.name}' in path '{path}'" + ) + container = container[0] + return container + + +def set_at_path(path: FieldPath | str, value: object) -> Callable[[dict], dict]: + """Return a mutator that sets *value* at *path* in a deep copy of the row. + + `[]` always indexes element 0 — one bad element suffices to trigger + a violation since `validate()` checks are element-wise. + + None at an intermediate struct segment is scaffolded as `{}`; None at + an intermediate array segment is scaffolded as `[{}]`. Empty arrays + raise `PathTraversalError` when called — there is no element to mutate. + + Parameters + ---------- + path + A `FieldPath` or its canonical encoded form (`"rules[].tags[].v"`). + value + The value to set at the resolved path. + + Returns + ------- + Callable[[dict], dict] + A function that takes a row dict and returns a deep copy with the + value at `path` replaced. + + Raises + ------ + PathTraversalError + When the path is empty, or when an intermediate or final array + segment is empty (raised at call time, not at factory time). + """ + segments = coerce(path).segments + + def mutator(row_dict: dict) -> dict: + if not segments: + raise PathTraversalError(f"Empty path: {path!r}") + result = copy.deepcopy(row_dict) + target: Any = result + for segment in segments[:-1]: + if isinstance(segment, ArraySegment): + target = _descend_through_array(segment, target, path)[0] + else: + target = _scaffold_struct(target, segment.name) + last = segments[-1] + if isinstance(last, ArraySegment): + _descend_through_array(last, target, path)[0] = value + else: + target[last.name] = value + return result + + return mutator diff --git a/packages/overture-schema-pyspark/tests/_support/mutations.py b/packages/overture-schema-pyspark/tests/_support/mutations.py new file mode 100644 index 000000000..4ed3466e3 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/mutations.py @@ -0,0 +1,388 @@ +"""Model-level mutation functions for generated conformance tests. + +Each function takes a row dict and returns a modified copy that should +trigger a specific model-level constraint violation. Generated test +files import these by name. +""" + +from __future__ import annotations + +import copy +from collections.abc import Callable +from typing import Any + +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + FieldPath, + PathSegment, + ScalarPath, + coerce, +) + +from .helpers import PathTraversalError + +_SENTINEL = "__FORBIDDEN_PRESENT__" +_NOT_EQUAL_PREFIX = "__NOT_" + + +def mutate_require_any_of( + row_dict: dict, + field_names: list[FieldPath | str], + *, + array_path: FieldPath | str | None = None, + struct_path: str | None = None, +) -> dict: + """Null every named field so `require_any_of` fires. + + Parameters + ---------- + array_path + Array column the constrained model lives inside. When None, the + fields live at the row root. + struct_path + Optional single intermediate struct field between the array + element and the target fields. + + See `_null_all_named_fields` for the full nesting semantics. + """ + return _null_all_named_fields( + row_dict, field_names, array_path=array_path, struct_path=struct_path + ) + + +def mutate_radio_group(row_dict: dict, field_names: list[FieldPath | str]) -> dict: + """Set first two fields to True so radio_group fires.""" + result = copy.deepcopy(row_dict) + for name in field_names[:2]: + _set_nested(result, name, True) + return result + + +def mutate_min_fields_set( + row_dict: dict, + field_names: list[FieldPath | str], + *, + array_path: FieldPath | str | None = None, + struct_path: str | None = None, +) -> dict: + """Null every named field so `min_fields_set(N)` fires (0 < N). + + The descriptor enumerates every field of the constrained model, so + nulling all of them drops the non-null count to zero -- below any + positive `count`. Nulling required fields incidentally trips their + `check_required` checks; the conformance test only asserts the + expected violation is present, so the extra failures don't matter. + + `array_path` / `struct_path` mirror `mutate_require_any_of` for the + case where the constrained model is reached through array iteration + (and optionally one intermediate struct field). + """ + return _null_all_named_fields( + row_dict, field_names, array_path=array_path, struct_path=struct_path + ) + + +def _null_all_named_fields( + row_dict: dict, + field_names: list[FieldPath | str], + *, + array_path: FieldPath | str | None, + struct_path: str | None, +) -> dict: + """Return a deep copy of *row_dict* with every named field set to None. + + Without *array_path*, the fields live at the row root. With *array_path*, + the fields live inside elements of that array column; *struct_path* + names an optional single intermediate struct field between the array + element and the target fields. A null array is replaced with a single + stub element so the violation has a row to fire on. + """ + result = copy.deepcopy(row_dict) + if array_path is None: + for name in field_names: + _set_nested(result, name, None) + return result + + arr: list[dict] | None = _get_nested(result, array_path) # type: ignore[assignment] + if arr is None: + stub: dict = {} + for name in field_names: + _set_nested(stub, name, None, create=True) + element = {struct_path: stub} if struct_path else stub + _set_nested(result, array_path, [element]) + else: + for element in arr: + if struct_path: + target = element.get(struct_path) + if target is None: + target = {} + element[struct_path] = target + else: + target = element + for name in field_names: + _set_nested(target, name, None) + return result + + +def mutate_require_if( + row_dict: dict, + field_names: list[FieldPath | str], + condition_field: FieldPath | str, + condition_value: object, + *, + negate: bool = False, + array_path: FieldPath | str | None = None, + inner_array_path: FieldPath | str | None = None, +) -> dict: + """Set condition to trigger require_if, then null target fields.""" + result = copy.deepcopy(row_dict) + + def _apply(target: dict) -> None: + _ensure_condition(target, condition_field, condition_value, negate=negate) + for name in field_names: + _set_nested(target, name, None) + + _apply_to_targets(result, _apply, array_path, inner_array_path) + return result + + +def mutate_forbid_if( + row_dict: dict, + field_names: list[str], + condition_field: FieldPath | str, + condition_value: object, + *, + negate: bool = False, + fill_values: dict[str, object] | None = None, + array_path: FieldPath | str | None = None, + inner_array_path: FieldPath | str | None = None, +) -> dict: + """Set condition to trigger forbid_if, ensure target fields are non-null. + + `field_names` are flat scalar field names — model-level forbid_if + references fields by name on the enclosing model. `fill_values` is + keyed by the same names. + """ + result = copy.deepcopy(row_dict) + fills = fill_values or {} + + def _apply(target: dict) -> None: + _ensure_condition(target, condition_field, condition_value, negate=negate) + for name in field_names: + if _get_nested(target, name) is None: + _set_nested(target, name, fills.get(name, _SENTINEL)) + + _apply_to_targets(result, _apply, array_path, inner_array_path) + return result + + +def mutate_unique_items(row_dict: dict, path: FieldPath | str) -> dict: + """Duplicate the first array element so unique_items fires. + + Supports bracket paths like `"restrictions[].when.mode"` -- enters + element 0 at each `[]` segment, then duplicates the first element + of the final array. A terminal `[]` (e.g. `"hierarchies[]"`) + targets the inner array at element 0 of the named field -- the + walker descends one extra level per bracket on the terminal + segment and duplicates the first element of the array it lands on. + """ + result = copy.deepcopy(row_dict) + segments = coerce(path).segments + + parent: Any = _walk_strict(result, segments[:-1], path) + last = segments[-1] + if not isinstance(parent, dict) or last.name not in parent: + raise PathTraversalError(f"Missing key '{last.name}' in path '{path}'") + + # When the terminal is an array segment, descend `iter_count` levels of + # `[0]`. Otherwise the terminal struct already references the list to + # mutate. The final `container[key]` must itself be a list. + container: Any = parent + key: int | str = last.name + iter_count = last.iter_count if isinstance(last, ArraySegment) else 0 + for depth in range(iter_count): + inner = container[key] + _require_non_empty_array(inner, f"{last.name}{'[]' * depth}", path) + container, key = inner, 0 + arr = container[key] + if not isinstance(arr, list): + raise PathTraversalError( + f"Expected list at terminal of path '{path}', got {type(arr).__name__}" + ) + _duplicate_first(container, key, arr) + return result + + +def _walk_strict( + target: Any, segments: tuple[PathSegment, ...], path: FieldPath | str +) -> Any: + """Walk segments without scaffolding. + + Raises `PathTraversalError` on missing or null struct intermediates, + and on empty arrays encountered at array intermediates (each `[]` in + a segment's `iter_count` descends one element, which requires a + non-empty list). + """ + for segment in segments: + if not isinstance(target, dict) or target.get(segment.name) is None: + raise PathTraversalError( + f"Missing or null key '{segment.name}' in path '{path}'" + ) + target = target[segment.name] + if isinstance(segment, ArraySegment): + for _ in range(segment.iter_count): + _require_non_empty_array(target, segment.name, path) + target = target[0] + return target + + +def _require_non_empty_array(value: Any, name: str, path: FieldPath | str) -> None: + """Raise PathTraversalError unless *value* is a non-empty list.""" + if not isinstance(value, list) or len(value) == 0: + raise PathTraversalError(f"Empty or missing array at '{name}' in path '{path}'") + + +def _duplicate_first(container: Any, key: int | str, arr: list) -> None: + """Replace `container[key]` with `arr` having its first element duplicated. + + No-op when `arr` is empty. Both elements are deep-copied so callers + cannot accidentally share state between the duplicates. + """ + if not arr: + return + dup = copy.deepcopy(arr[0]) + container[key] = [dup, copy.deepcopy(dup)] + list(arr[1:]) + + +_Applicator = Callable[[dict], None] + + +def _apply_to_targets( + row: dict, + fn: _Applicator, + array_path: FieldPath | str | None, + inner_array_path: FieldPath | str | None, +) -> None: + """Apply a mutation function to target dicts at the appropriate nesting level. + + Without array paths, applies directly to the row. With `array_path`, + iterates over elements of that array. With both `array_path` and + `inner_array_path`, iterates over outer elements, navigates the + inner struct path to a nested array, then iterates those elements. + + Creates stub array elements when the arrays are null so the mutation + can populate them. + """ + if array_path is None: + fn(row) + return + outer_arr: list[dict] | None = _get_nested(row, array_path) # type: ignore[assignment] + if outer_arr is None: + outer_stub: dict = {} + _stub_apply(outer_stub, inner_array_path, fn) + _set_nested(row, array_path, [outer_stub]) + return + if inner_array_path is None: + for element in outer_arr: + fn(element) + else: + for element in outer_arr: + inner_arr: list[dict] | None = _get_nested(element, inner_array_path) # type: ignore[assignment] + if inner_arr is not None: + for inner_element in inner_arr: + fn(inner_element) + else: + _stub_apply(element, inner_array_path, fn) + + +def _stub_apply( + parent: dict, + inner_array_path: FieldPath | str | None, + fn: _Applicator, +) -> None: + """Build a stub element at `inner_array_path` inside *parent* and run `fn`. + + When `inner_array_path` is None, *parent* itself is the stub that + `fn` mutates. Otherwise an empty stub is inserted as the sole + element of `[stub]` at `inner_array_path` inside *parent* + (scaffolding intermediate dicts), and `fn` mutates the stub. + """ + if inner_array_path is None: + fn(parent) + return + stub: dict = {} + fn(stub) + _set_nested(parent, inner_array_path, [stub], create=True) + + +def _ensure_condition( + d: dict, + condition_field: FieldPath | str, + condition_value: object, + *, + negate: bool, +) -> None: + """Set condition_field so the constraint condition evaluates to True. + + When *negate* is False, sets the field to *condition_value* (the + condition is `field == value`). When True, ensures the field is + NOT equal to *condition_value* (the condition is `field != value`); + if it already differs, leaves it alone. + """ + if negate: + current = _get_nested(d, condition_field) + if current == condition_value: + _set_nested(d, condition_field, f"{_NOT_EQUAL_PREFIX}{condition_value}__") + else: + _set_nested(d, condition_field, condition_value) + + +def _as_scalar_path(path: FieldPath | str) -> ScalarPath: + """Coerce *path* to a ScalarPath, rejecting any array markers. + + The dict-walking helpers operate only on struct fields; an array + marker indicates the caller wanted array-aware navigation and picked + the wrong helper. + """ + coerced = coerce(path) + if isinstance(coerced, ArrayPath): + raise ValueError(f"struct-only path expected, got array segment in {path!r}") + return coerced + + +def _set_nested( + d: dict, path: FieldPath | str, value: object, *, create: bool = False +) -> None: + """Set a value in a nested dict using a struct-field path. + + When *create* is True, intermediate dicts are created if missing or + None. When an intermediate is None and *value* is also None, the path + is already effectively null — returns without error. + """ + segments = _as_scalar_path(path).segments + target = d + for segment in segments[:-1]: + part = segment.name + if create and (part not in target or target[part] is None): + target[part] = {} + child = target.get(part) if isinstance(target, dict) else None + if child is None: + if value is None: + return + raise TypeError(f"None intermediate at '{part}' in path '{path}'") + target = child + target[segments[-1].name] = value + + +def _get_nested(d: dict, path: FieldPath | str) -> object: + """Get a value from a nested dict using a struct-field path. + + Returns None when any intermediate key is missing. + """ + target: object = d + for segment in _as_scalar_path(path).segments: + if not isinstance(target, dict) or segment.name not in target: + return None + target = target[segment.name] + return target diff --git a/packages/overture-schema-pyspark/tests/_support/scenarios.py b/packages/overture-schema-pyspark/tests/_support/scenarios.py new file mode 100644 index 000000000..a2f58abbd --- /dev/null +++ b/packages/overture-schema-pyspark/tests/_support/scenarios.py @@ -0,0 +1,34 @@ +"""Scenario dataclass for generated conformance tests.""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True, slots=True) +class Scenario: + """A test scenario: a mutation that should produce a specific violation. + + Parameters + ---------- + id + Human-readable scenario identifier, e.g. `"building::id:required"`. + scaffold + Dict merged onto the base row before mutation to provide valid values + for fields the base row lacks (e.g. array elements for nested paths). + mutate + Callable applied to `deep_merge(base_row, scaffold)` to produce the + invalid row. Must return a new dict; must not mutate its argument. + expected_field + Field name expected in the violation output. + expected_check + Check name expected in the violation output. + """ + + id: str + scaffold: dict[str, Any] + mutate: Callable[[dict], dict] + expected_field: str + expected_check: str diff --git a/packages/overture-schema-pyspark/tests/conftest.py b/packages/overture-schema-pyspark/tests/conftest.py new file mode 100644 index 000000000..ccacb1aac --- /dev/null +++ b/packages/overture-schema-pyspark/tests/conftest.py @@ -0,0 +1,50 @@ +"""Shared pytest fixtures for overture-schema-pyspark tests.""" + +import os +import socket +import sys +from collections.abc import Callable +from typing import Any + +import pytest +from pyspark.sql import SparkSession + +# Ensure PySpark workers use the same Python as the driver to avoid +# version mismatch errors when a different system Python is on PATH. +os.environ.setdefault("PYSPARK_PYTHON", sys.executable) +os.environ.setdefault("PYSPARK_DRIVER_PYTHON", sys.executable) + + +def pytest_configure(config: pytest.Config) -> None: + """Suppress ResourceWarning from PySpark's unclosed py4j sockets. + + PySpark uses py4j to communicate with the JVM. py4j socket proxies + are GC'd between tests and their __del__ fires ResourceWarning via + sys.unraisablehook. With -W error this becomes a test failure. + + The original hook is preserved for all other unraisable exceptions. + """ + original_hook: Callable[[Any], None] = sys.unraisablehook + + def _hook(unraisable: Any) -> None: + if isinstance(unraisable.exc_value, ResourceWarning) and isinstance( + unraisable.object, socket.socket + ): + return + original_hook(unraisable) + + sys.unraisablehook = _hook + + +@pytest.fixture(scope="session") +def spark() -> SparkSession: + """Provide a local SparkSession for testing.""" + session = ( + SparkSession.builder.master("local[1]") + .appName("overture-pyspark-tests") + .config("spark.ui.enabled", "false") + .config("spark.sql.shuffle.partitions", "1") + .getOrCreate() + ) + session.sparkContext.setLogLevel("ERROR") + return session diff --git a/packages/overture-schema-pyspark/tests/expressions/__init__.py b/packages/overture-schema-pyspark/tests/expressions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py b/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py new file mode 100644 index 000000000..6720da35e --- /dev/null +++ b/packages/overture-schema-pyspark/tests/expressions/test_column_patterns.py @@ -0,0 +1,258 @@ +"""Tests for column_patterns — structural PySpark composition helpers.""" + +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + coalesce_errors, + error_msg, + nested_array_check, +) +from pyspark.sql import Row, SparkSession +from pyspark.sql import functions as F + + +def test_error_msg_concatenates(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="bad")]) + result = df.select(error_msg("field: got ", F.col("val")).alias("msg")).collect() + assert result[0]["msg"] == "field: got bad" + + +def test_error_msg_multiple_values(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(a="x", b="y")]) + result = df.select( + error_msg("prefix ", F.col("a"), F.lit(" and "), F.col("b")).alias("msg") + ).collect() + assert result[0]["msg"] == "prefix x and y" + + +def test_array_check_null_column_returns_null(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=None)], + schema="items array>", + ) + result = df.select( + array_check("items", lambda el: F.lit("err")).alias("errs") + ).collect() + assert result[0]["errs"] is None + + +def test_array_check_filters_nulls(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(val="ok"), Row(val="bad")])], + schema="items array>", + ) + result = df.select( + array_check( + "items", + lambda el: F.when(el["val"] == "bad", F.lit("error")), + ).alias("errs") + ).collect() + assert result[0]["errs"] == ["error"] + + +def test_array_check_empty_when_all_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(val="ok")])], + schema="items array>", + ) + result = df.select( + array_check( + "items", + lambda el: F.when(el["val"] == "bad", F.lit("error")), + ).alias("errs") + ).collect() + assert result[0]["errs"] == [] + + +def test_struct_unique_no_duplicates(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(id="a"), Row(id="b")])], + schema="items array>", + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is None + + +def test_struct_unique_with_duplicates(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(id="a"), Row(id="a")])], + schema="items array>", + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is not None + assert "duplicate" in result[0]["err"] + + +def test_struct_unique_null_column(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=None)], + schema="items array>", + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is None + + +def test_struct_unique_repeated_value_different_fields(spark: SparkSession) -> None: + """Structs with same value subfield but different other fields are unique.""" + df = spark.createDataFrame( + [ + Row( + items=[ + Row(value="a", pos=0.0), + Row(value="b", pos=0.5), + Row(value="a", pos=0.7), + ] + ) + ] + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is None + + +def test_struct_unique_single_element(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=[Row(id="a")])], + schema="items array>", + ) + result = df.select(check_struct_unique("items").alias("err")).collect() + assert result[0]["err"] is None + + +def test_array_check_accepts_column(spark: SparkSession) -> None: + """array_check works when passed a Column instead of a string name.""" + df = spark.createDataFrame( + [Row(items=[Row(val="ok"), Row(val="bad")])], + schema="items array>", + ) + result = df.select( + array_check( + F.col("items"), + lambda el: F.when(el["val"] == "bad", F.lit("error")), + ).alias("errs") + ).collect() + assert result[0]["errs"] == ["error"] + + +def test_check_struct_unique_accepts_column(spark: SparkSession) -> None: + """check_struct_unique works when passed a Column instead of a string name.""" + df = spark.createDataFrame( + [Row(items=[Row(id="a"), Row(id="a")])], + schema="items array>", + ) + result = df.select(check_struct_unique(F.col("items")).alias("err")).collect() + assert result[0]["err"] is not None + assert "duplicate" in result[0]["err"] + + +def test_check_struct_unique_column_null(spark: SparkSession) -> None: + """check_struct_unique with Column input handles null.""" + df = spark.createDataFrame( + [Row(items=None)], schema="items array>" + ) + result = df.select(check_struct_unique(F.col("items")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_nested_array_check_flattens(spark: SparkSession) -> None: + """Inner array_check per outer element produces flat error list.""" + schema = "items array>>" + df = spark.createDataFrame( + [ + Row( + items=[ + Row(tags=["good", "bad"]), + Row(tags=["worse"]), + ] + ) + ], + schema=schema, + ) + result_col = nested_array_check( + "items", + lambda el: array_check( + el["tags"], + lambda tag: F.when(tag != "good", F.concat(F.lit("bad: "), tag)), + ), + ) + result = df.select(coalesce_errors(result_col).alias("errs")).collect() + errors = result[0]["errs"] + assert len(errors) == 2 + assert all(isinstance(e, str) for e in errors) + + +def test_nested_array_check_null_outer(spark: SparkSession) -> None: + schema = "items array>>" + df = spark.createDataFrame([Row(items=None)], schema=schema) + result_col = nested_array_check( + "items", + lambda el: array_check( + el["tags"], + lambda tag: F.when(tag != "good", F.lit("bad")), + ), + ) + result = df.select(coalesce_errors(result_col).alias("errs")).collect() + assert result[0]["errs"] == [] + + +def test_nested_array_check_mixed_null_inner_with_sibling_errors( + spark: SparkSession, +) -> None: + """A null inner array must not nullify sibling errors during flatten. + + `F.flatten` returns NULL whenever any sub-array is NULL. Without + guarding inner nulls, the outer transform produces NULL and every + sibling error is silently dropped. + """ + schema = "items array>>" + df = spark.createDataFrame( + [ + Row( + items=[ + Row(tags=["good"]), + Row(tags=None), + Row(tags=["bad"]), + ] + ) + ], + schema=schema, + ) + result_col = nested_array_check( + "items", + lambda el: array_check( + el["tags"], + lambda tag: F.when(tag != "good", F.concat(F.lit("bad: "), tag)), + ), + ) + result = df.select(coalesce_errors(result_col).alias("errs")).collect() + assert result[0]["errs"] == ["bad: bad"] + + +def test_nested_array_check_no_errors(spark: SparkSession) -> None: + schema = "items array>>" + df = spark.createDataFrame( + [Row(items=[Row(tags=["good"])])], + schema=schema, + ) + result_col = nested_array_check( + "items", + lambda el: array_check( + el["tags"], + lambda tag: F.when(tag != "good", F.lit("bad")), + ), + ) + result = df.select(coalesce_errors(result_col).alias("errs")).collect() + assert result[0]["errs"] == [] + + +def test_coalesce_errors_null_becomes_empty(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(x=1)]) + result = df.select( + coalesce_errors(F.lit(None).cast("array")).alias("errs") + ).collect() + assert result[0]["errs"] == [] + + +def test_coalesce_errors_preserves_array(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(x=1)]) + result = df.select(coalesce_errors(F.array(F.lit("err"))).alias("errs")).collect() + assert result[0]["errs"] == ["err"] diff --git a/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py new file mode 100644 index 000000000..6a4289e35 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/expressions/test_constraint_expressions.py @@ -0,0 +1,1341 @@ +"""Tests for constraint_expressions — constraint type to Column translation.""" + +import struct + +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_max_length, + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_email, + check_enum, + check_forbid_if, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_min_fields_set, + check_pattern, + check_radio_group, + check_require_any_of, + check_require_if, + check_required, + check_string_max_length, + check_string_min_length, + check_stripped, + check_url_format, + check_url_length, +) +from overture.schema.system.primitive import GeometryType +from pyspark.sql import Row, SparkSession +from pyspark.sql import functions as F +from pyspark.sql.types import DoubleType, StructField, StructType +from shapely.geometry import LineString, MultiPolygon, Point, Polygon + + +def test_check_bounds_ge_le_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=5)]) + result = df.select(check_bounds(F.col("val"), ge=1, le=10).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bounds_ge_violation(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=0)]) + result = df.select(check_bounds(F.col("val"), ge=1).alias("err")).collect() + assert result[0]["err"] is not None + assert ">= 1" in result[0]["err"] + + +def test_check_bounds_gt_violation(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=0)]) + result = df.select(check_bounds(F.col("val"), gt=0).alias("err")).collect() + assert result[0]["err"] is not None + assert "> 0" in result[0]["err"] + + +def test_check_bounds_le_violation(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=100)]) + result = df.select(check_bounds(F.col("val"), le=50).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bounds_null_passthrough(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val int") + result = df.select(check_bounds(F.col("val"), ge=1).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_enum_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="road")]) + result = df.select( + check_enum(F.col("val"), ["road", "rail", "water"]).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_enum_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="sky")]) + result = df.select( + check_enum(F.col("val"), ["road", "rail", "water"]).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "sky" in result[0]["err"] + + +class TestCheckPattern: + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("AB",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), r"^[A-Z]{2}$", label="test pattern").alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("abc",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), r"^[A-Z]{2}$", label="test pattern").alias("e") + ) + err = result.collect()[0]["e"] + assert "invalid test pattern" in err + assert "abc" in err + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern(F.col("v"), r"^[A-Z]{2}$", label="test pattern").alias("e") + ) + assert result.collect()[0]["e"] is None + + +class TestCheckMinLength: + def test_at_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=["a", "b"])], schema="items array" + ) + result = df.select( + check_array_min_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_below_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(items=["a"])], schema="items array") + result = df.select( + check_array_min_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "minimum length 2" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(items=None)], schema="items array") + result = df.select( + check_array_min_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckMaxLength: + def test_within_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=["a", "b"])], schema="items array" + ) + result = df.select( + check_array_max_length(F.col("items"), 3).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_at_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=["a", "b"])], schema="items array" + ) + result = df.select( + check_array_max_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_exceeds_limit(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(items=["a", "b", "c"])], schema="items array" + ) + result = df.select( + check_array_max_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "maximum length 2" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(items=None)], schema="items array") + result = df.select( + check_array_max_length(F.col("items"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_require_any_of_satisfied(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(a=1, b=None)], schema="a int, b int") + result = df.select( + check_require_any_of([F.col("a"), F.col("b")], ["a", "b"]).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_require_any_of_all_null(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(a=None, b=None)], schema="a int, b int") + result = df.select( + check_require_any_of([F.col("a"), F.col("b")], ["a", "b"]).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "a" in result[0]["err"] + assert "b" in result[0]["err"] + + +class TestCheckRequireIf: + def test_required_present(self, spark: SparkSession) -> None: + """Target is present when condition is true -> no error.""" + df = spark.createDataFrame( + [("road", "primary")], schema="subtype string, road_class string" + ) + result = df.select( + check_require_if( + F.col("road_class"), + F.col("subtype").isin(["road", "rail"]), + "subtype in [road, rail]", + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_required_absent(self, spark: SparkSession) -> None: + """Target is null when condition is true -> error.""" + df = spark.createDataFrame( + [("road", None)], schema="subtype string, road_class string" + ) + result = df.select( + check_require_if( + F.col("road_class"), + F.col("subtype").isin(["road", "rail"]), + "subtype in [road, rail]", + ).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "required" in result[0]["err"] + + def test_condition_false_skips(self, spark: SparkSession) -> None: + """Target is null but condition is false -> no error.""" + df = spark.createDataFrame( + [("water", None)], schema="subtype string, road_class string" + ) + result = df.select( + check_require_if( + F.col("road_class"), + F.col("subtype").isin(["road", "rail"]), + "subtype in [road, rail]", + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_with_value_cols(self, spark: SparkSession) -> None: + """Error message includes actual discriminator value.""" + df = spark.createDataFrame( + [("road", None)], schema="subtype string, road_class string" + ) + result = df.select( + check_require_if( + F.col("road_class"), + F.col("subtype").isin(["road", "rail"]), + "subtype in [road, rail]", + F.col("subtype"), + ).alias("err") + ).collect() + assert "road" in result[0]["err"] + + +class TestCheckForbidIf: + def test_forbidden_absent(self, spark: SparkSession) -> None: + """Target is null when condition is true -> no error.""" + df = spark.createDataFrame( + [Row(subtype="country", parent=None)], + schema="subtype string, parent string", + ) + result = df.select( + check_forbid_if( + F.col("parent"), + F.col("subtype") == "country", + "subtype = country", + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_forbidden_present(self, spark: SparkSession) -> None: + """Target is present when condition is true -> error.""" + df = spark.createDataFrame([Row(subtype="country", parent="abc")]) + result = df.select( + check_forbid_if( + F.col("parent"), + F.col("subtype") == "country", + "subtype = country", + ).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "forbidden" in result[0]["err"] + + def test_condition_false_skips(self, spark: SparkSession) -> None: + """Target is present but condition is false -> no error.""" + df = spark.createDataFrame([Row(subtype="region", parent="abc")]) + result = df.select( + check_forbid_if( + F.col("parent"), + F.col("subtype") == "country", + "subtype = country", + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_with_value_cols(self, spark: SparkSession) -> None: + """Error message includes actual discriminator value.""" + df = spark.createDataFrame([Row(subtype="country", parent="abc")]) + result = df.select( + check_forbid_if( + F.col("parent"), + F.col("subtype") == "country", + "subtype = country", + F.col("subtype"), + ).alias("err") + ).collect() + assert "country" in result[0]["err"] + + +class TestCheckStringMinLength: + def test_valid_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="abc")]) + result = df.select( + check_string_min_length(F.col("val"), 1).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_empty_string_violation(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="")]) + result = df.select( + check_string_min_length(F.col("val"), 1).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "minimum length" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select( + check_string_min_length(F.col("val"), 1).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_exact_min_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="ab")]) + result = df.select( + check_string_min_length(F.col("val"), 2).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_below_min_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="a")]) + result = df.select( + check_string_min_length(F.col("val"), 2).alias("err") + ).collect() + assert result[0]["err"] is not None + + +class TestCheckStringMaxLength: + def test_valid_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="abc")]) + result = df.select( + check_string_max_length(F.col("val"), 5).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_above_max_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="abcdef")]) + result = df.select( + check_string_max_length(F.col("val"), 5).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "maximum length" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select( + check_string_max_length(F.col("val"), 5).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_exact_max_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="abcde")]) + result = df.select( + check_string_max_length(F.col("val"), 5).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckRadioGroup: + def test_exactly_one_true(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(is_land=True, is_territorial=False)]) + result = df.select( + check_radio_group( + [F.col("is_land"), F.col("is_territorial")], + ["is_land", "is_territorial"], + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_none_true(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(is_land=False, is_territorial=False)]) + result = df.select( + check_radio_group( + [F.col("is_land"), F.col("is_territorial")], + ["is_land", "is_territorial"], + ).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "exactly one" in result[0]["err"] + assert "0" in result[0]["err"] + + def test_both_true(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(is_land=True, is_territorial=True)]) + result = df.select( + check_radio_group( + [F.col("is_land"), F.col("is_territorial")], + ["is_land", "is_territorial"], + ).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "2" in result[0]["err"] + + def test_null_treated_as_false(self, spark: SparkSession) -> None: + """Null booleans count as not-true (0 toward the count).""" + df = spark.createDataFrame( + [Row(is_land=True, is_territorial=None)], + schema="is_land boolean, is_territorial boolean", + ) + result = df.select( + check_radio_group( + [F.col("is_land"), F.col("is_territorial")], + ["is_land", "is_territorial"], + ).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckGeometryType: + def test_point_matches(self, spark: SparkSession) -> None: + wkb_bytes = Point(0, 0).wkb + df = spark.createDataFrame( + [Row(geometry=bytearray(wkb_bytes))], schema="geometry binary" + ) + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_point_rejects_linestring(self, spark: SparkSession) -> None: + wkb_bytes = LineString([(0, 0), (1, 1)]).wkb + df = spark.createDataFrame( + [Row(geometry=bytearray(wkb_bytes))], schema="geometry binary" + ) + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "Point" in result[0]["err"] + + def test_multiple_allowed_types(self, spark: SparkSession) -> None: + wkb_polygon = Polygon([(0, 0), (1, 0), (1, 1), (0, 0)]).wkb + wkb_multi = MultiPolygon([Polygon([(0, 0), (1, 0), (1, 1), (0, 0)])]).wkb + df = spark.createDataFrame( + [ + Row(geometry=bytearray(wkb_polygon)), + Row(geometry=bytearray(wkb_multi)), + ], + schema="geometry binary", + ) + result = df.select( + check_geometry_type( + F.col("geometry"), + GeometryType.POLYGON, + GeometryType.MULTI_POLYGON, + ).alias("err") + ).collect() + assert all(r["err"] is None for r in result) + + def test_multiple_allowed_rejects_wrong_type(self, spark: SparkSession) -> None: + wkb_point = Point(0, 0).wkb + df = spark.createDataFrame( + [Row(geometry=bytearray(wkb_point))], schema="geometry binary" + ) + result = df.select( + check_geometry_type( + F.col("geometry"), + GeometryType.POLYGON, + GeometryType.MULTI_POLYGON, + ).alias("err") + ).collect() + assert result[0]["err"] is not None + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(geometry=None)], schema="geometry binary") + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_big_endian_wkb(self, spark: SparkSession) -> None: + """Verify BE byte order handling. + + Shapely writes LE by default. Construct BE WKB for a Point + manually: byte_order=0x00, type=0x00000001, x=0.0, y=0.0. + """ + be_point = struct.pack(">bIdd", 0, 1, 0.0, 0.0) + df = spark.createDataFrame( + [Row(geometry=bytearray(be_point))], schema="geometry binary" + ) + result = df.select( + check_geometry_type(F.col("geometry"), GeometryType.POINT).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckStripped: + def test_clean_string(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="hello world")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_single_char(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="x")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_leading_space(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=" hello")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + assert "whitespace" in result[0]["err"] + + def test_trailing_space(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="hello ")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + assert "whitespace" in result[0]["err"] + + def test_leading_tab(self, spark: SparkSession) -> None: + """Tab is Unicode whitespace -- must be caught (not just ASCII space).""" + df = spark.createDataFrame([Row(val="\thello")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_trailing_newline(self, spark: SparkSession) -> None: + """Trailing newline requires \\z anchor -- $ matches before it in Java regex.""" + df = spark.createDataFrame([Row(val="hello\n")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_empty_string(self, spark: SparkSession) -> None: + """Empty string has no leading/trailing whitespace -- passes.""" + df = spark.createDataFrame([Row(val="")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_trailing_unit_separator(self, spark: SparkSession) -> None: + """U+001F (unit separator) -- Python strips it, Java \\S with (?U) does not.""" + df = spark.createDataFrame([Row(val="Main St \x1f")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_leading_file_separator(self, spark: SparkSession) -> None: + """U+001C (file separator) -- C0 control char Python treats as whitespace.""" + df = spark.createDataFrame([Row(val="\x1chello")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_trailing_soh(self, spark: SparkSession) -> None: + """U+0001 (SOH) -- C0 control char that even Python's strip() misses.""" + df = spark.createDataFrame([Row(val="hello\x01")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_trailing_del(self, spark: SparkSession) -> None: + """U+007F (DEL) -- control char outside C0 range.""" + df = spark.createDataFrame([Row(val="hello\x7f")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_trailing_c1_control(self, spark: SparkSession) -> None: + """U+009F (APC) -- C1 control char.""" + df = spark.createDataFrame([Row(val="hello\x9f")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + def test_control_char_in_middle_passes(self, spark: SparkSession) -> None: + """Control chars in the middle of a string are not a stripped concern.""" + df = spark.createDataFrame([Row(val="hel\x1flo")]) + result = df.select(check_stripped(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +class TestCheckJsonPointer: + def test_valid_pointer(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="/properties/name")]) + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_root_pointer(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="/")]) + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_empty_string_valid(self, spark: SparkSession) -> None: + """Empty string is valid per RFC 6901 (references whole document).""" + df = spark.createDataFrame([Row(val="")]) + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + def test_missing_leading_slash(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="properties/name")]) + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + assert "JSON pointer" in result[0]["err"] + assert "properties/name" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_json_pointer(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +class TestCheckLinearRangeLength: + def test_valid_length(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.0, 1.0])], schema="between array" + ) + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_wrong_length_one(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=[0.5])], schema="between array") + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "2 elements" in result[0]["err"] + + def test_wrong_length_three(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.0, 0.5, 1.0])], schema="between array" + ) + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "2 elements" in result[0]["err"] + + def test_empty_array(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=[])], schema="between array") + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "2 elements" in result[0]["err"] + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=None)], schema="between array") + result = df.select( + check_linear_range_length(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckLinearRangeBounds: + def test_valid_bounds(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.2, 0.8])], schema="between array" + ) + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_value_below_zero(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[-0.1, 0.5])], schema="between array" + ) + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "[0.0, 1.0]" in result[0]["err"] + + def test_value_above_one(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.0, 1.1])], schema="between array" + ) + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "[0.0, 1.0]" in result[0]["err"] + + def test_wrong_length_passthrough(self, spark: SparkSession) -> None: + """Wrong-length arrays are not this function's concern.""" + df = spark.createDataFrame([Row(between=[0.5])], schema="between array") + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=None)], schema="between array") + result = df.select( + check_linear_range_bounds(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + +class TestCheckLinearRangeOrder: + def test_valid_order(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.2, 0.8])], schema="between array" + ) + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_start_equals_end(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.5, 0.5])], schema="between array" + ) + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "start must be < end" in result[0]["err"] + + def test_start_after_end(self, spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(between=[0.8, 0.2])], schema="between array" + ) + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is not None + assert "start must be < end" in result[0]["err"] + + def test_wrong_length_passthrough(self, spark: SparkSession) -> None: + """Wrong-length arrays are not this function's concern.""" + df = spark.createDataFrame([Row(between=[0.5])], schema="between array") + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_null_passthrough(self, spark: SparkSession) -> None: + df = spark.createDataFrame([Row(between=None)], schema="between array") + result = df.select( + check_linear_range_order(F.col("between")).alias("err") + ).collect() + assert result[0]["err"] is None + + +def test_check_required_null_is_error(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_required(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + assert "missing" in result[0]["err"] + + +def test_check_required_non_null_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="hello")]) + result = df.select(check_required(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_required_composes_with_enum(spark: SparkSession) -> None: + """check_required + check_enum via F.coalesce catches both null and invalid.""" + df = spark.createDataFrame([Row(val=None)], schema="val string") + expr = F.coalesce( + check_required(F.col("val")), + check_enum(F.col("val"), ["a", "b"]), + ) + result = df.select(expr.alias("err")).collect() + assert result[0]["err"] is not None + assert "missing" in result[0]["err"] + + +_COUNTRY_CODE_PATTERN = r"^[A-Z]{2}\z" +_COUNTRY_CODE_LABEL = "ISO 3166-1 alpha-2 country code" + + +class TestCheckCountryCodeViaPattern: + """Country code validation through check_pattern with label.""" + + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("US",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _COUNTRY_CODE_PATTERN, label=_COUNTRY_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_lowercase_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("us",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _COUNTRY_CODE_PATTERN, label=_COUNTRY_CODE_LABEL + ).alias("e") + ) + err = result.collect()[0]["e"] + assert f"invalid {_COUNTRY_CODE_LABEL}" in err + assert "us" in err + + def test_three_chars_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("USA",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _COUNTRY_CODE_PATTERN, label=_COUNTRY_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is not None + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern( + F.col("v"), _COUNTRY_CODE_PATTERN, label=_COUNTRY_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + +_REGION_CODE_PATTERN = r"^[A-Z]{2}-[A-Z0-9]{1,3}\z" +_REGION_CODE_LABEL = "ISO 3166-2 subdivision code" + + +class TestCheckRegionCodeViaPattern: + """Region code validation through check_pattern with label.""" + + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("US-NY",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _REGION_CODE_PATTERN, label=_REGION_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_valid_numeric(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("CN-11",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _REGION_CODE_PATTERN, label=_REGION_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_no_dash_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("USNY",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _REGION_CODE_PATTERN, label=_REGION_CODE_LABEL + ).alias("e") + ) + err = result.collect()[0]["e"] + assert f"invalid {_REGION_CODE_LABEL}" in err + assert "USNY" in err + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern( + F.col("v"), _REGION_CODE_PATTERN, label=_REGION_CODE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + +_SNAKE_CASE_PATTERN = r"^[a-z0-9]+(_[a-z0-9]+)*\z" +_SNAKE_CASE_LABEL = "Category in snake_case format" + + +class TestCheckSnakeCaseViaPattern: + """Snake_case validation through check_pattern with label.""" + + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("hello_world",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_single_word(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("hello",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_with_numbers(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("hello_123",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_uppercase_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("Hello_World",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + err = result.collect()[0]["e"] + assert f"invalid {_SNAKE_CASE_LABEL}" in err + + def test_spaces_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("hello world",)], ["v"]) + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is not None + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern( + F.col("v"), _SNAKE_CASE_PATTERN, label=_SNAKE_CASE_LABEL + ).alias("e") + ) + assert result.collect()[0]["e"] is None + + +def test_check_url_format_http_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="http://example.com")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_format_https_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="https://example.com/path?q=1")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_format_no_scheme_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="example.com")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_url_format_ftp_scheme_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="ftp://example.com")]) + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_url_format_null_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_url_format(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_length_exceeds_2083_chars_invalid(spark: SparkSession) -> None: + long_url = "https://example.com/" + "a" * 2064 # 2084 chars + df = spark.createDataFrame([Row(val=long_url)]) + result = df.select(check_url_length(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_url_length_exactly_2083_chars_valid(spark: SparkSession) -> None: + url = "https://example.com/" + "a" * 2063 # 2083 chars + df = spark.createDataFrame([Row(val=url)]) + result = df.select(check_url_length(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_url_length_null_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_url_length(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_email_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_email_no_at_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="userexample.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_no_domain_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_spaces_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user @example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_null_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=None)], schema="val string") + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_email_trailing_period_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@example.com.")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_leading_period_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val=".user@example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_period_before_at_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user.@example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_period_after_at_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@.example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_double_period_domain_invalid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@example..com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_email_dotted_local_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user.name@example.com")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_email_subdomain_valid(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(val="user@mail.example.co.uk")]) + result = df.select(check_email(F.col("val")).alias("err")).collect() + assert result[0]["err"] is None + + +_PHONE_PATTERN = r"^\+\d{1,3}[\s\-\(\)0-9]+\z" +_PHONE_LABEL = "International phone number (+ followed by country code and number)" + + +class TestCheckPhoneViaPattern: + """Phone number validation through check_pattern with label.""" + + def test_valid_us(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("+1 555-555-5555",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_valid_international(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("+44 20 7946 0958",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + assert result.collect()[0]["e"] is None + + def test_no_plus_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("555-555-5555",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + err = result.collect()[0]["e"] + assert f"invalid {_PHONE_LABEL}" in err + + def test_letters_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("+1 abc-defg",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + assert result.collect()[0]["e"] is not None + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern(F.col("v"), _PHONE_PATTERN, label=_PHONE_LABEL).alias("e") + ) + assert result.collect()[0]["e"] is None + + +_WIKIDATA_PATTERN = r"^Q\d+\z" +_WIKIDATA_LABEL = "Wikidata identifier (Q followed by digits)" + + +class TestCheckWikidataIdViaPattern: + """Wikidata ID validation through check_pattern with label.""" + + def test_valid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("Q42",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is None + + def test_large_number(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("Q123456789",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is None + + def test_lowercase_q_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("q42",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + err = result.collect()[0]["e"] + assert f"invalid {_WIKIDATA_LABEL}" in err + + def test_no_digits_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("Q",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is not None + + def test_p_prefix_invalid(self, spark: SparkSession) -> None: + df = spark.createDataFrame([("P42",)], ["v"]) + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is not None + + def test_null_passes(self, spark: SparkSession) -> None: + df = spark.createDataFrame([(None,)], schema="v string") + result = df.select( + check_pattern(F.col("v"), _WIKIDATA_PATTERN, label=_WIKIDATA_LABEL).alias( + "e" + ) + ) + assert result.collect()[0]["e"] is None + + +class TestCheckMinFieldsSet: + def test_meets_threshold(self, spark: SparkSession) -> None: + """Count at threshold -> no error.""" + df = spark.createDataFrame( + [Row(a=1, b=2, c=None)], schema="a int, b int, c int" + ) + result = df.select( + check_min_fields_set( + [F.col("a"), F.col("b"), F.col("c")], + ["a", "b", "c"], + 2, + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_exceeds_threshold(self, spark: SparkSession) -> None: + """Count above threshold -> no error.""" + df = spark.createDataFrame([Row(a=1, b=2, c=3)], schema="a int, b int, c int") + result = df.select( + check_min_fields_set( + [F.col("a"), F.col("b"), F.col("c")], + ["a", "b", "c"], + 2, + ).alias("err") + ).collect() + assert result[0]["err"] is None + + def test_below_threshold(self, spark: SparkSession) -> None: + """Count below threshold -> error with field names and actual count.""" + df = spark.createDataFrame( + [Row(a=1, b=None, c=None)], schema="a int, b int, c int" + ) + result = df.select( + check_min_fields_set( + [F.col("a"), F.col("b"), F.col("c")], + ["a", "b", "c"], + 2, + ).alias("err") + ).collect() + err = result[0]["err"] + assert err is not None + assert "at least 2" in err + assert "a, b, c" in err + assert "1" in err + + def test_all_null_below_threshold(self, spark: SparkSession) -> None: + """All null -> error showing 0 non-null.""" + df = spark.createDataFrame([Row(a=None, b=None)], schema="a int, b int") + result = df.select( + check_min_fields_set( + [F.col("a"), F.col("b")], + ["a", "b"], + 1, + ).alias("err") + ).collect() + err = result[0]["err"] + assert err is not None + assert "0" in err + + def test_error_message_format(self, spark: SparkSession) -> None: + """Error message matches expected format exactly.""" + df = spark.createDataFrame([Row(x=None, y=None)], schema="x int, y int") + result = df.select( + check_min_fields_set( + [F.col("x"), F.col("y")], + ["x", "y"], + 1, + ).alias("err") + ).collect() + err = result[0]["err"] + assert err == "at least 1 of x, y required, got 0 non-null" + + +_BBOX_SCHEMA = StructType( + [ + StructField( + "bbox", + StructType( + [ + StructField("xmin", DoubleType(), True), + StructField("xmax", DoubleType(), True), + StructField("ymin", DoubleType(), True), + StructField("ymax", DoubleType(), True), + ] + ), + True, + ), + ] +) + + +def test_check_bbox_completeness_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=0.0, ymax=1.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_completeness(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_completeness_null_bbox_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(bbox=None)], schema=_BBOX_SCHEMA) + result = df.select(check_bbox_completeness(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_completeness_null_subfield_fails(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=None, xmax=1.0, ymin=0.0, ymax=1.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_completeness(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bbox_lat_ordering_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=-10.0, ymax=10.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_ordering(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_lat_ordering_equal_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=5.0, ymax=5.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_ordering(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_lat_ordering_inverted_fails(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=10.0, ymax=-10.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_ordering(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bbox_lat_ordering_null_bbox_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(bbox=None)], schema=_BBOX_SCHEMA) + result = df.select(check_bbox_lat_ordering(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_lat_range_valid(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=-90.0, ymax=90.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_range(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None + + +def test_check_bbox_lat_range_ymin_below_fails(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=-91.0, ymax=1.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_range(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bbox_lat_range_ymax_above_fails(spark: SparkSession) -> None: + df = spark.createDataFrame( + [Row(bbox=Row(xmin=0.0, xmax=1.0, ymin=0.0, ymax=91.0))], + schema=_BBOX_SCHEMA, + ) + result = df.select(check_bbox_lat_range(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is not None + + +def test_check_bbox_lat_range_null_bbox_passes(spark: SparkSession) -> None: + df = spark.createDataFrame([Row(bbox=None)], schema=_BBOX_SCHEMA) + result = df.select(check_bbox_lat_range(F.col("bbox")).alias("err")).collect() + assert result[0]["err"] is None diff --git a/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py b/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py new file mode 100644 index 000000000..937b8862d --- /dev/null +++ b/packages/overture-schema-pyspark/tests/expressions/test_schema_check.py @@ -0,0 +1,268 @@ +"""Tests for schema comparison.""" + +from overture.schema.pyspark.schema_check import ( + SchemaMismatch, + compare_schemas, +) +from pyspark.sql.types import ( + ArrayType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + + +class TestIdenticalSchemas: + def test_empty_schemas(self) -> None: + assert compare_schemas(StructType(), StructType()) == [] + + def test_flat_schema(self) -> None: + schema = StructType( + [ + StructField("id", StringType(), True), + StructField("version", IntegerType(), True), + ] + ) + assert compare_schemas(schema, schema) == [] + + def test_nested_struct(self) -> None: + schema = StructType( + [ + StructField( + "bbox", + StructType( + [ + StructField("xmin", DoubleType(), True), + ] + ), + True, + ), + ] + ) + assert compare_schemas(schema, schema) == [] + + def test_array_of_structs(self) -> None: + schema = StructType( + [ + StructField( + "items", + ArrayType( + StructType( + [ + StructField("name", StringType(), True), + ] + ) + ), + True, + ), + ] + ) + assert compare_schemas(schema, schema) == [] + + +class TestMissingFields: + def test_missing_in_actual(self) -> None: + actual = StructType([StructField("id", StringType(), True)]) + expected = StructType( + [ + StructField("id", StringType(), True), + StructField("version", IntegerType(), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("version", "missing", "IntegerType")] + + def test_extra_in_actual(self) -> None: + actual = StructType( + [ + StructField("id", StringType(), True), + StructField("extra", StringType(), True), + ] + ) + expected = StructType([StructField("id", StringType(), True)]) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("extra", "StringType", "missing")] + + +class TestTypeMismatches: + def test_top_level_type_mismatch(self) -> None: + actual = StructType([StructField("version", StringType(), True)]) + expected = StructType([StructField("version", IntegerType(), True)]) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("version", "StringType", "IntegerType")] + + def test_nested_struct_mismatch(self) -> None: + actual = StructType( + [ + StructField( + "bbox", + StructType( + [ + StructField("xmin", IntegerType(), True), + ] + ), + True, + ), + ] + ) + expected = StructType( + [ + StructField( + "bbox", + StructType( + [ + StructField("xmin", DoubleType(), True), + ] + ), + True, + ), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("bbox.xmin", "IntegerType", "DoubleType")] + + def test_array_element_type_mismatch(self) -> None: + actual = StructType( + [ + StructField("tags", ArrayType(IntegerType()), True), + ] + ) + expected = StructType( + [ + StructField("tags", ArrayType(StringType()), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("tags[]", "IntegerType", "StringType")] + + def test_array_struct_field_mismatch(self) -> None: + actual = StructType( + [ + StructField( + "items", + ArrayType( + StructType( + [ + StructField("name", IntegerType(), True), + ] + ) + ), + True, + ), + ] + ) + expected = StructType( + [ + StructField( + "items", + ArrayType( + StructType( + [ + StructField("name", StringType(), True), + ] + ) + ), + True, + ), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("items[].name", "IntegerType", "StringType")] + + def test_map_key_type_mismatch(self) -> None: + actual = StructType( + [ + StructField("tags", MapType(IntegerType(), StringType()), True), + ] + ) + expected = StructType( + [ + StructField("tags", MapType(StringType(), StringType()), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("tags{key}", "IntegerType", "StringType")] + + def test_map_value_type_mismatch(self) -> None: + actual = StructType( + [ + StructField("tags", MapType(StringType(), IntegerType()), True), + ] + ) + expected = StructType( + [ + StructField("tags", MapType(StringType(), StringType()), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("tags{value}", "IntegerType", "StringType")] + + +class TestFieldOrdering: + def test_different_order_is_ok(self) -> None: + actual = StructType( + [ + StructField("b", StringType(), True), + StructField("a", IntegerType(), True), + ] + ) + expected = StructType( + [ + StructField("a", IntegerType(), True), + StructField("b", StringType(), True), + ] + ) + assert compare_schemas(actual, expected) == [] + + +class TestMultipleMismatches: + def test_missing_and_extra_and_wrong_type(self) -> None: + actual = StructType( + [ + StructField("id", IntegerType(), True), + StructField("extra", StringType(), True), + ] + ) + expected = StructType( + [ + StructField("id", StringType(), True), + StructField("version", IntegerType(), True), + ] + ) + result = compare_schemas(actual, expected) + assert SchemaMismatch("id", "IntegerType", "StringType") in result + assert SchemaMismatch("extra", "StringType", "missing") in result + assert SchemaMismatch("version", "missing", "IntegerType") in result + + +class TestKindMismatch: + def test_struct_vs_primitive(self) -> None: + actual = StructType([StructField("x", StringType(), True)]) + expected = StructType( + [ + StructField( + "x", + StructType( + [ + StructField("y", StringType(), True), + ] + ), + True, + ), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("x", "StringType", "StructType")] + + def test_array_vs_primitive(self) -> None: + actual = StructType([StructField("x", StringType(), True)]) + expected = StructType( + [ + StructField("x", ArrayType(StringType()), True), + ] + ) + result = compare_schemas(actual, expected) + assert result == [SchemaMismatch("x", "StringType", "ArrayType")] diff --git a/packages/overture-schema-pyspark/tests/test_check.py b/packages/overture-schema-pyspark/tests/test_check.py new file mode 100644 index 000000000..681add76b --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_check.py @@ -0,0 +1,20 @@ +"""Tests for Check dataclass and CheckShape enum.""" + +import dataclasses + +import pytest +from overture.schema.pyspark.check import Check, CheckShape +from pyspark.sql import SparkSession +from pyspark.sql import functions as F + + +def test_check_is_frozen(spark: SparkSession) -> None: + check = Check( + field="subtype", + name="required", + expr=F.lit("error"), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + with pytest.raises(dataclasses.FrozenInstanceError): + check.field = "other" # type: ignore[misc] diff --git a/packages/overture-schema-pyspark/tests/test_cli.py b/packages/overture-schema-pyspark/tests/test_cli.py new file mode 100644 index 000000000..037d6aaeb --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_cli.py @@ -0,0 +1,475 @@ +"""Tests for CLI entry points.""" + +from collections.abc import Iterator +from pathlib import Path + +import pytest +from click.testing import CliRunner +from overture.schema.pyspark._registry import REGISTRY +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.cli import ( + ReadSpec, + _spark_config, + read_feature, + resolve_read, + validate_cli, +) +from pyspark.sql import Row, SparkSession +from pyspark.sql import functions as F +from pyspark.sql.types import StringType, StructField, StructType + +_TEST_TYPE = "_test_cli" + +# Tests that branch on registered geometry types require the runtime registry +# to be populated (i.e. generated expression modules present). +_requires_generated = pytest.mark.skipif( + not REGISTRY, reason="requires generated expression modules" +) + + +class TestSparkConfig: + """Tests for S3A auto-configuration.""" + + @_requires_generated + def test_large_geometry_disables_vectorized_reader(self) -> None: + config = _spark_config( + "samples/segment.parquet", (), "overture.schema.transportation:Segment" + ) + assert config["spark.sql.parquet.enableVectorizedReader"] == "false" + + @_requires_generated + def test_point_geometry_keeps_vectorized_reader(self) -> None: + config = _spark_config( + "samples/place.parquet", (), "overture.schema.places:Place" + ) + assert "spark.sql.parquet.enableVectorizedReader" not in config + + def test_unspecified_geometry_disables_vectorized_reader(self) -> None: + # _TEST_TYPE registers no geometry_types -- safe default disables the reader + config = _spark_config("samples/test.parquet", (), _TEST_TYPE) + assert config["spark.sql.parquet.enableVectorizedReader"] == "false" + + def test_s3a_path_applies_defaults(self) -> None: + config = _spark_config("s3a://bucket/path", (), _TEST_TYPE) + assert "org.apache.hadoop:hadoop-aws" in config["spark.jars.packages"] + assert "S3AFileSystem" in config["spark.hadoop.fs.s3a.impl"] + assert ( + "AnonymousAWSCredentialsProvider" + in config["spark.hadoop.fs.s3a.aws.credentials.provider"] + ) + + def test_user_conf_overrides_s3a_defaults(self) -> None: + config = _spark_config( + "s3a://bucket/path", + ( + "spark.hadoop.fs.s3a.aws.credentials.provider=" + "software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider", + ), + _TEST_TYPE, + ) + assert ( + "ProfileCredentialsProvider" + in config["spark.hadoop.fs.s3a.aws.credentials.provider"] + ) + + def test_user_conf_merges_with_s3a_defaults(self) -> None: + config = _spark_config( + "s3a://bucket/path", ("spark.master=local[4]",), _TEST_TYPE + ) + assert config["spark.master"] == "local[4]" + assert "spark.jars.packages" in config + + def test_local_path_passes_user_conf(self) -> None: + config = _spark_config( + "samples/test.parquet", ("spark.master=local[4]",), _TEST_TYPE + ) + assert config["spark.master"] == "local[4]" + assert config["spark.sql.parquet.enableVectorizedReader"] == "false" + + +def _test_checks() -> list[Check]: + """Minimal checks for CLI testing: value must be 'good'.""" + return [ + Check( + field="value", + name="enum", + expr=F.when(F.col("value") != "good", F.lit("not good")), + shape=CheckShape.SCALAR, + root_field="value", + ), + ] + + +@pytest.fixture(autouse=True) +def _register_test_checks() -> Iterator[None]: + REGISTRY[_TEST_TYPE] = FeatureValidation( + schema=StructType( + [ + StructField("id", StringType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("value", StringType(), True), + ] + ), + checks=_test_checks, + ) + yield + del REGISTRY[_TEST_TYPE] + + +def test_validate_missing_args() -> None: + runner = CliRunner() + result = runner.invoke(validate_cli, []) + assert result.exit_code != 0 + + +def test_validate_unknown_type() -> None: + runner = CliRunner() + result = runner.invoke(validate_cli, ["nonexistent", "/dev/null"]) + assert result.exit_code != 0 + assert "nonexistent" in result.output + + +def test_validate_clean_data(spark: SparkSession, tmp_path: Path) -> None: + """Valid data exits 0, no output file written.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="good")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + assert not Path(output_path).exists() + + +def test_validate_error_count(spark: SparkSession, tmp_path: Path) -> None: + """Rows with errors are counted in summary.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + assert result.exit_code != 0 + assert "1 / 1 rows with errors" in result.output + + +def test_validate_shows_error_rows(spark: SparkSession, tmp_path: Path) -> None: + """Error rows are displayed with violation columns.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [Row(id="row1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + assert result.exit_code != 0 + assert "row1" in result.output + assert "value" in result.output + + +def test_validate_head_zero(spark: SparkSession, tmp_path: Path) -> None: + """--head 0 suppresses the error row table.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [Row(id="row1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "-o", output_path, "--head", "0"] + ) + assert result.exit_code != 0 + assert "1 / 1 rows with errors" in result.output + assert "row1" not in result.output + + +def test_validate_schema_mismatch_exits(spark: SparkSession, tmp_path: Path) -> None: + """Schema mismatch prints diff and exits before validation.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + # Write data with wrong schema (IntegerType where StringType expected) + spark.createDataFrame( + [Row(id="r1", value=42)], schema="id string, value int" + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + assert result.exit_code != 0 + assert "Schema mismatch" in result.output + assert "value" in result.output + + +def test_validate_skip_schema_check(spark: SparkSession, tmp_path: Path) -> None: + """--skip-schema-check warns on mismatches but continues validation.""" + input_path = str(tmp_path / "input.parquet") + + # Extra column causes a mismatch but doesn't break check evaluation + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="good", extra="x")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "--skip-schema-check"] + ) + assert "Schema mismatch" in result.output + assert "rows with errors" in result.output + + +def test_validate_skip_columns(spark: SparkSession, tmp_path: Path) -> None: + """--skip-columns skips checks for absent columns.""" + input_path = str(tmp_path / "input.parquet") + + # Data missing 'value' column — declare it absent via --skip-columns + spark.createDataFrame([Row(id="r1", theme="test", type="test_cli")]).write.parquet( + input_path + ) + + runner = CliRunner() + result = runner.invoke( + validate_cli, + [_TEST_TYPE, input_path, "--skip-columns", "value", "--skip-schema-check"], + ) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + + +def test_validate_ignore_extra_columns(spark: SparkSession, tmp_path: Path) -> None: + """--ignore-extra-columns suppresses 'expected missing' schema mismatches.""" + input_path = str(tmp_path / "input.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="good", extra="x")] + ).write.parquet(input_path) + + runner = CliRunner() + # Without the flag, schema mismatch exits + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path]) + assert result.exit_code != 0 + assert "Schema mismatch" in result.output + + # With the flag, extra column is tolerated + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "--ignore-extra-columns", "extra"] + ) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + + +def test_validate_suppress_field(spark: SparkSession, tmp_path: Path) -> None: + """--suppress FIELD removes all checks on that field.""" + input_path = str(tmp_path / "input.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "--suppress", "value"] + ) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + + +def test_validate_suppress_field_check(spark: SparkSession, tmp_path: Path) -> None: + """--suppress FIELD:CHECK removes a specific check.""" + input_path = str(tmp_path / "input.parquet") + + spark.createDataFrame( + [Row(id="r1", theme="test", type="test_cli", value="bad")] + ).write.parquet(input_path) + + runner = CliRunner() + result = runner.invoke( + validate_cli, [_TEST_TYPE, input_path, "--suppress", "value:enum"] + ) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output + + +def test_validate_output_contains_explained_violations( + spark: SparkSession, tmp_path: Path +) -> None: + """Output Parquet contains explain() violations with field/check/message.""" + input_path = str(tmp_path / "input.parquet") + output_path = str(tmp_path / "output.parquet") + + spark.createDataFrame( + [ + Row(id="r1", theme="test", type="test_cli", value="good"), + Row(id="r2", theme="test", type="test_cli", value="bad"), + ] + ).write.parquet(input_path) + + runner = CliRunner() + runner.invoke(validate_cli, [_TEST_TYPE, input_path, "-o", output_path]) + + result_df = spark.read.parquet(output_path) + assert {"field", "check", "message"} <= set(result_df.columns) + assert result_df.count() == 1 # one violation from r2 + + +_BATHYMETRY_PARTITIONS = {"theme": "base", "type": "bathymetry"} +_SEGMENT_PARTITIONS = {"theme": "transportation", "type": "segment"} + + +class TestResolveRead: + """Pure-function tests for path resolution logic.""" + + def test_release_root(self) -> None: + spec = resolve_read("/data/release/2026-02-18.0/", _BATHYMETRY_PARTITIONS) + assert spec == ReadSpec( + data_path="/data/release/2026-02-18.0/theme=base/type=bathymetry", + base_path="/data/release/2026-02-18.0", + ) + + def test_release_root_no_trailing_slash(self) -> None: + spec = resolve_read("/data/release/2026-02-18.0", _BATHYMETRY_PARTITIONS) + assert spec == ReadSpec( + data_path="/data/release/2026-02-18.0/theme=base/type=bathymetry", + base_path="/data/release/2026-02-18.0", + ) + + def test_leaf_partition(self) -> None: + spec = resolve_read( + "/data/release/2026-02-18.0/theme=base/type=bathymetry/", + _BATHYMETRY_PARTITIONS, + ) + assert spec == ReadSpec( + data_path="/data/release/2026-02-18.0/theme=base/type=bathymetry/", + base_path="/data/release/2026-02-18.0", + ) + + def test_theme_partition_without_type(self) -> None: + spec = resolve_read( + "/data/release/2026-02-18.0/theme=base/", _BATHYMETRY_PARTITIONS + ) + assert spec == ReadSpec( + data_path="/data/release/2026-02-18.0/theme=base/", + base_path="/data/release/2026-02-18.0", + ) + + def test_individual_file(self) -> None: + spec = resolve_read("/tmp/bathymetry.parquet", _BATHYMETRY_PARTITIONS) + assert spec == ReadSpec(data_path="/tmp/bathymetry.parquet") + + def test_individual_file_no_partitions(self) -> None: + spec = resolve_read("/tmp/data.parquet", None) + assert spec == ReadSpec(data_path="/tmp/data.parquet") + + def test_plain_directory_no_partitions(self) -> None: + spec = resolve_read("/tmp/data/", None) + assert spec == ReadSpec(data_path="/tmp/data/") + + def test_s3a_release_root(self) -> None: + spec = resolve_read("s3a://bucket/release/2026-02-18.0/", _SEGMENT_PARTITIONS) + assert spec == ReadSpec( + data_path="s3a://bucket/release/2026-02-18.0/theme=transportation/type=segment", + base_path="s3a://bucket/release/2026-02-18.0", + ) + + def test_s3a_leaf_partition(self) -> None: + spec = resolve_read( + "s3a://bucket/release/2026-02-18.0/theme=transportation/type=segment/", + _SEGMENT_PARTITIONS, + ) + assert spec == ReadSpec( + data_path="s3a://bucket/release/2026-02-18.0/theme=transportation/type=segment/", + base_path="s3a://bucket/release/2026-02-18.0", + ) + + +def _write_partitioned(spark: SparkSession, base_dir: Path, rows: list[Row]) -> None: + """Write test rows as Hive-partitioned Parquet under *base_dir*.""" + spark.createDataFrame(rows).write.partitionBy("theme", "type").parquet( + str(base_dir) + ) + + +class TestReadFeature: + """Integration tests: resolve_read + read_feature against local Parquet.""" + + def test_read_from_release_root(self, spark: SparkSession, tmp_path: Path) -> None: + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [Row(id="r1", value="good", theme="test", type=_TEST_TYPE)], + ) + spec = resolve_read(str(base), {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert set(df.columns) >= {"id", "theme", "type", "value"} + + def test_read_from_leaf_partition( + self, spark: SparkSession, tmp_path: Path + ) -> None: + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [Row(id="r1", value="good", theme="test", type=_TEST_TYPE)], + ) + leaf = str(base / f"theme=test/type={_TEST_TYPE}") + spec = resolve_read(leaf, {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert set(df.columns) >= {"id", "theme", "type", "value"} + + def test_read_from_individual_file( + self, spark: SparkSession, tmp_path: Path + ) -> None: + file_path = str(tmp_path / "data.parquet") + spark.createDataFrame( + [Row(id="r1", theme="test", type=_TEST_TYPE, value="good")] + ).write.parquet(file_path) + spec = resolve_read(file_path, {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert set(df.columns) >= {"id", "theme", "type", "value"} + + def test_release_root_filters_to_type( + self, spark: SparkSession, tmp_path: Path + ) -> None: + """Only the target type's rows are returned from a multi-type release.""" + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [ + Row(id="r1", value="good", theme="test", type=_TEST_TYPE), + Row(id="r2", value="good", theme="test", type="other"), + ], + ) + spec = resolve_read(str(base), {"theme": "test", "type": _TEST_TYPE}) + df = read_feature(spark, spec) + assert df.count() == 1 + assert df.collect()[0]["id"] == "r1" + + +def test_validate_from_partitioned_release(spark: SparkSession, tmp_path: Path) -> None: + """Full CLI round-trip reading from a Hive-partitioned release root.""" + base = tmp_path / "release" + _write_partitioned( + spark, + base, + [Row(id="r1", value="good", theme="test", type=_TEST_TYPE)], + ) + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, str(base)]) + assert result.exit_code == 0, result.output + assert "0 / 1 rows with errors" in result.output diff --git a/packages/overture-schema-pyspark/tests/test_harness.py b/packages/overture-schema-pyspark/tests/test_harness.py new file mode 100644 index 000000000..188bdc3ac --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_harness.py @@ -0,0 +1,361 @@ +"""Tests for the conformance test harness.""" + +from __future__ import annotations + +import re + +import pytest +from overture.schema.pyspark.check import Check, CheckShape +from pyspark.sql import Row, SparkSession +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + IntegerType, + StringType, + StructField, + StructType, +) + +from ._support.harness import ( + assert_schema_covers_checks, + build_scenario_map, + build_scenario_rows, + index_violations, + sanitize_row, + scenario_uuid, +) +from ._support.helpers import PathTraversalError, set_at_path +from ._support.scenarios import Scenario + + +class TestScenarioUuid: + def test_deterministic(self) -> None: + """Same ID produces same UUID.""" + assert scenario_uuid("building::id:required") == scenario_uuid( + "building::id:required" + ) + + def test_different_ids_different_uuids(self) -> None: + assert scenario_uuid("a::b:c") != scenario_uuid("d::e:f") + + def test_valid_uuid_format(self) -> None: + uuid_str = scenario_uuid("test::x:y") + assert re.match( + r"^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + uuid_str, + ) + + +class TestBuildScenarioMap: + def test_scenarios_get_valid_and_invalid_entries(self) -> None: + scenarios = [ + Scenario( + id="f::x:required", + scaffold={}, + mutate=set_at_path("x", None), + expected_field="x", + expected_check="required", + ), + ] + scenario_map = build_scenario_map(scenarios, feature_name="f") + assert scenario_uuid("f::x:required::valid") in scenario_map + assert ( + scenario_map[scenario_uuid("f::x:required::valid")] + == "f::x:required::valid" + ) + assert scenario_uuid("f::x:required::invalid") in scenario_map + assert ( + scenario_map[scenario_uuid("f::x:required::invalid")] + == "f::x:required::invalid" + ) + + def test_baseline_plus_two_entries_per_scenario(self) -> None: + scenarios = [ + Scenario( + id="f::x:check", + scaffold={}, + mutate=set_at_path("x", 0), + expected_field="x", + expected_check="check", + ), + ] + scenario_map = build_scenario_map(scenarios, feature_name="f") + # baseline + (::valid, ::invalid) for the one scenario + assert len(scenario_map) == 3 + + def test_duplicate_id_values_raises(self) -> None: + scenarios = [ + Scenario( + id="f::x:required", + scaffold={}, + mutate=set_at_path("x", None), + expected_field="x", + expected_check="required", + ), + Scenario( + id="f::x:required", + scaffold={}, + mutate=set_at_path("x", None), + expected_field="x", + expected_check="required", + ), + ] + with pytest.raises(ValueError, match="Duplicate"): + build_scenario_map(scenarios, feature_name="f") + + +class TestBuildScenarioRows: + def test_baseline_row_included(self) -> None: + base = {"id": "original-uuid", "theme": "buildings", "type": "building", "x": 1} + rows, scenario_map, skipped = build_scenario_rows( + base, [], feature_name="building" + ) + assert len(rows) == 1 + assert rows[0]["theme"] == "buildings" + assert "_scenario_id" in rows[0] + + def test_path_traversal_error_skips(self) -> None: + """Mutation functions that raise PathTraversalError produce skips.""" + base = {"theme": "t", "type": "ty"} + + def bad_mutation(row: dict) -> dict: + raise PathTraversalError("cannot traverse") + + scenarios = [ + Scenario( + id="f::x:check", + scaffold={}, + mutate=bad_mutation, + expected_field="x", + expected_check="check", + ), + ] + rows, scenario_map, skipped = build_scenario_rows( + base, scenarios, feature_name="f" + ) + assert len(rows) == 1 + assert "f::x:check" in skipped + + def test_scenario_creates_valid_and_invalid_rows(self) -> None: + """Each Scenario produces both a valid and an invalid row.""" + base = {"id": "orig", "theme": "t", "type": "ty", "x": 1} + scenarios = [ + Scenario( + id="f::x:required", + scaffold={}, + mutate=set_at_path("x", None), + expected_field="x", + expected_check="required", + ), + ] + rows, scenario_map, skipped = build_scenario_rows( + base, scenarios, feature_name="f" + ) + # baseline + valid + invalid + assert len(rows) == 3 + assert rows[1]["x"] == 1 # valid row is a copy of base_row + assert rows[2]["x"] is None + assert rows[1]["_scenario_id"] == scenario_uuid("f::x:required::valid") + assert rows[2]["_scenario_id"] == scenario_uuid("f::x:required::invalid") + + def test_valid_row_uses_base_row_not_scaffold(self) -> None: + """Valid row is a copy of base_row, not the scaffold-merged row.""" + base = {"id": "orig", "theme": "t", "type": "ty", "items": [{"a": 1, "b": 2}]} + scenarios = [ + Scenario( + id="f::items[].a:required", + scaffold={"items": [{"a": 0}]}, + mutate=set_at_path("items[].a", None), + expected_field="items[].a", + expected_check="required", + ), + ] + rows, scenario_map, skipped = build_scenario_rows( + base, scenarios, feature_name="f" + ) + assert len(rows) == 3 + # Valid row uses base_row (preserves all fields in items element) + assert rows[1]["items"] == [{"a": 1, "b": 2}] + # Invalid row uses scaffold-merged row + assert rows[2]["items"][0]["a"] is None + + def test_scaffold_merged_onto_invalid_row(self) -> None: + base_row = {"id": "x", "a": 1} + s = Scenario( + id="test::b:check", + scaffold={"b": 10}, + mutate=set_at_path("b", 0), + expected_field="b", + expected_check="check", + ) + rows, scenario_map, skipped = build_scenario_rows( + base_row, [s], feature_name="test" + ) + invalid_id = scenario_uuid("test::b:check::invalid") + invalid_row = next(r for r in rows if r["_scenario_id"] == invalid_id) + # base field preserved, scaffold provides b, path overrides b + assert invalid_row["a"] == 1 + assert invalid_row["b"] == 0 + + def test_applies_scaffold_then_mutation(self) -> None: + base_row = {"id": "x", "a": 1} + s = Scenario( + id="test::model:check", + scaffold={"b": 10}, + mutate=lambda row: {**row, "a": None}, + expected_field="a", + expected_check="required", + ) + rows, scenario_map, skipped = build_scenario_rows( + base_row, [s], feature_name="test" + ) + assert len(rows) == 3 + assert not skipped + invalid_id = scenario_uuid("test::model:check::invalid") + invalid_row = next(r for r in rows if r["_scenario_id"] == invalid_id) + # scaffold merged: b exists + assert invalid_row["b"] == 10 + # mutation applied: a is None + assert invalid_row["a"] is None + + +class TestSanitizeRow: + def test_nested_geometry_converted(self) -> None: + row = { + "id": "x", + "nested": {"geometry": "POINT (1 2)"}, + } + result = sanitize_row(row) + assert isinstance(result["nested"]["geometry"], bytes) + + def test_top_level_geometry_converted(self) -> None: + row = {"id": "x", "geometry": "POINT (1 2)"} + result = sanitize_row(row) + assert isinstance(result["geometry"], bytes) + + def test_non_wkt_string_at_geometry_key_unchanged(self) -> None: + row = {"id": "x", "geometry": "not-a-geometry"} + result = sanitize_row(row) + assert result["geometry"] == "not-a-geometry" + + def test_non_geometry_keys_unchanged(self) -> None: + row = {"id": "x", "name": "POINT (1 2)"} + result = sanitize_row(row) + assert result["name"] == "POINT (1 2)" + + +class TestSchemaAssertions: + def test_assert_schema_covers_checks_passes(self, spark: SparkSession) -> None: + schema = StructType( + [ + StructField("id", StringType()), + StructField("x", IntegerType()), + ] + ) + checks = [ + Check( + field="id", + name="required", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field="id", + ) + ] + assert_schema_covers_checks(schema, checks) # should not raise + + def test_assert_schema_covers_synthetic_field(self, spark: SparkSession) -> None: + schema = StructType([StructField("sources", ArrayType(StringType()))]) + checks = [ + Check( + field="sources_min_length", + name="min_length", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field="sources", + ) + ] + assert_schema_covers_checks(schema, checks) # should not raise + + def test_assert_schema_covers_checks_missing_field( + self, spark: SparkSession + ) -> None: + schema = StructType([StructField("id", StringType())]) + checks = [ + Check( + field="missing", + name="required", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field="missing", + ) + ] + with pytest.raises(AssertionError, match="missing"): + assert_schema_covers_checks(schema, checks) + + def test_assert_schema_covers_synthetic_model_check( + self, spark: SparkSession + ) -> None: + """root_field=None passes regardless of schema (radio_group, etc.).""" + schema = StructType([StructField("id", StringType())]) + checks = [ + Check( + field="radio_group", + name="radio_group", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field=None, + ) + ] + assert_schema_covers_checks(schema, checks) # should not raise + + +class TestIndexViolations: + def test_groups_by_scenario_id(self) -> None: + uuid_a = scenario_uuid("f::a:required") + uuid_b = scenario_uuid("f::b:enum") + scenario_map = {uuid_a: "f::a:required", uuid_b: "f::b:enum"} + violation_rows = [ + Row( + _scenario_id=uuid_a, + x=1, + field="a", + check="required", + message="missing", + ), + Row( + _scenario_id=uuid_b, + x=2, + field="b", + check="enum", + message="invalid", + ), + ] + result = index_violations(violation_rows, scenario_map) + assert result["f::a:required"] == {("a", "required")} + assert result["f::b:enum"] == {("b", "enum")} + + def test_multiple_violations_per_scenario(self) -> None: + uuid_a = scenario_uuid("f::a:r") + scenario_map = {uuid_a: "f::a:r"} + violation_rows = [ + Row( + _scenario_id=uuid_a, + x=1, + field="a", + check="required", + message="m1", + ), + Row( + _scenario_id=uuid_a, + x=1, + field="a", + check="bounds", + message="m2", + ), + ] + result = index_violations(violation_rows, scenario_map) + assert result["f::a:r"] == {("a", "required"), ("a", "bounds")} + + def test_empty_violations(self) -> None: + result = index_violations([], {}) + assert result == {} diff --git a/packages/overture-schema-pyspark/tests/test_helpers.py b/packages/overture-schema-pyspark/tests/test_helpers.py new file mode 100644 index 000000000..b202ce3d1 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_helpers.py @@ -0,0 +1,147 @@ +"""Tests for the conformance test helpers.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from ._support.helpers import PathTraversalError, deep_merge, set_at_path + + +class TestSetAtPath: + def test_simple_field(self) -> None: + row = {"name": "Alice"} + result = set_at_path("name", "Bob")(row) + assert result["name"] == "Bob" + + def test_does_not_mutate_original(self) -> None: + row = {"name": "Alice"} + set_at_path("name", "Bob")(row) + assert row["name"] == "Alice" + + def test_nested_field(self) -> None: + row = {"outer": {"inner": "old"}} + result = set_at_path("outer.inner", "new")(row) + assert result["outer"]["inner"] == "new" + + def test_array_index_zero(self) -> None: + row = {"items": [{"value": 1}, {"value": 2}]} + result = set_at_path("items[].value", 99)(row) + assert result["items"][0]["value"] == 99 + assert result["items"][1]["value"] == 2 # untouched + + def test_set_to_none(self) -> None: + row = {"country": "US"} + result = set_at_path("country", None)(row) + assert result["country"] is None + + def test_nested_array(self) -> None: + row = {"rules": [{"tags": [{"v": "x"}]}]} + result = set_at_path("rules[].tags[].v", "y")(row) + assert result["rules"][0]["tags"][0]["v"] == "y" + + def test_deep_nested(self) -> None: + row = {"a": {"b": {"c": {"d": "old"}}}} + result = set_at_path("a.b.c.d", "new")(row) + assert result["a"]["b"]["c"]["d"] == "new" + + def test_returns_callable(self) -> None: + mutate = set_at_path("a.b", 1) + assert callable(mutate) + assert mutate({"a": {"b": 0}}) == {"a": {"b": 1}} + + +class TestSetAtPathTraversalErrors: + def test_raises_on_empty_array(self) -> None: + row: dict[str, Any] = {"items": []} + with pytest.raises(PathTraversalError): + set_at_path("items[].value", "x")(row) + + def test_raises_on_empty_nested_array(self) -> None: + row: dict[str, Any] = {"names": {"rules": []}} + with pytest.raises(PathTraversalError): + set_at_path("names.rules[].value", "x")(row) + + def test_error_message_empty_array_names_path(self) -> None: + row: dict[str, Any] = {"names": {"rules": []}} + with pytest.raises(PathTraversalError, match="rules"): + set_at_path("names.rules[].value", "x")(row) + + def test_raises_on_empty_path(self) -> None: + mutator = set_at_path("", "x") + with pytest.raises(PathTraversalError, match="Empty path"): + mutator({}) + + +class TestSetAtPathScaffolding: + def test_null_struct_intermediate_scaffolded(self) -> None: + row = {"id": "x", "names": None} + result = set_at_path("names.primary", "test")(row) + assert result["names"]["primary"] == "test" + + def test_null_array_intermediate_scaffolded(self) -> None: + row = {"id": "x", "rules": None} + result = set_at_path("rules[].value", "test")(row) + assert result["rules"][0]["value"] == "test" + + def test_null_nested_struct_in_array_scaffolded(self) -> None: + row = {"id": "x", "items": [{"nested": None}]} + result = set_at_path("items[].nested.field", "test")(row) + assert result["items"][0]["nested"]["field"] == "test" + + def test_deep_null_chain_scaffolded(self) -> None: + row = {"id": "x", "a": None} + result = set_at_path("a.b[].c", "test")(row) + assert result["a"]["b"][0]["c"] == "test" + + def test_chained_calls_preserve_prior_content(self) -> None: + """Chaining set_at_path preserves values set by prior calls.""" + row = {"items": None} + with_kind = set_at_path("items[].kind", "height")(row) + with_both = set_at_path("items[].value", 5.2)(with_kind) + assert with_both["items"][0]["kind"] == "height" + assert with_both["items"][0]["value"] == 5.2 + + def test_chained_calls_through_deep_null_path(self) -> None: + """Chained calls scaffold and preserve through deeply nested nulls.""" + row = {"outer": None} + with_disc = set_at_path("outer[].inner[].dimension", "height")(row) + with_value = set_at_path("outer[].inner[].value", None)(with_disc) + assert with_value["outer"][0]["inner"][0]["dimension"] == "height" + assert with_value["outer"][0]["inner"][0]["value"] is None + + +class TestDeepMerge: + def test_flat_merge(self) -> None: + base = {"a": 1, "b": 2} + scaffold = {"b": 3, "c": 4} + assert deep_merge(base, scaffold) == {"a": 1, "b": 3, "c": 4} + + def test_nested_dict_merge(self) -> None: + base = {"a": {"x": 1, "y": 2}} + scaffold = {"a": {"y": 3, "z": 4}} + assert deep_merge(base, scaffold) == {"a": {"x": 1, "y": 3, "z": 4}} + + def test_array_replace(self) -> None: + base = {"items": [{"a": 1}]} + scaffold = {"items": [{"b": 2}]} + assert deep_merge(base, scaffold) == {"items": [{"b": 2}]} + + def test_does_not_mutate_base(self) -> None: + base = {"a": {"x": 1}} + scaffold = {"a": {"y": 2}} + result = deep_merge(base, scaffold) + assert "y" not in base["a"] + assert result == {"a": {"x": 1, "y": 2}} + + def test_empty_scaffold(self) -> None: + base = {"a": 1} + assert deep_merge(base, {}) == {"a": 1} + + def test_scaffold_adds_new_key(self) -> None: + base = {"a": 1} + scaffold = {"speed_limits": [{"max_speed": {"value": 60}}]} + result = deep_merge(base, scaffold) + assert result["a"] == 1 + assert result["speed_limits"] == [{"max_speed": {"value": 60}}] diff --git a/packages/overture-schema-pyspark/tests/test_mutations.py b/packages/overture-schema-pyspark/tests/test_mutations.py new file mode 100644 index 000000000..f4e233dad --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_mutations.py @@ -0,0 +1,263 @@ +"""Tests for model-level mutation functions.""" + +import pytest + +from ._support.helpers import PathTraversalError +from ._support.mutations import ( + mutate_forbid_if, + mutate_min_fields_set, + mutate_radio_group, + mutate_require_any_of, + mutate_require_if, + mutate_unique_items, +) + + +class TestMutateRequireAnyOf: + def test_nulls_all_named_fields(self) -> None: + row = {"a": 1, "b": 2, "c": 3} + result = mutate_require_any_of(row, ["a", "b"]) + assert result["a"] is None + assert result["b"] is None + assert result["c"] == 3 + + def test_does_not_mutate_original(self) -> None: + row = {"a": 1, "b": 2} + mutate_require_any_of(row, ["a"]) + assert row["a"] == 1 + + +class TestMutateRadioGroup: + def test_sets_two_fields_to_true(self) -> None: + row = {"is_land": True, "is_territorial": False, "other": "x"} + result = mutate_radio_group(row, ["is_land", "is_territorial"]) + assert result["is_land"] is True + assert result["is_territorial"] is True + + def test_does_not_mutate_original(self) -> None: + row = {"a": False, "b": False} + mutate_radio_group(row, ["a", "b"]) + assert row["a"] is False + + +class TestMutateMinFieldsSet: + def test_nulls_all_named_fields(self) -> None: + row = {"a": 1, "b": 2, "c": 3} + result = mutate_min_fields_set(row, ["a", "b", "c"]) + assert result["a"] is None + assert result["b"] is None + assert result["c"] is None + + def test_leaves_unlisted_fields_alone(self) -> None: + row = {"a": 1, "b": 2, "other": "keep"} + result = mutate_min_fields_set(row, ["a", "b"]) + assert result["other"] == "keep" + + def test_does_not_mutate_original(self) -> None: + row = {"a": 1, "b": 2} + mutate_min_fields_set(row, ["a", "b"]) + assert row["a"] == 1 + + def test_with_array_path_nulls_inside_each_element(self) -> None: + row = {"items": [{"a": 1, "b": 2}, {"a": 3, "b": 4}]} + result = mutate_min_fields_set(row, ["a", "b"], array_path="items") + assert result["items"] == [{"a": None, "b": None}, {"a": None, "b": None}] + + +class TestMutateRequireIf: + def test_sets_condition_and_nulls_targets(self) -> None: + row = {"subtype": "other", "admin_level": 5} + result = mutate_require_if(row, ["admin_level"], "subtype", "country") + assert result["subtype"] == "country" + assert result["admin_level"] is None + + def test_does_not_mutate_original(self) -> None: + row = {"subtype": "other", "admin_level": 5} + mutate_require_if(row, ["admin_level"], "subtype", "country") + assert row["subtype"] == "other" + + +class TestMutateForbidIf: + def test_sets_condition_and_ensures_non_null(self) -> None: + row = {"subtype": "other", "admin_level": None} + result = mutate_forbid_if(row, ["admin_level"], "subtype", "country") + assert result["subtype"] == "country" + assert result["admin_level"] is not None + + def test_preserves_existing_non_null(self) -> None: + row = {"subtype": "other", "admin_level": 5} + result = mutate_forbid_if(row, ["admin_level"], "subtype", "country") + assert result["admin_level"] == 5 + + def test_uses_fill_value_for_array_field(self) -> None: + row = {"subtype": "other", "destinations": None} + result = mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + fill_values={"destinations": [{}]}, + ) + assert result["destinations"] == [{}] + + def test_uses_fill_value_for_struct_field(self) -> None: + row = {"subtype": "other", "road_surface": None} + result = mutate_forbid_if( + row, + ["road_surface"], + "subtype", + "road", + fill_values={"road_surface": {}}, + ) + assert result["road_surface"] == {} + + def test_fill_value_ignored_when_field_already_non_null(self) -> None: + row = {"subtype": "other", "destinations": [{"id": "x"}]} + result = mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + fill_values={"destinations": [{}]}, + ) + assert result["destinations"] == [{"id": "x"}] + + +class TestMutateRequireAnyOfNested: + def test_nulls_fields_within_array_elements(self) -> None: + row = { + "items": [ + {"a": 1, "b": 2, "c": 3}, + {"a": 4, "b": 5, "c": 6}, + ] + } + result = mutate_require_any_of(row, ["a", "b"], array_path="items") + for item in result["items"]: + assert item["a"] is None + assert item["b"] is None + assert item["c"] is not None + + def test_nulls_fields_within_nested_struct(self) -> None: + row = { + "items": [ + {"when": {"a": 1, "b": 2}}, + ] + } + result = mutate_require_any_of( + row, ["a", "b"], array_path="items", struct_path="when" + ) + assert result["items"][0]["when"]["a"] is None + assert result["items"][0]["when"]["b"] is None + + def test_creates_stub_element_when_array_is_null(self) -> None: + row = {"items": None} + result = mutate_require_any_of(row, ["a", "b"], array_path="items") + assert isinstance(result["items"], list) + assert len(result["items"]) == 1 + assert result["items"][0]["a"] is None + assert result["items"][0]["b"] is None + + def test_creates_stub_with_struct_path_when_null(self) -> None: + row = {"items": None} + result = mutate_require_any_of( + row, ["a", "b"], array_path="items", struct_path="when" + ) + assert result["items"][0]["when"]["a"] is None + assert result["items"][0]["when"]["b"] is None + + def test_does_not_mutate_original(self) -> None: + row = {"items": [{"a": 1, "b": 2}]} + mutate_require_any_of(row, ["a", "b"], array_path="items") + assert row["items"][0]["a"] == 1 + + +class TestMutateForbidIfNegate: + def test_negate_changes_condition_value(self) -> None: + """negate=True sets condition_field to something != condition_value.""" + row = {"subtype": "road", "destinations": [{"id": "x"}]} + result = mutate_forbid_if(row, ["destinations"], "subtype", "road", negate=True) + assert result["subtype"] != "road" + assert result["destinations"] is not None + + def test_negate_preserves_non_matching_value(self) -> None: + """When condition_field already != condition_value, leave it.""" + row = {"subtype": "water", "class": "canal"} + result = mutate_forbid_if(row, ["class"], "subtype", "road", negate=True) + assert result["subtype"] == "water" + + +class TestMutateRequireIfNegate: + def test_negate_changes_condition_value(self) -> None: + """negate=True sets condition_field to something != condition_value.""" + row = {"subtype": "road", "class": "motorway"} + result = mutate_require_if(row, ["class"], "subtype", "road", negate=True) + assert result["subtype"] != "road" + assert result["class"] is None + + def test_negate_preserves_non_matching_value(self) -> None: + """When condition_field already != condition_value, leave it.""" + row = {"subtype": "water", "class": "canal"} + result = mutate_require_if(row, ["class"], "subtype", "road", negate=True) + assert result["subtype"] == "water" + assert result["class"] is None + + +class TestMutateUniqueItems: + def test_duplicates_first_element(self) -> None: + row = {"ids": [{"value": "a"}, {"value": "b"}]} + result = mutate_unique_items(row, "ids") + assert result["ids"][0] == result["ids"][1] + assert len(result["ids"]) == 3 + + def test_nested_path(self) -> None: + row = {"outer": {"ids": [{"v": 1}, {"v": 2}]}} + result = mutate_unique_items(row, "outer.ids") + assert result["outer"]["ids"][0] == result["outer"]["ids"][1] + + def test_does_not_mutate_original(self) -> None: + row = {"ids": [{"value": "a"}, {"value": "b"}]} + mutate_unique_items(row, "ids") + assert len(row["ids"]) == 2 + + def test_bracket_path_enters_array_element(self) -> None: + row = {"restrictions": [{"when": {"mode": [{"type": "car"}, {"type": "bus"}]}}]} + result = mutate_unique_items(row, "restrictions[].when.mode") + mode = result["restrictions"][0]["when"]["mode"] + assert mode[0] == mode[1] + assert len(mode) == 3 + + def test_empty_array_returns_unchanged(self) -> None: + row: dict = {"items": []} + result = mutate_unique_items(row, "items") + assert result["items"] == [] + + def test_none_array_raises_traversal_error(self) -> None: + row: dict = {"ids": None} + with pytest.raises(PathTraversalError): + mutate_unique_items(row, "ids") + + def test_missing_key_raises_traversal_error(self) -> None: + row: dict = {"other": "x"} + with pytest.raises(PathTraversalError): + mutate_unique_items(row, "missing.nested") + + def test_nested_bracket_deep(self) -> None: + """Two levels of bracket nesting.""" + row: dict = {"outer": [{"inner": [{"vals": [{"x": 1}]}]}]} + result = mutate_unique_items(row, "outer[].inner[].vals") + vals = result["outer"][0]["inner"][0]["vals"] + assert vals[0] == vals[1] + + def test_terminal_bracket_duplicates_inner_list(self) -> None: + """Terminal `[]` targets the inner list at element 0 of the named field.""" + row: dict = {"hierarchies": [[{"a": 1}]]} + result = mutate_unique_items(row, "hierarchies[]") + inner = result["hierarchies"][0] + assert inner[0] == inner[1] + assert len(inner) == 2 + + def test_terminal_bracket_non_list_inner_raises(self) -> None: + """Terminal `[]` with non-list content at element 0 raises.""" + row: dict = {"hierarchies": [{"a": 1}]} + with pytest.raises(PathTraversalError): + mutate_unique_items(row, "hierarchies[]") diff --git a/packages/overture-schema-pyspark/tests/test_validate.py b/packages/overture-schema-pyspark/tests/test_validate.py new file mode 100644 index 000000000..c3fe3ea08 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/test_validate.py @@ -0,0 +1,516 @@ +"""Tests for validation pipeline.""" + +from collections.abc import Iterator + +import pytest +from overture.schema.pyspark._registry import REGISTRY +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.validate import ( + ValidationResult, + _normalize_suppress, + evaluate_checks, + explain_errors, + feature_keys, + feature_names, + filter_errors, + validate_feature, +) +from pyspark.sql import DataFrame, Row, SparkSession +from pyspark.sql import functions as F +from pyspark.sql.types import StringType, StructField, StructType + + +def _scalar_check( + field: str, name: str, expr: F.Column, *, root_field: str | None = None +) -> Check: + return Check( + field=field, + name=name, + expr=expr, + shape=CheckShape.SCALAR, + root_field=root_field if root_field is not None else field, + ) + + +def _array_check( + field: str, name: str, expr: F.Column, *, root_field: str | None = None +) -> Check: + return Check( + field=field, + name=name, + expr=expr, + shape=CheckShape.ARRAY, + root_field=root_field if root_field is not None else field, + ) + + +def _row(**kwargs: object) -> Row: + """Build a row with convenience id/theme/type defaults.""" + defaults: dict[str, object] = {"id": "id1", "theme": "t", "type": "f"} + defaults.update(kwargs) + return Row(**defaults) + + +class TestEvaluateChecks: + """Tests for evaluate_checks().""" + + def test_appends_error_columns(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + assert "_err_0" in evaluated.columns + assert set(df.columns) < set(evaluated.columns) + + def test_multiple_checks(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [ + _scalar_check("a", "c1", F.lit("e1")), + _scalar_check("b", "c2", F.lit("e2")), + ] + evaluated = evaluate_checks(df, checks) + assert "_err_0" in evaluated.columns + assert "_err_1" in evaluated.columns + + def test_error_column_is_array_string(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + row = evaluated.collect()[0] + assert row["_err_0"] == ["fail"] + + def test_null_error_for_passing_check(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "ok", F.lit(None).cast("string"))] + evaluated = evaluate_checks(df, checks) + row = evaluated.collect()[0] + assert row["_err_0"] == [] + + +class TestFilterErrors: + """Tests for filter_errors().""" + + def test_keeps_failing_rows(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert result.count() == 1 + + def test_removes_passing_rows(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "ok", F.lit(None).cast("string"))] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert result.count() == 0 + + def test_strips_error_columns(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert not any(c.startswith("_err_") for c in result.columns) + assert set(result.columns) == set(df.columns) + + def test_preserves_schema(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert result.schema == df.schema + + def test_mixed_rows(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row(id="pass"), _row(id="fail")]) + checks = [ + _scalar_check( + "id", + "not_fail", + F.when(F.col("id") == "fail", F.lit("bad")), + ), + ] + evaluated = evaluate_checks(df, checks) + result = filter_errors(evaluated, checks) + assert result.count() == 1 + assert result.collect()[0]["id"] == "fail" + + +class TestExplainErrors: + """Tests for explain_errors().""" + + def test_scalar_violation(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("missing"))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + rows = result.collect() + assert len(rows) == 1 + assert rows[0]["field"] == "value" + assert rows[0]["check"] == "required" + assert rows[0]["message"] == "missing" + + def test_array_violation(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_array_check("arr", "elem", F.array(F.lit("e1"), F.lit("e2")))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + messages = sorted(r["message"] for r in result.collect()) + assert messages == ["e1", "e2"] + + def test_no_violations(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "ok", F.lit(None).cast("string"))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + assert result.count() == 0 + + def test_preserves_original_columns(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + rows = result.collect() + assert rows[0]["id"] == "id1" + assert set(result.columns) == {*df.columns, "field", "check", "message"} + + def test_output_columns(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + checks = [_scalar_check("x", "required", F.lit("err"))] + evaluated = evaluate_checks(df, checks) + result = explain_errors(evaluated, checks) + expected_cols = {*df.columns, "field", "check", "message"} + assert set(result.columns) == expected_cols + + def test_empty_checks_returns_empty_dataframe_with_schema( + self, spark: SparkSession + ) -> None: + # Regression: explain_errors([]) on rows with no checks must + # return a typed empty DataFrame, not invoke `stack(0, ...)` + # (which Spark rejects). Consumers expect the standard + # `field/check/message` columns even when nothing fired. + df = spark.createDataFrame([_row()]) + result = explain_errors(df, []) + assert result.count() == 0 + assert set(result.columns) == {*df.columns, "field", "check", "message"} + + +class TestUserErrColumn: + """`_err_` is reserved; user `_err_*` names are passed through.""" + + def test_user_err_named_column_preserved(self, spark: SparkSession) -> None: + # Regression: `_orig_columns` strips only `_err_`. A + # user-supplied column like `_err_foo` must survive + # filter_errors / explain_errors round-trips. + df = spark.createDataFrame([_row(_err_foo="custom-data")]) + checks = [_scalar_check("value", "required", F.lit("fail"))] + evaluated = evaluate_checks(df, checks) + filtered = filter_errors(evaluated, checks) + assert "_err_foo" in filtered.columns + assert filtered.collect()[0]["_err_foo"] == "custom-data" + + explained = explain_errors(evaluated, checks) + assert "_err_foo" in explained.columns + assert explained.collect()[0]["_err_foo"] == "custom-data" + + +class TestSinglePassPipeline: + """Tests for the evaluate-once pattern used by the CLI.""" + + def test_shared_evaluated_gives_same_results(self, spark: SparkSession) -> None: + """filter_errors + explain_errors from the same evaluated DataFrame.""" + df = spark.createDataFrame([_row(id="ok"), _row(id="bad")]) + checks = [ + _scalar_check( + "id", + "not_bad", + F.when(F.col("id") == "bad", F.lit("is bad")), + ), + ] + evaluated = evaluate_checks(df, checks) + filtered = filter_errors(evaluated, checks) + explained = explain_errors(evaluated, checks) + assert filtered.count() == 1 + assert filtered.collect()[0]["id"] == "bad" + assert explained.count() == 1 + assert explained.collect()[0]["field"] == "id" + + +class TestNormalizeSuppress: + def test_empty(self) -> None: + roots, pairs = _normalize_suppress(()) + assert roots == set() + assert pairs == set() + + def test_bare_strings(self) -> None: + roots, pairs = _normalize_suppress(["sources", "theme"]) + assert roots == {"sources", "theme"} + assert pairs == set() + + def test_tuples(self) -> None: + roots, pairs = _normalize_suppress([("sources[].confidence", "bounds")]) + assert roots == set() + assert pairs == {("sources[].confidence", "bounds")} + + def test_check_objects(self, spark: SparkSession) -> None: + check = Check( + field="radio_group", + name="radio_group", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field=None, + ) + roots, pairs = _normalize_suppress([check]) + assert roots == set() + assert pairs == {("radio_group", "radio_group")} + + def test_mixed(self, spark: SparkSession) -> None: + check = Check( + field="radio_group", + name="radio_group", + expr=F.lit(None), + shape=CheckShape.SCALAR, + root_field=None, + ) + roots, pairs = _normalize_suppress( + [ + "sources", + ("theme", "enum"), + check, + ] + ) + assert roots == {"sources"} + assert pairs == {("theme", "enum"), ("radio_group", "radio_group")} + + +# These exercise the populated REGISTRY built by runtime discovery, so they +# require generated expression modules to be present on disk. When the +# generated tree is absent (e.g. a fresh checkout before `make +# generate-pyspark`), the registry is empty and these assertions can't hold. +_requires_generated = pytest.mark.skipif( + not REGISTRY, reason="requires generated expression modules" +) + + +@_requires_generated +def test_feature_names_includes_aliases() -> None: + result = feature_names() + assert isinstance(result, list) + assert result == sorted(result) + assert "building" in result + assert "segment" in result + assert "overture.schema.buildings:Building" in result + + +@_requires_generated +def test_feature_keys_only_canonical() -> None: + result = feature_keys() + assert isinstance(result, list) + assert result == sorted(result) + assert "overture.schema.buildings:Building" in result + assert "building" not in result + + +class TestValidationResult: + def test_error_rows_delegates_to_filter_errors(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row(id="ok"), _row(id="bad")]) + checks = [ + _scalar_check( + "id", + "not_bad", + F.when(F.col("id") == "bad", F.lit("is bad")), + ), + ] + evaluated = evaluate_checks(df, checks) + result = ValidationResult( + evaluated=evaluated, + checks=checks, + schema_mismatches=[], + suppressed_checks=[], + ) + error_rows = result.error_rows() + assert error_rows.count() == 1 + assert error_rows.collect()[0]["id"] == "bad" + assert not any(c.startswith("_err_") for c in error_rows.columns) + + def test_frozen(self) -> None: + result = ValidationResult( + evaluated=None, # type: ignore[arg-type] + checks=[], + schema_mismatches=[], + suppressed_checks=[], + ) + with pytest.raises(AttributeError): + result.checks = [] # type: ignore[misc] + + +_VF_TYPE = "_test_validate_feature" +_VF_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("value", StringType(), True), + StructField("sources", StringType(), True), + ] +) + + +def _vf_checks() -> list[Check]: + return [ + Check( + field="theme", + name="enum", + expr=F.when(F.col("theme") != "test", F.lit("bad theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ), + Check( + field="value", + name="required", + expr=F.when(F.col("value").isNull(), F.lit("missing")), + shape=CheckShape.SCALAR, + root_field="value", + ), + Check( + field="sources_min_length", + name="min_length", + expr=F.when(F.length("sources") < 1, F.lit("too short")), + shape=CheckShape.SCALAR, + root_field="sources", + ), + ] + + +class TestValidateFeature: + @pytest.fixture(autouse=True) + def _register_vf_type(self) -> Iterator[None]: + REGISTRY[_VF_TYPE] = FeatureValidation(schema=_VF_SCHEMA, checks=_vf_checks) + yield + del REGISTRY[_VF_TYPE] + + @pytest.fixture() + def vf_df(self, spark: SparkSession) -> DataFrame: + return spark.createDataFrame( + [Row(id="1", theme="test", type=_VF_TYPE, value="ok", sources="s")], + schema=_VF_SCHEMA, + ) + + def test_unknown_type_raises_value_error(self, spark: SparkSession) -> None: + df = spark.createDataFrame([_row()]) + with pytest.raises( + ValueError, match="Unknown entry-point alias.*nonexistent_type_xyz" + ): + validate_feature(df, "nonexistent_type_xyz") + + def test_basic_validation(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE) + assert isinstance(result, ValidationResult) + assert result.schema_mismatches == [] + assert len(result.checks) == 3 + assert result.error_rows().count() == 0 + + def test_skip_columns_errors_if_present(self, vf_df: DataFrame) -> None: + with pytest.raises(ValueError, match="skip_columns.*theme.*present"): + validate_feature(vf_df, _VF_TYPE, skip_columns=["theme"]) + + def test_skip_columns_filters_checks(self, spark: SparkSession) -> None: + schema_no_theme = StructType( + [f for f in _VF_SCHEMA.fields if f.name != "theme"] + ) + df = spark.createDataFrame( + [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], + schema=schema_no_theme, + ) + result = validate_feature(df, _VF_TYPE, skip_columns=["theme"]) + check_fields = [c.field for c in result.checks] + assert "theme" not in check_fields + assert "value" in check_fields + + def test_skip_columns_filters_schema_mismatches(self, spark: SparkSession) -> None: + schema_no_theme = StructType( + [f for f in _VF_SCHEMA.fields if f.name != "theme"] + ) + df = spark.createDataFrame( + [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], + schema=schema_no_theme, + ) + result = validate_feature(df, _VF_TYPE, skip_columns=["theme"]) + mismatch_fields = [m.path for m in result.schema_mismatches] + assert "theme" not in mismatch_fields + + def test_ignore_extra_columns(self, spark: SparkSession) -> None: + schema_extra = StructType( + _VF_SCHEMA.fields + [StructField("extra_score", StringType(), True)] + ) + df = spark.createDataFrame( + [ + Row( + id="1", + theme="test", + type=_VF_TYPE, + value="ok", + sources="s", + extra_score="9", + ) + ], + schema=schema_extra, + ) + result = validate_feature(df, _VF_TYPE, ignore_extra_columns=["extra_score"]) + mismatch_paths = [m.path for m in result.schema_mismatches] + assert "extra_score" not in mismatch_paths + + def test_suppress_unknown_root_raises(self, vf_df: DataFrame) -> None: + with pytest.raises(ValueError, match="unknown root fields.*typo_field"): + validate_feature(vf_df, _VF_TYPE, suppress=["typo_field"]) + + def test_suppress_unknown_pair_raises(self, vf_df: DataFrame) -> None: + with pytest.raises(ValueError, match=r"unknown \(field, name\) pairs"): + validate_feature(vf_df, _VF_TYPE, suppress=[("theme", "wrong_name")]) + + def test_suppress_mixed_unknown_lists_both(self, vf_df: DataFrame) -> None: + with pytest.raises(ValueError, match="unknown root fields.*unknown"): + validate_feature( + vf_df, + _VF_TYPE, + suppress=["typo_field", ("theme", "wrong_name")], + ) + + def test_suppress_bare_string(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE, suppress=["sources"]) + check_fields = [c.field for c in result.checks] + assert not any(f.startswith("sources") for f in check_fields) + assert len(result.suppressed_checks) == 1 + assert result.suppressed_checks[0].field == "sources_min_length" + + def test_suppress_tuple(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE, suppress=[("value", "required")]) + check_fields_names = [(c.field, c.name) for c in result.checks] + assert ("value", "required") not in check_fields_names + assert len(result.suppressed_checks) == 1 + + def test_suppress_check_object(self, vf_df: DataFrame) -> None: + initial = validate_feature(vf_df, _VF_TYPE) + target = [c for c in initial.checks if c.name == "required"][0] + result = validate_feature(vf_df, _VF_TYPE, suppress=[target]) + # Column objects can't be compared with ==, so compare by (field, name) + result_pairs = [(c.field, c.name) for c in result.checks] + suppressed_pairs = [(c.field, c.name) for c in result.suppressed_checks] + assert (target.field, target.name) not in result_pairs + assert (target.field, target.name) in suppressed_pairs + + def test_evaluated_has_err_columns(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE) + err_cols = [c for c in result.evaluated.columns if c.startswith("_err_")] + assert len(err_cols) == len(result.checks) + + def test_suppressed_checks_not_in_checks(self, vf_df: DataFrame) -> None: + result = validate_feature(vf_df, _VF_TYPE, suppress=[("theme", "enum")]) + for sc in result.suppressed_checks: + assert sc not in result.checks + + def test_all_checks_suppressed(self, vf_df: DataFrame) -> None: + result = validate_feature( + vf_df, + _VF_TYPE, + suppress=["theme", "value", "sources"], + ) + assert result.checks == [] + assert result.error_rows().count() == 0 diff --git a/packages/overture-schema-system/src/overture/schema/system/case.py b/packages/overture-schema-system/src/overture/schema/system/case.py new file mode 100644 index 000000000..62b3733ae --- /dev/null +++ b/packages/overture-schema-system/src/overture/schema/system/case.py @@ -0,0 +1,26 @@ +"""PascalCase to snake_case conversion.""" + +import re + +__all__ = ["to_snake_case"] + +_ACRONYM_BOUNDARY = re.compile(r"([A-Z]+)([A-Z][a-z])") +_CAMEL_BOUNDARY = re.compile(r"([a-z0-9])([A-Z])") + + +def to_snake_case(name: str) -> str: + """Convert PascalCase to snake_case. + + Handles acronym runs correctly: "HTMLParser" becomes "html_parser", + not "h_t_m_l_parser". + + >>> to_snake_case("HTMLParser") + 'html_parser' + >>> to_snake_case("BuildingPart") + 'building_part' + >>> to_snake_case("simple") + 'simple' + """ + name = _ACRONYM_BOUNDARY.sub(r"\1_\2", name) + name = _CAMEL_BOUNDARY.sub(r"\1_\2", name) + return name.lower() diff --git a/packages/overture-schema-system/src/overture/schema/system/discovery/__init__.py b/packages/overture-schema-system/src/overture/schema/system/discovery/__init__.py index ed8af77ad..c894f591e 100644 --- a/packages/overture-schema-system/src/overture/schema/system/discovery/__init__.py +++ b/packages/overture-schema-system/src/overture/schema/system/discovery/__init__.py @@ -5,6 +5,12 @@ filter_models, get_registered_model, ) +from .entry_point import ( + entry_point_class_alias, + entry_point_to_path, + resolve_entry_point_key, + split_entry_point, +) from .keys import ModelKey from .types import ModelDict @@ -13,7 +19,11 @@ "ModelKey", "TagSelector", "discover_models", + "entry_point_class_alias", + "entry_point_to_path", "filter_models", "get_registered_model", + "resolve_entry_point_key", + "split_entry_point", "tag", ] diff --git a/packages/overture-schema-system/src/overture/schema/system/discovery/entry_point.py b/packages/overture-schema-system/src/overture/schema/system/discovery/entry_point.py new file mode 100644 index 000000000..270c8addd --- /dev/null +++ b/packages/overture-schema-system/src/overture/schema/system/discovery/entry_point.py @@ -0,0 +1,119 @@ +"""Entry-point string utilities.""" + +from __future__ import annotations + +from collections.abc import Mapping +from pathlib import PurePosixPath + +from ..case import to_snake_case + +__all__ = [ + "entry_point_class_alias", + "entry_point_to_path", + "resolve_entry_point_key", + "split_entry_point", +] + + +def split_entry_point(entry_point_path: str) -> tuple[str, str]: + """Split `"module.path:ClassName"` into dotted module and class name. + + >>> split_entry_point("overture.schema.buildings:Building") + ('overture.schema.buildings', 'Building') + """ + if ":" not in entry_point_path: + msg = f"Expected 'module:Class' format, got {entry_point_path!r}" + raise ValueError(msg) + module, cls = entry_point_path.split(":", 1) + return module, cls + + +def entry_point_to_path(entry_point_path: str) -> tuple[PurePosixPath, str]: + """Translate an entry-point string into a directory path and class name. + + Each dotted component of the module becomes a directory, mirroring + the source package structure. The result is stable regardless of the + set of installed packages. + + Parameters + ---------- + entry_point_path + String in `"module.path:ClassName"` form. + + Returns + ------- + tuple[PurePosixPath, str] + Directory derived from the module path, and the class name. + + Examples + -------- + >>> entry_point_to_path("overture.schema.places:Place") + (PurePosixPath('overture/schema/places'), 'Place') + """ + module, cls = split_entry_point(entry_point_path) + return PurePosixPath(*module.split(".")), cls + + +def entry_point_class_alias(entry_point_path: str) -> str: + """Snake-case class name from an entry-point string. + + The alias is the user-friendly form used to look up entry-point + keys in a registry (e.g. `"place"` resolves + `"overture.schema.places:Place"`). Input without a colon is treated + as a bare class name and snake-cased directly, so the function is + safe to apply to every key in an arbitrary registry mapping. + + Parameters + ---------- + entry_point_path + String in `"module.path:ClassName"` form, or a bare name. + + Examples + -------- + >>> entry_point_class_alias("overture.schema.divisions:DivisionArea") + 'division_area' + """ + cls = entry_point_path.rsplit(":", 1)[-1] + return to_snake_case(cls) + + +def resolve_entry_point_key(name: str, registry: Mapping[str, object]) -> str: + """Resolve a user-supplied name to a canonical entry-point key. + + Tries exact match first, then snake-case class-name alias. Raises + `ValueError` when the alias is ambiguous (matches more than one + registered key) or when the name is unknown. + + Parameters + ---------- + name + User-supplied identifier: an entry-point key or a snake-case + class-name alias. + registry + Mapping whose keys are entry-point strings. + + Returns + ------- + str + The canonical registry key. + + Raises + ------ + ValueError + If `name` matches multiple registry entries via alias, or no + registry entry at all. The message lists the candidates or the + known keys to aid recovery. + """ + if name in registry: + return name + candidates = sorted(k for k in registry if entry_point_class_alias(k) == name) + if len(candidates) == 1: + return candidates[0] + if candidates: + raise ValueError( + f"Entry-point alias {name!r} is ambiguous. " + f"Specify one of: {', '.join(candidates)}" + ) + raise ValueError( + f"Unknown entry-point alias {name!r}. Known: {', '.join(sorted(registry))}" + ) diff --git a/packages/overture-schema-system/src/overture/schema/system/field_path.py b/packages/overture-schema-system/src/overture/schema/system/field_path.py new file mode 100644 index 000000000..a63a0265a --- /dev/null +++ b/packages/overture-schema-system/src/overture/schema/system/field_path.py @@ -0,0 +1,301 @@ +"""Structural representation of a field path through a nested schema. + +A `FieldPath` is one of two variants: + +- `ScalarPath` -- a sequence of `StructSegment` values locating a value + that requires no iteration to reach. +- `ArrayPath` -- a sequence of `StructSegment` and `ArraySegment` values, + with at least one `ArraySegment`, locating a value reached by iterating + one or more arrays. Each `ArraySegment` carries `iter_count`, the number + of `[]` markers on its name in the canonical encoding (multi-depth + segments encode nested-list iteration without an intervening struct, + e.g. `list[list[X]]` parses as a single `ArraySegment` with + `iter_count=2`). + +The canonical string form (`str(path)`) round-trips through `parse`. +Code that needs to emit a path into source or labels calls `str(path)` +at the boundary; everything else operates on segments. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TypeAlias + +__all__ = [ + "ArrayPath", + "ArraySegment", + "FieldPath", + "PathSegment", + "ScalarPath", + "StructSegment", + "coerce", + "parse", + "promote_terminal_array", +] + + +@dataclass(frozen=True, slots=True) +class StructSegment: + """A struct field navigation step.""" + + name: str + + +@dataclass(frozen=True, slots=True) +class ArraySegment: + """An array column entered with one or more levels of iteration. + + `iter_count` records the number of `[]` markers immediately following + the segment name; values > 1 correspond to nested lists like + `list[list[X]]`. + """ + + name: str + iter_count: int = 1 + + +PathSegment: TypeAlias = StructSegment | ArraySegment + + +@dataclass(frozen=True, slots=True) +class ScalarPath: + """Locate a non-iterated value in a row.""" + + segments: tuple[StructSegment, ...] = () + + def append_struct(self, name: str) -> ScalarPath: + return ScalarPath(segments=self.segments + (StructSegment(name=name),)) + + def append_array(self, name: str, iter_count: int = 1) -> ArrayPath: + return ArrayPath( + segments=self.segments + (ArraySegment(name=name, iter_count=iter_count),) + ) + + def __str__(self) -> str: + return ".".join(s.name for s in self.segments) + + +@dataclass(frozen=True, slots=True) +class ArrayPath: + """Locate an iterated value; iteration structure is part of the location. + + Invariant: `segments` contains at least one `ArraySegment`. + """ + + segments: tuple[PathSegment, ...] + + def __post_init__(self) -> None: + if not any(isinstance(s, ArraySegment) for s in self.segments): + raise ValueError("ArrayPath must contain at least one ArraySegment") + + def append_struct(self, name: str) -> ArrayPath: + return ArrayPath(segments=self.segments + (StructSegment(name=name),)) + + def append_array(self, name: str, iter_count: int = 1) -> ArrayPath: + return ArrayPath( + segments=self.segments + (ArraySegment(name=name, iter_count=iter_count),) + ) + + @property + def column_prefix(self) -> ScalarPath: + """Struct segments before the first ArraySegment. + + Returns an empty `ScalarPath(())` when the array is the first + segment. + """ + prefix: list[StructSegment] = [] + for seg in self.segments: + if isinstance(seg, ArraySegment): + break + prefix.append(seg) + return ScalarPath(segments=tuple(prefix)) + + @property + def column_path(self) -> str: + """Dotted name of the outermost array column. + + The struct prefix plus the first ArraySegment's name (unbracketed). + This is what `F.col(...)` or `array_check("...", ...)` consumes. + """ + first_prefix, first_array, _first_iter = self.array_chunks[0] + return ".".join((*first_prefix, first_array)) + + @property + def leaf(self) -> tuple[str, ...]: + """Names of struct segments after the last ArraySegment.""" + last_array = next( + i + for i in range(len(self.segments) - 1, -1, -1) + if isinstance(self.segments[i], ArraySegment) + ) + return tuple(s.name for s in self.segments[last_array + 1 :]) + + @property + def array_chunks( + self, + ) -> tuple[tuple[tuple[str, ...], str, int], ...]: + """One chunk per ArraySegment. + + Each entry is `(prefix_structs, array_name, iter_count)` where + `prefix_structs` is the sequence of struct segment names between + the previous ArraySegment (or the start of the path) and this + ArraySegment. + """ + chunks: list[tuple[tuple[str, ...], str, int]] = [] + prefix: list[str] = [] + for seg in self.segments: + if isinstance(seg, ArraySegment): + chunks.append((tuple(prefix), seg.name, seg.iter_count)) + prefix = [] + else: + prefix.append(seg.name) + return tuple(chunks) + + def element_relative_gate(self, gate: FieldPath) -> tuple[str, ...] | None: + """Path inside this array's element scope that names *gate*. + + Three return states: + + - ``tuple[str, ...]`` (non-empty) -- "reachable with descent": + `gate` enters the same outer array as this path and names a + struct descendant inside its element. The returned segments + name that descendant relative to the element. + - ``()`` -- "reachable, no descent": `gate` is the outer array + itself; the element variable IS the gated value. + - ``None`` -- "not reachable": `gate` does not cross into this + path's element scope (different outer array, scalar gate, + mismatched struct prefix, etc.). Callers must apply the gate + at column level instead. + + Raises `NotImplementedError` when `gate` enters the same outer + array but contains a nested `ArraySegment` past the boundary; + the element scope is a struct, so a gate path inside it must be + struct-only. + + Example: `parse("items[].x").element_relative_gate(parse( + "items[].nested")) == ("nested",)`. + """ + column_prefix = self.column_prefix.segments + n_prefix = len(column_prefix) + if not isinstance(gate, ArrayPath): + return None + gate_segs = gate.segments + if len(gate_segs) <= n_prefix: + return None + for i in range(n_prefix): + if not isinstance(gate_segs[i], StructSegment): + return None + if gate_segs[i].name != column_prefix[i].name: + return None + target_first_array_name = self.segments[n_prefix].name + gate_boundary = gate_segs[n_prefix] + if not isinstance(gate_boundary, ArraySegment): + return None + if gate_boundary.name != target_first_array_name: + return None + inner_segments = gate_segs[n_prefix + 1 :] + for seg in inner_segments: + if not isinstance(seg, StructSegment): + raise NotImplementedError( + f"gate path contains a nested array segment past the " + f"element boundary (gate={gate!r}, self={self!r})" + ) + return tuple(s.name for s in inner_segments) + + @property + def iter_struct_paths(self) -> tuple[tuple[str, ...], ...]: + """Per non-outermost iteration: the struct path that reaches its array. + + For each ArraySegment past the first, emit `(prefix_structs + + array_name)` -- the navigation FROM the previous iteration's + element TO this array. For each `iter_count > 1` on an + ArraySegment, emit `iter_count - 1` additional `()` entries + representing extra iterations inside the same (already-named) + array. + + Returns an empty tuple when the path iterates only once. + """ + paths: list[tuple[str, ...]] = [] + for chunk_idx, (prefix_structs, arr_name, iter_count) in enumerate( + self.array_chunks + ): + if chunk_idx > 0: + paths.append(prefix_structs + (arr_name,)) + for _ in range(iter_count - 1): + paths.append(()) + return tuple(paths) + + def __str__(self) -> str: + return ".".join(_segment_str(s) for s in self.segments) + + +FieldPath: TypeAlias = ScalarPath | ArrayPath + + +def _segment_str(seg: PathSegment) -> str: + if isinstance(seg, ArraySegment): + return seg.name + "[]" * seg.iter_count + return seg.name + + +def parse(encoded: str) -> FieldPath: + """Parse a canonical encoded path like `"items[].nested.value"`. + + Trailing `[]` markers on a dotted part produce an `ArraySegment` + with matching `iter_count`. The empty string returns the empty + `ScalarPath`. Raises `ValueError` when any dotted part has an empty + name (e.g. `".a"`, `"a..b"`, `"[]"`). + """ + if not encoded: + return ScalarPath() + segments: list[PathSegment] = [] + struct_segments: list[StructSegment] = [] + has_array = False + for part in encoded.split("."): + depth = 0 + while part.endswith("[]"): + part = part[:-2] + depth += 1 + if not part: + raise ValueError(f"FieldPath part has empty name in {encoded!r}") + if depth > 0: + has_array = True + segments.append(ArraySegment(name=part, iter_count=depth)) + else: + struct = StructSegment(name=part) + segments.append(struct) + struct_segments.append(struct) + if has_array: + return ArrayPath(segments=tuple(segments)) + return ScalarPath(segments=tuple(struct_segments)) + + +def coerce(value: FieldPath | str) -> FieldPath: + """Return *value* as a `FieldPath`, parsing it from string if needed.""" + if isinstance(value, str): + return parse(value) + return value + + +def promote_terminal_array(path: FieldPath) -> ArrayPath: + """Promote *path*'s terminal segment to an iterated `ArraySegment`. + + A `StructSegment` terminal is *replaced* with `ArraySegment(name, + iter_count=1)`; an `ArraySegment` terminal has its `iter_count` + incremented. This is how a walker records entering a `list[...]` + layer on the field it is already pointing at -- unlike `append_array`, + which adds a new segment for a fresh nested array. Repeated calls + build the multi-iteration terminal of a `list[list[X]]` field. + + Raises `ValueError` on an empty path: there is no terminal segment + to promote. + """ + if not path.segments: + raise ValueError("cannot promote the terminal of an empty path") + *prefix, last = path.segments + if isinstance(last, ArraySegment): + promoted = ArraySegment(name=last.name, iter_count=last.iter_count + 1) + else: + promoted = ArraySegment(name=last.name, iter_count=1) + return ArrayPath(segments=(*prefix, promoted)) diff --git a/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py b/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py index 14a1ebae1..0ba1be6ce 100644 --- a/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py +++ b/packages/overture-schema-system/tests/field_constraint/test_string_constraints.py @@ -1,3 +1,4 @@ +import re from typing import Annotated import pytest @@ -210,7 +211,6 @@ class TestModel(BaseModel): def test_stripped_constraint_json_schema_pattern(self) -> None: """StrippedConstraint's JSON schema pattern accepts empty string and rejects leading/trailing whitespace.""" - import re class TestModel(BaseModel): text: Annotated[str, StrippedConstraint()] diff --git a/packages/overture-schema-codegen/tests/test_naming.py b/packages/overture-schema-system/tests/test_case.py similarity index 67% rename from packages/overture-schema-codegen/tests/test_naming.py rename to packages/overture-schema-system/tests/test_case.py index 77e4d5773..21cddcb5a 100644 --- a/packages/overture-schema-codegen/tests/test_naming.py +++ b/packages/overture-schema-system/tests/test_case.py @@ -1,7 +1,8 @@ """Tests for PascalCase to snake_case conversion.""" import pytest -from overture.schema.codegen.extraction.case_conversion import to_snake_case + +from overture.schema.system.case import to_snake_case class TestToSnakeCase: @@ -14,10 +15,11 @@ class TestToSnakeCase: ("BuildingPart", "building_part"), ("RoadSegment", "road_segment"), ("Place", "place"), - ("simple", "simple"), # Already lowercase - ("HTTPServer", "http_server"), # Consecutive caps + ("simple", "simple"), + ("HTTPServer", "http_server"), + ("HTMLParser", "html_parser"), ], ) def test_converts_pascal_to_snake(self, input_name: str, expected: str) -> None: - """PascalCase names should convert to snake_case.""" + """PascalCase names convert to snake_case; acronyms collapse.""" assert to_snake_case(input_name) == expected diff --git a/packages/overture-schema-system/tests/test_discovery_entry_point.py b/packages/overture-schema-system/tests/test_discovery_entry_point.py new file mode 100644 index 000000000..3f8c766af --- /dev/null +++ b/packages/overture-schema-system/tests/test_discovery_entry_point.py @@ -0,0 +1,97 @@ +"""Tests for entry-point string utilities.""" + +from pathlib import PurePosixPath + +import pytest + +from overture.schema.system.discovery.entry_point import ( + entry_point_class_alias, + entry_point_to_path, + resolve_entry_point_key, +) + + +class TestEntryPointToPath: + def test_typical_overture_entry_point(self) -> None: + path, cls = entry_point_to_path("overture.schema.places:Place") + assert path == PurePosixPath("overture/schema/places") + assert cls == "Place" + + def test_single_segment_module(self) -> None: + path, cls = entry_point_to_path("myschema:Foo") + assert path == PurePosixPath("myschema") + assert cls == "Foo" + + def test_deeply_nested_module(self) -> None: + path, cls = entry_point_to_path("a.b.c.d.e:Thing") + assert path == PurePosixPath("a/b/c/d/e") + assert cls == "Thing" + + def test_missing_colon_raises(self) -> None: + with pytest.raises(ValueError, match="module:Class"): + entry_point_to_path("overture.schema.places.Place") + + def test_class_name_with_dot_kept(self) -> None: + # Class name after the colon is taken verbatim — Python class + # names can't contain dots, but we don't validate. + path, cls = entry_point_to_path("a.b:Outer.Inner") + assert path == PurePosixPath("a/b") + assert cls == "Outer.Inner" + + +class TestEntryPointClassAlias: + def test_returns_snake_case_class_name(self) -> None: + assert entry_point_class_alias("overture.schema.places:Place") == "place" + + def test_handles_pascal_case_class(self) -> None: + assert ( + entry_point_class_alias("overture.schema.buildings:BuildingPart") + == "building_part" + ) + + def test_handles_acronyms(self) -> None: + assert ( + entry_point_class_alias("overture.schema.places:HTMLParser") + == "html_parser" + ) + + def test_bare_name_is_snake_cased(self) -> None: + # Tolerant of registry keys that aren't entry-point-formatted — + # the snake-case form of the whole string is returned. + assert entry_point_class_alias("BareName") == "bare_name" + + +class TestResolveEntryPointKey: + def test_exact_match(self) -> None: + registry = {"overture.schema.places:Place": object()} + assert ( + resolve_entry_point_key("overture.schema.places:Place", registry) + == "overture.schema.places:Place" + ) + + def test_snake_case_alias_match(self) -> None: + registry = {"overture.schema.places:Place": object()} + assert ( + resolve_entry_point_key("place", registry) == "overture.schema.places:Place" + ) + + def test_ambiguous_lists_candidates(self) -> None: + registry = { + "overture.schema.places:Place": object(), + "annex.schema.places:Place": object(), + } + with pytest.raises(ValueError, match="ambiguous"): + resolve_entry_point_key("place", registry) + + def test_unknown_lists_known(self) -> None: + registry = {"overture.schema.places:Place": object()} + with pytest.raises(ValueError, match="Unknown"): + resolve_entry_point_key("zzz", registry) + + def test_acronym_class_name_resolves(self) -> None: + registry = { + "ns.a:HTMLParser": object(), + "ns.b:HTMLParser": object(), + } + with pytest.raises(ValueError, match=r"ns\.a:HTMLParser"): + resolve_entry_point_key("html_parser", registry) diff --git a/packages/overture-schema-system/tests/test_field_path.py b/packages/overture-schema-system/tests/test_field_path.py new file mode 100644 index 000000000..0b3614ebf --- /dev/null +++ b/packages/overture-schema-system/tests/test_field_path.py @@ -0,0 +1,376 @@ +"""Tests for FieldPath, the structural path type for nested schemas.""" + +from __future__ import annotations + +import re + +import pytest + +from overture.schema.system.field_path import ( + ArrayPath, + ArraySegment, + ScalarPath, + StructSegment, + coerce, + parse, + promote_terminal_array, +) + + +class TestParseAndRoundTrip: + def test_empty_path_parses_to_empty_scalar(self) -> None: + assert parse("") == ScalarPath(segments=()) + + def test_single_segment(self) -> None: + path = parse("name") + assert path == ScalarPath(segments=(StructSegment(name="name"),)) + + def test_dotted_path(self) -> None: + path = parse("bbox.xmin") + assert path == ScalarPath( + segments=(StructSegment(name="bbox"), StructSegment(name="xmin")) + ) + + def test_array_segment(self) -> None: + path = parse("items[]") + assert path == ArrayPath(segments=(ArraySegment(name="items", iter_count=1),)) + + def test_array_with_nested_field(self) -> None: + path = parse("items[].value") + assert path == ArrayPath( + segments=( + ArraySegment(name="items", iter_count=1), + StructSegment(name="value"), + ) + ) + + def test_nested_list_depth(self) -> None: + path = parse("hierarchies[][]") + assert path == ArrayPath( + segments=(ArraySegment(name="hierarchies", iter_count=2),) + ) + + def test_nested_list_with_leaf(self) -> None: + path = parse("hierarchies[][].value") + assert path == ArrayPath( + segments=( + ArraySegment(name="hierarchies", iter_count=2), + StructSegment(name="value"), + ) + ) + + def test_complex_path(self) -> None: + path = parse("speed_limits[].when.vehicle[].dimension") + assert path == ArrayPath( + segments=( + ArraySegment(name="speed_limits", iter_count=1), + StructSegment(name="when"), + ArraySegment(name="vehicle", iter_count=1), + StructSegment(name="dimension"), + ) + ) + + @pytest.mark.parametrize( + "encoded", + [ + "", + "name", + "bbox.xmin", + "items[]", + "items[].value", + "hierarchies[][]", + "hierarchies[][].value", + "speed_limits[].when.vehicle[].dimension", + "tags_min_length", + ], + ) + def test_str_round_trip(self, encoded: str) -> None: + assert str(parse(encoded)) == encoded + + +class TestScalarVsArrayPartition: + def test_no_array_returns_scalar_path(self) -> None: + assert isinstance(parse("a.b.c"), ScalarPath) + + def test_with_array_returns_array_path(self) -> None: + assert isinstance(parse("a.b[].c"), ArrayPath) + + def test_empty_is_scalar(self) -> None: + assert isinstance(parse(""), ScalarPath) + + +class TestStr: + def test_empty_renders_as_empty(self) -> None: + assert str(ScalarPath()) == "" + + def test_scalar_path_renders_dotted(self) -> None: + path = ScalarPath( + segments=(StructSegment(name="bbox"), StructSegment(name="xmin")) + ) + assert str(path) == "bbox.xmin" + + def test_array_path_renders_with_brackets(self) -> None: + path = ArrayPath( + segments=( + ArraySegment(name="speed_limits", iter_count=1), + StructSegment(name="when"), + ) + ) + assert str(path) == "speed_limits[].when" + + def test_array_path_renders_multi_depth(self) -> None: + path = ArrayPath(segments=(ArraySegment(name="hierarchies", iter_count=2),)) + assert str(path) == "hierarchies[][]" + + +class TestAppendStruct: + def test_scalar_append_struct_returns_scalar(self) -> None: + path = ScalarPath().append_struct("name") + assert path == parse("name") + assert isinstance(path, ScalarPath) + + def test_scalar_chain_struct(self) -> None: + path = ScalarPath().append_struct("bbox").append_struct("xmin") + assert path == parse("bbox.xmin") + + def test_array_append_struct_returns_array(self) -> None: + path = parse("items[]") + assert isinstance(path, ArrayPath) + result = path.append_struct("value") + assert result == parse("items[].value") + assert isinstance(result, ArrayPath) + + +class TestAppendArray: + def test_scalar_append_array_returns_array_path(self) -> None: + path = ScalarPath().append_array("items") + assert path == parse("items[]") + assert isinstance(path, ArrayPath) + + def test_scalar_append_array_after_struct(self) -> None: + path = ScalarPath().append_struct("outer").append_array("items") + assert path == parse("outer.items[]") + + def test_scalar_append_array_multi_depth(self) -> None: + path = ScalarPath().append_array("hierarchies", iter_count=2) + assert path == parse("hierarchies[][]") + + def test_array_append_array(self) -> None: + path = parse("outer[]") + assert isinstance(path, ArrayPath) + result = path.append_array("inner") + assert result == parse("outer[].inner[]") + + +class TestPromoteTerminalArray: + def test_scalar_struct_terminal_becomes_array(self) -> None: + assert promote_terminal_array(parse("tags")) == parse("tags[]") + + def test_struct_prefix_is_preserved(self) -> None: + assert promote_terminal_array(parse("outer.tags")) == parse("outer.tags[]") + + def test_struct_terminal_inside_array_path(self) -> None: + assert promote_terminal_array(parse("items[].tags")) == parse("items[].tags[]") + + def test_array_terminal_increments_iter_count(self) -> None: + assert promote_terminal_array(parse("tags[]")) == parse("tags[][]") + + def test_consecutive_promotions_stack(self) -> None: + assert promote_terminal_array(promote_terminal_array(parse("grid"))) == parse( + "grid[][]" + ) + + def test_array_terminal_inside_array_path(self) -> None: + assert promote_terminal_array(parse("items[].grid[]")) == parse( + "items[].grid[][]" + ) + + def test_empty_path_raises(self) -> None: + with pytest.raises(ValueError, match="empty path"): + promote_terminal_array(ScalarPath()) + + +class TestColumnPrefix: + def test_array_at_start_has_empty_prefix(self) -> None: + path = parse("items[].value") + assert isinstance(path, ArrayPath) + assert path.column_prefix == ScalarPath(()) + + def test_struct_prefix_before_array(self) -> None: + path = parse("parent.items[].value") + assert isinstance(path, ArrayPath) + assert path.column_prefix == parse("parent") + + def test_dotted_struct_prefix(self) -> None: + path = parse("a.b.c[].d") + assert isinstance(path, ArrayPath) + assert path.column_prefix == parse("a.b") + + +class TestLeaf: + def test_no_leaf_after_array(self) -> None: + path = parse("items[]") + assert isinstance(path, ArrayPath) + assert path.leaf == () + + def test_single_struct_leaf(self) -> None: + path = parse("items[].value") + assert isinstance(path, ArrayPath) + assert path.leaf == ("value",) + + def test_nested_struct_leaf(self) -> None: + path = parse("items[].nested.value") + assert isinstance(path, ArrayPath) + assert path.leaf == ("nested", "value") + + def test_uses_last_array(self) -> None: + path = parse("speed_limits[].when.vehicle[].dimension") + assert isinstance(path, ArrayPath) + assert path.leaf == ("dimension",) + + +class TestArrayChunks: + def test_single_top_level_array(self) -> None: + path = parse("items[]") + assert isinstance(path, ArrayPath) + assert path.array_chunks == (((), "items", 1),) + + def test_single_array_with_struct_prefix(self) -> None: + path = parse("parent.items[].value") + assert isinstance(path, ArrayPath) + assert path.array_chunks == ((("parent",), "items", 1),) + + def test_nested_arrays(self) -> None: + path = parse("speed_limits[].when.vehicle[].dimension") + assert isinstance(path, ArrayPath) + assert path.array_chunks == ( + ((), "speed_limits", 1), + (("when",), "vehicle", 1), + ) + + def test_multi_depth_array(self) -> None: + path = parse("hierarchies[][].value") + assert isinstance(path, ArrayPath) + assert path.array_chunks == (((), "hierarchies", 2),) + + +class TestIterStructPaths: + def test_single_iteration_is_empty(self) -> None: + path = parse("items[].value") + assert isinstance(path, ArrayPath) + assert path.iter_struct_paths == () + + def test_nested_arrays_emit_navigation_path(self) -> None: + path = parse("speed_limits[].when.vehicle[].dimension") + assert isinstance(path, ArrayPath) + assert path.iter_struct_paths == (("when", "vehicle"),) + + def test_multi_depth_array_expands_extra_iterations(self) -> None: + path = parse("hierarchies[][].value") + assert isinstance(path, ArrayPath) + assert path.iter_struct_paths == ((),) + + def test_multi_depth_inner_array_combines_navigation_and_expansion(self) -> None: + path = parse("rules[].tags[][].value") + assert isinstance(path, ArrayPath) + assert path.iter_struct_paths == (("tags",), ()) + + +class TestElementRelativeGate: + def test_gate_inside_same_outer_array(self) -> None: + target = parse("items[].value") + gate = parse("items[].nested") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == ("nested",) + + def test_gate_at_outer_array_root_returns_empty(self) -> None: + target = parse("items[].value") + gate = parse("items[]") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == () + + def test_gate_with_dotted_struct_inside_element(self) -> None: + target = parse("items[].value") + gate = parse("items[].a.b") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == ("a", "b") + + def test_scalar_gate_returns_none(self) -> None: + target = parse("items[].value") + gate = parse("other") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) is None + + def test_different_outer_array_returns_none(self) -> None: + target = parse("items[].value") + gate = parse("other[].x") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) is None + + def test_struct_prefix_must_match(self) -> None: + target = parse("parent.items[].value") + gate = parse("items[].x") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) is None + + def test_matching_struct_prefix(self) -> None: + target = parse("parent.items[].value") + gate = parse("parent.items[].x") + assert isinstance(target, ArrayPath) + assert target.element_relative_gate(gate) == ("x",) + + def test_inner_array_segment_raises(self) -> None: + target = parse("items[].value") + gate = parse("items[].nested[]") + assert isinstance(target, ArrayPath) + with pytest.raises(NotImplementedError, match="nested array segment"): + target.element_relative_gate(gate) + + +class TestArrayPathInvariant: + def test_rejects_segments_without_array(self) -> None: + with pytest.raises(ValueError, match="at least one ArraySegment"): + ArrayPath(segments=(StructSegment(name="a"),)) + + +class TestEqualityAndHashing: + def test_paths_with_same_segments_are_equal(self) -> None: + assert parse("items[].value") == parse("items[].value") + + def test_different_paths_unequal(self) -> None: + assert parse("items[].value") != parse("items[].other") + + def test_scalar_array_unequal(self) -> None: + assert parse("items") != parse("items[]") + + def test_hashable(self) -> None: + s = {parse("a.b"), parse("a.b"), parse("c")} + assert len(s) == 2 + + def test_string_is_not_equal_to_path(self) -> None: + assert parse("items[].value") != "items[].value" + + +class TestCoerce: + def test_passes_through_scalar(self) -> None: + path = parse("a.b") + assert coerce(path) is path + + def test_passes_through_array(self) -> None: + path = parse("items[].value") + assert coerce(path) is path + + def test_parses_string(self) -> None: + assert coerce("items[].value") == parse("items[].value") + + +class TestParseRejectsEmptyParts: + @pytest.mark.parametrize("encoded", [".a", "a..b", "[]", "a.[]", ".[]"]) + def test_raises_value_error_on_empty_part(self, encoded: str) -> None: + with pytest.raises(ValueError, match="empty name"): + parse(encoded) + + @pytest.mark.parametrize("encoded", [".a", "a..b", "[]"]) + def test_error_includes_input_string(self, encoded: str) -> None: + with pytest.raises(ValueError, match=re.escape(repr(encoded))): + parse(encoded) diff --git a/packages/overture-schema-transportation-theme/pyproject.toml b/packages/overture-schema-transportation-theme/pyproject.toml index 51614e4ae..7824ec665 100644 --- a/packages/overture-schema-transportation-theme/pyproject.toml +++ b/packages/overture-schema-transportation-theme/pyproject.toml @@ -160,7 +160,6 @@ network = "za:regional" ref = "R33" # Rail segment: disused railway, Mpulungu, Zambia (2026-02-18.0) -# Populates rail_flags with values to cover the rail_flags[].values xfail. [[examples.Segment]] class = "unknown" geometry = "LINESTRING (30.9844394 -12.7185733, 30.9818611 -12.7207838, 30.9815908 -12.7210751)" diff --git a/packages/overture-schema-transportation-theme/src/overture/schema/transportation/models.py b/packages/overture-schema-transportation-theme/src/overture/schema/transportation/models.py index 0d2685df9..260fd4574 100644 --- a/packages/overture-schema-transportation-theme/src/overture/schema/transportation/models.py +++ b/packages/overture-schema-transportation-theme/src/overture/schema/transportation/models.py @@ -37,7 +37,7 @@ def _connector_type() -> type[OvertureFeature]: - from .connector import Connector + from .connector import Connector # noqa: PLC0415 return Connector diff --git a/pyproject.toml b/pyproject.toml index c21f4bc17..154546081 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ pythonpath = [ "packages/overture-schema-common/tests", "packages/overture-schema-divisions-theme/tests", "packages/overture-schema-places-theme/tests", + "packages/overture-schema-pyspark/tests", "packages/overture-schema-system/tests", "packages/overture-schema-transportation-theme/tests", "packages/overture-schema/tests", diff --git a/uv.lock b/uv.lock index e22235af7..e1cfed99f 100644 --- a/uv.lock +++ b/uv.lock @@ -2,7 +2,8 @@ version = 1 revision = 3 requires-python = ">=3.10" resolution-markers = [ - "python_full_version >= '3.11'", + "python_full_version >= '3.15'", + "python_full_version >= '3.11' and python_full_version < '3.15'", "python_full_version < '3.11'", ] @@ -22,6 +23,7 @@ members = [ "overture-schema-common", "overture-schema-divisions-theme", "overture-schema-places-theme", + "overture-schema-pyspark", "overture-schema-system", "overture-schema-transportation-theme", "overture-schema-workspace", @@ -38,14 +40,14 @@ wheels = [ [[package]] name = "click" -version = "8.3.1" +version = "8.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/63/f9e1ea081ce35720d8b92acde70daaedace594dc93b693c869e0d5910718/click-8.3.3.tar.gz", hash = "sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2", size = 328061, upload-time = "2026-04-22T15:11:27.506Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, + { url = "https://files.pythonhosted.org/packages/ae/44/c1221527f6a71a01ec6fbad7fa78f1d50dfa02217385cf0fa3eec7087d59/click-8.3.3-py3-none-any.whl", hash = "sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613", size = 110502, upload-time = "2026-04-22T15:11:25.044Z" }, ] [[package]] @@ -59,101 +61,115 @@ wheels = [ [[package]] name = "coverage" -version = "7.13.2" +version = "7.13.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ad/49/349848445b0e53660e258acbcc9b0d014895b6739237920886672240f84b/coverage-7.13.2.tar.gz", hash = "sha256:044c6951ec37146b72a50cc81ef02217d27d4c3640efd2640311393cbbf143d3", size = 826523, upload-time = "2026-01-25T13:00:04.889Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/e0/70553e3000e345daff267cec284ce4cbf3fc141b6da229ac52775b5428f1/coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179", size = 915967, upload-time = "2026-03-17T10:33:18.341Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/2d/63e37369c8e81a643afe54f76073b020f7b97ddbe698c5c944b51b0a2bc5/coverage-7.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4af3b01763909f477ea17c962e2cca8f39b350a4e46e3a30838b2c12e31b81b", size = 218842, upload-time = "2026-01-25T12:57:15.3Z" }, - { url = "https://files.pythonhosted.org/packages/57/06/86ce882a8d58cbcb3030e298788988e618da35420d16a8c66dac34f138d0/coverage-7.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:36393bd2841fa0b59498f75466ee9bdec4f770d3254f031f23e8fd8e140ffdd2", size = 219360, upload-time = "2026-01-25T12:57:17.572Z" }, - { url = "https://files.pythonhosted.org/packages/cd/84/70b0eb1ee19ca4ef559c559054c59e5b2ae4ec9af61398670189e5d276e9/coverage-7.13.2-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9cc7573518b7e2186bd229b1a0fe24a807273798832c27032c4510f47ffdb896", size = 246123, upload-time = "2026-01-25T12:57:19.087Z" }, - { url = "https://files.pythonhosted.org/packages/35/fb/05b9830c2e8275ebc031e0019387cda99113e62bb500ab328bb72578183b/coverage-7.13.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ca9566769b69a5e216a4e176d54b9df88f29d750c5b78dbb899e379b4e14b30c", size = 247930, upload-time = "2026-01-25T12:57:20.929Z" }, - { url = "https://files.pythonhosted.org/packages/81/aa/3f37858ca2eed4f09b10ca3c6ddc9041be0a475626cd7fd2712f4a2d526f/coverage-7.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c9bdea644e94fd66d75a6f7e9a97bb822371e1fe7eadae2cacd50fcbc28e4dc", size = 249804, upload-time = "2026-01-25T12:57:22.904Z" }, - { url = "https://files.pythonhosted.org/packages/b6/b3/c904f40c56e60a2d9678a5ee8df3d906d297d15fb8bec5756c3b0a67e2df/coverage-7.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5bd447332ec4f45838c1ad42268ce21ca87c40deb86eabd59888859b66be22a5", size = 246815, upload-time = "2026-01-25T12:57:24.314Z" }, - { url = "https://files.pythonhosted.org/packages/41/91/ddc1c5394ca7fd086342486440bfdd6b9e9bda512bf774599c7c7a0081e0/coverage-7.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7c79ad5c28a16a1277e1187cf83ea8dafdcc689a784228a7d390f19776db7c31", size = 247843, upload-time = "2026-01-25T12:57:26.544Z" }, - { url = "https://files.pythonhosted.org/packages/87/d2/cdff8f4cd33697883c224ea8e003e9c77c0f1a837dc41d95a94dd26aad67/coverage-7.13.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:76e06ccacd1fb6ada5d076ed98a8c6f66e2e6acd3df02819e2ee29fd637b76ad", size = 245850, upload-time = "2026-01-25T12:57:28.507Z" }, - { url = "https://files.pythonhosted.org/packages/f5/42/e837febb7866bf2553ab53dd62ed52f9bb36d60c7e017c55376ad21fbb05/coverage-7.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:49d49e9a5e9f4dc3d3dac95278a020afa6d6bdd41f63608a76fa05a719d5b66f", size = 246116, upload-time = "2026-01-25T12:57:30.16Z" }, - { url = "https://files.pythonhosted.org/packages/09/b1/4a3f935d7df154df02ff4f71af8d61298d713a7ba305d050ae475bfbdde2/coverage-7.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ed2bce0e7bfa53f7b0b01c722da289ef6ad4c18ebd52b1f93704c21f116360c8", size = 246720, upload-time = "2026-01-25T12:57:32.165Z" }, - { url = "https://files.pythonhosted.org/packages/e1/fe/538a6fd44c515f1c5197a3f078094cbaf2ce9f945df5b44e29d95c864bff/coverage-7.13.2-cp310-cp310-win32.whl", hash = "sha256:1574983178b35b9af4db4a9f7328a18a14a0a0ce76ffaa1c1bacb4cc82089a7c", size = 221465, upload-time = "2026-01-25T12:57:33.511Z" }, - { url = "https://files.pythonhosted.org/packages/5e/09/4b63a024295f326ec1a40ec8def27799300ce8775b1cbf0d33b1790605c4/coverage-7.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:a360a8baeb038928ceb996f5623a4cd508728f8f13e08d4e96ce161702f3dd99", size = 222397, upload-time = "2026-01-25T12:57:34.927Z" }, - { url = "https://files.pythonhosted.org/packages/6c/01/abca50583a8975bb6e1c59eff67ed8e48bb127c07dad5c28d9e96ccc09ec/coverage-7.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:060ebf6f2c51aff5ba38e1f43a2095e087389b1c69d559fde6049a4b0001320e", size = 218971, upload-time = "2026-01-25T12:57:36.953Z" }, - { url = "https://files.pythonhosted.org/packages/eb/0e/b6489f344d99cd1e5b4d5e1be52dfd3f8a3dc5112aa6c33948da8cabad4e/coverage-7.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1ea8ca9db5e7469cd364552985e15911548ea5b69c48a17291f0cac70484b2e", size = 219473, upload-time = "2026-01-25T12:57:38.934Z" }, - { url = "https://files.pythonhosted.org/packages/17/11/db2f414915a8e4ec53f60b17956c27f21fb68fcf20f8a455ce7c2ccec638/coverage-7.13.2-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b780090d15fd58f07cf2011943e25a5f0c1c894384b13a216b6c86c8a8a7c508", size = 249896, upload-time = "2026-01-25T12:57:40.365Z" }, - { url = "https://files.pythonhosted.org/packages/80/06/0823fe93913663c017e508e8810c998c8ebd3ec2a5a85d2c3754297bdede/coverage-7.13.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:88a800258d83acb803c38175b4495d293656d5fac48659c953c18e5f539a274b", size = 251810, upload-time = "2026-01-25T12:57:42.045Z" }, - { url = "https://files.pythonhosted.org/packages/61/dc/b151c3cc41b28cdf7f0166c5fa1271cbc305a8ec0124cce4b04f74791a18/coverage-7.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6326e18e9a553e674d948536a04a80d850a5eeefe2aae2e6d7cf05d54046c01b", size = 253920, upload-time = "2026-01-25T12:57:44.026Z" }, - { url = "https://files.pythonhosted.org/packages/2d/35/e83de0556e54a4729a2b94ea816f74ce08732e81945024adee46851c2264/coverage-7.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:59562de3f797979e1ff07c587e2ac36ba60ca59d16c211eceaa579c266c5022f", size = 250025, upload-time = "2026-01-25T12:57:45.624Z" }, - { url = "https://files.pythonhosted.org/packages/39/67/af2eb9c3926ce3ea0d58a0d2516fcbdacf7a9fc9559fe63076beaf3f2596/coverage-7.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:27ba1ed6f66b0e2d61bfa78874dffd4f8c3a12f8e2b5410e515ab345ba7bc9c3", size = 251612, upload-time = "2026-01-25T12:57:47.713Z" }, - { url = "https://files.pythonhosted.org/packages/26/62/5be2e25f3d6c711d23b71296f8b44c978d4c8b4e5b26871abfc164297502/coverage-7.13.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8be48da4d47cc68754ce643ea50b3234557cbefe47c2f120495e7bd0a2756f2b", size = 249670, upload-time = "2026-01-25T12:57:49.378Z" }, - { url = "https://files.pythonhosted.org/packages/b3/51/400d1b09a8344199f9b6a6fc1868005d766b7ea95e7882e494fa862ca69c/coverage-7.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2a47a4223d3361b91176aedd9d4e05844ca67d7188456227b6bf5e436630c9a1", size = 249395, upload-time = "2026-01-25T12:57:50.86Z" }, - { url = "https://files.pythonhosted.org/packages/e0/36/f02234bc6e5230e2f0a63fd125d0a2093c73ef20fdf681c7af62a140e4e7/coverage-7.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6f141b468740197d6bd38f2b26ade124363228cc3f9858bd9924ab059e00059", size = 250298, upload-time = "2026-01-25T12:57:52.287Z" }, - { url = "https://files.pythonhosted.org/packages/b0/06/713110d3dd3151b93611c9cbfc65c15b4156b44f927fced49ac0b20b32a4/coverage-7.13.2-cp311-cp311-win32.whl", hash = "sha256:89567798404af067604246e01a49ef907d112edf2b75ef814b1364d5ce267031", size = 221485, upload-time = "2026-01-25T12:57:53.876Z" }, - { url = "https://files.pythonhosted.org/packages/16/0c/3ae6255fa1ebcb7dec19c9a59e85ef5f34566d1265c70af5b2fc981da834/coverage-7.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:21dd57941804ae2ac7e921771a5e21bbf9aabec317a041d164853ad0a96ce31e", size = 222421, upload-time = "2026-01-25T12:57:55.433Z" }, - { url = "https://files.pythonhosted.org/packages/b5/37/fabc3179af4d61d89ea47bd04333fec735cd5e8b59baad44fed9fc4170d7/coverage-7.13.2-cp311-cp311-win_arm64.whl", hash = "sha256:10758e0586c134a0bafa28f2d37dd2cdb5e4a90de25c0fc0c77dabbad46eca28", size = 221088, upload-time = "2026-01-25T12:57:57.41Z" }, - { url = "https://files.pythonhosted.org/packages/46/39/e92a35f7800222d3f7b2cbb7bbc3b65672ae8d501cb31801b2d2bd7acdf1/coverage-7.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f106b2af193f965d0d3234f3f83fc35278c7fb935dfbde56ae2da3dd2c03b84d", size = 219142, upload-time = "2026-01-25T12:58:00.448Z" }, - { url = "https://files.pythonhosted.org/packages/45/7a/8bf9e9309c4c996e65c52a7c5a112707ecdd9fbaf49e10b5a705a402bbb4/coverage-7.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f45d21dc4d5d6bd29323f0320089ef7eae16e4bef712dff79d184fa7330af3", size = 219503, upload-time = "2026-01-25T12:58:02.451Z" }, - { url = "https://files.pythonhosted.org/packages/87/93/17661e06b7b37580923f3f12406ac91d78aeed293fb6da0b69cc7957582f/coverage-7.13.2-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fae91dfecd816444c74531a9c3d6ded17a504767e97aa674d44f638107265b99", size = 251006, upload-time = "2026-01-25T12:58:04.059Z" }, - { url = "https://files.pythonhosted.org/packages/12/f0/f9e59fb8c310171497f379e25db060abef9fa605e09d63157eebec102676/coverage-7.13.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:264657171406c114787b441484de620e03d8f7202f113d62fcd3d9688baa3e6f", size = 253750, upload-time = "2026-01-25T12:58:05.574Z" }, - { url = "https://files.pythonhosted.org/packages/e5/b1/1935e31add2232663cf7edd8269548b122a7d100047ff93475dbaaae673e/coverage-7.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae47d8dcd3ded0155afbb59c62bd8ab07ea0fd4902e1c40567439e6db9dcaf2f", size = 254862, upload-time = "2026-01-25T12:58:07.647Z" }, - { url = "https://files.pythonhosted.org/packages/af/59/b5e97071ec13df5f45da2b3391b6cdbec78ba20757bc92580a5b3d5fa53c/coverage-7.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8a0b33e9fd838220b007ce8f299114d406c1e8edb21336af4c97a26ecfd185aa", size = 251420, upload-time = "2026-01-25T12:58:09.309Z" }, - { url = "https://files.pythonhosted.org/packages/3f/75/9495932f87469d013dc515fb0ce1aac5fa97766f38f6b1a1deb1ee7b7f3a/coverage-7.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3becbea7f3ce9a2d4d430f223ec15888e4deb31395840a79e916368d6004cce", size = 252786, upload-time = "2026-01-25T12:58:10.909Z" }, - { url = "https://files.pythonhosted.org/packages/6a/59/af550721f0eb62f46f7b8cb7e6f1860592189267b1c411a4e3a057caacee/coverage-7.13.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f819c727a6e6eeb8711e4ce63d78c620f69630a2e9d53bc95ca5379f57b6ba94", size = 250928, upload-time = "2026-01-25T12:58:12.449Z" }, - { url = "https://files.pythonhosted.org/packages/9b/b1/21b4445709aae500be4ab43bbcfb4e53dc0811c3396dcb11bf9f23fd0226/coverage-7.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:4f7b71757a3ab19f7ba286e04c181004c1d61be921795ee8ba6970fd0ec91da5", size = 250496, upload-time = "2026-01-25T12:58:14.047Z" }, - { url = "https://files.pythonhosted.org/packages/ba/b1/0f5d89dfe0392990e4f3980adbde3eb34885bc1effb2dc369e0bf385e389/coverage-7.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b7fc50d2afd2e6b4f6f2f403b70103d280a8e0cb35320cbbe6debcda02a1030b", size = 252373, upload-time = "2026-01-25T12:58:15.976Z" }, - { url = "https://files.pythonhosted.org/packages/01/c9/0cf1a6a57a9968cc049a6b896693faa523c638a5314b1fc374eb2b2ac904/coverage-7.13.2-cp312-cp312-win32.whl", hash = "sha256:292250282cf9bcf206b543d7608bda17ca6fc151f4cbae949fc7e115112fbd41", size = 221696, upload-time = "2026-01-25T12:58:17.517Z" }, - { url = "https://files.pythonhosted.org/packages/4d/05/d7540bf983f09d32803911afed135524570f8c47bb394bf6206c1dc3a786/coverage-7.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:eeea10169fac01549a7921d27a3e517194ae254b542102267bef7a93ed38c40e", size = 222504, upload-time = "2026-01-25T12:58:19.115Z" }, - { url = "https://files.pythonhosted.org/packages/15/8b/1a9f037a736ced0a12aacf6330cdaad5008081142a7070bc58b0f7930cbc/coverage-7.13.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a5b567f0b635b592c917f96b9a9cb3dbd4c320d03f4bf94e9084e494f2e8894", size = 221120, upload-time = "2026-01-25T12:58:21.334Z" }, - { url = "https://files.pythonhosted.org/packages/a7/f0/3d3eac7568ab6096ff23791a526b0048a1ff3f49d0e236b2af6fb6558e88/coverage-7.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed75de7d1217cf3b99365d110975f83af0528c849ef5180a12fd91b5064df9d6", size = 219168, upload-time = "2026-01-25T12:58:23.376Z" }, - { url = "https://files.pythonhosted.org/packages/a3/a6/f8b5cfeddbab95fdef4dcd682d82e5dcff7a112ced57a959f89537ee9995/coverage-7.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97e596de8fa9bada4d88fde64a3f4d37f1b6131e4faa32bad7808abc79887ddc", size = 219537, upload-time = "2026-01-25T12:58:24.932Z" }, - { url = "https://files.pythonhosted.org/packages/7b/e6/8d8e6e0c516c838229d1e41cadcec91745f4b1031d4db17ce0043a0423b4/coverage-7.13.2-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:68c86173562ed4413345410c9480a8d64864ac5e54a5cda236748031e094229f", size = 250528, upload-time = "2026-01-25T12:58:26.567Z" }, - { url = "https://files.pythonhosted.org/packages/8e/78/befa6640f74092b86961f957f26504c8fba3d7da57cc2ab7407391870495/coverage-7.13.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7be4d613638d678b2b3773b8f687537b284d7074695a43fe2fbbfc0e31ceaed1", size = 253132, upload-time = "2026-01-25T12:58:28.251Z" }, - { url = "https://files.pythonhosted.org/packages/9d/10/1630db1edd8ce675124a2ee0f7becc603d2bb7b345c2387b4b95c6907094/coverage-7.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7f63ce526a96acd0e16c4af8b50b64334239550402fb1607ce6a584a6d62ce9", size = 254374, upload-time = "2026-01-25T12:58:30.294Z" }, - { url = "https://files.pythonhosted.org/packages/ed/1d/0d9381647b1e8e6d310ac4140be9c428a0277330991e0c35bdd751e338a4/coverage-7.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:406821f37f864f968e29ac14c3fccae0fec9fdeba48327f0341decf4daf92d7c", size = 250762, upload-time = "2026-01-25T12:58:32.036Z" }, - { url = "https://files.pythonhosted.org/packages/43/e4/5636dfc9a7c871ee8776af83ee33b4c26bc508ad6cee1e89b6419a366582/coverage-7.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ee68e5a4e3e5443623406b905db447dceddffee0dceb39f4e0cd9ec2a35004b5", size = 252502, upload-time = "2026-01-25T12:58:33.961Z" }, - { url = "https://files.pythonhosted.org/packages/02/2a/7ff2884d79d420cbb2d12fed6fff727b6d0ef27253140d3cdbbd03187ee0/coverage-7.13.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2ee0e58cca0c17dd9c6c1cdde02bb705c7b3fbfa5f3b0b5afeda20d4ebff8ef4", size = 250463, upload-time = "2026-01-25T12:58:35.529Z" }, - { url = "https://files.pythonhosted.org/packages/91/c0/ba51087db645b6c7261570400fc62c89a16278763f36ba618dc8657a187b/coverage-7.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:6e5bbb5018bf76a56aabdb64246b5288d5ae1b7d0dd4d0534fe86df2c2992d1c", size = 250288, upload-time = "2026-01-25T12:58:37.226Z" }, - { url = "https://files.pythonhosted.org/packages/03/07/44e6f428551c4d9faf63ebcefe49b30e5c89d1be96f6a3abd86a52da9d15/coverage-7.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a55516c68ef3e08e134e818d5e308ffa6b1337cc8b092b69b24287bf07d38e31", size = 252063, upload-time = "2026-01-25T12:58:38.821Z" }, - { url = "https://files.pythonhosted.org/packages/c2/67/35b730ad7e1859dd57e834d1bc06080d22d2f87457d53f692fce3f24a5a9/coverage-7.13.2-cp313-cp313-win32.whl", hash = "sha256:5b20211c47a8abf4abc3319d8ce2464864fa9f30c5fcaf958a3eed92f4f1fef8", size = 221716, upload-time = "2026-01-25T12:58:40.484Z" }, - { url = "https://files.pythonhosted.org/packages/0d/82/e5fcf5a97c72f45fc14829237a6550bf49d0ab882ac90e04b12a69db76b4/coverage-7.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:14f500232e521201cf031549fb1ebdfc0a40f401cf519157f76c397e586c3beb", size = 222522, upload-time = "2026-01-25T12:58:43.247Z" }, - { url = "https://files.pythonhosted.org/packages/b1/f1/25d7b2f946d239dd2d6644ca2cc060d24f97551e2af13b6c24c722ae5f97/coverage-7.13.2-cp313-cp313-win_arm64.whl", hash = "sha256:9779310cb5a9778a60c899f075a8514c89fa6d10131445c2207fc893e0b14557", size = 221145, upload-time = "2026-01-25T12:58:45Z" }, - { url = "https://files.pythonhosted.org/packages/9e/f7/080376c029c8f76fadfe43911d0daffa0cbdc9f9418a0eead70c56fb7f4b/coverage-7.13.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5a1e41ce5df6b547cbc3d3699381c9e2c2c369c67837e716ed0f549d48e", size = 219861, upload-time = "2026-01-25T12:58:46.586Z" }, - { url = "https://files.pythonhosted.org/packages/42/11/0b5e315af5ab35f4c4a70e64d3314e4eec25eefc6dec13be3a7d5ffe8ac5/coverage-7.13.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b01899e82a04085b6561eb233fd688474f57455e8ad35cd82286463ba06332b7", size = 220207, upload-time = "2026-01-25T12:58:48.277Z" }, - { url = "https://files.pythonhosted.org/packages/b2/0c/0874d0318fb1062117acbef06a09cf8b63f3060c22265adaad24b36306b7/coverage-7.13.2-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:838943bea48be0e2768b0cf7819544cdedc1bbb2f28427eabb6eb8c9eb2285d3", size = 261504, upload-time = "2026-01-25T12:58:49.904Z" }, - { url = "https://files.pythonhosted.org/packages/83/5e/1cd72c22ecb30751e43a72f40ba50fcef1b7e93e3ea823bd9feda8e51f9a/coverage-7.13.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:93d1d25ec2b27e90bcfef7012992d1f5121b51161b8bffcda756a816cf13c2c3", size = 263582, upload-time = "2026-01-25T12:58:51.582Z" }, - { url = "https://files.pythonhosted.org/packages/9b/da/8acf356707c7a42df4d0657020308e23e5a07397e81492640c186268497c/coverage-7.13.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93b57142f9621b0d12349c43fc7741fe578e4bc914c1e5a54142856cfc0bf421", size = 266008, upload-time = "2026-01-25T12:58:53.234Z" }, - { url = "https://files.pythonhosted.org/packages/41/41/ea1730af99960309423c6ea8d6a4f1fa5564b2d97bd1d29dda4b42611f04/coverage-7.13.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f06799ae1bdfff7ccb8665d75f8291c69110ba9585253de254688aa8a1ccc6c5", size = 260762, upload-time = "2026-01-25T12:58:55.372Z" }, - { url = "https://files.pythonhosted.org/packages/22/fa/02884d2080ba71db64fdc127b311db60e01fe6ba797d9c8363725e39f4d5/coverage-7.13.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f9405ab4f81d490811b1d91c7a20361135a2df4c170e7f0b747a794da5b7f23", size = 263571, upload-time = "2026-01-25T12:58:57.52Z" }, - { url = "https://files.pythonhosted.org/packages/d2/6b/4083aaaeba9b3112f55ac57c2ce7001dc4d8fa3fcc228a39f09cc84ede27/coverage-7.13.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f9ab1d5b86f8fbc97a5b3cd6280a3fd85fef3b028689d8a2c00918f0d82c728c", size = 261200, upload-time = "2026-01-25T12:58:59.255Z" }, - { url = "https://files.pythonhosted.org/packages/e9/d2/aea92fa36d61955e8c416ede9cf9bf142aa196f3aea214bb67f85235a050/coverage-7.13.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:f674f59712d67e841525b99e5e2b595250e39b529c3bda14764e4f625a3fa01f", size = 260095, upload-time = "2026-01-25T12:59:01.066Z" }, - { url = "https://files.pythonhosted.org/packages/0d/ae/04ffe96a80f107ea21b22b2367175c621da920063260a1c22f9452fd7866/coverage-7.13.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c6cadac7b8ace1ba9144feb1ae3cb787a6065ba6d23ffc59a934b16406c26573", size = 262284, upload-time = "2026-01-25T12:59:02.802Z" }, - { url = "https://files.pythonhosted.org/packages/1c/7a/6f354dcd7dfc41297791d6fb4e0d618acb55810bde2c1fd14b3939e05c2b/coverage-7.13.2-cp313-cp313t-win32.whl", hash = "sha256:14ae4146465f8e6e6253eba0cccd57423e598a4cb925958b240c805300918343", size = 222389, upload-time = "2026-01-25T12:59:04.563Z" }, - { url = "https://files.pythonhosted.org/packages/8d/d5/080ad292a4a3d3daf411574be0a1f56d6dee2c4fdf6b005342be9fac807f/coverage-7.13.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9074896edd705a05769e3de0eac0a8388484b503b68863dd06d5e473f874fd47", size = 223450, upload-time = "2026-01-25T12:59:06.677Z" }, - { url = "https://files.pythonhosted.org/packages/88/96/df576fbacc522e9fb8d1c4b7a7fc62eb734be56e2cba1d88d2eabe08ea3f/coverage-7.13.2-cp313-cp313t-win_arm64.whl", hash = "sha256:69e526e14f3f854eda573d3cf40cffd29a1a91c684743d904c33dbdcd0e0f3e7", size = 221707, upload-time = "2026-01-25T12:59:08.363Z" }, - { url = "https://files.pythonhosted.org/packages/55/53/1da9e51a0775634b04fcc11eb25c002fc58ee4f92ce2e8512f94ac5fc5bf/coverage-7.13.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:387a825f43d680e7310e6f325b2167dd093bc8ffd933b83e9aa0983cf6e0a2ef", size = 219213, upload-time = "2026-01-25T12:59:11.909Z" }, - { url = "https://files.pythonhosted.org/packages/46/35/b3caac3ebbd10230fea5a33012b27d19e999a17c9285c4228b4b2e35b7da/coverage-7.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f0d7fea9d8e5d778cd5a9e8fc38308ad688f02040e883cdc13311ef2748cb40f", size = 219549, upload-time = "2026-01-25T12:59:13.638Z" }, - { url = "https://files.pythonhosted.org/packages/76/9c/e1cf7def1bdc72c1907e60703983a588f9558434a2ff94615747bd73c192/coverage-7.13.2-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080afb413be106c95c4ee96b4fffdc9e2fa56a8bbf90b5c0918e5c4449412f5", size = 250586, upload-time = "2026-01-25T12:59:15.808Z" }, - { url = "https://files.pythonhosted.org/packages/ba/49/f54ec02ed12be66c8d8897270505759e057b0c68564a65c429ccdd1f139e/coverage-7.13.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a7fc042ba3c7ce25b8a9f097eb0f32a5ce1ccdb639d9eec114e26def98e1f8a4", size = 253093, upload-time = "2026-01-25T12:59:17.491Z" }, - { url = "https://files.pythonhosted.org/packages/fb/5e/aaf86be3e181d907e23c0f61fccaeb38de8e6f6b47aed92bf57d8fc9c034/coverage-7.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0ba505e021557f7f8173ee8cd6b926373d8653e5ff7581ae2efce1b11ef4c27", size = 254446, upload-time = "2026-01-25T12:59:19.752Z" }, - { url = "https://files.pythonhosted.org/packages/28/c8/a5fa01460e2d75b0c853b392080d6829d3ca8b5ab31e158fa0501bc7c708/coverage-7.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7de326f80e3451bd5cc7239ab46c73ddb658fe0b7649476bc7413572d36cd548", size = 250615, upload-time = "2026-01-25T12:59:21.928Z" }, - { url = "https://files.pythonhosted.org/packages/86/0b/6d56315a55f7062bb66410732c24879ccb2ec527ab6630246de5fe45a1df/coverage-7.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:abaea04f1e7e34841d4a7b343904a3f59481f62f9df39e2cd399d69a187a9660", size = 252452, upload-time = "2026-01-25T12:59:23.592Z" }, - { url = "https://files.pythonhosted.org/packages/30/19/9bc550363ebc6b0ea121977ee44d05ecd1e8bf79018b8444f1028701c563/coverage-7.13.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9f93959ee0c604bccd8e0697be21de0887b1f73efcc3aa73a3ec0fd13feace92", size = 250418, upload-time = "2026-01-25T12:59:25.392Z" }, - { url = "https://files.pythonhosted.org/packages/1f/53/580530a31ca2f0cc6f07a8f2ab5460785b02bb11bdf815d4c4d37a4c5169/coverage-7.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:13fe81ead04e34e105bf1b3c9f9cdf32ce31736ee5d90a8d2de02b9d3e1bcb82", size = 250231, upload-time = "2026-01-25T12:59:27.888Z" }, - { url = "https://files.pythonhosted.org/packages/e2/42/dd9093f919dc3088cb472893651884bd675e3df3d38a43f9053656dca9a2/coverage-7.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d6d16b0f71120e365741bca2cb473ca6fe38930bc5431c5e850ba949f708f892", size = 251888, upload-time = "2026-01-25T12:59:29.636Z" }, - { url = "https://files.pythonhosted.org/packages/fa/a6/0af4053e6e819774626e133c3d6f70fae4d44884bfc4b126cb647baee8d3/coverage-7.13.2-cp314-cp314-win32.whl", hash = "sha256:9b2f4714bb7d99ba3790ee095b3b4ac94767e1347fe424278a0b10acb3ff04fe", size = 221968, upload-time = "2026-01-25T12:59:31.424Z" }, - { url = "https://files.pythonhosted.org/packages/c4/cc/5aff1e1f80d55862442855517bb8ad8ad3a68639441ff6287dde6a58558b/coverage-7.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:e4121a90823a063d717a96e0a0529c727fb31ea889369a0ee3ec00ed99bf6859", size = 222783, upload-time = "2026-01-25T12:59:33.118Z" }, - { url = "https://files.pythonhosted.org/packages/de/20/09abafb24f84b3292cc658728803416c15b79f9ee5e68d25238a895b07d9/coverage-7.13.2-cp314-cp314-win_arm64.whl", hash = "sha256:6873f0271b4a15a33e7590f338d823f6f66f91ed147a03938d7ce26efd04eee6", size = 221348, upload-time = "2026-01-25T12:59:34.939Z" }, - { url = "https://files.pythonhosted.org/packages/b6/60/a3820c7232db63be060e4019017cd3426751c2699dab3c62819cdbcea387/coverage-7.13.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f61d349f5b7cd95c34017f1927ee379bfbe9884300d74e07cf630ccf7a610c1b", size = 219950, upload-time = "2026-01-25T12:59:36.624Z" }, - { url = "https://files.pythonhosted.org/packages/fd/37/e4ef5975fdeb86b1e56db9a82f41b032e3d93a840ebaf4064f39e770d5c5/coverage-7.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a43d34ce714f4ca674c0d90beb760eb05aad906f2c47580ccee9da8fe8bfb417", size = 220209, upload-time = "2026-01-25T12:59:38.339Z" }, - { url = "https://files.pythonhosted.org/packages/54/df/d40e091d00c51adca1e251d3b60a8b464112efa3004949e96a74d7c19a64/coverage-7.13.2-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bff1b04cb9d4900ce5c56c4942f047dc7efe57e2608cb7c3c8936e9970ccdbee", size = 261576, upload-time = "2026-01-25T12:59:40.446Z" }, - { url = "https://files.pythonhosted.org/packages/c5/44/5259c4bed54e3392e5c176121af9f71919d96dde853386e7730e705f3520/coverage-7.13.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6ae99e4560963ad8e163e819e5d77d413d331fd00566c1e0856aa252303552c1", size = 263704, upload-time = "2026-01-25T12:59:42.346Z" }, - { url = "https://files.pythonhosted.org/packages/16/bd/ae9f005827abcbe2c70157459ae86053971c9fa14617b63903abbdce26d9/coverage-7.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e79a8c7d461820257d9aa43716c4efc55366d7b292e46b5b37165be1d377405d", size = 266109, upload-time = "2026-01-25T12:59:44.073Z" }, - { url = "https://files.pythonhosted.org/packages/a2/c0/8e279c1c0f5b1eaa3ad9b0fb7a5637fc0379ea7d85a781c0fe0bb3cfc2ab/coverage-7.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:060ee84f6a769d40c492711911a76811b4befb6fba50abb450371abb720f5bd6", size = 260686, upload-time = "2026-01-25T12:59:45.804Z" }, - { url = "https://files.pythonhosted.org/packages/b2/47/3a8112627e9d863e7cddd72894171c929e94491a597811725befdcd76bce/coverage-7.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bca209d001fd03ea2d978f8a4985093240a355c93078aee3f799852c23f561a", size = 263568, upload-time = "2026-01-25T12:59:47.929Z" }, - { url = "https://files.pythonhosted.org/packages/92/bc/7ea367d84afa3120afc3ce6de294fd2dcd33b51e2e7fbe4bbfd200f2cb8c/coverage-7.13.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6b8092aa38d72f091db61ef83cb66076f18f02da3e1a75039a4f218629600e04", size = 261174, upload-time = "2026-01-25T12:59:49.717Z" }, - { url = "https://files.pythonhosted.org/packages/33/b7/f1092dcecb6637e31cc2db099581ee5c61a17647849bae6b8261a2b78430/coverage-7.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4a3158dc2dcce5200d91ec28cd315c999eebff355437d2765840555d765a6e5f", size = 260017, upload-time = "2026-01-25T12:59:51.463Z" }, - { url = "https://files.pythonhosted.org/packages/2b/cd/f3d07d4b95fbe1a2ef0958c15da614f7e4f557720132de34d2dc3aa7e911/coverage-7.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3973f353b2d70bd9796cc12f532a05945232ccae966456c8ed7034cb96bbfd6f", size = 262337, upload-time = "2026-01-25T12:59:53.407Z" }, - { url = "https://files.pythonhosted.org/packages/e0/db/b0d5b2873a07cb1e06a55d998697c0a5a540dcefbf353774c99eb3874513/coverage-7.13.2-cp314-cp314t-win32.whl", hash = "sha256:79f6506a678a59d4ded048dc72f1859ebede8ec2b9a2d509ebe161f01c2879d3", size = 222749, upload-time = "2026-01-25T12:59:56.316Z" }, - { url = "https://files.pythonhosted.org/packages/e5/2f/838a5394c082ac57d85f57f6aba53093b30d9089781df72412126505716f/coverage-7.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:196bfeabdccc5a020a57d5a368c681e3a6ceb0447d153aeccc1ab4d70a5032ba", size = 223857, upload-time = "2026-01-25T12:59:58.201Z" }, - { url = "https://files.pythonhosted.org/packages/44/d4/b608243e76ead3a4298824b50922b89ef793e50069ce30316a65c1b4d7ef/coverage-7.13.2-cp314-cp314t-win_arm64.whl", hash = "sha256:69269ab58783e090bfbf5b916ab3d188126e22d6070bbfc93098fdd474ef937c", size = 221881, upload-time = "2026-01-25T13:00:00.449Z" }, - { url = "https://files.pythonhosted.org/packages/d2/db/d291e30fdf7ea617a335531e72294e0c723356d7fdde8fba00610a76bda9/coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5", size = 210943, upload-time = "2026-01-25T13:00:02.388Z" }, + { url = "https://files.pythonhosted.org/packages/69/33/e8c48488c29a73fd089f9d71f9653c1be7478f2ad6b5bc870db11a55d23d/coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5", size = 219255, upload-time = "2026-03-17T10:29:51.081Z" }, + { url = "https://files.pythonhosted.org/packages/da/bd/b0ebe9f677d7f4b74a3e115eec7ddd4bcf892074963a00d91e8b164a6386/coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf", size = 219772, upload-time = "2026-03-17T10:29:52.867Z" }, + { url = "https://files.pythonhosted.org/packages/48/cc/5cb9502f4e01972f54eedd48218bb203fe81e294be606a2bc93970208013/coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8", size = 246532, upload-time = "2026-03-17T10:29:54.688Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d8/3217636d86c7e7b12e126e4f30ef1581047da73140614523af7495ed5f2d/coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4", size = 248333, upload-time = "2026-03-17T10:29:56.221Z" }, + { url = "https://files.pythonhosted.org/packages/2b/30/2002ac6729ba2d4357438e2ed3c447ad8562866c8c63fc16f6dfc33afe56/coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d", size = 250211, upload-time = "2026-03-17T10:29:57.938Z" }, + { url = "https://files.pythonhosted.org/packages/6c/85/552496626d6b9359eb0e2f86f920037c9cbfba09b24d914c6e1528155f7d/coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930", size = 252125, upload-time = "2026-03-17T10:29:59.388Z" }, + { url = "https://files.pythonhosted.org/packages/44/21/40256eabdcbccdb6acf6b381b3016a154399a75fe39d406f790ae84d1f3c/coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d", size = 247219, upload-time = "2026-03-17T10:30:01.199Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/96e2a6c3f21a0ea77d7830b254a1542d0328acc8d7bdf6a284ba7e529f77/coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40", size = 248248, upload-time = "2026-03-17T10:30:03.317Z" }, + { url = "https://files.pythonhosted.org/packages/da/ba/8477f549e554827da390ec659f3c38e4b6d95470f4daafc2d8ff94eaa9c2/coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878", size = 246254, upload-time = "2026-03-17T10:30:04.832Z" }, + { url = "https://files.pythonhosted.org/packages/55/59/bc22aef0e6aa179d5b1b001e8b3654785e9adf27ef24c93dc4228ebd5d68/coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400", size = 250067, upload-time = "2026-03-17T10:30:06.535Z" }, + { url = "https://files.pythonhosted.org/packages/de/1b/c6a023a160806a5137dca53468fd97530d6acad24a22003b1578a9c2e429/coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0", size = 246521, upload-time = "2026-03-17T10:30:08.486Z" }, + { url = "https://files.pythonhosted.org/packages/2d/3f/3532c85a55aa2f899fa17c186f831cfa1aa434d88ff792a709636f64130e/coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0", size = 247126, upload-time = "2026-03-17T10:30:09.966Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2e/b9d56af4a24ef45dfbcda88e06870cb7d57b2b0bfa3a888d79b4c8debd76/coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58", size = 221860, upload-time = "2026-03-17T10:30:11.393Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cc/d938417e7a4d7f0433ad4edee8bb2acdc60dc7ac5af19e2a07a048ecbee3/coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e", size = 222788, upload-time = "2026-03-17T10:30:12.886Z" }, + { url = "https://files.pythonhosted.org/packages/4b/37/d24c8f8220ff07b839b2c043ea4903a33b0f455abe673ae3c03bbdb7f212/coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d", size = 219381, upload-time = "2026-03-17T10:30:14.68Z" }, + { url = "https://files.pythonhosted.org/packages/35/8b/cd129b0ca4afe886a6ce9d183c44d8301acbd4ef248622e7c49a23145605/coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587", size = 219880, upload-time = "2026-03-17T10:30:16.231Z" }, + { url = "https://files.pythonhosted.org/packages/55/2f/e0e5b237bffdb5d6c530ce87cc1d413a5b7d7dfd60fb067ad6d254c35c76/coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642", size = 250303, upload-time = "2026-03-17T10:30:17.748Z" }, + { url = "https://files.pythonhosted.org/packages/92/be/b1afb692be85b947f3401375851484496134c5554e67e822c35f28bf2fbc/coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b", size = 252218, upload-time = "2026-03-17T10:30:19.804Z" }, + { url = "https://files.pythonhosted.org/packages/da/69/2f47bb6fa1b8d1e3e5d0c4be8ccb4313c63d742476a619418f85740d597b/coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686", size = 254326, upload-time = "2026-03-17T10:30:21.321Z" }, + { url = "https://files.pythonhosted.org/packages/d5/d0/79db81da58965bd29dabc8f4ad2a2af70611a57cba9d1ec006f072f30a54/coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743", size = 256267, upload-time = "2026-03-17T10:30:23.094Z" }, + { url = "https://files.pythonhosted.org/packages/e5/32/d0d7cc8168f91ddab44c0ce4806b969df5f5fdfdbb568eaca2dbc2a04936/coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75", size = 250430, upload-time = "2026-03-17T10:30:25.311Z" }, + { url = "https://files.pythonhosted.org/packages/4d/06/a055311d891ddbe231cd69fdd20ea4be6e3603ffebddf8704b8ca8e10a3c/coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209", size = 252017, upload-time = "2026-03-17T10:30:27.284Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f6/d0fd2d21e29a657b5f77a2fe7082e1568158340dceb941954f776dce1b7b/coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a", size = 250080, upload-time = "2026-03-17T10:30:29.481Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ab/0d7fb2efc2e9a5eb7ddcc6e722f834a69b454b7e6e5888c3a8567ecffb31/coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e", size = 253843, upload-time = "2026-03-17T10:30:31.301Z" }, + { url = "https://files.pythonhosted.org/packages/ba/6f/7467b917bbf5408610178f62a49c0ed4377bb16c1657f689cc61470da8ce/coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd", size = 249802, upload-time = "2026-03-17T10:30:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/75/2c/1172fb689df92135f5bfbbd69fc83017a76d24ea2e2f3a1154007e2fb9f8/coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8", size = 250707, upload-time = "2026-03-17T10:30:35.2Z" }, + { url = "https://files.pythonhosted.org/packages/67/21/9ac389377380a07884e3b48ba7a620fcd9dbfaf1d40565facdc6b36ec9ef/coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf", size = 221880, upload-time = "2026-03-17T10:30:36.775Z" }, + { url = "https://files.pythonhosted.org/packages/af/7f/4cd8a92531253f9d7c1bbecd9fa1b472907fb54446ca768c59b531248dc5/coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9", size = 222816, upload-time = "2026-03-17T10:30:38.891Z" }, + { url = "https://files.pythonhosted.org/packages/12/a6/1d3f6155fb0010ca68eba7fe48ca6c9da7385058b77a95848710ecf189b1/coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028", size = 221483, upload-time = "2026-03-17T10:30:40.463Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c3/a396306ba7db865bf96fc1fb3b7fd29bcbf3d829df642e77b13555163cd6/coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01", size = 219554, upload-time = "2026-03-17T10:30:42.208Z" }, + { url = "https://files.pythonhosted.org/packages/a6/16/a68a19e5384e93f811dccc51034b1fd0b865841c390e3c931dcc4699e035/coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422", size = 219908, upload-time = "2026-03-17T10:30:43.906Z" }, + { url = "https://files.pythonhosted.org/packages/29/72/20b917c6793af3a5ceb7fb9c50033f3ec7865f2911a1416b34a7cfa0813b/coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f", size = 251419, upload-time = "2026-03-17T10:30:45.545Z" }, + { url = "https://files.pythonhosted.org/packages/8c/49/cd14b789536ac6a4778c453c6a2338bc0a2fb60c5a5a41b4008328b9acc1/coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5", size = 254159, upload-time = "2026-03-17T10:30:47.204Z" }, + { url = "https://files.pythonhosted.org/packages/9d/00/7b0edcfe64e2ed4c0340dac14a52ad0f4c9bd0b8b5e531af7d55b703db7c/coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376", size = 255270, upload-time = "2026-03-17T10:30:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/7ffc4ba0f5d0a55c1e84ea7cee39c9fc06af7b170513d83fbf3bbefce280/coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256", size = 257538, upload-time = "2026-03-17T10:30:50.77Z" }, + { url = "https://files.pythonhosted.org/packages/81/bd/73ddf85f93f7e6fa83e77ccecb6162d9415c79007b4bc124008a4995e4a7/coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c", size = 251821, upload-time = "2026-03-17T10:30:52.5Z" }, + { url = "https://files.pythonhosted.org/packages/a0/81/278aff4e8dec4926a0bcb9486320752811f543a3ce5b602cc7a29978d073/coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5", size = 253191, upload-time = "2026-03-17T10:30:54.543Z" }, + { url = "https://files.pythonhosted.org/packages/70/ee/fe1621488e2e0a58d7e94c4800f0d96f79671553488d401a612bebae324b/coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09", size = 251337, upload-time = "2026-03-17T10:30:56.663Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/f79fb37aa104b562207cc23cb5711ab6793608e246cae1e93f26b2236ed9/coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9", size = 255404, upload-time = "2026-03-17T10:30:58.427Z" }, + { url = "https://files.pythonhosted.org/packages/75/f0/ed15262a58ec81ce457ceb717b7f78752a1713556b19081b76e90896e8d4/coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf", size = 250903, upload-time = "2026-03-17T10:31:00.093Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e9/9129958f20e7e9d4d56d51d42ccf708d15cac355ff4ac6e736e97a9393d2/coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c", size = 252780, upload-time = "2026-03-17T10:31:01.916Z" }, + { url = "https://files.pythonhosted.org/packages/a4/d7/0ad9b15812d81272db94379fe4c6df8fd17781cc7671fdfa30c76ba5ff7b/coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf", size = 222093, upload-time = "2026-03-17T10:31:03.642Z" }, + { url = "https://files.pythonhosted.org/packages/29/3d/821a9a5799fac2556bcf0bd37a70d1d11fa9e49784b6d22e92e8b2f85f18/coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810", size = 222900, upload-time = "2026-03-17T10:31:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/d4/fa/2238c2ad08e35cf4f020ea721f717e09ec3152aea75d191a7faf3ef009a8/coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de", size = 221515, upload-time = "2026-03-17T10:31:07.293Z" }, + { url = "https://files.pythonhosted.org/packages/74/8c/74fedc9663dcf168b0a059d4ea756ecae4da77a489048f94b5f512a8d0b3/coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1", size = 219576, upload-time = "2026-03-17T10:31:09.045Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c9/44fb661c55062f0818a6ffd2685c67aa30816200d5f2817543717d4b92eb/coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3", size = 219942, upload-time = "2026-03-17T10:31:10.708Z" }, + { url = "https://files.pythonhosted.org/packages/5f/13/93419671cee82b780bab7ea96b67c8ef448f5f295f36bf5031154ec9a790/coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26", size = 250935, upload-time = "2026-03-17T10:31:12.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/68/1666e3a4462f8202d836920114fa7a5ee9275d1fa45366d336c551a162dd/coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3", size = 253541, upload-time = "2026-03-17T10:31:14.247Z" }, + { url = "https://files.pythonhosted.org/packages/4e/5e/3ee3b835647be646dcf3c65a7c6c18f87c27326a858f72ab22c12730773d/coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b", size = 254780, upload-time = "2026-03-17T10:31:16.193Z" }, + { url = "https://files.pythonhosted.org/packages/44/b3/cb5bd1a04cfcc49ede6cd8409d80bee17661167686741e041abc7ee1b9a9/coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a", size = 256912, upload-time = "2026-03-17T10:31:17.89Z" }, + { url = "https://files.pythonhosted.org/packages/1b/66/c1dceb7b9714473800b075f5c8a84f4588f887a90eb8645282031676e242/coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969", size = 251165, upload-time = "2026-03-17T10:31:19.605Z" }, + { url = "https://files.pythonhosted.org/packages/b7/62/5502b73b97aa2e53ea22a39cf8649ff44827bef76d90bf638777daa27a9d/coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161", size = 252908, upload-time = "2026-03-17T10:31:21.312Z" }, + { url = "https://files.pythonhosted.org/packages/7d/37/7792c2d69854397ca77a55c4646e5897c467928b0e27f2d235d83b5d08c6/coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15", size = 250873, upload-time = "2026-03-17T10:31:23.565Z" }, + { url = "https://files.pythonhosted.org/packages/a3/23/bc866fb6163be52a8a9e5d708ba0d3b1283c12158cefca0a8bbb6e247a43/coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1", size = 255030, upload-time = "2026-03-17T10:31:25.58Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8b/ef67e1c222ef49860701d346b8bbb70881bef283bd5f6cbba68a39a086c7/coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6", size = 250694, upload-time = "2026-03-17T10:31:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/46/0d/866d1f74f0acddbb906db212e096dee77a8e2158ca5e6bb44729f9d93298/coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17", size = 252469, upload-time = "2026-03-17T10:31:29.472Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f5/be742fec31118f02ce42b21c6af187ad6a344fed546b56ca60caacc6a9a0/coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85", size = 222112, upload-time = "2026-03-17T10:31:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/66/40/7732d648ab9d069a46e686043241f01206348e2bbf128daea85be4d6414b/coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b", size = 222923, upload-time = "2026-03-17T10:31:33.633Z" }, + { url = "https://files.pythonhosted.org/packages/48/af/fea819c12a095781f6ccd504890aaddaf88b8fab263c4940e82c7b770124/coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664", size = 221540, upload-time = "2026-03-17T10:31:35.445Z" }, + { url = "https://files.pythonhosted.org/packages/23/d2/17879af479df7fbbd44bd528a31692a48f6b25055d16482fdf5cdb633805/coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d", size = 220262, upload-time = "2026-03-17T10:31:37.184Z" }, + { url = "https://files.pythonhosted.org/packages/5b/4c/d20e554f988c8f91d6a02c5118f9abbbf73a8768a3048cb4962230d5743f/coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0", size = 220617, upload-time = "2026-03-17T10:31:39.245Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/f9f5277b95184f764b24e7231e166dfdb5780a46d408a2ac665969416d61/coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806", size = 261912, upload-time = "2026-03-17T10:31:41.324Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f6/7f1ab39393eeb50cfe4747ae8ef0e4fc564b989225aa1152e13a180d74f8/coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3", size = 263987, upload-time = "2026-03-17T10:31:43.724Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/62c084fb489ed9c6fbdf57e006752e7c516ea46fd690e5ed8b8617c7d52e/coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9", size = 266416, upload-time = "2026-03-17T10:31:45.769Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f6/df63d8660e1a0bff6125947afda112a0502736f470d62ca68b288ea762d8/coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd", size = 267558, upload-time = "2026-03-17T10:31:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/5b/02/353ca81d36779bd108f6d384425f7139ac3c58c750dcfaafe5d0bee6436b/coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606", size = 261163, upload-time = "2026-03-17T10:31:50.125Z" }, + { url = "https://files.pythonhosted.org/packages/2c/16/2e79106d5749bcaf3aee6d309123548e3276517cd7851faa8da213bc61bf/coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e", size = 263981, upload-time = "2026-03-17T10:31:51.961Z" }, + { url = "https://files.pythonhosted.org/packages/29/c7/c29e0c59ffa6942030ae6f50b88ae49988e7e8da06de7ecdbf49c6d4feae/coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0", size = 261604, upload-time = "2026-03-17T10:31:53.872Z" }, + { url = "https://files.pythonhosted.org/packages/40/48/097cdc3db342f34006a308ab41c3a7c11c3f0d84750d340f45d88a782e00/coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87", size = 265321, upload-time = "2026-03-17T10:31:55.997Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/4994af354689e14fd03a75f8ec85a9a68d94e0188bbdab3fc1516b55e512/coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479", size = 260502, upload-time = "2026-03-17T10:31:58.308Z" }, + { url = "https://files.pythonhosted.org/packages/22/c6/9bb9ef55903e628033560885f5c31aa227e46878118b63ab15dc7ba87797/coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2", size = 262688, upload-time = "2026-03-17T10:32:00.141Z" }, + { url = "https://files.pythonhosted.org/packages/14/4f/f5df9007e50b15e53e01edea486814783a7f019893733d9e4d6caad75557/coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a", size = 222788, upload-time = "2026-03-17T10:32:02.246Z" }, + { url = "https://files.pythonhosted.org/packages/e1/98/aa7fccaa97d0f3192bec013c4e6fd6d294a6ed44b640e6bb61f479e00ed5/coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819", size = 223851, upload-time = "2026-03-17T10:32:04.416Z" }, + { url = "https://files.pythonhosted.org/packages/3d/8b/e5c469f7352651e5f013198e9e21f97510b23de957dd06a84071683b4b60/coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911", size = 222104, upload-time = "2026-03-17T10:32:06.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/77/39703f0d1d4b478bfd30191d3c14f53caf596fac00efb3f8f6ee23646439/coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f", size = 219621, upload-time = "2026-03-17T10:32:08.589Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3e/51dff36d99ae14639a133d9b164d63e628532e2974d8b1edb99dd1ebc733/coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e", size = 219953, upload-time = "2026-03-17T10:32:10.507Z" }, + { url = "https://files.pythonhosted.org/packages/6a/6c/1f1917b01eb647c2f2adc9962bd66c79eb978951cab61bdc1acab3290c07/coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a", size = 250992, upload-time = "2026-03-17T10:32:12.41Z" }, + { url = "https://files.pythonhosted.org/packages/22/e5/06b1f88f42a5a99df42ce61208bdec3bddb3d261412874280a19796fc09c/coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510", size = 253503, upload-time = "2026-03-17T10:32:14.449Z" }, + { url = "https://files.pythonhosted.org/packages/80/28/2a148a51e5907e504fa7b85490277734e6771d8844ebcc48764a15e28155/coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247", size = 254852, upload-time = "2026-03-17T10:32:16.56Z" }, + { url = "https://files.pythonhosted.org/packages/61/77/50e8d3d85cc0b7ebe09f30f151d670e302c7ff4a1bf6243f71dd8b0981fa/coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6", size = 257161, upload-time = "2026-03-17T10:32:19.004Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c4/b5fd1d4b7bf8d0e75d997afd3925c59ba629fc8616f1b3aae7605132e256/coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0", size = 251021, upload-time = "2026-03-17T10:32:21.344Z" }, + { url = "https://files.pythonhosted.org/packages/f8/66/6ea21f910e92d69ef0b1c3346ea5922a51bad4446c9126db2ae96ee24c4c/coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882", size = 252858, upload-time = "2026-03-17T10:32:23.506Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ea/879c83cb5d61aa2a35fb80e72715e92672daef8191b84911a643f533840c/coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740", size = 250823, upload-time = "2026-03-17T10:32:25.516Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fb/616d95d3adb88b9803b275580bdeee8bd1b69a886d057652521f83d7322f/coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16", size = 255099, upload-time = "2026-03-17T10:32:27.944Z" }, + { url = "https://files.pythonhosted.org/packages/1c/93/25e6917c90ec1c9a56b0b26f6cad6408e5f13bb6b35d484a0d75c9cf000d/coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0", size = 250638, upload-time = "2026-03-17T10:32:29.914Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7b/dc1776b0464145a929deed214aef9fb1493f159b59ff3c7eeeedf91eddd0/coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0", size = 252295, upload-time = "2026-03-17T10:32:31.981Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fb/99cbbc56a26e07762a2740713f3c8f9f3f3106e3a3dd8cc4474954bccd34/coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc", size = 222360, upload-time = "2026-03-17T10:32:34.233Z" }, + { url = "https://files.pythonhosted.org/packages/8d/b7/4758d4f73fb536347cc5e4ad63662f9d60ba9118cb6785e9616b2ce5d7fa/coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633", size = 223174, upload-time = "2026-03-17T10:32:36.369Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f2/24d84e1dfe70f8ac9fdf30d338239860d0d1d5da0bda528959d0ebc9da28/coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8", size = 221739, upload-time = "2026-03-17T10:32:38.736Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/4a168591057b3668c2428bff25dd3ebc21b629d666d90bcdfa0217940e84/coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b", size = 220351, upload-time = "2026-03-17T10:32:41.196Z" }, + { url = "https://files.pythonhosted.org/packages/f5/21/1fd5c4dbfe4a58b6b99649125635df46decdfd4a784c3cd6d410d303e370/coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c", size = 220612, upload-time = "2026-03-17T10:32:43.204Z" }, + { url = "https://files.pythonhosted.org/packages/d6/fe/2a924b3055a5e7e4512655a9d4609781b0d62334fa0140c3e742926834e2/coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9", size = 261985, upload-time = "2026-03-17T10:32:45.514Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0d/c8928f2bd518c45990fe1a2ab8db42e914ef9b726c975facc4282578c3eb/coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29", size = 264107, upload-time = "2026-03-17T10:32:47.971Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ae/4ae35bbd9a0af9d820362751f0766582833c211224b38665c0f8de3d487f/coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607", size = 266513, upload-time = "2026-03-17T10:32:50.1Z" }, + { url = "https://files.pythonhosted.org/packages/9c/20/d326174c55af36f74eac6ae781612d9492f060ce8244b570bb9d50d9d609/coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90", size = 267650, upload-time = "2026-03-17T10:32:52.391Z" }, + { url = "https://files.pythonhosted.org/packages/7a/5e/31484d62cbd0eabd3412e30d74386ece4a0837d4f6c3040a653878bfc019/coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3", size = 261089, upload-time = "2026-03-17T10:32:54.544Z" }, + { url = "https://files.pythonhosted.org/packages/e9/d8/49a72d6de146eebb0b7e48cc0f4bc2c0dd858e3d4790ab2b39a2872b62bd/coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab", size = 263982, upload-time = "2026-03-17T10:32:56.803Z" }, + { url = "https://files.pythonhosted.org/packages/06/3b/0351f1bd566e6e4dd39e978efe7958bde1d32f879e85589de147654f57bb/coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562", size = 261579, upload-time = "2026-03-17T10:32:59.466Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ce/796a2a2f4017f554d7810f5c573449b35b1e46788424a548d4d19201b222/coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2", size = 265316, upload-time = "2026-03-17T10:33:01.847Z" }, + { url = "https://files.pythonhosted.org/packages/3d/16/d5ae91455541d1a78bc90abf495be600588aff8f6db5c8b0dae739fa39c9/coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea", size = 260427, upload-time = "2026-03-17T10:33:03.945Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/07f413dba62db21fb3fad5d0de013a50e073cc4e2dc4306e770360f6dfc8/coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a", size = 262745, upload-time = "2026-03-17T10:33:06.285Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/d792371332eb4663115becf4bad47e047d16234b1aff687b1b18c58d60ae/coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215", size = 223146, upload-time = "2026-03-17T10:33:08.756Z" }, + { url = "https://files.pythonhosted.org/packages/db/51/37221f59a111dca5e85be7dbf09696323b5b9f13ff65e0641d535ed06ea8/coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43", size = 224254, upload-time = "2026-03-17T10:33:11.174Z" }, + { url = "https://files.pythonhosted.org/packages/54/83/6acacc889de8987441aa7d5adfbdbf33d288dad28704a67e574f1df9bcbb/coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45", size = 222276, upload-time = "2026-03-17T10:33:13.466Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ee/a4cf96b8ce1e566ed238f0659ac2d3f007ed1d14b181bcb684e19561a69a/coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61", size = 211346, upload-time = "2026-03-17T10:33:15.691Z" }, ] [package.optional-dependencies] @@ -163,14 +179,14 @@ toml = [ [[package]] name = "deepdiff" -version = "8.6.1" +version = "9.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "orderly-set" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/76/36c9aab3d5c19a94091f7c6c6e784efca50d87b124bf026c36e94719f33c/deepdiff-8.6.1.tar.gz", hash = "sha256:ec56d7a769ca80891b5200ec7bd41eec300ced91ebcc7797b41eb2b3f3ff643a", size = 634054, upload-time = "2025-09-03T19:40:41.461Z" } +sdist = { url = "https://files.pythonhosted.org/packages/24/20/63dd34163ed07393968128dc8c7ab948c96e47c4ce76976ea533de64909d/deepdiff-9.0.0.tar.gz", hash = "sha256:4872005306237b5b50829803feff58a1dfd20b2b357a55de22e7ded65b2008a7", size = 151952, upload-time = "2026-03-30T05:52:23.769Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/e6/efe534ef0952b531b630780e19cabd416e2032697019d5295defc6ef9bd9/deepdiff-8.6.1-py3-none-any.whl", hash = "sha256:ee8708a7f7d37fb273a541fa24ad010ed484192cd0c4ffc0fa0ed5e2d4b9e78b", size = 91378, upload-time = "2025-09-03T19:40:39.679Z" }, + { url = "https://files.pythonhosted.org/packages/dc/c4/da7089cd7aa4ab554f56e18a7fb08dcfed8fd2ae91fa528f5b1be207a148/deepdiff-9.0.0-py3-none-any.whl", hash = "sha256:b1ae0dd86290d86a03de5fbee728fde43095c1472ae4974bdab23ab4656305bd", size = 170540, upload-time = "2026-03-30T05:52:22.008Z" }, ] [[package]] @@ -209,11 +225,11 @@ wheels = [ [[package]] name = "idna" -version = "3.11" +version = "3.13" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/cc/762dfb036166873f0059f3b7de4565e1b5bc3d6f28a414c13da27e442f99/idna-3.13.tar.gz", hash = "sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242", size = 194210, upload-time = "2026-04-22T16:42:42.314Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, + { url = "https://files.pythonhosted.org/packages/5d/13/ad7d7ca3808a898b4612b6fe93cde56b53f3034dcde235acb1f0e1df24c6/idna-3.13-py3-none-any.whl", hash = "sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3", size = 68629, upload-time = "2026-04-22T16:42:40.909Z" }, ] [[package]] @@ -239,87 +255,96 @@ wheels = [ [[package]] name = "jsonpath-ng" -version = "1.7.0" +version = "1.8.0" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ply" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/6d/86/08646239a313f895186ff0a4573452038eed8c86f54380b3ebac34d32fb2/jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c", size = 37838, upload-time = "2024-10-11T15:41:42.404Z" } +sdist = { url = "https://files.pythonhosted.org/packages/32/58/250751940d75c8019659e15482d548a4aa3b6ce122c515102a4bfdac50e3/jsonpath_ng-1.8.0.tar.gz", hash = "sha256:54252968134b5e549ea5b872f1df1168bd7defe1a52fed5a358c194e1943ddc3", size = 74513, upload-time = "2026-02-24T14:42:06.182Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/35/5a/73ecb3d82f8615f32ccdadeb9356726d6cae3a4bbc840b437ceb95708063/jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6", size = 30105, upload-time = "2024-11-20T17:58:30.418Z" }, + { url = "https://files.pythonhosted.org/packages/03/99/33c7d78a3fb70d545fd5411ac67a651c81602cc09c9cf0df383733f068c5/jsonpath_ng-1.8.0-py3-none-any.whl", hash = "sha256:b8dde192f8af58d646fc031fac9c99fe4d00326afc4148f1f043c601a8cfe138", size = 67844, upload-time = "2026-02-28T00:53:19.637Z" }, ] [[package]] name = "librt" -version = "0.7.8" +version = "0.9.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/24/5f3646ff414285e0f7708fa4e946b9bf538345a41d1c375c439467721a5e/librt-0.7.8.tar.gz", hash = "sha256:1a4ede613941d9c3470b0368be851df6bb78ab218635512d0370b27a277a0862", size = 148323, upload-time = "2026-01-14T12:56:16.876Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/6b/3d5c13fb3e3c4f43206c8f9dfed13778c2ed4f000bacaa0b7ce3c402a265/librt-0.9.0.tar.gz", hash = "sha256:a0951822531e7aee6e0dfb556b30d5ee36bbe234faf60c20a16c01be3530869d", size = 184368, upload-time = "2026-04-09T16:06:26.173Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/44/13/57b06758a13550c5f09563893b004f98e9537ee6ec67b7df85c3571c8832/librt-0.7.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b45306a1fc5f53c9330fbee134d8b3227fe5da2ab09813b892790400aa49352d", size = 56521, upload-time = "2026-01-14T12:54:40.066Z" }, - { url = "https://files.pythonhosted.org/packages/c2/24/bbea34d1452a10612fb45ac8356f95351ba40c2517e429602160a49d1fd0/librt-0.7.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:864c4b7083eeee250ed55135d2127b260d7eb4b5e953a9e5df09c852e327961b", size = 58456, upload-time = "2026-01-14T12:54:41.471Z" }, - { url = "https://files.pythonhosted.org/packages/04/72/a168808f92253ec3a810beb1eceebc465701197dbc7e865a1c9ceb3c22c7/librt-0.7.8-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6938cc2de153bc927ed8d71c7d2f2ae01b4e96359126c602721340eb7ce1a92d", size = 164392, upload-time = "2026-01-14T12:54:42.843Z" }, - { url = "https://files.pythonhosted.org/packages/14/5c/4c0d406f1b02735c2e7af8ff1ff03a6577b1369b91aa934a9fa2cc42c7ce/librt-0.7.8-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:66daa6ac5de4288a5bbfbe55b4caa7bf0cd26b3269c7a476ffe8ce45f837f87d", size = 172959, upload-time = "2026-01-14T12:54:44.602Z" }, - { url = "https://files.pythonhosted.org/packages/82/5f/3e85351c523f73ad8d938989e9a58c7f59fb9c17f761b9981b43f0025ce7/librt-0.7.8-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4864045f49dc9c974dadb942ac56a74cd0479a2aafa51ce272c490a82322ea3c", size = 186717, upload-time = "2026-01-14T12:54:45.986Z" }, - { url = "https://files.pythonhosted.org/packages/08/f8/18bfe092e402d00fe00d33aa1e01dda1bd583ca100b393b4373847eade6d/librt-0.7.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a36515b1328dc5b3ffce79fe204985ca8572525452eacabee2166f44bb387b2c", size = 184585, upload-time = "2026-01-14T12:54:47.139Z" }, - { url = "https://files.pythonhosted.org/packages/4e/fc/f43972ff56fd790a9fa55028a52ccea1875100edbb856b705bd393b601e3/librt-0.7.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b7e7f140c5169798f90b80d6e607ed2ba5059784968a004107c88ad61fb3641d", size = 180497, upload-time = "2026-01-14T12:54:48.946Z" }, - { url = "https://files.pythonhosted.org/packages/e1/3a/25e36030315a410d3ad0b7d0f19f5f188e88d1613d7d3fd8150523ea1093/librt-0.7.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff71447cb778a4f772ddc4ce360e6ba9c95527ed84a52096bd1bbf9fee2ec7c0", size = 200052, upload-time = "2026-01-14T12:54:50.382Z" }, - { url = "https://files.pythonhosted.org/packages/fc/b8/f3a5a1931ae2a6ad92bf6893b9ef44325b88641d58723529e2c2935e8abe/librt-0.7.8-cp310-cp310-win32.whl", hash = "sha256:047164e5f68b7a8ebdf9fae91a3c2161d3192418aadd61ddd3a86a56cbe3dc85", size = 43477, upload-time = "2026-01-14T12:54:51.815Z" }, - { url = "https://files.pythonhosted.org/packages/fe/91/c4202779366bc19f871b4ad25db10fcfa1e313c7893feb942f32668e8597/librt-0.7.8-cp310-cp310-win_amd64.whl", hash = "sha256:d6f254d096d84156a46a84861183c183d30734e52383602443292644d895047c", size = 49806, upload-time = "2026-01-14T12:54:53.149Z" }, - { url = "https://files.pythonhosted.org/packages/1b/a3/87ea9c1049f2c781177496ebee29430e4631f439b8553a4969c88747d5d8/librt-0.7.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ff3e9c11aa260c31493d4b3197d1e28dd07768594a4f92bec4506849d736248f", size = 56507, upload-time = "2026-01-14T12:54:54.156Z" }, - { url = "https://files.pythonhosted.org/packages/5e/4a/23bcef149f37f771ad30203d561fcfd45b02bc54947b91f7a9ac34815747/librt-0.7.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddb52499d0b3ed4aa88746aaf6f36a08314677d5c346234c3987ddc506404eac", size = 58455, upload-time = "2026-01-14T12:54:55.978Z" }, - { url = "https://files.pythonhosted.org/packages/22/6e/46eb9b85c1b9761e0f42b6e6311e1cc544843ac897457062b9d5d0b21df4/librt-0.7.8-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e9c0afebbe6ce177ae8edba0c7c4d626f2a0fc12c33bb993d163817c41a7a05c", size = 164956, upload-time = "2026-01-14T12:54:57.311Z" }, - { url = "https://files.pythonhosted.org/packages/7a/3f/aa7c7f6829fb83989feb7ba9aa11c662b34b4bd4bd5b262f2876ba3db58d/librt-0.7.8-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:631599598e2c76ded400c0a8722dec09217c89ff64dc54b060f598ed68e7d2a8", size = 174364, upload-time = "2026-01-14T12:54:59.089Z" }, - { url = "https://files.pythonhosted.org/packages/3f/2d/d57d154b40b11f2cb851c4df0d4c4456bacd9b1ccc4ecb593ddec56c1a8b/librt-0.7.8-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c1ba843ae20db09b9d5c80475376168feb2640ce91cd9906414f23cc267a1ff", size = 188034, upload-time = "2026-01-14T12:55:00.141Z" }, - { url = "https://files.pythonhosted.org/packages/59/f9/36c4dad00925c16cd69d744b87f7001792691857d3b79187e7a673e812fb/librt-0.7.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b5b007bb22ea4b255d3ee39dfd06d12534de2fcc3438567d9f48cdaf67ae1ae3", size = 186295, upload-time = "2026-01-14T12:55:01.303Z" }, - { url = "https://files.pythonhosted.org/packages/23/9b/8a9889d3df5efb67695a67785028ccd58e661c3018237b73ad081691d0cb/librt-0.7.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dbd79caaf77a3f590cbe32dc2447f718772d6eea59656a7dcb9311161b10fa75", size = 181470, upload-time = "2026-01-14T12:55:02.492Z" }, - { url = "https://files.pythonhosted.org/packages/43/64/54d6ef11afca01fef8af78c230726a9394759f2addfbf7afc5e3cc032a45/librt-0.7.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:87808a8d1e0bd62a01cafc41f0fd6818b5a5d0ca0d8a55326a81643cdda8f873", size = 201713, upload-time = "2026-01-14T12:55:03.919Z" }, - { url = "https://files.pythonhosted.org/packages/2d/29/73e7ed2991330b28919387656f54109139b49e19cd72902f466bd44415fd/librt-0.7.8-cp311-cp311-win32.whl", hash = "sha256:31724b93baa91512bd0a376e7cf0b59d8b631ee17923b1218a65456fa9bda2e7", size = 43803, upload-time = "2026-01-14T12:55:04.996Z" }, - { url = "https://files.pythonhosted.org/packages/3f/de/66766ff48ed02b4d78deea30392ae200bcbd99ae61ba2418b49fd50a4831/librt-0.7.8-cp311-cp311-win_amd64.whl", hash = "sha256:978e8b5f13e52cf23a9e80f3286d7546baa70bc4ef35b51d97a709d0b28e537c", size = 50080, upload-time = "2026-01-14T12:55:06.489Z" }, - { url = "https://files.pythonhosted.org/packages/6f/e3/33450438ff3a8c581d4ed7f798a70b07c3206d298cf0b87d3806e72e3ed8/librt-0.7.8-cp311-cp311-win_arm64.whl", hash = "sha256:20e3946863d872f7cabf7f77c6c9d370b8b3d74333d3a32471c50d3a86c0a232", size = 43383, upload-time = "2026-01-14T12:55:07.49Z" }, - { url = "https://files.pythonhosted.org/packages/56/04/79d8fcb43cae376c7adbab7b2b9f65e48432c9eced62ac96703bcc16e09b/librt-0.7.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9b6943885b2d49c48d0cff23b16be830ba46b0152d98f62de49e735c6e655a63", size = 57472, upload-time = "2026-01-14T12:55:08.528Z" }, - { url = "https://files.pythonhosted.org/packages/b4/ba/60b96e93043d3d659da91752689023a73981336446ae82078cddf706249e/librt-0.7.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46ef1f4b9b6cc364b11eea0ecc0897314447a66029ee1e55859acb3dd8757c93", size = 58986, upload-time = "2026-01-14T12:55:09.466Z" }, - { url = "https://files.pythonhosted.org/packages/7c/26/5215e4cdcc26e7be7eee21955a7e13cbf1f6d7d7311461a6014544596fac/librt-0.7.8-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:907ad09cfab21e3c86e8f1f87858f7049d1097f77196959c033612f532b4e592", size = 168422, upload-time = "2026-01-14T12:55:10.499Z" }, - { url = "https://files.pythonhosted.org/packages/0f/84/e8d1bc86fa0159bfc24f3d798d92cafd3897e84c7fea7fe61b3220915d76/librt-0.7.8-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2991b6c3775383752b3ca0204842743256f3ad3deeb1d0adc227d56b78a9a850", size = 177478, upload-time = "2026-01-14T12:55:11.577Z" }, - { url = "https://files.pythonhosted.org/packages/57/11/d0268c4b94717a18aa91df1100e767b010f87b7ae444dafaa5a2d80f33a6/librt-0.7.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03679b9856932b8c8f674e87aa3c55ea11c9274301f76ae8dc4d281bda55cf62", size = 192439, upload-time = "2026-01-14T12:55:12.7Z" }, - { url = "https://files.pythonhosted.org/packages/8d/56/1e8e833b95fe684f80f8894ae4d8b7d36acc9203e60478fcae599120a975/librt-0.7.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3968762fec1b2ad34ce57458b6de25dbb4142713e9ca6279a0d352fa4e9f452b", size = 191483, upload-time = "2026-01-14T12:55:13.838Z" }, - { url = "https://files.pythonhosted.org/packages/17/48/f11cf28a2cb6c31f282009e2208312aa84a5ee2732859f7856ee306176d5/librt-0.7.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bb7a7807523a31f03061288cc4ffc065d684c39db7644c676b47d89553c0d714", size = 185376, upload-time = "2026-01-14T12:55:15.017Z" }, - { url = "https://files.pythonhosted.org/packages/b8/6a/d7c116c6da561b9155b184354a60a3d5cdbf08fc7f3678d09c95679d13d9/librt-0.7.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad64a14b1e56e702e19b24aae108f18ad1bf7777f3af5fcd39f87d0c5a814449", size = 206234, upload-time = "2026-01-14T12:55:16.571Z" }, - { url = "https://files.pythonhosted.org/packages/61/de/1975200bb0285fc921c5981d9978ce6ce11ae6d797df815add94a5a848a3/librt-0.7.8-cp312-cp312-win32.whl", hash = "sha256:0241a6ed65e6666236ea78203a73d800dbed896cf12ae25d026d75dc1fcd1dac", size = 44057, upload-time = "2026-01-14T12:55:18.077Z" }, - { url = "https://files.pythonhosted.org/packages/8e/cd/724f2d0b3461426730d4877754b65d39f06a41ac9d0a92d5c6840f72b9ae/librt-0.7.8-cp312-cp312-win_amd64.whl", hash = "sha256:6db5faf064b5bab9675c32a873436b31e01d66ca6984c6f7f92621656033a708", size = 50293, upload-time = "2026-01-14T12:55:19.179Z" }, - { url = "https://files.pythonhosted.org/packages/bd/cf/7e899acd9ee5727ad8160fdcc9994954e79fab371c66535c60e13b968ffc/librt-0.7.8-cp312-cp312-win_arm64.whl", hash = "sha256:57175aa93f804d2c08d2edb7213e09276bd49097611aefc37e3fa38d1fb99ad0", size = 43574, upload-time = "2026-01-14T12:55:20.185Z" }, - { url = "https://files.pythonhosted.org/packages/a1/fe/b1f9de2829cf7fc7649c1dcd202cfd873837c5cc2fc9e526b0e7f716c3d2/librt-0.7.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4c3995abbbb60b3c129490fa985dfe6cac11d88fc3c36eeb4fb1449efbbb04fc", size = 57500, upload-time = "2026-01-14T12:55:21.219Z" }, - { url = "https://files.pythonhosted.org/packages/eb/d4/4a60fbe2e53b825f5d9a77325071d61cd8af8506255067bf0c8527530745/librt-0.7.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:44e0c2cbc9bebd074cf2cdbe472ca185e824be4e74b1c63a8e934cea674bebf2", size = 59019, upload-time = "2026-01-14T12:55:22.256Z" }, - { url = "https://files.pythonhosted.org/packages/6a/37/61ff80341ba5159afa524445f2d984c30e2821f31f7c73cf166dcafa5564/librt-0.7.8-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4d2f1e492cae964b3463a03dc77a7fe8742f7855d7258c7643f0ee32b6651dd3", size = 169015, upload-time = "2026-01-14T12:55:23.24Z" }, - { url = "https://files.pythonhosted.org/packages/1c/86/13d4f2d6a93f181ebf2fc953868826653ede494559da8268023fe567fca3/librt-0.7.8-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:451e7ffcef8f785831fdb791bd69211f47e95dc4c6ddff68e589058806f044c6", size = 178161, upload-time = "2026-01-14T12:55:24.826Z" }, - { url = "https://files.pythonhosted.org/packages/88/26/e24ef01305954fc4d771f1f09f3dd682f9eb610e1bec188ffb719374d26e/librt-0.7.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3469e1af9f1380e093ae06bedcbdd11e407ac0b303a56bbe9afb1d6824d4982d", size = 193015, upload-time = "2026-01-14T12:55:26.04Z" }, - { url = "https://files.pythonhosted.org/packages/88/a0/92b6bd060e720d7a31ed474d046a69bd55334ec05e9c446d228c4b806ae3/librt-0.7.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f11b300027ce19a34f6d24ebb0a25fd0e24a9d53353225a5c1e6cadbf2916b2e", size = 192038, upload-time = "2026-01-14T12:55:27.208Z" }, - { url = "https://files.pythonhosted.org/packages/06/bb/6f4c650253704279c3a214dad188101d1b5ea23be0606628bc6739456624/librt-0.7.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4adc73614f0d3c97874f02f2c7fd2a27854e7e24ad532ea6b965459c5b757eca", size = 186006, upload-time = "2026-01-14T12:55:28.594Z" }, - { url = "https://files.pythonhosted.org/packages/dc/00/1c409618248d43240cadf45f3efb866837fa77e9a12a71481912135eb481/librt-0.7.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60c299e555f87e4c01b2eca085dfccda1dde87f5a604bb45c2906b8305819a93", size = 206888, upload-time = "2026-01-14T12:55:30.214Z" }, - { url = "https://files.pythonhosted.org/packages/d9/83/b2cfe8e76ff5c1c77f8a53da3d5de62d04b5ebf7cf913e37f8bca43b5d07/librt-0.7.8-cp313-cp313-win32.whl", hash = "sha256:b09c52ed43a461994716082ee7d87618096851319bf695d57ec123f2ab708951", size = 44126, upload-time = "2026-01-14T12:55:31.44Z" }, - { url = "https://files.pythonhosted.org/packages/a9/0b/c59d45de56a51bd2d3a401fc63449c0ac163e4ef7f523ea8b0c0dee86ec5/librt-0.7.8-cp313-cp313-win_amd64.whl", hash = "sha256:f8f4a901a3fa28969d6e4519deceab56c55a09d691ea7b12ca830e2fa3461e34", size = 50262, upload-time = "2026-01-14T12:55:33.01Z" }, - { url = "https://files.pythonhosted.org/packages/fc/b9/973455cec0a1ec592395250c474164c4a58ebf3e0651ee920fef1a2623f1/librt-0.7.8-cp313-cp313-win_arm64.whl", hash = "sha256:43d4e71b50763fcdcf64725ac680d8cfa1706c928b844794a7aa0fa9ac8e5f09", size = 43600, upload-time = "2026-01-14T12:55:34.054Z" }, - { url = "https://files.pythonhosted.org/packages/1a/73/fa8814c6ce2d49c3827829cadaa1589b0bf4391660bd4510899393a23ebc/librt-0.7.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:be927c3c94c74b05128089a955fba86501c3b544d1d300282cc1b4bd370cb418", size = 57049, upload-time = "2026-01-14T12:55:35.056Z" }, - { url = "https://files.pythonhosted.org/packages/53/fe/f6c70956da23ea235fd2e3cc16f4f0b4ebdfd72252b02d1164dd58b4e6c3/librt-0.7.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7b0803e9008c62a7ef79058233db7ff6f37a9933b8f2573c05b07ddafa226611", size = 58689, upload-time = "2026-01-14T12:55:36.078Z" }, - { url = "https://files.pythonhosted.org/packages/1f/4d/7a2481444ac5fba63050d9abe823e6bc16896f575bfc9c1e5068d516cdce/librt-0.7.8-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:79feb4d00b2a4e0e05c9c56df707934f41fcb5fe53fd9efb7549068d0495b758", size = 166808, upload-time = "2026-01-14T12:55:37.595Z" }, - { url = "https://files.pythonhosted.org/packages/ac/3c/10901d9e18639f8953f57c8986796cfbf4c1c514844a41c9197cf87cb707/librt-0.7.8-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b9122094e3f24aa759c38f46bd8863433820654927370250f460ae75488b66ea", size = 175614, upload-time = "2026-01-14T12:55:38.756Z" }, - { url = "https://files.pythonhosted.org/packages/db/01/5cbdde0951a5090a80e5ba44e6357d375048123c572a23eecfb9326993a7/librt-0.7.8-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e03bea66af33c95ce3addf87a9bf1fcad8d33e757bc479957ddbc0e4f7207ac", size = 189955, upload-time = "2026-01-14T12:55:39.939Z" }, - { url = "https://files.pythonhosted.org/packages/6a/b4/e80528d2f4b7eaf1d437fcbd6fc6ba4cbeb3e2a0cb9ed5a79f47c7318706/librt-0.7.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f1ade7f31675db00b514b98f9ab9a7698c7282dad4be7492589109471852d398", size = 189370, upload-time = "2026-01-14T12:55:41.057Z" }, - { url = "https://files.pythonhosted.org/packages/c1/ab/938368f8ce31a9787ecd4becb1e795954782e4312095daf8fd22420227c8/librt-0.7.8-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a14229ac62adcf1b90a15992f1ab9c69ae8b99ffb23cb64a90878a6e8a2f5b81", size = 183224, upload-time = "2026-01-14T12:55:42.328Z" }, - { url = "https://files.pythonhosted.org/packages/3c/10/559c310e7a6e4014ac44867d359ef8238465fb499e7eb31b6bfe3e3f86f5/librt-0.7.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5bcaaf624fd24e6a0cb14beac37677f90793a96864c67c064a91458611446e83", size = 203541, upload-time = "2026-01-14T12:55:43.501Z" }, - { url = "https://files.pythonhosted.org/packages/f8/db/a0db7acdb6290c215f343835c6efda5b491bb05c3ddc675af558f50fdba3/librt-0.7.8-cp314-cp314-win32.whl", hash = "sha256:7aa7d5457b6c542ecaed79cec4ad98534373c9757383973e638ccced0f11f46d", size = 40657, upload-time = "2026-01-14T12:55:44.668Z" }, - { url = "https://files.pythonhosted.org/packages/72/e0/4f9bdc2a98a798511e81edcd6b54fe82767a715e05d1921115ac70717f6f/librt-0.7.8-cp314-cp314-win_amd64.whl", hash = "sha256:3d1322800771bee4a91f3b4bd4e49abc7d35e65166821086e5afd1e6c0d9be44", size = 46835, upload-time = "2026-01-14T12:55:45.655Z" }, - { url = "https://files.pythonhosted.org/packages/f9/3d/59c6402e3dec2719655a41ad027a7371f8e2334aa794ed11533ad5f34969/librt-0.7.8-cp314-cp314-win_arm64.whl", hash = "sha256:5363427bc6a8c3b1719f8f3845ea53553d301382928a86e8fab7984426949bce", size = 39885, upload-time = "2026-01-14T12:55:47.138Z" }, - { url = "https://files.pythonhosted.org/packages/4e/9c/2481d80950b83085fb14ba3c595db56330d21bbc7d88a19f20165f3538db/librt-0.7.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ca916919793a77e4a98d4a1701e345d337ce53be4a16620f063191f7322ac80f", size = 59161, upload-time = "2026-01-14T12:55:48.45Z" }, - { url = "https://files.pythonhosted.org/packages/96/79/108df2cfc4e672336765d54e3ff887294c1cc36ea4335c73588875775527/librt-0.7.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:54feb7b4f2f6706bb82325e836a01be805770443e2400f706e824e91f6441dde", size = 61008, upload-time = "2026-01-14T12:55:49.527Z" }, - { url = "https://files.pythonhosted.org/packages/46/f2/30179898f9994a5637459d6e169b6abdc982012c0a4b2d4c26f50c06f911/librt-0.7.8-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:39a4c76fee41007070f872b648cc2f711f9abf9a13d0c7162478043377b52c8e", size = 187199, upload-time = "2026-01-14T12:55:50.587Z" }, - { url = "https://files.pythonhosted.org/packages/b4/da/f7563db55cebdc884f518ba3791ad033becc25ff68eb70902b1747dc0d70/librt-0.7.8-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac9c8a458245c7de80bc1b9765b177055efff5803f08e548dd4bb9ab9a8d789b", size = 198317, upload-time = "2026-01-14T12:55:51.991Z" }, - { url = "https://files.pythonhosted.org/packages/b3/6c/4289acf076ad371471fa86718c30ae353e690d3de6167f7db36f429272f1/librt-0.7.8-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b67aa7eff150f075fda09d11f6bfb26edffd300f6ab1666759547581e8f666", size = 210334, upload-time = "2026-01-14T12:55:53.682Z" }, - { url = "https://files.pythonhosted.org/packages/4a/7f/377521ac25b78ac0a5ff44127a0360ee6d5ddd3ce7327949876a30533daa/librt-0.7.8-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:535929b6eff670c593c34ff435d5440c3096f20fa72d63444608a5aef64dd581", size = 211031, upload-time = "2026-01-14T12:55:54.827Z" }, - { url = "https://files.pythonhosted.org/packages/c5/b1/e1e96c3e20b23d00cf90f4aad48f0deb4cdfec2f0ed8380d0d85acf98bbf/librt-0.7.8-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:63937bd0f4d1cb56653dc7ae900d6c52c41f0015e25aaf9902481ee79943b33a", size = 204581, upload-time = "2026-01-14T12:55:56.811Z" }, - { url = "https://files.pythonhosted.org/packages/43/71/0f5d010e92ed9747e14bef35e91b6580533510f1e36a8a09eb79ee70b2f0/librt-0.7.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cf243da9e42d914036fd362ac3fa77d80a41cadcd11ad789b1b5eec4daaf67ca", size = 224731, upload-time = "2026-01-14T12:55:58.175Z" }, - { url = "https://files.pythonhosted.org/packages/22/f0/07fb6ab5c39a4ca9af3e37554f9d42f25c464829254d72e4ebbd81da351c/librt-0.7.8-cp314-cp314t-win32.whl", hash = "sha256:171ca3a0a06c643bd0a2f62a8944e1902c94aa8e5da4db1ea9a8daf872685365", size = 41173, upload-time = "2026-01-14T12:55:59.315Z" }, - { url = "https://files.pythonhosted.org/packages/24/d4/7e4be20993dc6a782639625bd2f97f3c66125c7aa80c82426956811cfccf/librt-0.7.8-cp314-cp314t-win_amd64.whl", hash = "sha256:445b7304145e24c60288a2f172b5ce2ca35c0f81605f5299f3fa567e189d2e32", size = 47668, upload-time = "2026-01-14T12:56:00.261Z" }, - { url = "https://files.pythonhosted.org/packages/fc/85/69f92b2a7b3c0f88ffe107c86b952b397004b5b8ea5a81da3d9c04c04422/librt-0.7.8-cp314-cp314t-win_arm64.whl", hash = "sha256:8766ece9de08527deabcd7cb1b4f1a967a385d26e33e536d6d8913db6ef74f06", size = 40550, upload-time = "2026-01-14T12:56:01.542Z" }, + { url = "https://files.pythonhosted.org/packages/f3/4a/c64265d71b84030174ff3ac2cd16d8b664072afab8c41fccd8e2ee5a6f8d/librt-0.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f8e12706dcb8ff6b3ed57514a19e45c49ad00bcd423e87b2b2e4b5f64578443", size = 67529, upload-time = "2026-04-09T16:04:27.373Z" }, + { url = "https://files.pythonhosted.org/packages/23/b1/30ca0b3a8bdac209a00145c66cf42e5e7da2cc056ffc6ebc5c7b430ddd34/librt-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4e3dda8345307fd7306db0ed0cb109a63a2c85ba780eb9dc2d09b2049a931f9c", size = 70248, upload-time = "2026-04-09T16:04:28.758Z" }, + { url = "https://files.pythonhosted.org/packages/fa/fc/c6018dc181478d6ac5aa24a5846b8185101eb90894346db239eb3ea53209/librt-0.9.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:de7dac64e3eb832ffc7b840eb8f52f76420cde1b845be51b2a0f6b870890645e", size = 202184, upload-time = "2026-04-09T16:04:29.893Z" }, + { url = "https://files.pythonhosted.org/packages/bf/58/d69629f002203370ef41ea69ff71c49a2c618aec39b226ff49986ecd8623/librt-0.9.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a904cbdb678f7cb348c90d543d3c52f581663d687992fee47fd566dcbf5285", size = 212926, upload-time = "2026-04-09T16:04:31.126Z" }, + { url = "https://files.pythonhosted.org/packages/cc/55/01d859f57824e42bd02465c77bec31fa5ef9d8c2bcee702ccf8ef1b9f508/librt-0.9.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:224b9727eb8bc188bc3bcf29d969dba0cd61b01d9bac80c41575520cc4baabb2", size = 225664, upload-time = "2026-04-09T16:04:32.352Z" }, + { url = "https://files.pythonhosted.org/packages/9b/02/32f63ad0ef085a94a70315291efe1151a48b9947af12261882f8445b2a30/librt-0.9.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e94cbc6ad9a6aeea46d775cbb11f361022f778a9cc8cc90af653d3a594b057ce", size = 219534, upload-time = "2026-04-09T16:04:33.667Z" }, + { url = "https://files.pythonhosted.org/packages/6a/5a/9d77111a183c885acf3b3b6e4c00f5b5b07b5817028226499a55f1fedc59/librt-0.9.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7bc30ad339f4e1a01d4917d645e522a0bc0030644d8973f6346397c93ba1503f", size = 227322, upload-time = "2026-04-09T16:04:34.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e7/05d700c93063753e12ab230b972002a3f8f3b9c95d8a980c2f646c8b6963/librt-0.9.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:56d65b583cf43b8cf4c8fbe1e1da20fa3076cc32a1149a141507af1062718236", size = 223407, upload-time = "2026-04-09T16:04:36.22Z" }, + { url = "https://files.pythonhosted.org/packages/c0/26/26c3124823c67c987456977c683da9a27cc874befc194ddcead5f9988425/librt-0.9.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:0a1be03168b2691ba61927e299b352a6315189199ca18a57b733f86cb3cc8d38", size = 221302, upload-time = "2026-04-09T16:04:37.62Z" }, + { url = "https://files.pythonhosted.org/packages/50/2b/c7cc2be5cf4ff7b017d948a789256288cb33a517687ff1995e72a7eea79f/librt-0.9.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:63c12efcd160e1d14da11af0c46c0217473e1e0d2ae1acbccc83f561ea4c2a7b", size = 243893, upload-time = "2026-04-09T16:04:38.909Z" }, + { url = "https://files.pythonhosted.org/packages/62/d3/da553d37417a337d12660450535d5fd51373caffbedf6962173c87867246/librt-0.9.0-cp310-cp310-win32.whl", hash = "sha256:e9002e98dcb1c0a66723592520decd86238ddcef168b37ff6cfb559200b4b774", size = 55375, upload-time = "2026-04-09T16:04:40.148Z" }, + { url = "https://files.pythonhosted.org/packages/9b/5a/46fa357bab8311b6442a83471591f2f9e5b15ecc1d2121a43725e0c529b8/librt-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:9fcb461fbf70654a52a7cc670e606f04449e2374c199b1825f754e16dacfedd8", size = 62581, upload-time = "2026-04-09T16:04:41.452Z" }, + { url = "https://files.pythonhosted.org/packages/e2/1e/2ec7afcebcf3efea593d13aee18bbcfdd3a243043d848ebf385055e9f636/librt-0.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90904fac73c478f4b83f4ed96c99c8208b75e6f9a8a1910548f69a00f1eaa671", size = 67155, upload-time = "2026-04-09T16:04:42.933Z" }, + { url = "https://files.pythonhosted.org/packages/18/77/72b85afd4435268338ad4ec6231b3da8c77363f212a0227c1ff3b45e4d35/librt-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:789fff71757facc0738e8d89e3b84e4f0251c1c975e85e81b152cdaca927cc2d", size = 69916, upload-time = "2026-04-09T16:04:44.042Z" }, + { url = "https://files.pythonhosted.org/packages/27/fb/948ea0204fbe2e78add6d46b48330e58d39897e425560674aee302dca81c/librt-0.9.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1bf465d1e5b0a27713862441f6467b5ab76385f4ecf8f1f3a44f8aa3c695b4b6", size = 199635, upload-time = "2026-04-09T16:04:45.5Z" }, + { url = "https://files.pythonhosted.org/packages/ac/cd/894a29e251b296a27957856804cfd21e93c194aa131de8bb8032021be07e/librt-0.9.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f819e0c6413e259a17a7c0d49f97f405abadd3c2a316a3b46c6440b7dbbedbb1", size = 211051, upload-time = "2026-04-09T16:04:47.016Z" }, + { url = "https://files.pythonhosted.org/packages/18/8f/dcaed0bc084a35f3721ff2d081158db569d2c57ea07d35623ddaca5cfc8e/librt-0.9.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e0785c2fb4a81e1aece366aa3e2e039f4a4d7d21aaaded5227d7f3c703427882", size = 224031, upload-time = "2026-04-09T16:04:48.207Z" }, + { url = "https://files.pythonhosted.org/packages/03/44/88f6c1ed1132cd418601cc041fbd92fed28b3a09f39de81978e0822d13ff/librt-0.9.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:80b25c7b570a86c03b5da69e665809deb39265476e8e21d96a9328f9762f9990", size = 218069, upload-time = "2026-04-09T16:04:50.025Z" }, + { url = "https://files.pythonhosted.org/packages/a3/90/7d02e981c2db12188d82b4410ff3e35bfdb844b26aecd02233626f46af2b/librt-0.9.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d4d16b608a1c43d7e33142099a75cd93af482dadce0bf82421e91cad077157f4", size = 224857, upload-time = "2026-04-09T16:04:51.684Z" }, + { url = "https://files.pythonhosted.org/packages/ef/c3/c77e706b7215ca32e928d47535cf13dbc3d25f096f84ddf8fbc06693e229/librt-0.9.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:194fc1a32e1e21fe809d38b5faea66cc65eaa00217c8901fbdb99866938adbdb", size = 219865, upload-time = "2026-04-09T16:04:52.949Z" }, + { url = "https://files.pythonhosted.org/packages/52/d1/32b0c1a0eb8461c70c11656c46a29f760b7c7edf3c36d6f102470c17170f/librt-0.9.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:8c6bc1384d9738781cfd41d09ad7f6e8af13cfea2c75ece6bd6d2566cdea2076", size = 218451, upload-time = "2026-04-09T16:04:54.174Z" }, + { url = "https://files.pythonhosted.org/packages/74/d1/adfd0f9c44761b1d49b1bec66173389834c33ee2bd3c7fd2e2367f1942d4/librt-0.9.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:15cb151e52a044f06e54ac7f7b47adbfc89b5c8e2b63e1175a9d587c43e8942a", size = 241300, upload-time = "2026-04-09T16:04:55.452Z" }, + { url = "https://files.pythonhosted.org/packages/09/b0/9074b64407712f0003c27f5b1d7655d1438979155f049720e8a1abd9b1a1/librt-0.9.0-cp311-cp311-win32.whl", hash = "sha256:f100bfe2acf8a3689af9d0cc660d89f17286c9c795f9f18f7b62dd1a6b247ae6", size = 55668, upload-time = "2026-04-09T16:04:56.689Z" }, + { url = "https://files.pythonhosted.org/packages/24/19/40b77b77ce80b9389fb03971431b09b6b913911c38d412059e0b3e2a9ef2/librt-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:0b73e4266307e51c95e09c0750b7ec383c561d2e97d58e473f6f6a209952fbb8", size = 62976, upload-time = "2026-04-09T16:04:57.733Z" }, + { url = "https://files.pythonhosted.org/packages/70/9d/9fa7a64041e29035cb8c575af5f0e3840be1b97b4c4d9061e0713f171849/librt-0.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:bc5518873822d2faa8ebdd2c1a4d7c8ef47b01a058495ab7924cb65bdbf5fc9a", size = 53502, upload-time = "2026-04-09T16:04:58.806Z" }, + { url = "https://files.pythonhosted.org/packages/bf/90/89ddba8e1c20b0922783cd93ed8e64f34dc05ab59c38a9c7e313632e20ff/librt-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9b3e3bc363f71bda1639a4ee593cb78f7fbfeacc73411ec0d4c92f00730010a4", size = 68332, upload-time = "2026-04-09T16:05:00.09Z" }, + { url = "https://files.pythonhosted.org/packages/a8/40/7aa4da1fb08bdeeb540cb07bfc8207cb32c5c41642f2594dbd0098a0662d/librt-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0a09c2f5869649101738653a9b7ab70cf045a1105ac66cbb8f4055e61df78f2d", size = 70581, upload-time = "2026-04-09T16:05:01.213Z" }, + { url = "https://files.pythonhosted.org/packages/48/ac/73a2187e1031041e93b7e3a25aae37aa6f13b838c550f7e0f06f66766212/librt-0.9.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5ca8e133d799c948db2ab1afc081c333a825b5540475164726dcbf73537e5c2f", size = 203984, upload-time = "2026-04-09T16:05:02.542Z" }, + { url = "https://files.pythonhosted.org/packages/5e/3d/23460d571e9cbddb405b017681df04c142fb1b04cbfce77c54b08e28b108/librt-0.9.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:603138ee838ee1583f1b960b62d5d0007845c5c423feb68e44648b1359014e27", size = 215762, upload-time = "2026-04-09T16:05:04.127Z" }, + { url = "https://files.pythonhosted.org/packages/de/1e/42dc7f8ab63e65b20640d058e63e97fd3e482c1edbda3570d813b4d0b927/librt-0.9.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4003f70c56a5addd6aa0897f200dd59afd3bf7bcd5b3cce46dd21f925743bc2", size = 230288, upload-time = "2026-04-09T16:05:05.883Z" }, + { url = "https://files.pythonhosted.org/packages/dc/08/ca812b6d8259ad9ece703397f8ad5c03af5b5fedfce64279693d3ce4087c/librt-0.9.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:78042f6facfd98ecb25e9829c7e37cce23363d9d7c83bc5f72702c5059eb082b", size = 224103, upload-time = "2026-04-09T16:05:07.148Z" }, + { url = "https://files.pythonhosted.org/packages/b6/3f/620490fb2fa66ffd44e7f900254bc110ebec8dac6c1b7514d64662570e6f/librt-0.9.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a361c9434a64d70a7dbb771d1de302c0cc9f13c0bffe1cf7e642152814b35265", size = 232122, upload-time = "2026-04-09T16:05:08.386Z" }, + { url = "https://files.pythonhosted.org/packages/e9/83/12864700a1b6a8be458cf5d05db209b0d8e94ae281e7ec261dbe616597b4/librt-0.9.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:dd2c7e082b0b92e1baa4da28163a808672485617bc855cc22a2fd06978fa9084", size = 225045, upload-time = "2026-04-09T16:05:09.707Z" }, + { url = "https://files.pythonhosted.org/packages/fd/1b/845d339c29dc7dbc87a2e992a1ba8d28d25d0e0372f9a0a2ecebde298186/librt-0.9.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7e6274fd33fc5b2a14d41c9119629d3ff395849d8bcbc80cf637d9e8d2034da8", size = 227372, upload-time = "2026-04-09T16:05:10.942Z" }, + { url = "https://files.pythonhosted.org/packages/8d/fe/277985610269d926a64c606f761d58d3db67b956dbbf40024921e95e7fcb/librt-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5093043afb226ecfa1400120d1ebd4442b4f99977783e4f4f7248879009b227f", size = 248224, upload-time = "2026-04-09T16:05:12.254Z" }, + { url = "https://files.pythonhosted.org/packages/92/1b/ee486d244b8de6b8b5dbaefabe6bfdd4a72e08f6353edf7d16d27114da8d/librt-0.9.0-cp312-cp312-win32.whl", hash = "sha256:9edcc35d1cae9fd5320171b1a838c7da8a5c968af31e82ecc3dff30b4be0957f", size = 55986, upload-time = "2026-04-09T16:05:13.529Z" }, + { url = "https://files.pythonhosted.org/packages/89/7a/ba1737012308c17dc6d5516143b5dce9a2c7ba3474afd54e11f44a4d1ef3/librt-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc2917258e131ae5f958a4d872e07555b51cb7466a43433218061c74ef33745", size = 63260, upload-time = "2026-04-09T16:05:14.68Z" }, + { url = "https://files.pythonhosted.org/packages/36/e4/01752c113da15127f18f7bf11142f5640038f062407a611c059d0036c6aa/librt-0.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:90e6d5420fc8a300518d4d2288154ff45005e920425c22cbbfe8330f3f754bd9", size = 53694, upload-time = "2026-04-09T16:05:16.095Z" }, + { url = "https://files.pythonhosted.org/packages/5f/d7/1b3e26fffde1452d82f5666164858a81c26ebe808e7ae8c9c88628981540/librt-0.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f29b68cd9714531672db62cc54f6e8ff981900f824d13fa0e00749189e13778e", size = 68367, upload-time = "2026-04-09T16:05:17.243Z" }, + { url = "https://files.pythonhosted.org/packages/a5/5b/c61b043ad2e091fbe1f2d35d14795e545d0b56b03edaa390fa1dcee3d160/librt-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d5c8a5929ac325729f6119802070b561f4db793dffc45e9ac750992a4ed4d22", size = 70595, upload-time = "2026-04-09T16:05:18.471Z" }, + { url = "https://files.pythonhosted.org/packages/a3/22/2448471196d8a73370aa2f23445455dc42712c21404081fcd7a03b9e0749/librt-0.9.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:756775d25ec8345b837ab52effee3ad2f3b2dfd6bbee3e3f029c517bd5d8f05a", size = 204354, upload-time = "2026-04-09T16:05:19.593Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5e/39fc4b153c78cfd2c8a2dcb32700f2d41d2312aa1050513183be4540930d/librt-0.9.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b8f5d00b49818f4e2b1667db994488b045835e0ac16fe2f924f3871bd2b8ac5", size = 216238, upload-time = "2026-04-09T16:05:20.868Z" }, + { url = "https://files.pythonhosted.org/packages/d7/42/bc2d02d0fa7badfa63aa8d6dcd8793a9f7ef5a94396801684a51ed8d8287/librt-0.9.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c81aef782380f0f13ead670aae01825eb653b44b046aa0e5ebbb79f76ed4aa11", size = 230589, upload-time = "2026-04-09T16:05:22.305Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7b/e2d95cc513866373692aa5edf98080d5602dd07cabfb9e5d2f70df2f25f7/librt-0.9.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:66b58fed90a545328e80d575467244de3741e088c1af928f0b489ebec3ef3858", size = 224610, upload-time = "2026-04-09T16:05:23.647Z" }, + { url = "https://files.pythonhosted.org/packages/31/d5/6cec4607e998eaba57564d06a1295c21b0a0c8de76e4e74d699e627bd98c/librt-0.9.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e78fb7419e07d98c2af4b8567b72b3eaf8cb05caad642e9963465569c8b2d87e", size = 232558, upload-time = "2026-04-09T16:05:25.025Z" }, + { url = "https://files.pythonhosted.org/packages/95/8c/27f1d8d3aaf079d3eb26439bf0b32f1482340c3552e324f7db9dca858671/librt-0.9.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2c3786f0f4490a5cd87f1ed6cefae833ad6b1060d52044ce0434a2e85893afd0", size = 225521, upload-time = "2026-04-09T16:05:26.311Z" }, + { url = "https://files.pythonhosted.org/packages/6b/d8/1e0d43b1c329b416017619469b3c3801a25a6a4ef4a1c68332aeaa6f72ca/librt-0.9.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8494cfc61e03542f2d381e71804990b3931175a29b9278fdb4a5459948778dc2", size = 227789, upload-time = "2026-04-09T16:05:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/2c/b4/d3d842e88610fcd4c8eec7067b0c23ef2d7d3bff31496eded6a83b0f99be/librt-0.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:07cf11f769831186eeac424376e6189f20ace4f7263e2134bdb9757340d84d4d", size = 248616, upload-time = "2026-04-09T16:05:29.181Z" }, + { url = "https://files.pythonhosted.org/packages/ec/28/527df8ad0d1eb6c8bdfa82fc190f1f7c4cca5a1b6d7b36aeabf95b52d74d/librt-0.9.0-cp313-cp313-win32.whl", hash = "sha256:850d6d03177e52700af605fd60db7f37dcb89782049a149674d1a9649c2138fd", size = 56039, upload-time = "2026-04-09T16:05:30.709Z" }, + { url = "https://files.pythonhosted.org/packages/f3/a7/413652ad0d92273ee5e30c000fc494b361171177c83e57c060ecd3c21538/librt-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:a5af136bfba820d592f86c67affcef9b3ff4d4360ac3255e341e964489b48519", size = 63264, upload-time = "2026-04-09T16:05:31.881Z" }, + { url = "https://files.pythonhosted.org/packages/a4/0a/92c244309b774e290ddb15e93363846ae7aa753d9586b8aad511c5e6145b/librt-0.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:4c4d0440a3a8e31d962340c3e1cc3fc9ee7febd34c8d8f770d06adb947779ea5", size = 53728, upload-time = "2026-04-09T16:05:33.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c1/184e539543f06ea2912f4b92a5ffaede4f9b392689e3f00acbf8134bee92/librt-0.9.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:3f05d145df35dca5056a8bc3838e940efebd893a54b3e19b2dda39ceaa299bcb", size = 67830, upload-time = "2026-04-09T16:05:34.517Z" }, + { url = "https://files.pythonhosted.org/packages/f3/ad/23399bdcb7afca819acacdef31b37ee59de261bd66b503a7995c03c4b0dc/librt-0.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1c587494461ebd42229d0f1739f3aa34237dd9980623ecf1be8d3bcba79f4499", size = 70280, upload-time = "2026-04-09T16:05:35.649Z" }, + { url = "https://files.pythonhosted.org/packages/9f/0b/4542dc5a2b8772dbf92cafb9194701230157e73c14b017b6961a23598b03/librt-0.9.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b0a2040f801406b93657a70b72fa12311063a319fee72ce98e1524da7200171f", size = 201925, upload-time = "2026-04-09T16:05:36.739Z" }, + { url = "https://files.pythonhosted.org/packages/31/d4/8ee7358b08fd0cfce051ef96695380f09b3c2c11b77c9bfbc367c921cce5/librt-0.9.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f38bc489037eca88d6ebefc9c4d41a4e07c8e8b4de5188a9e6d290273ad7ebb1", size = 212381, upload-time = "2026-04-09T16:05:38.043Z" }, + { url = "https://files.pythonhosted.org/packages/f2/94/a2025fe442abedf8b038038dab3dba942009ad42b38ea064a1a9e6094241/librt-0.9.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3fd278f5e6bf7c75ccd6d12344eb686cc020712683363b66f46ac79d37c799f", size = 227065, upload-time = "2026-04-09T16:05:39.394Z" }, + { url = "https://files.pythonhosted.org/packages/7c/e9/b9fcf6afa909f957cfbbf918802f9dada1bd5d3c1da43d722fd6a310dc3f/librt-0.9.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fcbdf2a9ca24e87bbebb47f1fe34e531ef06f104f98c9ccfc953a3f3344c567a", size = 221333, upload-time = "2026-04-09T16:05:40.999Z" }, + { url = "https://files.pythonhosted.org/packages/ac/7c/ba54cd6aa6a3c8cd12757a6870e0c79a64b1e6327f5248dcff98423f4d43/librt-0.9.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e306d956cfa027fe041585f02a1602c32bfa6bb8ebea4899d373383295a6c62f", size = 229051, upload-time = "2026-04-09T16:05:42.605Z" }, + { url = "https://files.pythonhosted.org/packages/4b/4b/8cfdbad314c8677a0148bf0b70591d6d18587f9884d930276098a235461b/librt-0.9.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:465814ab157986acb9dfa5ccd7df944be5eefc0d08d31ec6e8d88bc71251d845", size = 222492, upload-time = "2026-04-09T16:05:43.842Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d1/2eda69563a1a88706808decdce035e4b32755dbfbb0d05e1a65db9547ed1/librt-0.9.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:703f4ae36d6240bfe24f542bac784c7e4194ec49c3ba5a994d02891649e2d85b", size = 223849, upload-time = "2026-04-09T16:05:45.054Z" }, + { url = "https://files.pythonhosted.org/packages/04/44/b2ed37df6be5b3d42cfe36318e0598e80843d5c6308dd63d0bf4e0ce5028/librt-0.9.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3be322a15ee5e70b93b7a59cfd074614f22cc8c9ff18bd27f474e79137ea8d3b", size = 245001, upload-time = "2026-04-09T16:05:46.34Z" }, + { url = "https://files.pythonhosted.org/packages/47/e7/617e412426df89169dd2a9ed0cc8752d5763336252c65dbf945199915119/librt-0.9.0-cp314-cp314-win32.whl", hash = "sha256:b8da9f8035bb417770b1e1610526d87ad4fc58a2804dc4d79c53f6d2cf5a6eb9", size = 51799, upload-time = "2026-04-09T16:05:47.738Z" }, + { url = "https://files.pythonhosted.org/packages/24/ed/c22ca4db0ca3cbc285e4d9206108746beda561a9792289c3c31281d7e9df/librt-0.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:b8bd70d5d816566a580d193326912f4a76ec2d28a97dc4cd4cc831c0af8e330e", size = 59165, upload-time = "2026-04-09T16:05:49.198Z" }, + { url = "https://files.pythonhosted.org/packages/24/56/875398fafa4cbc8f15b89366fc3287304ddd3314d861f182a4b87595ace0/librt-0.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:fc5758e2b7a56532dc33e3c544d78cbaa9ecf0a0f2a2da2df882c1d6b99a317f", size = 49292, upload-time = "2026-04-09T16:05:50.362Z" }, + { url = "https://files.pythonhosted.org/packages/4c/61/bc448ecbf9b2d69c5cff88fe41496b19ab2a1cbda0065e47d4d0d51c0867/librt-0.9.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f24b90b0e0c8cc9491fb1693ae91fe17cb7963153a1946395acdbdd5818429a4", size = 70175, upload-time = "2026-04-09T16:05:51.564Z" }, + { url = "https://files.pythonhosted.org/packages/60/f2/c47bb71069a73e2f04e70acbd196c1e5cc411578ac99039a224b98920fd4/librt-0.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3fe56e80badb66fdcde06bef81bbaa5bfcf6fbd7aefb86222d9e369c38c6b228", size = 72951, upload-time = "2026-04-09T16:05:52.699Z" }, + { url = "https://files.pythonhosted.org/packages/29/19/0549df59060631732df758e8886d92088da5fdbedb35b80e4643664e8412/librt-0.9.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:527b5b820b47a09e09829051452bb0d1dd2122261254e2a6f674d12f1d793d54", size = 225864, upload-time = "2026-04-09T16:05:53.895Z" }, + { url = "https://files.pythonhosted.org/packages/9d/f8/3b144396d302ac08e50f89e64452c38db84bc7b23f6c60479c5d3abd303c/librt-0.9.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d429bdd4ac0ab17c8e4a8af0ed2a7440b16eba474909ab357131018fe8c7e71", size = 241155, upload-time = "2026-04-09T16:05:55.191Z" }, + { url = "https://files.pythonhosted.org/packages/7a/ce/ee67ec14581de4043e61d05786d2aed6c9b5338816b7859bcf07455c6a9f/librt-0.9.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7202bdcac47d3a708271c4304a474a8605a4a9a4a709e954bf2d3241140aa938", size = 252235, upload-time = "2026-04-09T16:05:56.549Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fa/0ead15daa2b293a54101550b08d4bafe387b7d4a9fc6d2b985602bae69b6/librt-0.9.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0d620e74897f8c2613b3c4e2e9c1e422eb46d2ddd07df540784d44117836af3", size = 244963, upload-time = "2026-04-09T16:05:57.858Z" }, + { url = "https://files.pythonhosted.org/packages/29/68/9fbf9a9aa704ba87689e40017e720aced8d9a4d2b46b82451d8142f91ec9/librt-0.9.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d69fc39e627908f4c03297d5a88d9284b73f4d90b424461e32e8c2485e21c283", size = 257364, upload-time = "2026-04-09T16:05:59.686Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8d/9d60869f1b6716c762e45f66ed945b1e5dd649f7377684c3b176ae424648/librt-0.9.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:c2640e23d2b7c98796f123ffd95cf2022c7777aa8a4a3b98b36c570d37e85eee", size = 247661, upload-time = "2026-04-09T16:06:00.938Z" }, + { url = "https://files.pythonhosted.org/packages/70/ff/a5c365093962310bfdb4f6af256f191085078ffb529b3f0cbebb5b33ebe2/librt-0.9.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:451daa98463b7695b0a30aa56bf637831ea559e7b8101ac2ef6382e8eb15e29c", size = 248238, upload-time = "2026-04-09T16:06:02.537Z" }, + { url = "https://files.pythonhosted.org/packages/a0/3c/2d34365177f412c9e19c0a29f969d70f5343f27634b76b765a54d8b27705/librt-0.9.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:928bd06eca2c2bbf4349e5b817f837509b0604342e65a502de1d50a7570afd15", size = 269457, upload-time = "2026-04-09T16:06:03.833Z" }, + { url = "https://files.pythonhosted.org/packages/bc/cd/de45b239ea3bdf626f982a00c14bfcf2e12d261c510ba7db62c5969a27cd/librt-0.9.0-cp314-cp314t-win32.whl", hash = "sha256:a9c63e04d003bc0fb6a03b348018b9a3002f98268200e22cc80f146beac5dc40", size = 52453, upload-time = "2026-04-09T16:06:05.229Z" }, + { url = "https://files.pythonhosted.org/packages/7f/f9/bfb32ae428aa75c0c533915622176f0a17d6da7b72b5a3c6363685914f70/librt-0.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f162af66a2ed3f7d1d161a82ca584efd15acd9c1cff190a373458c32f7d42118", size = 60044, upload-time = "2026-04-09T16:06:06.398Z" }, + { url = "https://files.pythonhosted.org/packages/aa/47/7d70414bcdbb3bc1f458a8d10558f00bbfdb24e5a11740fc8197e12c3255/librt-0.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:a4b25c6c25cac5d0d9d6d6da855195b254e0021e513e0249f0e3b444dc6e0e61", size = 50009, upload-time = "2026-04-09T16:06:07.995Z" }, ] [[package]] @@ -336,11 +361,11 @@ wheels = [ [[package]] name = "markdown2" -version = "2.5.4" +version = "2.5.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/42/f8/b2ae8bf5f28f9b510ae097415e6e4cb63226bb28d7ee01aec03a755ba03b/markdown2-2.5.4.tar.gz", hash = "sha256:a09873f0b3c23dbfae589b0080587df52ad75bb09a5fa6559147554736676889", size = 145652, upload-time = "2025-07-27T16:16:24.307Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/ae/07d4a5fcaa5509221287d289323d75ac8eda5a5a4ac9de2accf7bbcc2b88/markdown2-2.5.5.tar.gz", hash = "sha256:001547e68f6e7fcf0f1cb83f7e82f48aa7d48b2c6a321f0cd20a853a8a2d1664", size = 157249, upload-time = "2026-03-02T20:46:53.411Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/06/2697b5043c3ecb720ce0d243fc7cf5024c0b5b1e450506e9b21939019963/markdown2-2.5.4-py3-none-any.whl", hash = "sha256:3c4b2934e677be7fec0e6f2de4410e116681f4ad50ec8e5ba7557be506d3f439", size = 49954, upload-time = "2025-07-27T16:16:23.026Z" }, + { url = "https://files.pythonhosted.org/packages/43/af/4b3891eb0a49d6cfd5cbf3e9bf514c943afc2b0f13e2c57cc57cd88ecc21/markdown2-2.5.5-py3-none-any.whl", hash = "sha256:be798587e09d1f52d2e4d96a649c4b82a778c75f9929aad52a2c95747fa26941", size = 56250, upload-time = "2026-03-02T20:46:52.032Z" }, ] [[package]] @@ -439,7 +464,7 @@ wheels = [ [[package]] name = "mypy" -version = "1.19.1" +version = "1.20.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "librt", marker = "platform_python_implementation != 'PyPy'" }, @@ -448,39 +473,51 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/af/e3d4b3e9ec91a0ff9aabfdb38692952acf49bbb899c2e4c29acb3a6da3ae/mypy-1.20.2.tar.gz", hash = "sha256:e8222c26daaafd9e8626dec58ae36029f82585890589576f769a650dd20fd665", size = 3817349, upload-time = "2026-04-21T17:12:28.473Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2f/63/e499890d8e39b1ff2df4c0c6ce5d371b6844ee22b8250687a99fd2f657a8/mypy-1.19.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f05aa3d375b385734388e844bc01733bd33c644ab48e9684faa54e5389775ec", size = 13101333, upload-time = "2025-12-15T05:03:03.28Z" }, - { url = "https://files.pythonhosted.org/packages/72/4b/095626fc136fba96effc4fd4a82b41d688ab92124f8c4f7564bffe5cf1b0/mypy-1.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:022ea7279374af1a5d78dfcab853fe6a536eebfda4b59deab53cd21f6cd9f00b", size = 12164102, upload-time = "2025-12-15T05:02:33.611Z" }, - { url = "https://files.pythonhosted.org/packages/0c/5b/952928dd081bf88a83a5ccd49aaecfcd18fd0d2710c7ff07b8fb6f7032b9/mypy-1.19.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee4c11e460685c3e0c64a4c5de82ae143622410950d6be863303a1c4ba0e36d6", size = 12765799, upload-time = "2025-12-15T05:03:28.44Z" }, - { url = "https://files.pythonhosted.org/packages/2a/0d/93c2e4a287f74ef11a66fb6d49c7a9f05e47b0a4399040e6719b57f500d2/mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74", size = 13522149, upload-time = "2025-12-15T05:02:36.011Z" }, - { url = "https://files.pythonhosted.org/packages/7b/0e/33a294b56aaad2b338d203e3a1d8b453637ac36cb278b45005e0901cf148/mypy-1.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ab43590f9cd5108f41aacf9fca31841142c786827a74ab7cc8a2eacb634e09a1", size = 13810105, upload-time = "2025-12-15T05:02:40.327Z" }, - { url = "https://files.pythonhosted.org/packages/0e/fd/3e82603a0cb66b67c5e7abababce6bf1a929ddf67bf445e652684af5c5a0/mypy-1.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:2899753e2f61e571b3971747e302d5f420c3fd09650e1951e99f823bc3089dac", size = 10057200, upload-time = "2025-12-15T05:02:51.012Z" }, - { url = "https://files.pythonhosted.org/packages/ef/47/6b3ebabd5474d9cdc170d1342fbf9dddc1b0ec13ec90bf9004ee6f391c31/mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288", size = 13028539, upload-time = "2025-12-15T05:03:44.129Z" }, - { url = "https://files.pythonhosted.org/packages/5c/a6/ac7c7a88a3c9c54334f53a941b765e6ec6c4ebd65d3fe8cdcfbe0d0fd7db/mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab", size = 12083163, upload-time = "2025-12-15T05:03:37.679Z" }, - { url = "https://files.pythonhosted.org/packages/67/af/3afa9cf880aa4a2c803798ac24f1d11ef72a0c8079689fac5cfd815e2830/mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6", size = 12687629, upload-time = "2025-12-15T05:02:31.526Z" }, - { url = "https://files.pythonhosted.org/packages/2d/46/20f8a7114a56484ab268b0ab372461cb3a8f7deed31ea96b83a4e4cfcfca/mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331", size = 13436933, upload-time = "2025-12-15T05:03:15.606Z" }, - { url = "https://files.pythonhosted.org/packages/5b/f8/33b291ea85050a21f15da910002460f1f445f8007adb29230f0adea279cb/mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925", size = 13661754, upload-time = "2025-12-15T05:02:26.731Z" }, - { url = "https://files.pythonhosted.org/packages/fd/a3/47cbd4e85bec4335a9cd80cf67dbc02be21b5d4c9c23ad6b95d6c5196bac/mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042", size = 10055772, upload-time = "2025-12-15T05:03:26.179Z" }, - { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" }, - { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" }, - { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" }, - { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" }, - { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" }, - { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" }, - { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" }, - { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" }, - { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" }, - { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" }, - { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" }, - { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" }, - { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" }, - { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" }, - { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" }, - { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" }, - { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" }, - { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" }, - { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" }, + { url = "https://files.pythonhosted.org/packages/76/97/ce2502df2cecf2ef997b6c6527c4a223b92feb9e7b790cdc8dcd683f3a8a/mypy-1.20.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cf5a4db6dca263010e2c7bff081c89383c72d187ba2cf4c44759aac970e2f0c4", size = 14457059, upload-time = "2026-04-21T17:06:14.935Z" }, + { url = "https://files.pythonhosted.org/packages/c9/34/417ee60b822cc80c0f3dc9f495ad7fd8dbb8d8b2cf4baf22d4046d25d01d/mypy-1.20.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7b0e817b518bff7facd7f85ea05b643ad8bdcce684cf29784987b0a7c8e1f997", size = 13346816, upload-time = "2026-04-21T17:10:41.433Z" }, + { url = "https://files.pythonhosted.org/packages/4a/85/e20951978702df58379d0bcc2e8f7ccdca4e78cd7dc66dd3ddbf9b29d517/mypy-1.20.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97d7b9a485b40f8ca425460e89bf1da2814625b2da627c0dcc6aa46c92631d14", size = 13772593, upload-time = "2026-04-21T17:08:11.24Z" }, + { url = "https://files.pythonhosted.org/packages/63/a5/5441a13259ec516c56fd5de0fd96a69a9590ae6c5e5d3e5174aa84b97973/mypy-1.20.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e1c12f6d2db3d78b909b5f77513c11eb7f2dd2782b96a3ab6dffc7d44575c99", size = 14656635, upload-time = "2026-04-21T17:09:54.042Z" }, + { url = "https://files.pythonhosted.org/packages/3b/51/b89c69157c5e1f19fd125a65d991166a26906e7902f026f00feebbcfa2b9/mypy-1.20.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:89dce27e142d25ffbc154c1819383b69f2e9234dc4ed4766f42e0e8cb264ab5c", size = 14943278, upload-time = "2026-04-21T17:09:15.599Z" }, + { url = "https://files.pythonhosted.org/packages/e9/44/6b0eeecfe96d7cce1d71c66b8e03cb304aa70ec11f1955dc1d6b46aca3c3/mypy-1.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:f376e37f9bf2a946872fc5fd1199c99310748e3c26c7a26683f13f8bdb756cbd", size = 10851915, upload-time = "2026-04-21T17:06:03.5Z" }, + { url = "https://files.pythonhosted.org/packages/3c/36/6593dc88545d75fb96416184be5392da5e2a8e8c2802a8597913e16ae25c/mypy-1.20.2-cp310-cp310-win_arm64.whl", hash = "sha256:6e2b469efd811707bc530fd1effef0f5d6eebcb7fe376affae69025da4b979a2", size = 9786676, upload-time = "2026-04-21T17:07:02.035Z" }, + { url = "https://files.pythonhosted.org/packages/1f/4d/9ebeae211caccbdaddde7ed5e31dfcf57faac66be9b11deb1dc6526c8078/mypy-1.20.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4077797a273e56e8843d001e9dfe4ba10e33323d6ade647ff260e5cd97d9758c", size = 14371307, upload-time = "2026-04-21T17:08:56.442Z" }, + { url = "https://files.pythonhosted.org/packages/95/d7/93473d34b61f04fac1aecc01368485c89c5c4af7a4b9a0cab5d77d04b63f/mypy-1.20.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cdecf62abcc4292500d7858aeae87a1f8f1150f4c4dd08fb0b336ee79b2a6df3", size = 13258917, upload-time = "2026-04-21T17:05:50.978Z" }, + { url = "https://files.pythonhosted.org/packages/e2/30/3dd903e8bafb7b5f7bf87fcd58f8382086dea2aa19f0a7b357f21f63071b/mypy-1.20.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c566c3a88b6ece59b3d70f65bedef17304f48eb52ff040a6a18214e1917b3254", size = 13700516, upload-time = "2026-04-21T17:11:33.161Z" }, + { url = "https://files.pythonhosted.org/packages/07/05/c61a140aba4c729ac7bc99ae26fc627c78a6e08f5b9dd319244ea71a3d7e/mypy-1.20.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0deb80d062b2479f2c87ae568f89845afc71d11bc41b04179e58165fd9f31e98", size = 14562889, upload-time = "2026-04-21T17:05:27.674Z" }, + { url = "https://files.pythonhosted.org/packages/fd/87/da78243742ffa8a36d98c3010f0d829f93d5da4e6786f1a1a6f2ad616502/mypy-1.20.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bba9ad231e92a3e424b3e56b65aa17704993425bba97e302c832f9466bb85bac", size = 14803844, upload-time = "2026-04-21T17:10:06.2Z" }, + { url = "https://files.pythonhosted.org/packages/37/52/10a1ddf91b40f843943a3c6db51e2df59c9e237f29d355e95eaab427461f/mypy-1.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:baf593f2765fa3a6b1ef95807dbaa3d25b594f6a52adcc506a6b9cb115e1be67", size = 10846300, upload-time = "2026-04-21T17:12:23.886Z" }, + { url = "https://files.pythonhosted.org/packages/20/02/f9a4415b664c53bd34d6709be59da303abcae986dc4ac847b402edb6fa1e/mypy-1.20.2-cp311-cp311-win_arm64.whl", hash = "sha256:20175a1c0f49863946ec20b7f63255768058ac4f07d2b9ded6a6b46cfb5a9100", size = 9779498, upload-time = "2026-04-21T17:09:23.695Z" }, + { url = "https://files.pythonhosted.org/packages/71/4e/7560e4528db9e9b147e4c0f22660466bf30a0a1fe3d63d1b9d3b0fd354ee/mypy-1.20.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4dbfcf869f6b0517f70cf0030ba6ea1d6645e132337a7d5204a18d8d5636c02b", size = 14539393, upload-time = "2026-04-21T17:07:12.52Z" }, + { url = "https://files.pythonhosted.org/packages/32/d9/34a5efed8124f5a9234f55ac6a4ced4201e2c5b81e1109c49ad23190ec8c/mypy-1.20.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4b6481b228d072315b053210b01ac320e1be243dc17f9e5887ef167f23f5fae4", size = 13361642, upload-time = "2026-04-21T17:06:53.742Z" }, + { url = "https://files.pythonhosted.org/packages/d1/14/eb377acf78c03c92d566a1510cda8137348215b5335085ef662ab82ecd3a/mypy-1.20.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34397cdced6b90b836e38182076049fdb41424322e0b0728c946b0939ebdf9f6", size = 13740347, upload-time = "2026-04-21T17:12:04.73Z" }, + { url = "https://files.pythonhosted.org/packages/b9/94/7e4634a32b641aa1c112422eed1bbece61ee16205f674190e8b536f884de/mypy-1.20.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5da6976f20cae27059ea8d0c86e7cef3de720e04c4bb9ee18e3690fdb792066", size = 14734042, upload-time = "2026-04-21T17:07:43.16Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f3/f7e62395cb7f434541b4491a01149a4439e28ace4c0c632bbf5431e92d1f/mypy-1.20.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:56908d7e08318d39f85b1f0c6cfd47b0cac1a130da677630dac0de3e0623e102", size = 14964958, upload-time = "2026-04-21T17:11:00.665Z" }, + { url = "https://files.pythonhosted.org/packages/3e/0d/47e3c3a0ec2a876e35aeac365df3cac7776c36bbd4ed18cc521e1b9d255b/mypy-1.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:d52ad8d78522da1d308789df651ee5379088e77c76cb1994858d40a426b343b9", size = 10911340, upload-time = "2026-04-21T17:10:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/d6/b2/6c852d72e0ea8b01f49da817fb52539993cde327e7d010e0103dc12d0dac/mypy-1.20.2-cp312-cp312-win_arm64.whl", hash = "sha256:785b08db19c9f214dc37d65f7c165d19a30fcecb48abfa30f31b01b5acaabb58", size = 9833947, upload-time = "2026-04-21T17:09:05.267Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c4/b93812d3a192c9bcf5df405bd2f30277cd0e48106a14d1023c7f6ed6e39b/mypy-1.20.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:edfbfca868cdd6bd8d974a60f8a3682f5565d3f5c99b327640cedd24c4264026", size = 14524670, upload-time = "2026-04-21T17:10:30.737Z" }, + { url = "https://files.pythonhosted.org/packages/f3/47/42c122501bff18eaf1e8f457f5c017933452d8acdc52918a9f59f6812955/mypy-1.20.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e2877a02380adfcdbc69071a0f74d6e9dbbf593c0dc9d174e1f223ffd5281943", size = 13336218, upload-time = "2026-04-21T17:08:44.069Z" }, + { url = "https://files.pythonhosted.org/packages/92/8f/75bbc92f41725fbd585fb17b440b1119b576105df1013622983e18640a93/mypy-1.20.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7488448de6007cd5177c6cea0517ac33b4c0f5ee9b5e9f2be51ce75511a85517", size = 13724906, upload-time = "2026-04-21T17:08:01.02Z" }, + { url = "https://files.pythonhosted.org/packages/a1/32/4c49da27a606167391ff0c39aa955707a00edc500572e562f7c36c08a71f/mypy-1.20.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb9c2fa06887e21d6a3a868762acb82aec34e2c6fd0174064f27c93ede68ad15", size = 14726046, upload-time = "2026-04-21T17:11:22.354Z" }, + { url = "https://files.pythonhosted.org/packages/7f/fc/4e354a1bd70216359deb0c9c54847ee6b32ef78dfb09f5131ff99b494078/mypy-1.20.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d56a78b646f2e3daa865bc70cd5ec5a46c50045801ca8ff17a0c43abc97e3ee", size = 14955587, upload-time = "2026-04-21T17:12:16.033Z" }, + { url = "https://files.pythonhosted.org/packages/62/b2/c0f2056e9eb8f08c62cafd9715e4584b89132bdc832fcf85d27d07b5f3e5/mypy-1.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:2a4102b03bb7481d9a91a6da8d174740c9c8c4401024684b9ca3b7cc5e49852f", size = 10922681, upload-time = "2026-04-21T17:06:35.842Z" }, + { url = "https://files.pythonhosted.org/packages/e5/14/065e333721f05de8ef683d0aa804c23026bcc287446b61cac657b902ccac/mypy-1.20.2-cp313-cp313-win_arm64.whl", hash = "sha256:a95a9248b0c6fd933a442c03c3b113c3b61320086b88e2c444676d3fd1ca3330", size = 9830560, upload-time = "2026-04-21T17:07:51.023Z" }, + { url = "https://files.pythonhosted.org/packages/ae/d1/b4ec96b0ecc620a4443570c6e95c867903428cfcde4206518eafdd5880c3/mypy-1.20.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:419413398fe250aae057fd2fe50166b61077083c9b82754c341cf4fd73038f30", size = 14524561, upload-time = "2026-04-21T17:06:27.325Z" }, + { url = "https://files.pythonhosted.org/packages/3a/63/d2c2ff4fa66bc49477d32dfa26e8a167ba803ea6a69c5efb416036909d30/mypy-1.20.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e73c07f23009962885c197ccb9b41356a30cc0e5a1d0c2ea8fd8fb1362d7f924", size = 13363883, upload-time = "2026-04-21T17:11:11.239Z" }, + { url = "https://files.pythonhosted.org/packages/2a/56/983916806bf4eddeaaa2c9230903c3669c6718552a921154e1c5182c701f/mypy-1.20.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c64e5973df366b747646fc98da921f9d6eba9716d57d1db94a83c026a08e0fb", size = 13742945, upload-time = "2026-04-21T17:08:34.181Z" }, + { url = "https://files.pythonhosted.org/packages/19/65/0cd9285ab010ee8214c83d67c6b49417c40d86ce46f1aa109457b5a9b8d7/mypy-1.20.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a65aa591af023864fd08a97da9974e919452cfe19cb146c8a5dc692626445dc", size = 14706163, upload-time = "2026-04-21T17:05:15.51Z" }, + { url = "https://files.pythonhosted.org/packages/94/97/48ff3b297cafcc94d185243a9190836fb1b01c1b0918fff64e941e973cc9/mypy-1.20.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4fef51b01e638974a6e69885687e9bd40c8d1e09a6cd291cca0619625cf1f558", size = 14938677, upload-time = "2026-04-21T17:05:39.562Z" }, + { url = "https://files.pythonhosted.org/packages/fd/a1/1b4233d255bdd0b38a1f284feeb1c143ca508c19184964e22f8d837ec851/mypy-1.20.2-cp314-cp314-win_amd64.whl", hash = "sha256:913485a03f1bcf5d279409a9d2b9ed565c151f61c09f29991e5faa14033da4c8", size = 11089322, upload-time = "2026-04-21T17:06:44.29Z" }, + { url = "https://files.pythonhosted.org/packages/78/c2/ce7ee2ba36aeb954ba50f18fa25d9c1188578654b97d02a66a15b6f09531/mypy-1.20.2-cp314-cp314-win_arm64.whl", hash = "sha256:c3bae4f855d965b5453784300c12ffc63a548304ac7f99e55d4dc7c898673aa3", size = 10017775, upload-time = "2026-04-21T17:07:20.732Z" }, + { url = "https://files.pythonhosted.org/packages/4e/a1/9d93a7d0b5859af0ead82b4888b46df6c8797e1bc5e1e262a08518c6d48e/mypy-1.20.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2de3dcea53babc1c3237a19002bc3d228ce1833278f093b8d619e06e7cc79609", size = 15549002, upload-time = "2026-04-21T17:08:23.107Z" }, + { url = "https://files.pythonhosted.org/packages/00/d2/09a6a10ee1bf0008f6c144d9676f2ca6a12512151b4e0ad0ff6c4fac5337/mypy-1.20.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:52b176444e2e5054dfcbcb8c75b0b719865c96247b37407184bbfca5c353f2c2", size = 14401942, upload-time = "2026-04-21T17:07:31.837Z" }, + { url = "https://files.pythonhosted.org/packages/57/da/9594b75c3c019e805250bed3583bdf4443ff9e6ef08f97e39ae308cb06f2/mypy-1.20.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:688c3312e5dadb573a2c69c82af3a298d43ecf9e6d264e0f95df960b5f6ac19c", size = 15041649, upload-time = "2026-04-21T17:09:34.653Z" }, + { url = "https://files.pythonhosted.org/packages/97/77/f75a65c278e6e8eba2071f7f5a90481891053ecc39878cc444634d892abe/mypy-1.20.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29752dbbf8cc53f89f6ac096d363314333045c257c9c75cbd189ca2de0455744", size = 15864588, upload-time = "2026-04-21T17:11:44.936Z" }, + { url = "https://files.pythonhosted.org/packages/d7/46/1a4e1c66e96c1a3246ddf5403d122ac9b0a8d2b7e65730b9d6533ba7a6d3/mypy-1.20.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:803203d2b6ea644982c644895c2f78b28d0e208bba7b27d9b921e0ec5eb207c6", size = 16093956, upload-time = "2026-04-21T17:10:17.683Z" }, + { url = "https://files.pythonhosted.org/packages/5a/2c/78a8851264dec38cd736ca5b8bc9380674df0dd0be7792f538916157716c/mypy-1.20.2-cp314-cp314t-win_amd64.whl", hash = "sha256:9bcb8aa397ff0093c824182fd76a935a9ba7ad097fcbef80ae89bf6c1731d8ec", size = 12568661, upload-time = "2026-04-21T17:11:54.473Z" }, + { url = "https://files.pythonhosted.org/packages/83/01/cd7318aa03493322ce275a0e14f4f52b8896335e4e79d4fb8153a7ad2b77/mypy-1.20.2-cp314-cp314t-win_arm64.whl", hash = "sha256:e061b58443f1736f8a37c48978d7ab581636d6ab03e3d4f99e3fa90463bb9382", size = 10389240, upload-time = "2026-04-21T17:09:42.719Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/f23c163e25b11074188251b0b5a0342625fc1cdb6af604757174fa9acc9b/mypy-1.20.2-py3-none-any.whl", hash = "sha256:a94c5a76ab46c5e6257c7972b6c8cff0574201ca7dc05647e33e795d78680563", size = 2637314, upload-time = "2026-04-21T17:05:54.5Z" }, ] [[package]] @@ -559,84 +596,85 @@ wheels = [ [[package]] name = "numpy" -version = "2.4.1" +version = "2.4.4" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.11'", + "python_full_version >= '3.15'", + "python_full_version >= '3.11' and python_full_version < '3.15'", ] -sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320, upload-time = "2026-01-10T06:44:59.619Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/34/2b1bc18424f3ad9af577f6ce23600319968a70575bd7db31ce66731bbef9/numpy-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0cce2a669e3c8ba02ee563c7835f92c153cf02edff1ae05e1823f1dde21b16a5", size = 16944563, upload-time = "2026-01-10T06:42:14.615Z" }, - { url = "https://files.pythonhosted.org/packages/2c/57/26e5f97d075aef3794045a6ca9eada6a4ed70eb9a40e7a4a93f9ac80d704/numpy-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:899d2c18024984814ac7e83f8f49d8e8180e2fbe1b2e252f2e7f1d06bea92425", size = 12645658, upload-time = "2026-01-10T06:42:17.298Z" }, - { url = "https://files.pythonhosted.org/packages/8e/ba/80fc0b1e3cb2fd5c6143f00f42eb67762aa043eaa05ca924ecc3222a7849/numpy-2.4.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:09aa8a87e45b55a1c2c205d42e2808849ece5c484b2aab11fecabec3841cafba", size = 5474132, upload-time = "2026-01-10T06:42:19.637Z" }, - { url = "https://files.pythonhosted.org/packages/40/ae/0a5b9a397f0e865ec171187c78d9b57e5588afc439a04ba9cab1ebb2c945/numpy-2.4.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:edee228f76ee2dab4579fad6f51f6a305de09d444280109e0f75df247ff21501", size = 6804159, upload-time = "2026-01-10T06:42:21.44Z" }, - { url = "https://files.pythonhosted.org/packages/86/9c/841c15e691c7085caa6fd162f063eff494099c8327aeccd509d1ab1e36ab/numpy-2.4.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a92f227dbcdc9e4c3e193add1a189a9909947d4f8504c576f4a732fd0b54240a", size = 14708058, upload-time = "2026-01-10T06:42:23.546Z" }, - { url = "https://files.pythonhosted.org/packages/5d/9d/7862db06743f489e6a502a3b93136d73aea27d97b2cf91504f70a27501d6/numpy-2.4.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:538bf4ec353709c765ff75ae616c34d3c3dca1a68312727e8f2676ea644f8509", size = 16651501, upload-time = "2026-01-10T06:42:25.909Z" }, - { url = "https://files.pythonhosted.org/packages/a6/9c/6fc34ebcbd4015c6e5f0c0ce38264010ce8a546cb6beacb457b84a75dfc8/numpy-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ac08c63cb7779b85e9d5318e6c3518b424bc1f364ac4cb2c6136f12e5ff2dccc", size = 16492627, upload-time = "2026-01-10T06:42:28.938Z" }, - { url = "https://files.pythonhosted.org/packages/aa/63/2494a8597502dacda439f61b3c0db4da59928150e62be0e99395c3ad23c5/numpy-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4f9c360ecef085e5841c539a9a12b883dff005fbd7ce46722f5e9cef52634d82", size = 18585052, upload-time = "2026-01-10T06:42:31.312Z" }, - { url = "https://files.pythonhosted.org/packages/6a/93/098e1162ae7522fc9b618d6272b77404c4656c72432ecee3abc029aa3de0/numpy-2.4.1-cp311-cp311-win32.whl", hash = "sha256:0f118ce6b972080ba0758c6087c3617b5ba243d806268623dc34216d69099ba0", size = 6236575, upload-time = "2026-01-10T06:42:33.872Z" }, - { url = "https://files.pythonhosted.org/packages/8c/de/f5e79650d23d9e12f38a7bc6b03ea0835b9575494f8ec94c11c6e773b1b1/numpy-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:18e14c4d09d55eef39a6ab5b08406e84bc6869c1e34eef45564804f90b7e0574", size = 12604479, upload-time = "2026-01-10T06:42:35.778Z" }, - { url = "https://files.pythonhosted.org/packages/dd/65/e1097a7047cff12ce3369bd003811516b20ba1078dbdec135e1cd7c16c56/numpy-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:6461de5113088b399d655d45c3897fa188766415d0f568f175ab071c8873bd73", size = 10578325, upload-time = "2026-01-10T06:42:38.518Z" }, - { url = "https://files.pythonhosted.org/packages/78/7f/ec53e32bf10c813604edf07a3682616bd931d026fcde7b6d13195dfb684a/numpy-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d3703409aac693fa82c0aee023a1ae06a6e9d065dba10f5e8e80f642f1e9d0a2", size = 16656888, upload-time = "2026-01-10T06:42:40.913Z" }, - { url = "https://files.pythonhosted.org/packages/b8/e0/1f9585d7dae8f14864e948fd7fa86c6cb72dee2676ca2748e63b1c5acfe0/numpy-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7211b95ca365519d3596a1d8688a95874cc94219d417504d9ecb2df99fa7bfa8", size = 12373956, upload-time = "2026-01-10T06:42:43.091Z" }, - { url = "https://files.pythonhosted.org/packages/8e/43/9762e88909ff2326f5e7536fa8cb3c49fb03a7d92705f23e6e7f553d9cb3/numpy-2.4.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5adf01965456a664fc727ed69cc71848f28d063217c63e1a0e200a118d5eec9a", size = 5202567, upload-time = "2026-01-10T06:42:45.107Z" }, - { url = "https://files.pythonhosted.org/packages/4b/ee/34b7930eb61e79feb4478800a4b95b46566969d837546aa7c034c742ef98/numpy-2.4.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26f0bcd9c79a00e339565b303badc74d3ea2bd6d52191eeca5f95936cad107d0", size = 6549459, upload-time = "2026-01-10T06:42:48.152Z" }, - { url = "https://files.pythonhosted.org/packages/79/e3/5f115fae982565771be994867c89bcd8d7208dbfe9469185497d70de5ddf/numpy-2.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0093e85df2960d7e4049664b26afc58b03236e967fb942354deef3208857a04c", size = 14404859, upload-time = "2026-01-10T06:42:49.947Z" }, - { url = "https://files.pythonhosted.org/packages/d9/7d/9c8a781c88933725445a859cac5d01b5871588a15969ee6aeb618ba99eee/numpy-2.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ad270f438cbdd402c364980317fb6b117d9ec5e226fff5b4148dd9aa9fc6e02", size = 16371419, upload-time = "2026-01-10T06:42:52.409Z" }, - { url = "https://files.pythonhosted.org/packages/a6/d2/8aa084818554543f17cf4162c42f162acbd3bb42688aefdba6628a859f77/numpy-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:297c72b1b98100c2e8f873d5d35fb551fce7040ade83d67dd51d38c8d42a2162", size = 16182131, upload-time = "2026-01-10T06:42:54.694Z" }, - { url = "https://files.pythonhosted.org/packages/60/db/0425216684297c58a8df35f3284ef56ec4a043e6d283f8a59c53562caf1b/numpy-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf6470d91d34bf669f61d515499859fa7a4c2f7c36434afb70e82df7217933f9", size = 18295342, upload-time = "2026-01-10T06:42:56.991Z" }, - { url = "https://files.pythonhosted.org/packages/31/4c/14cb9d86240bd8c386c881bafbe43f001284b7cce3bc01623ac9475da163/numpy-2.4.1-cp312-cp312-win32.whl", hash = "sha256:b6bcf39112e956594b3331316d90c90c90fb961e39696bda97b89462f5f3943f", size = 5959015, upload-time = "2026-01-10T06:42:59.631Z" }, - { url = "https://files.pythonhosted.org/packages/51/cf/52a703dbeb0c65807540d29699fef5fda073434ff61846a564d5c296420f/numpy-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:e1a27bb1b2dee45a2a53f5ca6ff2d1a7f135287883a1689e930d44d1ff296c87", size = 12310730, upload-time = "2026-01-10T06:43:01.627Z" }, - { url = "https://files.pythonhosted.org/packages/69/80/a828b2d0ade5e74a9fe0f4e0a17c30fdc26232ad2bc8c9f8b3197cf7cf18/numpy-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:0e6e8f9d9ecf95399982019c01223dc130542960a12edfa8edd1122dfa66a8a8", size = 10312166, upload-time = "2026-01-10T06:43:03.673Z" }, - { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495, upload-time = "2026-01-10T06:43:06.283Z" }, - { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657, upload-time = "2026-01-10T06:43:09.094Z" }, - { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256, upload-time = "2026-01-10T06:43:13.634Z" }, - { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212, upload-time = "2026-01-10T06:43:15.661Z" }, - { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871, upload-time = "2026-01-10T06:43:17.324Z" }, - { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305, upload-time = "2026-01-10T06:43:19.376Z" }, - { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909, upload-time = "2026-01-10T06:43:21.808Z" }, - { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380, upload-time = "2026-01-10T06:43:23.957Z" }, - { url = "https://files.pythonhosted.org/packages/67/78/722b62bd31842ff029412271556a1a27a98f45359dea78b1548a3a9996aa/numpy-2.4.1-cp313-cp313-win32.whl", hash = "sha256:3d1a100e48cb266090a031397863ff8a30050ceefd798f686ff92c67a486753d", size = 5957089, upload-time = "2026-01-10T06:43:27.535Z" }, - { url = "https://files.pythonhosted.org/packages/da/a6/cf32198b0b6e18d4fbfa9a21a992a7fca535b9bb2b0cdd217d4a3445b5ca/numpy-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:92a0e65272fd60bfa0d9278e0484c2f52fe03b97aedc02b357f33fe752c52ffb", size = 12307230, upload-time = "2026-01-10T06:43:29.298Z" }, - { url = "https://files.pythonhosted.org/packages/44/6c/534d692bfb7d0afe30611320c5fb713659dcb5104d7cc182aff2aea092f5/numpy-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:20d4649c773f66cc2fc36f663e091f57c3b7655f936a4c681b4250855d1da8f5", size = 10313125, upload-time = "2026-01-10T06:43:31.782Z" }, - { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156, upload-time = "2026-01-10T06:43:34.237Z" }, - { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663, upload-time = "2026-01-10T06:43:36.211Z" }, - { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224, upload-time = "2026-01-10T06:43:37.884Z" }, - { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352, upload-time = "2026-01-10T06:43:39.479Z" }, - { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279, upload-time = "2026-01-10T06:43:41.93Z" }, - { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316, upload-time = "2026-01-10T06:43:44.121Z" }, - { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884, upload-time = "2026-01-10T06:43:46.613Z" }, - { url = "https://files.pythonhosted.org/packages/37/a4/b073f3e9d77f9aec8debe8ca7f9f6a09e888ad1ba7488f0c3b36a94c03ac/numpy-2.4.1-cp313-cp313t-win32.whl", hash = "sha256:382ad67d99ef49024f11d1ce5dcb5ad8432446e4246a4b014418ba3a1175a1f4", size = 6081138, upload-time = "2026-01-10T06:43:48.854Z" }, - { url = "https://files.pythonhosted.org/packages/16/16/af42337b53844e67752a092481ab869c0523bc95c4e5c98e4dac4e9581ac/numpy-2.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:62fea415f83ad8fdb6c20840578e5fbaf5ddd65e0ec6c3c47eda0f69da172510", size = 12447478, upload-time = "2026-01-10T06:43:50.476Z" }, - { url = "https://files.pythonhosted.org/packages/6c/f8/fa85b2eac68ec631d0b631abc448552cb17d39afd17ec53dcbcc3537681a/numpy-2.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", size = 10382981, upload-time = "2026-01-10T06:43:52.575Z" }, - { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046, upload-time = "2026-01-10T06:43:54.797Z" }, - { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858, upload-time = "2026-01-10T06:43:57.099Z" }, - { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417, upload-time = "2026-01-10T06:43:59.037Z" }, - { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643, upload-time = "2026-01-10T06:44:01.852Z" }, - { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963, upload-time = "2026-01-10T06:44:04.047Z" }, - { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811, upload-time = "2026-01-10T06:44:06.207Z" }, - { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643, upload-time = "2026-01-10T06:44:08.33Z" }, - { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601, upload-time = "2026-01-10T06:44:10.841Z" }, - { url = "https://files.pythonhosted.org/packages/80/9a/0d44b468cad50315127e884802351723daca7cf1c98d102929468c81d439/numpy-2.4.1-cp314-cp314-win32.whl", hash = "sha256:727c6c3275ddefa0dc078524a85e064c057b4f4e71ca5ca29a19163c607be745", size = 6005722, upload-time = "2026-01-10T06:44:13.332Z" }, - { url = "https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d", size = 12438590, upload-time = "2026-01-10T06:44:15.006Z" }, - { url = "https://files.pythonhosted.org/packages/e9/da/a598d5cb260780cf4d255102deba35c1d072dc028c4547832f45dd3323a8/numpy-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:ce9ce141a505053b3c7bce3216071f3bf5c182b8b28930f14cd24d43932cd2df", size = 10596180, upload-time = "2026-01-10T06:44:17.386Z" }, - { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774, upload-time = "2026-01-10T06:44:19.467Z" }, - { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274, upload-time = "2026-01-10T06:44:23.189Z" }, - { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306, upload-time = "2026-01-10T06:44:25.012Z" }, - { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653, upload-time = "2026-01-10T06:44:26.706Z" }, - { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144, upload-time = "2026-01-10T06:44:29.378Z" }, - { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425, upload-time = "2026-01-10T06:44:31.721Z" }, - { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053, upload-time = "2026-01-10T06:44:34.617Z" }, - { url = "https://files.pythonhosted.org/packages/23/12/8b5fc6b9c487a09a7957188e0943c9ff08432c65e34567cabc1623b03a51/numpy-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:5de60946f14ebe15e713a6f22850c2372fa72f4ff9a432ab44aa90edcadaa65a", size = 6152482, upload-time = "2026-01-10T06:44:36.798Z" }, - { url = "https://files.pythonhosted.org/packages/00/a5/9f8ca5856b8940492fc24fbe13c1bc34d65ddf4079097cf9e53164d094e1/numpy-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:8f085da926c0d491ffff3096f91078cc97ea67e7e6b65e490bc8dcda65663be2", size = 12627117, upload-time = "2026-01-10T06:44:38.828Z" }, - { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload-time = "2026-01-10T06:44:41.644Z" }, - { url = "https://files.pythonhosted.org/packages/1e/48/d86f97919e79314a1cdee4c832178763e6e98e623e123d0bada19e92c15a/numpy-2.4.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8ad35f20be147a204e28b6a0575fbf3540c5e5f802634d4258d55b1ff5facce1", size = 16822202, upload-time = "2026-01-10T06:44:43.738Z" }, - { url = "https://files.pythonhosted.org/packages/51/e9/1e62a7f77e0f37dcfb0ad6a9744e65df00242b6ea37dfafb55debcbf5b55/numpy-2.4.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8097529164c0f3e32bb89412a0905d9100bf434d9692d9fc275e18dcf53c9344", size = 12569985, upload-time = "2026-01-10T06:44:45.945Z" }, - { url = "https://files.pythonhosted.org/packages/c7/7e/914d54f0c801342306fdcdce3e994a56476f1b818c46c47fc21ae968088c/numpy-2.4.1-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:ea66d2b41ca4a1630aae5507ee0a71647d3124d1741980138aa8f28f44dac36e", size = 5398484, upload-time = "2026-01-10T06:44:48.012Z" }, - { url = "https://files.pythonhosted.org/packages/1c/d8/9570b68584e293a33474e7b5a77ca404f1dcc655e40050a600dee81d27fb/numpy-2.4.1-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d3f8f0df9f4b8be57b3bf74a1d087fec68f927a2fab68231fdb442bf2c12e426", size = 6713216, upload-time = "2026-01-10T06:44:49.725Z" }, - { url = "https://files.pythonhosted.org/packages/33/9b/9dd6e2db8d49eb24f86acaaa5258e5f4c8ed38209a4ee9de2d1a0ca25045/numpy-2.4.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2023ef86243690c2791fd6353e5b4848eedaa88ca8a2d129f462049f6d484696", size = 14538937, upload-time = "2026-01-10T06:44:51.498Z" }, - { url = "https://files.pythonhosted.org/packages/53/87/d5bd995b0f798a37105b876350d346eea5838bd8f77ea3d7a48392f3812b/numpy-2.4.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8361ea4220d763e54cff2fbe7d8c93526b744f7cd9ddab47afeff7e14e8503be", size = 16479830, upload-time = "2026-01-10T06:44:53.931Z" }, - { url = "https://files.pythonhosted.org/packages/5b/c7/b801bf98514b6ae6475e941ac05c58e6411dd863ea92916bfd6d510b08c1/numpy-2.4.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4f1b68ff47680c2925f8063402a693ede215f0257f02596b1318ecdfb1d79e33", size = 12492579, upload-time = "2026-01-10T06:44:57.094Z" }, + { url = "https://files.pythonhosted.org/packages/ef/c6/4218570d8c8ecc9704b5157a3348e486e84ef4be0ed3e38218ab473c83d2/numpy-2.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f983334aea213c99992053ede6168500e5f086ce74fbc4acc3f2b00f5762e9db", size = 16976799, upload-time = "2026-03-29T13:18:15.438Z" }, + { url = "https://files.pythonhosted.org/packages/dd/92/b4d922c4a5f5dab9ed44e6153908a5c665b71acf183a83b93b690996e39b/numpy-2.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72944b19f2324114e9dc86a159787333b77874143efcf89a5167ef83cfee8af0", size = 14971552, upload-time = "2026-03-29T13:18:18.606Z" }, + { url = "https://files.pythonhosted.org/packages/8a/dc/df98c095978fa6ee7b9a9387d1d58cbb3d232d0e69ad169a4ce784bde4fd/numpy-2.4.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:86b6f55f5a352b48d7fbfd2dbc3d5b780b2d79f4d3c121f33eb6efb22e9a2015", size = 5476566, upload-time = "2026-03-29T13:18:21.532Z" }, + { url = "https://files.pythonhosted.org/packages/28/34/b3fdcec6e725409223dd27356bdf5a3c2cc2282e428218ecc9cb7acc9763/numpy-2.4.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:ba1f4fc670ed79f876f70082eff4f9583c15fb9a4b89d6188412de4d18ae2f40", size = 6806482, upload-time = "2026-03-29T13:18:23.634Z" }, + { url = "https://files.pythonhosted.org/packages/68/62/63417c13aa35d57bee1337c67446761dc25ea6543130cf868eace6e8157b/numpy-2.4.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a87ec22c87be071b6bdbd27920b129b94f2fc964358ce38f3822635a3e2e03d", size = 15973376, upload-time = "2026-03-29T13:18:26.677Z" }, + { url = "https://files.pythonhosted.org/packages/cf/c5/9fcb7e0e69cef59cf10c746b84f7d58b08bc66a6b7d459783c5a4f6101a6/numpy-2.4.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df3775294accfdd75f32c74ae39fcba920c9a378a2fc18a12b6820aa8c1fb502", size = 16925137, upload-time = "2026-03-29T13:18:30.14Z" }, + { url = "https://files.pythonhosted.org/packages/7e/43/80020edacb3f84b9efdd1591120a4296462c23fd8db0dde1666f6ef66f13/numpy-2.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0d4e437e295f18ec29bc79daf55e8a47a9113df44d66f702f02a293d93a2d6dd", size = 17329414, upload-time = "2026-03-29T13:18:33.733Z" }, + { url = "https://files.pythonhosted.org/packages/fd/06/af0658593b18a5f73532d377188b964f239eb0894e664a6c12f484472f97/numpy-2.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6aa3236c78803afbcb255045fbef97a9e25a1f6c9888357d205ddc42f4d6eba5", size = 18658397, upload-time = "2026-03-29T13:18:37.511Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ce/13a09ed65f5d0ce5c7dd0669250374c6e379910f97af2c08c57b0608eee4/numpy-2.4.4-cp311-cp311-win32.whl", hash = "sha256:30caa73029a225b2d40d9fae193e008e24b2026b7ee1a867b7ee8d96ca1a448e", size = 6239499, upload-time = "2026-03-29T13:18:40.372Z" }, + { url = "https://files.pythonhosted.org/packages/bd/63/05d193dbb4b5eec1eca73822d80da98b511f8328ad4ae3ca4caf0f4db91d/numpy-2.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:6bbe4eb67390b0a0265a2c25458f6b90a409d5d069f1041e6aff1e27e3d9a79e", size = 12614257, upload-time = "2026-03-29T13:18:42.95Z" }, + { url = "https://files.pythonhosted.org/packages/87/c5/8168052f080c26fa984c413305012be54741c9d0d74abd7fbeeccae3889f/numpy-2.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:fcfe2045fd2e8f3cb0ce9d4ba6dba6333b8fa05bb8a4939c908cd43322d14c7e", size = 10486775, upload-time = "2026-03-29T13:18:45.835Z" }, + { url = "https://files.pythonhosted.org/packages/28/05/32396bec30fb2263770ee910142f49c1476d08e8ad41abf8403806b520ce/numpy-2.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:15716cfef24d3a9762e3acdf87e27f58dc823d1348f765bbea6bef8c639bfa1b", size = 16689272, upload-time = "2026-03-29T13:18:49.223Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f3/a983d28637bfcd763a9c7aafdb6d5c0ebf3d487d1e1459ffdb57e2f01117/numpy-2.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23cbfd4c17357c81021f21540da84ee282b9c8fba38a03b7b9d09ba6b951421e", size = 14699573, upload-time = "2026-03-29T13:18:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fd/e5ecca1e78c05106d98028114f5c00d3eddb41207686b2b7de3e477b0e22/numpy-2.4.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b3b60bb7cba2c8c81837661c488637eee696f59a877788a396d33150c35d842", size = 5204782, upload-time = "2026-03-29T13:18:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/702a4594413c1a8632092beae8aba00f1d67947389369b3777aed783fdca/numpy-2.4.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e4a010c27ff6f210ff4c6ef34394cd61470d01014439b192ec22552ee867f2a8", size = 6552038, upload-time = "2026-03-29T13:18:57.769Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/eed308a8f56cba4d1fdf467a4fc67ef4ff4bf1c888f5fc980481890104b1/numpy-2.4.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9e75681b59ddaa5e659898085ae0eaea229d054f2ac0c7e563a62205a700121", size = 15670666, upload-time = "2026-03-29T13:19:00.341Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0d/0e3ecece05b7a7e87ab9fb587855548da437a061326fff64a223b6dcb78a/numpy-2.4.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:81f4a14bee47aec54f883e0cad2d73986640c1590eb9bfaaba7ad17394481e6e", size = 16645480, upload-time = "2026-03-29T13:19:03.63Z" }, + { url = "https://files.pythonhosted.org/packages/34/49/f2312c154b82a286758ee2f1743336d50651f8b5195db18cdb63675ff649/numpy-2.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:62d6b0f03b694173f9fcb1fb317f7222fd0b0b103e784c6549f5e53a27718c44", size = 17020036, upload-time = "2026-03-29T13:19:07.428Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e9/736d17bd77f1b0ec4f9901aaec129c00d59f5d84d5e79bba540ef12c2330/numpy-2.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbc356aae7adf9e6336d336b9c8111d390a05df88f1805573ebb0807bd06fd1d", size = 18368643, upload-time = "2026-03-29T13:19:10.775Z" }, + { url = "https://files.pythonhosted.org/packages/63/f6/d417977c5f519b17c8a5c3bc9e8304b0908b0e21136fe43bf628a1343914/numpy-2.4.4-cp312-cp312-win32.whl", hash = "sha256:0d35aea54ad1d420c812bfa0385c71cd7cc5bcf7c65fed95fc2cd02fe8c79827", size = 5961117, upload-time = "2026-03-29T13:19:13.464Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5b/e1deebf88ff431b01b7406ca3583ab2bbb90972bbe1c568732e49c844f7e/numpy-2.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5f0362dc928a6ecd9db58868fca5e48485205e3855957bdedea308f8672ea4a", size = 12320584, upload-time = "2026-03-29T13:19:16.155Z" }, + { url = "https://files.pythonhosted.org/packages/58/89/e4e856ac82a68c3ed64486a544977d0e7bdd18b8da75b78a577ca31c4395/numpy-2.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:846300f379b5b12cc769334464656bc882e0735d27d9726568bc932fdc49d5ec", size = 10221450, upload-time = "2026-03-29T13:19:18.994Z" }, + { url = "https://files.pythonhosted.org/packages/14/1d/d0a583ce4fefcc3308806a749a536c201ed6b5ad6e1322e227ee4848979d/numpy-2.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08f2e31ed5e6f04b118e49821397f12767934cfdd12a1ce86a058f91e004ee50", size = 16684933, upload-time = "2026-03-29T13:19:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/c1/62/2b7a48fbb745d344742c0277f01286dead15f3f68e4f359fbfcf7b48f70f/numpy-2.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e823b8b6edc81e747526f70f71a9c0a07ac4e7ad13020aa736bb7c9d67196115", size = 14694532, upload-time = "2026-03-29T13:19:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/499737bfba066b4a3bebff24a8f1c5b2dee410b209bc6668c9be692580f0/numpy-2.4.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4a19d9dba1a76618dd86b164d608566f393f8ec6ac7c44f0cc879011c45e65af", size = 5199661, upload-time = "2026-03-29T13:19:28.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/da/464d551604320d1491bc345efed99b4b7034143a85787aab78d5691d5a0e/numpy-2.4.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d2a8490669bfe99a233298348acc2d824d496dee0e66e31b66a6022c2ad74a5c", size = 6547539, upload-time = "2026-03-29T13:19:30.97Z" }, + { url = "https://files.pythonhosted.org/packages/7d/90/8d23e3b0dafd024bf31bdec225b3bb5c2dbfa6912f8a53b8659f21216cbf/numpy-2.4.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45dbed2ab436a9e826e302fcdcbe9133f9b0006e5af7168afb8963a6520da103", size = 15668806, upload-time = "2026-03-29T13:19:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/d1/73/a9d864e42a01896bb5974475438f16086be9ba1f0d19d0bb7a07427c4a8b/numpy-2.4.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c901b15172510173f5cb310eae652908340f8dede90fff9e3bf6c0d8dfd92f83", size = 16632682, upload-time = "2026-03-29T13:19:37.336Z" }, + { url = "https://files.pythonhosted.org/packages/34/fb/14570d65c3bde4e202a031210475ae9cde9b7686a2e7dc97ee67d2833b35/numpy-2.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:99d838547ace2c4aace6c4f76e879ddfe02bb58a80c1549928477862b7a6d6ed", size = 17019810, upload-time = "2026-03-29T13:19:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/77/2ba9d87081fd41f6d640c83f26fb7351e536b7ce6dd9061b6af5904e8e46/numpy-2.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0aec54fd785890ecca25a6003fd9a5aed47ad607bbac5cd64f836ad8666f4959", size = 18357394, upload-time = "2026-03-29T13:19:44.859Z" }, + { url = "https://files.pythonhosted.org/packages/a2/23/52666c9a41708b0853fa3b1a12c90da38c507a3074883823126d4e9d5b30/numpy-2.4.4-cp313-cp313-win32.whl", hash = "sha256:07077278157d02f65c43b1b26a3886bce886f95d20aabd11f87932750dfb14ed", size = 5959556, upload-time = "2026-03-29T13:19:47.661Z" }, + { url = "https://files.pythonhosted.org/packages/57/fb/48649b4971cde70d817cf97a2a2fdc0b4d8308569f1dd2f2611959d2e0cf/numpy-2.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:5c70f1cc1c4efbe316a572e2d8b9b9cc44e89b95f79ca3331553fbb63716e2bf", size = 12317311, upload-time = "2026-03-29T13:19:50.67Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d8/11490cddd564eb4de97b4579ef6bfe6a736cc07e94c1598590ae25415e01/numpy-2.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:ef4059d6e5152fa1a39f888e344c73fdc926e1b2dd58c771d67b0acfbf2aa67d", size = 10222060, upload-time = "2026-03-29T13:19:54.229Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/dab4339177a905aad3e2221c915b35202f1ec30d750dd2e5e9d9a72b804b/numpy-2.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4bbc7f303d125971f60ec0aaad5e12c62d0d2c925f0ab1273debd0e4ba37aba5", size = 14822302, upload-time = "2026-03-29T13:19:57.585Z" }, + { url = "https://files.pythonhosted.org/packages/eb/e4/0564a65e7d3d97562ed6f9b0fd0fb0a6f559ee444092f105938b50043876/numpy-2.4.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:4d6d57903571f86180eb98f8f0c839fa9ebbfb031356d87f1361be91e433f5b7", size = 5327407, upload-time = "2026-03-29T13:20:00.601Z" }, + { url = "https://files.pythonhosted.org/packages/29/8d/35a3a6ce5ad371afa58b4700f1c820f8f279948cca32524e0a695b0ded83/numpy-2.4.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:4636de7fd195197b7535f231b5de9e4b36d2c440b6e566d2e4e4746e6af0ca93", size = 6647631, upload-time = "2026-03-29T13:20:02.855Z" }, + { url = "https://files.pythonhosted.org/packages/f4/da/477731acbd5a58a946c736edfdabb2ac5b34c3d08d1ba1a7b437fa0884df/numpy-2.4.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ad2e2ef14e0b04e544ea2fa0a36463f847f113d314aa02e5b402fdf910ef309e", size = 15727691, upload-time = "2026-03-29T13:20:06.004Z" }, + { url = "https://files.pythonhosted.org/packages/e6/db/338535d9b152beabeb511579598418ba0212ce77cf9718edd70262cc4370/numpy-2.4.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a285b3b96f951841799528cd1f4f01cd70e7e0204b4abebac9463eecfcf2a40", size = 16681241, upload-time = "2026-03-29T13:20:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/ad248e8f58beb7a0219b413c9c7d8151c5d285f7f946c3e26695bdbbe2df/numpy-2.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f8474c4241bc18b750be2abea9d7a9ec84f46ef861dbacf86a4f6e043401f79e", size = 17085767, upload-time = "2026-03-29T13:20:13.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1a/3b88ccd3694681356f70da841630e4725a7264d6a885c8d442a697e1146b/numpy-2.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4e874c976154687c1f71715b034739b45c7711bec81db01914770373d125e392", size = 18403169, upload-time = "2026-03-29T13:20:17.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c9/fcfd5d0639222c6eac7f304829b04892ef51c96a75d479214d77e3ce6e33/numpy-2.4.4-cp313-cp313t-win32.whl", hash = "sha256:9c585a1790d5436a5374bac930dad6ed244c046ed91b2b2a3634eb2971d21008", size = 6083477, upload-time = "2026-03-29T13:20:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e3/3938a61d1c538aaec8ed6fd6323f57b0c2d2d2219512434c5c878db76553/numpy-2.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:93e15038125dc1e5345d9b5b68aa7f996ec33b98118d18c6ca0d0b7d6198b7e8", size = 12457487, upload-time = "2026-03-29T13:20:22.946Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/7e345032cc60501721ef94e0e30b60f6b0bd601f9174ebd36389a2b86d40/numpy-2.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:0dfd3f9d3adbe2920b68b5cd3d51444e13a10792ec7154cd0a2f6e74d4ab3233", size = 10292002, upload-time = "2026-03-29T13:20:25.909Z" }, + { url = "https://files.pythonhosted.org/packages/6e/06/c54062f85f673dd5c04cbe2f14c3acb8c8b95e3384869bb8cc9bff8cb9df/numpy-2.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f169b9a863d34f5d11b8698ead99febeaa17a13ca044961aa8e2662a6c7766a0", size = 16684353, upload-time = "2026-03-29T13:20:29.504Z" }, + { url = "https://files.pythonhosted.org/packages/4c/39/8a320264a84404c74cc7e79715de85d6130fa07a0898f67fb5cd5bd79908/numpy-2.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2483e4584a1cb3092da4470b38866634bafb223cbcd551ee047633fd2584599a", size = 14704914, upload-time = "2026-03-29T13:20:33.547Z" }, + { url = "https://files.pythonhosted.org/packages/91/fb/287076b2614e1d1044235f50f03748f31fa287e3dbe6abeb35cdfa351eca/numpy-2.4.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2d19e6e2095506d1736b7d80595e0f252d76b89f5e715c35e06e937679ea7d7a", size = 5210005, upload-time = "2026-03-29T13:20:36.45Z" }, + { url = "https://files.pythonhosted.org/packages/63/eb/fcc338595309910de6ecabfcef2419a9ce24399680bfb149421fa2df1280/numpy-2.4.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6a246d5914aa1c820c9443ddcee9c02bec3e203b0c080349533fae17727dfd1b", size = 6544974, upload-time = "2026-03-29T13:20:39.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/5d/e7e9044032a716cdfaa3fba27a8e874bf1c5f1912a1ddd4ed071bf8a14a6/numpy-2.4.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989824e9faf85f96ec9c7761cd8d29c531ad857bfa1daa930cba85baaecf1a9a", size = 15684591, upload-time = "2026-03-29T13:20:42.146Z" }, + { url = "https://files.pythonhosted.org/packages/98/7c/21252050676612625449b4807d6b695b9ce8a7c9e1c197ee6216c8a65c7c/numpy-2.4.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27a8d92cd10f1382a67d7cf4db7ce18341b66438bdd9f691d7b0e48d104c2a9d", size = 16637700, upload-time = "2026-03-29T13:20:46.204Z" }, + { url = "https://files.pythonhosted.org/packages/b1/29/56d2bbef9465db24ef25393383d761a1af4f446a1df9b8cded4fe3a5a5d7/numpy-2.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e44319a2953c738205bf3354537979eaa3998ed673395b964c1176083dd46252", size = 17035781, upload-time = "2026-03-29T13:20:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2b/a35a6d7589d21f44cea7d0a98de5ddcbb3d421b2622a5c96b1edf18707c3/numpy-2.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e892aff75639bbef0d2a2cfd55535510df26ff92f63c92cd84ef8d4ba5a5557f", size = 18362959, upload-time = "2026-03-29T13:20:54.019Z" }, + { url = "https://files.pythonhosted.org/packages/64/c9/d52ec581f2390e0f5f85cbfd80fb83d965fc15e9f0e1aec2195faa142cde/numpy-2.4.4-cp314-cp314-win32.whl", hash = "sha256:1378871da56ca8943c2ba674530924bb8ca40cd228358a3b5f302ad60cf875fc", size = 6008768, upload-time = "2026-03-29T13:20:56.912Z" }, + { url = "https://files.pythonhosted.org/packages/fa/22/4cc31a62a6c7b74a8730e31a4274c5dc80e005751e277a2ce38e675e4923/numpy-2.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:715d1c092715954784bc79e1174fc2a90093dc4dc84ea15eb14dad8abdcdeb74", size = 12449181, upload-time = "2026-03-29T13:20:59.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/2e/14cda6f4d8e396c612d1bf97f22958e92148801d7e4f110cabebdc0eef4b/numpy-2.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:2c194dd721e54ecad9ad387c1d35e63dce5c4450c6dc7dd5611283dda239aabb", size = 10496035, upload-time = "2026-03-29T13:21:02.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/8fed8c8d848d7ecea092dc3469643f9d10bc3a134a815a3b033da1d2039b/numpy-2.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2aa0613a5177c264ff5921051a5719d20095ea586ca88cc802c5c218d1c67d3e", size = 14824958, upload-time = "2026-03-29T13:21:05.671Z" }, + { url = "https://files.pythonhosted.org/packages/05/1a/d8007a5138c179c2bf33ef44503e83d70434d2642877ee8fbb230e7c0548/numpy-2.4.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:42c16925aa5a02362f986765f9ebabf20de75cdefdca827d14315c568dcab113", size = 5330020, upload-time = "2026-03-29T13:21:08.635Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/ffb99ac6ae93faf117bcbd5c7ba48a7f45364a33e8e458545d3633615dda/numpy-2.4.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:874f200b2a981c647340f841730fc3a2b54c9d940566a3c4149099591e2c4c3d", size = 6650758, upload-time = "2026-03-29T13:21:10.949Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6e/795cc078b78a384052e73b2f6281ff7a700e9bf53bcce2ee579d4f6dd879/numpy-2.4.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9b39d38a9bd2ae1becd7eac1303d031c5c110ad31f2b319c6e7d98b135c934d", size = 15729948, upload-time = "2026-03-29T13:21:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/5f/86/2acbda8cc2af5f3d7bfc791192863b9e3e19674da7b5e533fded124d1299/numpy-2.4.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b268594bccac7d7cf5844c7732e3f20c50921d94e36d7ec9b79e9857694b1b2f", size = 16679325, upload-time = "2026-03-29T13:21:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/bc/59/cafd83018f4aa55e0ac6fa92aa066c0a1877b77a615ceff1711c260ffae8/numpy-2.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ac6b31e35612a26483e20750126d30d0941f949426974cace8e6b5c58a3657b0", size = 17084883, upload-time = "2026-03-29T13:21:21.106Z" }, + { url = "https://files.pythonhosted.org/packages/f0/85/a42548db84e65ece46ab2caea3d3f78b416a47af387fcbb47ec28e660dc2/numpy-2.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e3ed142f2728df44263aaf5fb1f5b0b99f4070c553a0d7f033be65338329150", size = 18403474, upload-time = "2026-03-29T13:21:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ad/483d9e262f4b831000062e5d8a45e342166ec8aaa1195264982bca267e62/numpy-2.4.4-cp314-cp314t-win32.whl", hash = "sha256:dddbbd259598d7240b18c9d87c56a9d2fb3b02fe266f49a7c101532e78c1d871", size = 6155500, upload-time = "2026-03-29T13:21:28.205Z" }, + { url = "https://files.pythonhosted.org/packages/c7/03/2fc4e14c7bd4ff2964b74ba90ecb8552540b6315f201df70f137faa5c589/numpy-2.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:a7164afb23be6e37ad90b2f10426149fd75aee07ca55653d2aa41e66c4ef697e", size = 12637755, upload-time = "2026-03-29T13:21:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, + { url = "https://files.pythonhosted.org/packages/6b/33/8fae8f964a4f63ed528264ddf25d2b683d0b663e3cba26961eb838a7c1bd/numpy-2.4.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:58c8b5929fcb8287cbd6f0a3fae19c6e03a5c48402ae792962ac465224a629a4", size = 16854491, upload-time = "2026-03-29T13:21:38.03Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d0/1aabee441380b981cf8cdda3ae7a46aa827d1b5a8cce84d14598bc94d6d9/numpy-2.4.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:eea7ac5d2dce4189771cedb559c738a71512768210dc4e4753b107a2048b3d0e", size = 14895830, upload-time = "2026-03-29T13:21:41.509Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b8/aafb0d1065416894fccf4df6b49ef22b8db045187949545bced89c034b8e/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:51fc224f7ca4d92656d5a5eb315f12eb5fe2c97a66249aa7b5f562528a3be38c", size = 5400927, upload-time = "2026-03-29T13:21:44.747Z" }, + { url = "https://files.pythonhosted.org/packages/d6/77/063baa20b08b431038c7f9ff5435540c7b7265c78cf56012a483019ca72d/numpy-2.4.4-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:28a650663f7314afc3e6ec620f44f333c386aad9f6fc472030865dc0ebb26ee3", size = 6715557, upload-time = "2026-03-29T13:21:47.406Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a8/379542d45a14f149444c5c4c4e7714707239ce9cc1de8c2803958889da14/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19710a9ca9992d7174e9c52f643d4272dcd1558c5f7af7f6f8190f633bd651a7", size = 15804253, upload-time = "2026-03-29T13:21:50.753Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c8/f0a45426d6d21e7ea3310a15cf90c43a14d9232c31a837702dba437f3373/numpy-2.4.4-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b2aec6af35c113b05695ebb5749a787acd63cafc83086a05771d1e1cd1e555f", size = 16753552, upload-time = "2026-03-29T13:21:54.344Z" }, + { url = "https://files.pythonhosted.org/packages/04/74/f4c001f4714c3ad9ce037e18cf2b9c64871a84951eaa0baf683a9ca9301c/numpy-2.4.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f2cf083b324a467e1ab358c105f6cad5ea950f50524668a80c486ff1db24e119", size = 12509075, upload-time = "2026-03-29T13:21:57.644Z" }, ] [[package]] @@ -806,6 +844,16 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, ] +[package.dev-dependencies] +test = [ + { name = "overture-schema-addresses-theme" }, + { name = "overture-schema-base-theme" }, + { name = "overture-schema-buildings-theme" }, + { name = "overture-schema-divisions-theme" }, + { name = "overture-schema-places-theme" }, + { name = "overture-schema-transportation-theme" }, +] + [package.metadata] requires-dist = [ { name = "click", specifier = ">=8.1" }, @@ -816,6 +864,16 @@ requires-dist = [ { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0" }, ] +[package.metadata.requires-dev] +test = [ + { name = "overture-schema-addresses-theme", editable = "packages/overture-schema-addresses-theme" }, + { name = "overture-schema-base-theme", editable = "packages/overture-schema-base-theme" }, + { name = "overture-schema-buildings-theme", editable = "packages/overture-schema-buildings-theme" }, + { name = "overture-schema-divisions-theme", editable = "packages/overture-schema-divisions-theme" }, + { name = "overture-schema-places-theme", editable = "packages/overture-schema-places-theme" }, + { name = "overture-schema-transportation-theme", editable = "packages/overture-schema-transportation-theme" }, +] + [[package]] name = "overture-schema-common" source = { editable = "packages/overture-schema-common" } @@ -878,6 +936,22 @@ requires-dist = [ { name = "pydantic", extras = ["email"], specifier = ">=2.12.0" }, ] +[[package]] +name = "overture-schema-pyspark" +source = { editable = "packages/overture-schema-pyspark" } +dependencies = [ + { name = "click" }, + { name = "overture-schema-system" }, + { name = "pyspark" }, +] + +[package.metadata] +requires-dist = [ + { name = "click", specifier = ">=8.0" }, + { name = "overture-schema-system", editable = "packages/overture-schema-system" }, + { name = "pyspark", specifier = ">=3.4" }, +] + [[package]] name = "overture-schema-system" source = { editable = "packages/overture-schema-system" } @@ -953,20 +1027,20 @@ dev = [ [[package]] name = "packaging" -version = "26.0" +version = "26.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, + { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" }, ] [[package]] name = "pathspec" -version = "1.0.4" +version = "1.1.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/82/42f767fc1c1143d6fd36efb827202a2d997a375e160a71eb2888a925aac1/pathspec-1.1.1.tar.gz", hash = "sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a", size = 135180, upload-time = "2026-04-27T01:46:08.907Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, + { url = "https://files.pythonhosted.org/packages/f1/d9/7fb5aa316bc299258e68c73ba3bddbc499654a07f151cba08f6153988714/pathspec-1.1.1-py3-none-any.whl", hash = "sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189", size = 57328, upload-time = "2026-04-27T01:46:07.06Z" }, ] [[package]] @@ -994,17 +1068,17 @@ wheels = [ ] [[package]] -name = "ply" -version = "3.11" +name = "py4j" +version = "0.10.9.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e5/69/882ee5c9d017149285cab114ebeab373308ef0f874fcdac9beb90e0ac4da/ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", size = 159130, upload-time = "2018-02-15T19:01:31.097Z" } +sdist = { url = "https://files.pythonhosted.org/packages/38/31/0b210511177070c8d5d3059556194352e5753602fa64b85b7ab81ec1a009/py4j-0.10.9.9.tar.gz", hash = "sha256:f694cad19efa5bd1dee4f3e5270eb406613c974394035e5bfc4ec1aba870b879", size = 761089, upload-time = "2025-01-15T03:53:18.624Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" }, + { url = "https://files.pythonhosted.org/packages/bd/db/ea0203e495be491c85af87b66e37acfd3bf756fd985f87e46fc5e3bf022c/py4j-0.10.9.9-py2.py3-none-any.whl", hash = "sha256:c7c26e4158defb37b0bb124933163641a2ff6e3a3913f7811b0ddbe07ed61533", size = 203008, upload-time = "2025-01-15T03:53:15.648Z" }, ] [[package]] name = "pydantic" -version = "2.12.5" +version = "2.13.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, @@ -1012,9 +1086,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/e4/40d09941a2cebcb20609b86a559817d5b9291c49dd6f8c87e5feffbe703a/pydantic-2.13.3.tar.gz", hash = "sha256:af09e9d1d09f4e7fe37145c1f577e1d61ceb9a41924bf0094a36506285d0a84d", size = 844068, upload-time = "2026-04-20T14:46:43.632Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, + { url = "https://files.pythonhosted.org/packages/f3/0a/fd7d723f8f8153418fb40cf9c940e82004fce7e987026b08a68a36dd3fe7/pydantic-2.13.3-py3-none-any.whl", hash = "sha256:6db14ac8dfc9a1e57f87ea2c0de670c251240f43cb0c30a5130e9720dc612927", size = 471981, upload-time = "2026-04-20T14:46:41.402Z" }, ] [package.optional-dependencies] @@ -1024,120 +1098,118 @@ email = [ [[package]] name = "pydantic-core" -version = "2.41.5" +version = "2.46.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/ef/f7abb56c49382a246fd2ce9c799691e3c3e7175ec74b14d99e798bcddb1a/pydantic_core-2.46.3.tar.gz", hash = "sha256:41c178f65b8c29807239d47e6050262eb6bf84eb695e41101e62e38df4a5bc2c", size = 471412, upload-time = "2026-04-20T14:40:56.672Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" }, - { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" }, - { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" }, - { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" }, - { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" }, - { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" }, - { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" }, - { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" }, - { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" }, - { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" }, - { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" }, - { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" }, - { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" }, - { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" }, - { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" }, - { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" }, - { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" }, - { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" }, - { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" }, - { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" }, - { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" }, - { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" }, - { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" }, - { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" }, - { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" }, - { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" }, - { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" }, - { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, - { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, - { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, - { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, - { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, - { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, - { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, - { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, - { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, - { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, - { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, - { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, - { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, - { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, - { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, - { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, - { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, - { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, - { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, - { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, - { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, - { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, - { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, - { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, - { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, - { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, - { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, - { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, - { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, - { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, - { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, - { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, - { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, - { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, - { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, - { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, - { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, - { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, - { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, - { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, - { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, - { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, - { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, - { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, - { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, - { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, - { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, - { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, - { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, - { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, - { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, - { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, - { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, - { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, - { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, - { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, - { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" }, - { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" }, - { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" }, - { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" }, - { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, - { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, - { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, - { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, - { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" }, - { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" }, - { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" }, - { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" }, - { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" }, - { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" }, - { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" }, - { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" }, - { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" }, - { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" }, - { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" }, - { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" }, - { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" }, - { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" }, - { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" }, - { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, + { url = "https://files.pythonhosted.org/packages/22/98/b50eb9a411e87483b5c65dba4fa430a06bac4234d3403a40e5a9905ebcd0/pydantic_core-2.46.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:1da3786b8018e60349680720158cc19161cc3b4bdd815beb0a321cd5ce1ad5b1", size = 2108971, upload-time = "2026-04-20T14:43:51.945Z" }, + { url = "https://files.pythonhosted.org/packages/08/4b/f364b9d161718ff2217160a4b5d41ce38de60aed91c3689ebffa1c939d23/pydantic_core-2.46.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc0988cb29d21bf4a9d5cf2ef970b5c0e38d8d8e107a493278c05dc6c1dda69f", size = 1949588, upload-time = "2026-04-20T14:44:10.386Z" }, + { url = "https://files.pythonhosted.org/packages/8f/8b/30bd03ee83b2f5e29f5ba8e647ab3c456bf56f2ec72fdbcc0215484a0854/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f9067c3bfadd04c55484b89c0d267981b2f3512850f6f66e1e74204a4e4ce3", size = 1975986, upload-time = "2026-04-20T14:43:57.106Z" }, + { url = "https://files.pythonhosted.org/packages/3c/54/13ccf954d84ec275d5d023d5786e4aa48840bc9f161f2838dc98e1153518/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a642ac886ecf6402d9882d10c405dcf4b902abeb2972cd5fb4a48c83cd59279a", size = 2055830, upload-time = "2026-04-20T14:44:15.499Z" }, + { url = "https://files.pythonhosted.org/packages/be/0e/65f38125e660fdbd72aa858e7dfae893645cfa0e7b13d333e174a367cd23/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79f561438481f28681584b89e2effb22855e2179880314bcddbf5968e935e807", size = 2222340, upload-time = "2026-04-20T14:41:51.353Z" }, + { url = "https://files.pythonhosted.org/packages/d1/88/f3ab7739efe0e7e80777dbb84c59eb98518e3f57ea433206194c2e425272/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57a973eae4665352a47cf1a99b4ee864620f2fe663a217d7a8da68a1f3a5bfda", size = 2280727, upload-time = "2026-04-20T14:41:30.461Z" }, + { url = "https://files.pythonhosted.org/packages/2a/6d/c228219080817bec4982f9531cadb18da6aaa770fdeb114f49c237ac2c9f/pydantic_core-2.46.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83d002b97072a53ea150d63e0a3adfae5670cef5aa8a6e490240e482d3b22e57", size = 2092158, upload-time = "2026-04-20T14:44:07.305Z" }, + { url = "https://files.pythonhosted.org/packages/0f/b1/525a16711e7c6d61635fac3b0bd54600b5c5d9f60c6fc5aaab26b64a2297/pydantic_core-2.46.3-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:b40ddd51e7c44b28cfaef746c9d3c506d658885e0a46f9eeef2ee815cbf8e045", size = 2116626, upload-time = "2026-04-20T14:42:34.118Z" }, + { url = "https://files.pythonhosted.org/packages/ef/7c/17d30673351439a6951bf54f564cf2443ab00ae264ec9df00e2efd710eb5/pydantic_core-2.46.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ac5ec7fb9b87f04ee839af2d53bcadea57ded7d229719f56c0ed895bff987943", size = 2160691, upload-time = "2026-04-20T14:41:14.023Z" }, + { url = "https://files.pythonhosted.org/packages/86/66/af8adbcbc0886ead7f1a116606a534d75a307e71e6e08226000d51b880d2/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a3b11c812f61b3129c4905781a2601dfdfdea5fe1e6c1cfb696b55d14e9c054f", size = 2182543, upload-time = "2026-04-20T14:40:48.886Z" }, + { url = "https://files.pythonhosted.org/packages/b0/37/6de71e0f54c54a4190010f57deb749e1ddf75c568ada3b1320b70067f121/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:1108da631e602e5b3c38d6d04fe5bb3bfa54349e6918e3ca6cf570b2e2b2f9d4", size = 2324513, upload-time = "2026-04-20T14:42:36.121Z" }, + { url = "https://files.pythonhosted.org/packages/51/b1/9fc74ce94f603d5ef59ff258ca9c2c8fb902fb548d340a96f77f4d1c3b7f/pydantic_core-2.46.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:de885175515bcfa98ae618c1df7a072f13d179f81376c8007112af20567fd08a", size = 2361853, upload-time = "2026-04-20T14:43:24.886Z" }, + { url = "https://files.pythonhosted.org/packages/40/d0/4c652fc592db35f100279ee751d5a145aca1b9a7984b9684ba7c1b5b0535/pydantic_core-2.46.3-cp310-cp310-win32.whl", hash = "sha256:d11058e3201527d41bc6b545c79187c9e4bf85e15a236a6007f0e991518882b7", size = 1980465, upload-time = "2026-04-20T14:44:46.239Z" }, + { url = "https://files.pythonhosted.org/packages/27/b8/a920453c38afbe1f355e1ea0b0d94a0a3e0b0879d32d793108755fa171d5/pydantic_core-2.46.3-cp310-cp310-win_amd64.whl", hash = "sha256:3612edf65c8ea67ac13616c4d23af12faef1ae435a8a93e5934c2a0cbbdd1fd6", size = 2073884, upload-time = "2026-04-20T14:43:01.201Z" }, + { url = "https://files.pythonhosted.org/packages/22/a2/1ba90a83e85a3f94c796b184f3efde9c72f2830dcda493eea8d59ba78e6d/pydantic_core-2.46.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ab124d49d0459b2373ecf54118a45c28a1e6d4192a533fbc915e70f556feb8e5", size = 2106740, upload-time = "2026-04-20T14:41:20.932Z" }, + { url = "https://files.pythonhosted.org/packages/b6/f6/99ae893c89a0b9d3daec9f95487aa676709aa83f67643b3f0abaf4ab628a/pydantic_core-2.46.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cca67d52a5c7a16aed2b3999e719c4bcf644074eac304a5d3d62dd70ae7d4b2c", size = 1948293, upload-time = "2026-04-20T14:43:42.115Z" }, + { url = "https://files.pythonhosted.org/packages/3e/b8/2e8e636dc9e3f16c2e16bf0849e24be82c5ee82c603c65fc0326666328fc/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c024e08c0ba23e6fd68c771a521e9d6a792f2ebb0fa734296b36394dc30390e", size = 1973222, upload-time = "2026-04-20T14:41:57.841Z" }, + { url = "https://files.pythonhosted.org/packages/34/36/0e730beec4d83c5306f417afbd82ff237d9a21e83c5edf675f31ed84c1fe/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6645ce7eec4928e29a1e3b3d5c946621d105d3e79f0c9cddf07c2a9770949287", size = 2053852, upload-time = "2026-04-20T14:40:43.077Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f0/3071131f47e39136a17814576e0fada9168569f7f8c0e6ac4d1ede6a4958/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a712c7118e6c5ea96562f7b488435172abb94a3c53c22c9efc1412264a45cbbe", size = 2221134, upload-time = "2026-04-20T14:43:03.349Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a9/a2dc023eec5aa4b02a467874bad32e2446957d2adcab14e107eab502e978/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69a868ef3ff206343579021c40faf3b1edc64b1cc508ff243a28b0a514ccb050", size = 2279785, upload-time = "2026-04-20T14:41:19.285Z" }, + { url = "https://files.pythonhosted.org/packages/0a/44/93f489d16fb63fbd41c670441536541f6e8cfa1e5a69f40bc9c5d30d8c90/pydantic_core-2.46.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc7e8c32db809aa0f6ea1d6869ebc8518a65d5150fdfad8bcae6a49ae32a22e2", size = 2089404, upload-time = "2026-04-20T14:43:10.108Z" }, + { url = "https://files.pythonhosted.org/packages/2a/78/8692e3aa72b2d004f7a5d937f1dfdc8552ba26caf0bec75f342c40f00dec/pydantic_core-2.46.3-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:3481bd1341dc85779ee506bc8e1196a277ace359d89d28588a9468c3ecbe63fa", size = 2114898, upload-time = "2026-04-20T14:44:51.475Z" }, + { url = "https://files.pythonhosted.org/packages/6a/62/e83133f2e7832532060175cebf1f13748f4c7e7e7165cdd1f611f174494b/pydantic_core-2.46.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8690eba565c6d68ffd3a8655525cbdd5246510b44a637ee2c6c03a7ebfe64d3c", size = 2157856, upload-time = "2026-04-20T14:43:46.64Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ec/6a500e3ad7718ee50583fae79c8651f5d37e3abce1fa9ae177ae65842c53/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4de88889d7e88d50d40ee5b39d5dac0bcaef9ba91f7e536ac064e6b2834ecccf", size = 2180168, upload-time = "2026-04-20T14:42:00.302Z" }, + { url = "https://files.pythonhosted.org/packages/d8/53/8267811054b1aa7fc1dc7ded93812372ef79a839f5e23558136a6afbfde1/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:e480080975c1ef7f780b8f99ed72337e7cc5efea2e518a20a692e8e7b278eb8b", size = 2322885, upload-time = "2026-04-20T14:41:05.253Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c1/1c0acdb3aa0856ddc4ecc55214578f896f2de16f400cf51627eb3c26c1c4/pydantic_core-2.46.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:de3a5c376f8cd94da9a1b8fd3dd1c16c7a7b216ed31dc8ce9fd7a22bf13b836e", size = 2360328, upload-time = "2026-04-20T14:41:43.991Z" }, + { url = "https://files.pythonhosted.org/packages/f0/d0/ef39cd0f4a926814f360e71c1adeab48ad214d9727e4deb48eedfb5bce1a/pydantic_core-2.46.3-cp311-cp311-win32.whl", hash = "sha256:fc331a5314ffddd5385b9ee9d0d2fee0b13c27e0e02dad71b1ae5d6561f51eeb", size = 1979464, upload-time = "2026-04-20T14:43:12.215Z" }, + { url = "https://files.pythonhosted.org/packages/18/9c/f41951b0d858e343f1cf09398b2a7b3014013799744f2c4a8ad6a3eec4f2/pydantic_core-2.46.3-cp311-cp311-win_amd64.whl", hash = "sha256:b5b9c6cf08a8a5e502698f5e153056d12c34b8fb30317e0c5fd06f45162a6346", size = 2070837, upload-time = "2026-04-20T14:41:47.707Z" }, + { url = "https://files.pythonhosted.org/packages/9f/1e/264a17cd582f6ed50950d4d03dd5fefd84e570e238afe1cb3e25cf238769/pydantic_core-2.46.3-cp311-cp311-win_arm64.whl", hash = "sha256:5dfd51cf457482f04ec49491811a2b8fd5b843b64b11eecd2d7a1ee596ea78a6", size = 2053647, upload-time = "2026-04-20T14:42:27.535Z" }, + { url = "https://files.pythonhosted.org/packages/4b/cb/5b47425556ecc1f3fe18ed2a0083188aa46e1dd812b06e406475b3a5d536/pydantic_core-2.46.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b11b59b3eee90a80a36701ddb4576d9ae31f93f05cb9e277ceaa09e6bf074a67", size = 2101946, upload-time = "2026-04-20T14:40:52.581Z" }, + { url = "https://files.pythonhosted.org/packages/a1/4f/2fb62c2267cae99b815bbf4a7b9283812c88ca3153ef29f7707200f1d4e5/pydantic_core-2.46.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af8653713055ea18a3abc1537fe2ebc42f5b0bbb768d1eb79fd74eb47c0ac089", size = 1951612, upload-time = "2026-04-20T14:42:42.996Z" }, + { url = "https://files.pythonhosted.org/packages/50/6e/b7348fd30d6556d132cddd5bd79f37f96f2601fe0608afac4f5fb01ec0b3/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75a519dab6d63c514f3a81053e5266c549679e4aa88f6ec57f2b7b854aceb1b0", size = 1977027, upload-time = "2026-04-20T14:42:02.001Z" }, + { url = "https://files.pythonhosted.org/packages/82/11/31d60ee2b45540d3fb0b29302a393dbc01cd771c473f5b5147bcd353e593/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6cd87cb1575b1ad05ba98894c5b5c96411ef678fa2f6ed2576607095b8d9789", size = 2063008, upload-time = "2026-04-20T14:44:17.952Z" }, + { url = "https://files.pythonhosted.org/packages/8a/db/3a9d1957181b59258f44a2300ab0f0be9d1e12d662a4f57bb31250455c52/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f80a55484b8d843c8ada81ebf70a682f3f00a3d40e378c06cf17ecb44d280d7d", size = 2233082, upload-time = "2026-04-20T14:40:57.934Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e1/3277c38792aeb5cfb18c2f0c5785a221d9ff4e149abbe1184d53d5f72273/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3861f1731b90c50a3266316b9044f5c9b405eecb8e299b0a7120596334e4fe9c", size = 2304615, upload-time = "2026-04-20T14:42:12.584Z" }, + { url = "https://files.pythonhosted.org/packages/5e/d5/e3d9717c9eba10855325650afd2a9cba8e607321697f18953af9d562da2f/pydantic_core-2.46.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb528e295ed31570ac3dcc9bfdd6e0150bc11ce6168ac87a8082055cf1a67395", size = 2094380, upload-time = "2026-04-20T14:43:05.522Z" }, + { url = "https://files.pythonhosted.org/packages/a1/20/abac35dedcbfd66c6f0b03e4e3564511771d6c9b7ede10a362d03e110d9b/pydantic_core-2.46.3-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:367508faa4973b992b271ba1494acaab36eb7e8739d1e47be5035fb1ea225396", size = 2135429, upload-time = "2026-04-20T14:41:55.549Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a5/41bfd1df69afad71b5cf0535055bccc73022715ad362edbc124bc1e021d7/pydantic_core-2.46.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ad3c826fe523e4becf4fe39baa44286cff85ef137c729a2c5e269afbfd0905d", size = 2174582, upload-time = "2026-04-20T14:41:45.96Z" }, + { url = "https://files.pythonhosted.org/packages/79/65/38d86ea056b29b2b10734eb23329b7a7672ca604df4f2b6e9c02d4ee22fe/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ec638c5d194ef8af27db69f16c954a09797c0dc25015ad6123eb2c73a4d271ca", size = 2187533, upload-time = "2026-04-20T14:40:55.367Z" }, + { url = "https://files.pythonhosted.org/packages/b6/55/a1129141678a2026badc539ad1dee0a71d06f54c2f06a4bd68c030ac781b/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:28ed528c45446062ee66edb1d33df5d88828ae167de76e773a3c7f64bd14e976", size = 2332985, upload-time = "2026-04-20T14:44:13.05Z" }, + { url = "https://files.pythonhosted.org/packages/d7/60/cb26f4077719f709e54819f4e8e1d43f4091f94e285eb6bd21e1190a7b7c/pydantic_core-2.46.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aed19d0c783886d5bd86d80ae5030006b45e28464218747dcf83dabfdd092c7b", size = 2373670, upload-time = "2026-04-20T14:41:53.421Z" }, + { url = "https://files.pythonhosted.org/packages/6b/7e/c3f21882bdf1d8d086876f81b5e296206c69c6082551d776895de7801fa0/pydantic_core-2.46.3-cp312-cp312-win32.whl", hash = "sha256:06d5d8820cbbdb4147578c1fe7ffcd5b83f34508cb9f9ab76e807be7db6ff0a4", size = 1966722, upload-time = "2026-04-20T14:44:30.588Z" }, + { url = "https://files.pythonhosted.org/packages/57/be/6b5e757b859013ebfbd7adba02f23b428f37c86dcbf78b5bb0b4ffd36e99/pydantic_core-2.46.3-cp312-cp312-win_amd64.whl", hash = "sha256:c3212fda0ee959c1dd04c60b601ec31097aaa893573a3a1abd0a47bcac2968c1", size = 2072970, upload-time = "2026-04-20T14:42:54.248Z" }, + { url = "https://files.pythonhosted.org/packages/bf/f8/a989b21cc75e9a32d24192ef700eea606521221a89faa40c919ce884f2b1/pydantic_core-2.46.3-cp312-cp312-win_arm64.whl", hash = "sha256:f1f8338dd7a7f31761f1f1a3c47503a9a3b34eea3c8b01fa6ee96408affb5e72", size = 2035963, upload-time = "2026-04-20T14:44:20.4Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3c/9b5e8eb9821936d065439c3b0fb1490ffa64163bfe7e1595985a47896073/pydantic_core-2.46.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:12bc98de041458b80c86c56b24df1d23832f3e166cbaff011f25d187f5c62c37", size = 2102109, upload-time = "2026-04-20T14:41:24.219Z" }, + { url = "https://files.pythonhosted.org/packages/91/97/1c41d1f5a19f241d8069f1e249853bcce378cdb76eec8ab636d7bc426280/pydantic_core-2.46.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:85348b8f89d2c3508b65b16c3c33a4da22b8215138d8b996912bb1532868885f", size = 1951820, upload-time = "2026-04-20T14:42:14.236Z" }, + { url = "https://files.pythonhosted.org/packages/30/b4/d03a7ae14571bc2b6b3c7b122441154720619afe9a336fa3a95434df5e2f/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1105677a6df914b1fb71a81b96c8cce7726857e1717d86001f29be06a25ee6f8", size = 1977785, upload-time = "2026-04-20T14:42:31.648Z" }, + { url = "https://files.pythonhosted.org/packages/ae/0c/4086f808834b59e3c8f1aa26df8f4b6d998cdcf354a143d18ef41529d1fe/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87082cd65669a33adeba5470769e9704c7cf026cc30afb9cc77fd865578ebaad", size = 2062761, upload-time = "2026-04-20T14:40:37.093Z" }, + { url = "https://files.pythonhosted.org/packages/fa/71/a649be5a5064c2df0db06e0a512c2281134ed2fcc981f52a657936a7527c/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e5f66e12c4f5212d08522963380eaaeac5ebd795826cfd19b2dfb0c7a52b9c", size = 2232989, upload-time = "2026-04-20T14:42:59.254Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/7756e75763e810b3a710f4724441d1ecc5883b94aacb07ca71c5fb5cfb69/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6cdf19bf84128d5e7c37e8a73a0c5c10d51103a650ac585d42dd6ae233f2b7f", size = 2303975, upload-time = "2026-04-20T14:41:32.287Z" }, + { url = "https://files.pythonhosted.org/packages/6c/35/68a762e0c1e31f35fa0dac733cbd9f5b118042853698de9509c8e5bf128b/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031bb17f4885a43773c8c763089499f242aee2ea85cf17154168775dccdecf35", size = 2095325, upload-time = "2026-04-20T14:42:47.685Z" }, + { url = "https://files.pythonhosted.org/packages/77/bf/1bf8c9a8e91836c926eae5e3e51dce009bf495a60ca56060689d3df3f340/pydantic_core-2.46.3-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:bcf2a8b2982a6673693eae7348ef3d8cf3979c1d63b54fca7c397a635cc68687", size = 2133368, upload-time = "2026-04-20T14:41:22.766Z" }, + { url = "https://files.pythonhosted.org/packages/e5/50/87d818d6bab915984995157ceb2380f5aac4e563dddbed6b56f0ed057aba/pydantic_core-2.46.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28e8cf2f52d72ced402a137145923a762cbb5081e48b34312f7a0c8f55928ec3", size = 2173908, upload-time = "2026-04-20T14:42:52.044Z" }, + { url = "https://files.pythonhosted.org/packages/91/88/a311fb306d0bd6185db41fa14ae888fb81d0baf648a761ae760d30819d33/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:17eaface65d9fc5abb940003020309c1bf7a211f5f608d7870297c367e6f9022", size = 2186422, upload-time = "2026-04-20T14:43:29.55Z" }, + { url = "https://files.pythonhosted.org/packages/8f/79/28fd0d81508525ab2054fef7c77a638c8b5b0afcbbaeee493cf7c3fef7e1/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:93fd339f23408a07e98950a89644f92c54d8729719a40b30c0a30bb9ebc55d23", size = 2332709, upload-time = "2026-04-20T14:42:16.134Z" }, + { url = "https://files.pythonhosted.org/packages/b3/21/795bf5fe5c0f379308b8ef19c50dedab2e7711dbc8d0c2acf08f1c7daa05/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:23cbdb3aaa74dfe0837975dbf69b469753bbde8eacace524519ffdb6b6e89eb7", size = 2372428, upload-time = "2026-04-20T14:41:10.974Z" }, + { url = "https://files.pythonhosted.org/packages/45/b3/ed14c659cbe7605e3ef063077680a64680aec81eb1a04763a05190d49b7f/pydantic_core-2.46.3-cp313-cp313-win32.whl", hash = "sha256:610eda2e3838f401105e6326ca304f5da1e15393ae25dacae5c5c63f2c275b13", size = 1965601, upload-time = "2026-04-20T14:41:42.128Z" }, + { url = "https://files.pythonhosted.org/packages/ef/bb/adb70d9a762ddd002d723fbf1bd492244d37da41e3af7b74ad212609027e/pydantic_core-2.46.3-cp313-cp313-win_amd64.whl", hash = "sha256:68cc7866ed863db34351294187f9b729964c371ba33e31c26f478471c52e1ed0", size = 2071517, upload-time = "2026-04-20T14:43:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/52/eb/66faefabebfe68bd7788339c9c9127231e680b11906368c67ce112fdb47f/pydantic_core-2.46.3-cp313-cp313-win_arm64.whl", hash = "sha256:f64b5537ac62b231572879cd08ec05600308636a5d63bcbdb15063a466977bec", size = 2035802, upload-time = "2026-04-20T14:43:38.507Z" }, + { url = "https://files.pythonhosted.org/packages/7f/db/a7bcb4940183fda36022cd18ba8dd12f2dff40740ec7b58ce7457befa416/pydantic_core-2.46.3-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:afa3aa644f74e290cdede48a7b0bee37d1c35e71b05105f6b340d484af536d9b", size = 2097614, upload-time = "2026-04-20T14:44:38.374Z" }, + { url = "https://files.pythonhosted.org/packages/24/35/e4066358a22e3e99519db370494c7528f5a2aa1367370e80e27e20283543/pydantic_core-2.46.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ced3310e51aa425f7f77da8bbbb5212616655bedbe82c70944320bc1dbe5e018", size = 1951896, upload-time = "2026-04-20T14:40:53.996Z" }, + { url = "https://files.pythonhosted.org/packages/87/92/37cf4049d1636996e4b888c05a501f40a43ff218983a551d57f9d5e14f0d/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e29908922ce9da1a30b4da490bd1d3d82c01dcfdf864d2a74aacee674d0bfa34", size = 1979314, upload-time = "2026-04-20T14:41:49.446Z" }, + { url = "https://files.pythonhosted.org/packages/d8/36/9ff4d676dfbdfb2d591cf43f3d90ded01e15b1404fd101180ed2d62a2fd3/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c9ff69140423eea8ed2d5477df3ba037f671f5e897d206d921bc9fdc39613e7", size = 2056133, upload-time = "2026-04-20T14:42:23.574Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f0/405b442a4d7ba855b06eec8b2bf9c617d43b8432d099dfdc7bf999293495/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b675ab0a0d5b1c8fdb81195dc5bcefea3f3c240871cdd7ff9a2de8aa50772eb2", size = 2228726, upload-time = "2026-04-20T14:44:22.816Z" }, + { url = "https://files.pythonhosted.org/packages/e7/f8/65cd92dd5a0bd89ba277a98ecbfaf6fc36bbd3300973c7a4b826d6ab1391/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0087084960f209a9a4af50ecd1fb063d9ad3658c07bb81a7a53f452dacbfb2ba", size = 2301214, upload-time = "2026-04-20T14:44:48.792Z" }, + { url = "https://files.pythonhosted.org/packages/fd/86/ef96a4c6e79e7a2d0410826a68fbc0eccc0fd44aa733be199d5fcac3bb87/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed42e6cc8e1b0e2b9b96e2276bad70ae625d10d6d524aed0c93de974ae029f9f", size = 2099927, upload-time = "2026-04-20T14:41:40.196Z" }, + { url = "https://files.pythonhosted.org/packages/6d/53/269caf30e0096e0a8a8f929d1982a27b3879872cca2d917d17c2f9fdf4fe/pydantic_core-2.46.3-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:f1771ce258afb3e4201e67d154edbbae712a76a6081079fe247c2f53c6322c22", size = 2128789, upload-time = "2026-04-20T14:41:15.868Z" }, + { url = "https://files.pythonhosted.org/packages/00/b0/1a6d9b6a587e118482910c244a1c5acf4d192604174132efd12bf0ac486f/pydantic_core-2.46.3-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a7610b6a5242a6c736d8ad47fd5fff87fcfe8f833b281b1c409c3d6835d9227f", size = 2173815, upload-time = "2026-04-20T14:44:25.152Z" }, + { url = "https://files.pythonhosted.org/packages/87/56/e7e00d4041a7e62b5a40815590114db3b535bf3ca0bf4dca9f16cef25246/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:ff5e7783bcc5476e1db448bf268f11cb257b1c276d3e89f00b5727be86dd0127", size = 2181608, upload-time = "2026-04-20T14:41:28.933Z" }, + { url = "https://files.pythonhosted.org/packages/e8/22/4bd23c3d41f7c185d60808a1de83c76cf5aeabf792f6c636a55c3b1ec7f9/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:9d2e32edcc143bc01e95300671915d9ca052d4f745aa0a49c48d4803f8a85f2c", size = 2326968, upload-time = "2026-04-20T14:42:03.962Z" }, + { url = "https://files.pythonhosted.org/packages/24/ac/66cd45129e3915e5ade3b292cb3bc7fd537f58f8f8dbdaba6170f7cabb74/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d83d1c6b87fa56b521479cff237e626a292f3b31b6345c15a99121b454c1", size = 2369842, upload-time = "2026-04-20T14:41:35.52Z" }, + { url = "https://files.pythonhosted.org/packages/a2/51/dd4248abb84113615473aa20d5545b7c4cd73c8644003b5259686f93996c/pydantic_core-2.46.3-cp314-cp314-win32.whl", hash = "sha256:07bc6d2a28c3adb4f7c6ae46aa4f2d2929af127f587ed44057af50bf1ce0f505", size = 1959661, upload-time = "2026-04-20T14:41:00.042Z" }, + { url = "https://files.pythonhosted.org/packages/20/eb/59980e5f1ae54a3b86372bd9f0fa373ea2d402e8cdcd3459334430f91e91/pydantic_core-2.46.3-cp314-cp314-win_amd64.whl", hash = "sha256:8940562319bc621da30714617e6a7eaa6b98c84e8c685bcdc02d7ed5e7c7c44e", size = 2071686, upload-time = "2026-04-20T14:43:16.471Z" }, + { url = "https://files.pythonhosted.org/packages/8c/db/1cf77e5247047dfee34bc01fa9bca134854f528c8eb053e144298893d370/pydantic_core-2.46.3-cp314-cp314-win_arm64.whl", hash = "sha256:5dcbbcf4d22210ced8f837c96db941bdb078f419543472aca5d9a0bb7cddc7df", size = 2026907, upload-time = "2026-04-20T14:43:31.732Z" }, + { url = "https://files.pythonhosted.org/packages/57/c0/b3df9f6a543276eadba0a48487b082ca1f201745329d97dbfa287034a230/pydantic_core-2.46.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:d0fe3dce1e836e418f912c1ad91c73357d03e556a4d286f441bf34fed2dbeecf", size = 2095047, upload-time = "2026-04-20T14:42:37.982Z" }, + { url = "https://files.pythonhosted.org/packages/66/57/886a938073b97556c168fd99e1a7305bb363cd30a6d2c76086bf0587b32a/pydantic_core-2.46.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9ce92e58abc722dac1bf835a6798a60b294e48eb0e625ec9fd994b932ac5feee", size = 1934329, upload-time = "2026-04-20T14:43:49.655Z" }, + { url = "https://files.pythonhosted.org/packages/0b/7c/b42eaa5c34b13b07ecb51da21761297a9b8eb43044c864a035999998f328/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a03e6467f0f5ab796a486146d1b887b2dc5e5f9b3288898c1b1c3ad974e53e4a", size = 1974847, upload-time = "2026-04-20T14:42:10.737Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9b/92b42db6543e7de4f99ae977101a2967b63122d4b6cf7773812da2d7d5b5/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2798b6ba041b9d70acfb9071a2ea13c8456dd1e6a5555798e41ba7b0790e329c", size = 2041742, upload-time = "2026-04-20T14:40:44.262Z" }, + { url = "https://files.pythonhosted.org/packages/0f/19/46fbe1efabb5aa2834b43b9454e70f9a83ad9c338c1291e48bdc4fecf167/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9be3e221bdc6d69abf294dcf7aff6af19c31a5cdcc8f0aa3b14be29df4bd03b1", size = 2236235, upload-time = "2026-04-20T14:41:27.307Z" }, + { url = "https://files.pythonhosted.org/packages/77/da/b3f95bc009ad60ec53120f5d16c6faa8cabdbe8a20d83849a1f2b8728148/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f13936129ce841f2a5ddf6f126fea3c43cd128807b5a59588c37cf10178c2e64", size = 2282633, upload-time = "2026-04-20T14:44:33.271Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6e/401336117722e28f32fb8220df676769d28ebdf08f2f4469646d404c43a3/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28b5f2ef03416facccb1c6ef744c69793175fd27e44ef15669201601cf423acb", size = 2109679, upload-time = "2026-04-20T14:44:41.065Z" }, + { url = "https://files.pythonhosted.org/packages/fc/53/b289f9bc8756a32fe718c46f55afaeaf8d489ee18d1a1e7be1db73f42cc4/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:830d1247d77ad23852314f069e9d7ddafeec5f684baf9d7e7065ed46a049c4e6", size = 2108342, upload-time = "2026-04-20T14:42:50.144Z" }, + { url = "https://files.pythonhosted.org/packages/10/5b/8292fc7c1f9111f1b2b7c1b0dcf1179edcd014fc3ea4517499f50b829d71/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0793c90c1a3c74966e7975eaef3ed30ebdff3260a0f815a62a22adc17e4c01c", size = 2157208, upload-time = "2026-04-20T14:42:08.133Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9e/f80044e9ec07580f057a89fc131f78dda7a58751ddf52bbe05eaf31db50f/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:d2d0aead851b66f5245ec0c4fb2612ef457f8bbafefdf65a2bf9d6bac6140f47", size = 2167237, upload-time = "2026-04-20T14:42:25.412Z" }, + { url = "https://files.pythonhosted.org/packages/f8/84/6781a1b037f3b96be9227edbd1101f6d3946746056231bf4ac48cdff1a8d/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:2f40e4246676beb31c5ce77c38a55ca4e465c6b38d11ea1bd935420568e0b1ab", size = 2312540, upload-time = "2026-04-20T14:40:40.313Z" }, + { url = "https://files.pythonhosted.org/packages/3e/db/19c0839feeb728e7df03255581f198dfdf1c2aeb1e174a8420b63c5252e5/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:cf489cf8986c543939aeee17a09c04d6ffb43bfef8ca16fcbcc5cfdcbed24dba", size = 2369556, upload-time = "2026-04-20T14:41:09.427Z" }, + { url = "https://files.pythonhosted.org/packages/e0/15/3228774cb7cd45f5f721ddf1b2242747f4eb834d0c491f0c02d606f09fed/pydantic_core-2.46.3-cp314-cp314t-win32.whl", hash = "sha256:ffe0883b56cfc05798bf994164d2b2ff03efe2d22022a2bb080f3b626176dd56", size = 1949756, upload-time = "2026-04-20T14:41:25.717Z" }, + { url = "https://files.pythonhosted.org/packages/b8/2a/c79cf53fd91e5a87e30d481809f52f9a60dd221e39de66455cf04deaad37/pydantic_core-2.46.3-cp314-cp314t-win_amd64.whl", hash = "sha256:706d9d0ce9cf4593d07270d8e9f53b161f90c57d315aeec4fb4fd7a8b10240d8", size = 2051305, upload-time = "2026-04-20T14:43:18.627Z" }, + { url = "https://files.pythonhosted.org/packages/0b/db/d8182a7f1d9343a032265aae186eb063fe26ca4c40f256b21e8da4498e89/pydantic_core-2.46.3-cp314-cp314t-win_arm64.whl", hash = "sha256:77706aeb41df6a76568434701e0917da10692da28cb69d5fb6919ce5fdb07374", size = 2026310, upload-time = "2026-04-20T14:41:01.778Z" }, + { url = "https://files.pythonhosted.org/packages/66/7f/03dbad45cd3aa9083fbc93c210ae8b005af67e4136a14186950a747c6874/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:9715525891ed524a0a1eb6d053c74d4d4ad5017677fb00af0b7c2644a31bae46", size = 2105683, upload-time = "2026-04-20T14:42:19.779Z" }, + { url = "https://files.pythonhosted.org/packages/26/22/4dc186ac8ea6b257e9855031f51b62a9637beac4d68ac06bee02f046f836/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:9d2f400712a99a013aff420ef1eb9be077f8189a36c1e3ef87660b4e1088a874", size = 1940052, upload-time = "2026-04-20T14:43:59.274Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ca/d376391a5aff1f2e8188960d7873543608130a870961c2b6b5236627c116/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd2aab0e2e9dc2daf36bd2686c982535d5e7b1d930a1344a7bb6e82baab42a76", size = 1988172, upload-time = "2026-04-20T14:41:17.469Z" }, + { url = "https://files.pythonhosted.org/packages/0e/6b/523b9f85c23788755d6ab949329de692a2e3a584bc6beb67fef5e035aa9d/pydantic_core-2.46.3-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e9d76736da5f362fabfeea6a69b13b7f2be405c6d6966f06b2f6bfff7e64531", size = 2128596, upload-time = "2026-04-20T14:40:41.707Z" }, + { url = "https://files.pythonhosted.org/packages/34/42/f426db557e8ab2791bc7562052299944a118655496fbff99914e564c0a94/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:b12dd51f1187c2eb489af8e20f880362db98e954b54ab792fa5d92e8bcc6b803", size = 2091877, upload-time = "2026-04-20T14:43:27.091Z" }, + { url = "https://files.pythonhosted.org/packages/5c/4f/86a832a9d14df58e663bfdf4627dc00d3317c2bd583c4fb23390b0f04b8e/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:f00a0961b125f1a47af7bcc17f00782e12f4cd056f83416006b30111d941dfa3", size = 1932428, upload-time = "2026-04-20T14:40:45.781Z" }, + { url = "https://files.pythonhosted.org/packages/11/1a/fe857968954d93fb78e0d4b6df5c988c74c4aaa67181c60be7cfe327c0ca/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57697d7c056aca4bbb680200f96563e841a6386ac1129370a0102592f4dddff5", size = 1997550, upload-time = "2026-04-20T14:44:02.425Z" }, + { url = "https://files.pythonhosted.org/packages/17/eb/9d89ad2d9b0ba8cd65393d434471621b98912abb10fbe1df08e480ba57b5/pydantic_core-2.46.3-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd35aa21299def8db7ef4fe5c4ff862941a9a158ca7b63d61e66fe67d30416b4", size = 2137657, upload-time = "2026-04-20T14:42:45.149Z" }, + { url = "https://files.pythonhosted.org/packages/1f/da/99d40830684f81dec901cac521b5b91c095394cc1084b9433393cde1c2df/pydantic_core-2.46.3-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:13afdd885f3d71280cf286b13b310ee0f7ccfefd1dbbb661514a474b726e2f25", size = 2107973, upload-time = "2026-04-20T14:42:06.175Z" }, + { url = "https://files.pythonhosted.org/packages/99/a5/87024121818d75bbb2a98ddbaf638e40e7a18b5e0f5492c9ca4b1b316107/pydantic_core-2.46.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f91c0aff3e3ee0928edd1232c57f643a7a003e6edf1860bc3afcdc749cb513f3", size = 1947191, upload-time = "2026-04-20T14:43:14.319Z" }, + { url = "https://files.pythonhosted.org/packages/60/62/0c1acfe10945b83a6a59d19fbaa92f48825381509e5701b855c08f13db76/pydantic_core-2.46.3-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6529d1d128321a58d30afcc97b49e98836542f68dd41b33c2e972bb9e5290536", size = 2123791, upload-time = "2026-04-20T14:43:22.766Z" }, + { url = "https://files.pythonhosted.org/packages/75/3e/3b2393b4c8f44285561dc30b00cf307a56a2eff7c483a824db3b8221ca51/pydantic_core-2.46.3-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:975c267cff4f7e7272eacbe50f6cc03ca9a3da4c4fbd66fffd89c94c1e311aa1", size = 2153197, upload-time = "2026-04-20T14:44:27.932Z" }, + { url = "https://files.pythonhosted.org/packages/ba/75/5af02fb35505051eee727c061f2881c555ab4f8ddb2d42da715a42c9731b/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:2b8e4f2bbdf71415c544b4b1138b8060db7b6611bc927e8064c769f64bed651c", size = 2181073, upload-time = "2026-04-20T14:43:20.729Z" }, + { url = "https://files.pythonhosted.org/packages/10/92/7e0e1bd9ca3c68305db037560ca2876f89b2647deb2f8b6319005de37505/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:e61ea8e9fff9606d09178f577ff8ccdd7206ff73d6552bcec18e1033c4254b85", size = 2315886, upload-time = "2026-04-20T14:44:04.826Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d8/101655f27eaf3e44558ead736b2795d12500598beed4683f279396fa186e/pydantic_core-2.46.3-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b504bda01bafc69b6d3c7a0c7f039dcf60f47fab70e06fe23f57b5c75bdc82b8", size = 2360528, upload-time = "2026-04-20T14:40:47.431Z" }, + { url = "https://files.pythonhosted.org/packages/07/0f/1c34a74c8d07136f0d729ffe5e1fdab04fbdaa7684f61a92f92511a84a15/pydantic_core-2.46.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b00b76f7142fc60c762ce579bd29c8fa44aaa56592dd3c54fab3928d0d4ca6ff", size = 2184144, upload-time = "2026-04-20T14:42:57Z" }, ] [[package]] @@ -1154,16 +1226,25 @@ wheels = [ [[package]] name = "pygments" -version = "2.19.2" +version = "2.20.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, ] +[[package]] +name = "pyspark" +version = "4.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "py4j" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/bf/58ee13add151469c25825b7125bbf62c3bdcec05eec4d458fcb5c5516066/pyspark-4.1.1.tar.gz", hash = "sha256:77f78984aa84fbe865c717dd37b49913b4e5c97d76ef6824f932f1aefa6621ec", size = 455359625, upload-time = "2026-01-09T09:38:38.28Z" } + [[package]] name = "pytest" -version = "9.0.2" +version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -1174,23 +1255,23 @@ dependencies = [ { name = "pygments" }, { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] [[package]] name = "pytest-cov" -version = "7.0.0" +version = "7.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "coverage", extra = ["toml"] }, { name = "pluggy" }, { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b1/51/a849f96e117386044471c8ec2bd6cfebacda285da9525c9106aeb28da671/pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2", size = 55592, upload-time = "2026-03-21T20:11:16.284Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, + { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, ] [[package]] @@ -1272,41 +1353,40 @@ wheels = [ [[package]] name = "rich" -version = "14.3.1" +version = "15.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a1/84/4831f881aa6ff3c976f6d6809b58cdfa350593ffc0dc3c58f5f6586780fb/rich-14.3.1.tar.gz", hash = "sha256:b8c5f568a3a749f9290ec6bddedf835cec33696bfc1e48bcfecb276c7386e4b8", size = 230125, upload-time = "2026-01-24T21:40:44.847Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/87/2a/a1810c8627b9ec8c57ec5ec325d306701ae7be50235e8fd81266e002a3cc/rich-14.3.1-py3-none-any.whl", hash = "sha256:da750b1aebbff0b372557426fb3f35ba56de8ef954b3190315eb64076d6fb54e", size = 309952, upload-time = "2026-01-24T21:40:42.969Z" }, + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, ] [[package]] name = "ruff" -version = "0.14.14" +version = "0.15.12" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" } +sdist = { url = "https://files.pythonhosted.org/packages/99/43/3291f1cc9106f4c63bdce7a8d0df5047fe8422a75b091c16b5e9355e0b11/ruff-0.15.12.tar.gz", hash = "sha256:ecea26adb26b4232c0c2ca19ccbc0083a68344180bba2a600605538ce51a40a6", size = 4643852, upload-time = "2026-04-24T18:17:14.305Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" }, - { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" }, - { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" }, - { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" }, - { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" }, - { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" }, - { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" }, - { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" }, - { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" }, - { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" }, - { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" }, - { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" }, - { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" }, - { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" }, - { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" }, - { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" }, - { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" }, - { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, + { url = "https://files.pythonhosted.org/packages/c3/6e/e78ffb61d4686f3d96ba3df2c801161843746dcbcbb17a1e927d4829312b/ruff-0.15.12-py3-none-linux_armv6l.whl", hash = "sha256:f86f176e188e94d6bdbc09f09bfd9dc729059ad93d0e7390b5a73efe19f8861c", size = 10640713, upload-time = "2026-04-24T18:17:22.841Z" }, + { url = "https://files.pythonhosted.org/packages/ae/08/a317bc231fb9e7b93e4ef3089501e51922ff88d6936ce5cf870c4fe55419/ruff-0.15.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e3bcd123364c3770b8e1b7baaf343cc99a35f197c5c6e8af79015c666c423a6c", size = 11069267, upload-time = "2026-04-24T18:17:30.105Z" }, + { url = "https://files.pythonhosted.org/packages/aa/a4/f828e9718d3dce1f5f11c39c4f65afd32783c8b2aebb2e3d259e492c47bd/ruff-0.15.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fe87510d000220aa1ed530d4448a7c696a0cae1213e5ec30e5874287b66557b5", size = 10397182, upload-time = "2026-04-24T18:17:07.177Z" }, + { url = "https://files.pythonhosted.org/packages/71/e0/3310fc6d1b5e1fdea22bf3b1b807c7e187b581021b0d7d4514cccdb5fb71/ruff-0.15.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84a1630093121375a3e2a95b4a6dc7b59e2b4ee76216e32d81aae550a832d002", size = 10758012, upload-time = "2026-04-24T18:16:55.759Z" }, + { url = "https://files.pythonhosted.org/packages/11/c1/a606911aee04c324ddaa883ae418f3569792fd3c4a10c50e0dd0a2311e1e/ruff-0.15.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fb129f40f114f089ebe0ca56c0d251cf2061b17651d464bb6478dc01e69f11f5", size = 10447479, upload-time = "2026-04-24T18:16:51.677Z" }, + { url = "https://files.pythonhosted.org/packages/9d/68/4201e8444f0894f21ab4aeeaee68aa4f10b51613514a20d80bd628d57e88/ruff-0.15.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0c862b172d695db7598426b8af465e7e9ac00a3ea2a3630ee67eb82e366aaa6", size = 11234040, upload-time = "2026-04-24T18:17:16.529Z" }, + { url = "https://files.pythonhosted.org/packages/34/ff/8a6d6cf4ccc23fd67060874e832c18919d1557a0611ebef03fdb01fff11e/ruff-0.15.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2849ea9f3484c3aca43a82f484210370319e7170df4dfe4843395ddf6c57bc33", size = 12087377, upload-time = "2026-04-24T18:17:04.944Z" }, + { url = "https://files.pythonhosted.org/packages/85/f6/c669cf73f5152f623d34e69866a46d5e6185816b19fcd5b6dd8a2d299922/ruff-0.15.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e77c7e51c07fe396826d5969a5b846d9cd4c402535835fb6e21ce8b28fef847", size = 11367784, upload-time = "2026-04-24T18:17:25.409Z" }, + { url = "https://files.pythonhosted.org/packages/e8/39/c61d193b8a1daaa8977f7dea9e8d8ba866e02ea7b65d32f6861693aa4c12/ruff-0.15.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83b2f4f2f3b1026b5fb449b467d9264bf22067b600f7b6f41fc5958909f449d0", size = 11344088, upload-time = "2026-04-24T18:17:12.258Z" }, + { url = "https://files.pythonhosted.org/packages/c2/8d/49afab3645e31e12c590acb6d3b5b69d7aab5b81926dbaf7461f9441f37a/ruff-0.15.12-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9ba3b8f1afd7e2e43d8943e55f249e13f9682fde09711644a6e7290eb4f3e339", size = 11271770, upload-time = "2026-04-24T18:17:02.457Z" }, + { url = "https://files.pythonhosted.org/packages/46/06/33f41fe94403e2b755481cdfb9b7ef3e4e0ed031c4581124658d935d52b4/ruff-0.15.12-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e852ba9fdc890655e1d78f2df1499efbe0e54126bd405362154a75e2bde159c5", size = 10719355, upload-time = "2026-04-24T18:17:27.648Z" }, + { url = "https://files.pythonhosted.org/packages/0d/59/18aa4e014debbf559670e4048e39260a85c7fcee84acfd761ac01e7b8d35/ruff-0.15.12-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dd8aed930da53780d22fc70bdf84452c843cf64f8cb4eb38984319c24c5cd5fd", size = 10462758, upload-time = "2026-04-24T18:17:32.347Z" }, + { url = "https://files.pythonhosted.org/packages/25/e7/cc9f16fd0f3b5fddcbd7ec3d6ae30c8f3fde1047f32a4093a98d633c6570/ruff-0.15.12-py3-none-musllinux_1_2_i686.whl", hash = "sha256:01da3988d225628b709493d7dc67c3b9b12c0210016b08690ef9bd27970b262b", size = 10953498, upload-time = "2026-04-24T18:17:20.674Z" }, + { url = "https://files.pythonhosted.org/packages/72/7a/a9ba7f98c7a575978698f4230c5e8cc54bbc761af34f560818f933dafa0c/ruff-0.15.12-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:9cae0f92bd5700d1213188b31cd3bdd2b315361296d10b96b8e2337d3d11f53e", size = 11447765, upload-time = "2026-04-24T18:17:09.755Z" }, + { url = "https://files.pythonhosted.org/packages/ea/f9/0ae446942c846b8266059ad8a30702a35afae55f5cdc54c5adf8d7afdc27/ruff-0.15.12-py3-none-win32.whl", hash = "sha256:d0185894e038d7043ba8fd6aee7499ece6462dc0ea9f1e260c7451807c714c20", size = 10657277, upload-time = "2026-04-24T18:17:18.591Z" }, + { url = "https://files.pythonhosted.org/packages/33/f1/9614e03e1cdcbf9437570b5400ced8a720b5db22b28d8e0f1bda429f660d/ruff-0.15.12-py3-none-win_amd64.whl", hash = "sha256:c87a162d61ab3adca47c03f7f717c68672edec7d1b5499e652331780fe74950d", size = 11837758, upload-time = "2026-04-24T18:17:00.113Z" }, + { url = "https://files.pythonhosted.org/packages/c0/98/6beb4b351e472e5f4c4613f7c35a5290b8be2497e183825310c4c3a3984b/ruff-0.15.12-py3-none-win_arm64.whl", hash = "sha256:a538f7a82d061cee7be55542aca1d86d1393d55d81d4fcc314370f4340930d4f", size = 11120821, upload-time = "2026-04-24T18:16:57.979Z" }, ] [[package]] @@ -1315,7 +1395,7 @@ version = "2.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" } wheels = [ @@ -1388,78 +1468,78 @@ wheels = [ [[package]] name = "tomli" -version = "2.4.0" +version = "2.4.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" } +sdist = { url = "https://files.pythonhosted.org/packages/22/de/48c59722572767841493b26183a0d1cc411d54fd759c5607c4590b6563a6/tomli-2.4.1.tar.gz", hash = "sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f", size = 17543, upload-time = "2026-03-25T20:22:03.828Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" }, - { url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" }, - { url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" }, - { url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" }, - { url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" }, - { url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" }, - { url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" }, - { url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" }, - { url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" }, - { url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" }, - { url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" }, - { url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" }, - { url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" }, - { url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" }, - { url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" }, - { url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" }, - { url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" }, - { url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" }, - { url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" }, - { url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" }, - { url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" }, - { url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" }, - { url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" }, - { url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" }, - { url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" }, - { url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" }, - { url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" }, - { url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" }, - { url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" }, - { url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" }, - { url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" }, - { url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" }, - { url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" }, - { url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" }, - { url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" }, - { url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" }, - { url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" }, - { url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" }, - { url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" }, - { url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" }, - { url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" }, - { url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" }, - { url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" }, - { url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" }, - { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" }, + { url = "https://files.pythonhosted.org/packages/f4/11/db3d5885d8528263d8adc260bb2d28ebf1270b96e98f0e0268d32b8d9900/tomli-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30", size = 154704, upload-time = "2026-03-25T20:21:10.473Z" }, + { url = "https://files.pythonhosted.org/packages/6d/f7/675db52c7e46064a9aa928885a9b20f4124ecb9bc2e1ce74c9106648d202/tomli-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ab97e64ccda8756376892c53a72bd1f964e519c77236368527f758fbc36a53a", size = 149454, upload-time = "2026-03-25T20:21:12.036Z" }, + { url = "https://files.pythonhosted.org/packages/61/71/81c50943cf953efa35bce7646caab3cf457a7d8c030b27cfb40d7235f9ee/tomli-2.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96481a5786729fd470164b47cdb3e0e58062a496f455ee41b4403be77cb5a076", size = 237561, upload-time = "2026-03-25T20:21:13.098Z" }, + { url = "https://files.pythonhosted.org/packages/48/c1/f41d9cb618acccca7df82aaf682f9b49013c9397212cb9f53219e3abac37/tomli-2.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a881ab208c0baf688221f8cecc5401bd291d67e38a1ac884d6736cbcd8247e9", size = 243824, upload-time = "2026-03-25T20:21:14.569Z" }, + { url = "https://files.pythonhosted.org/packages/22/e4/5a816ecdd1f8ca51fb756ef684b90f2780afc52fc67f987e3c61d800a46d/tomli-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47149d5bd38761ac8be13a84864bf0b7b70bc051806bc3669ab1cbc56216b23c", size = 242227, upload-time = "2026-03-25T20:21:15.712Z" }, + { url = "https://files.pythonhosted.org/packages/6b/49/2b2a0ef529aa6eec245d25f0c703e020a73955ad7edf73e7f54ddc608aa5/tomli-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ec9bfaf3ad2df51ace80688143a6a4ebc09a248f6ff781a9945e51937008fcbc", size = 247859, upload-time = "2026-03-25T20:21:17.001Z" }, + { url = "https://files.pythonhosted.org/packages/83/bd/6c1a630eaca337e1e78c5903104f831bda934c426f9231429396ce3c3467/tomli-2.4.1-cp311-cp311-win32.whl", hash = "sha256:ff2983983d34813c1aeb0fa89091e76c3a22889ee83ab27c5eeb45100560c049", size = 97204, upload-time = "2026-03-25T20:21:18.079Z" }, + { url = "https://files.pythonhosted.org/packages/42/59/71461df1a885647e10b6bb7802d0b8e66480c61f3f43079e0dcd315b3954/tomli-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:5ee18d9ebdb417e384b58fe414e8d6af9f4e7a0ae761519fb50f721de398dd4e", size = 108084, upload-time = "2026-03-25T20:21:18.978Z" }, + { url = "https://files.pythonhosted.org/packages/b8/83/dceca96142499c069475b790e7913b1044c1a4337e700751f48ed723f883/tomli-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:c2541745709bad0264b7d4705ad453b76ccd191e64aa6f0fc66b69a293a45ece", size = 95285, upload-time = "2026-03-25T20:21:20.309Z" }, + { url = "https://files.pythonhosted.org/packages/c1/ba/42f134a3fe2b370f555f44b1d72feebb94debcab01676bf918d0cb70e9aa/tomli-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c742f741d58a28940ce01d58f0ab2ea3ced8b12402f162f4d534dfe18ba1cd6a", size = 155924, upload-time = "2026-03-25T20:21:21.626Z" }, + { url = "https://files.pythonhosted.org/packages/dc/c7/62d7a17c26487ade21c5422b646110f2162f1fcc95980ef7f63e73c68f14/tomli-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f86fd587c4ed9dd76f318225e7d9b29cfc5a9d43de44e5754db8d1128487085", size = 150018, upload-time = "2026-03-25T20:21:23.002Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/79d13d7c15f13bdef410bdd49a6485b1c37d28968314eabee452c22a7fda/tomli-2.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ff18e6a727ee0ab0388507b89d1bc6a22b138d1e2fa56d1ad494586d61d2eae9", size = 244948, upload-time = "2026-03-25T20:21:24.04Z" }, + { url = "https://files.pythonhosted.org/packages/10/90/d62ce007a1c80d0b2c93e02cab211224756240884751b94ca72df8a875ca/tomli-2.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:136443dbd7e1dee43c68ac2694fde36b2849865fa258d39bf822c10e8068eac5", size = 253341, upload-time = "2026-03-25T20:21:25.177Z" }, + { url = "https://files.pythonhosted.org/packages/1a/7e/caf6496d60152ad4ed09282c1885cca4eea150bfd007da84aea07bcc0a3e/tomli-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e262d41726bc187e69af7825504c933b6794dc3fbd5945e41a79bb14c31f585", size = 248159, upload-time = "2026-03-25T20:21:26.364Z" }, + { url = "https://files.pythonhosted.org/packages/99/e7/c6f69c3120de34bbd882c6fba7975f3d7a746e9218e56ab46a1bc4b42552/tomli-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5cb41aa38891e073ee49d55fbc7839cfdb2bc0e600add13874d048c94aadddd1", size = 253290, upload-time = "2026-03-25T20:21:27.46Z" }, + { url = "https://files.pythonhosted.org/packages/d6/2f/4a3c322f22c5c66c4b836ec58211641a4067364f5dcdd7b974b4c5da300c/tomli-2.4.1-cp312-cp312-win32.whl", hash = "sha256:da25dc3563bff5965356133435b757a795a17b17d01dbc0f42fb32447ddfd917", size = 98141, upload-time = "2026-03-25T20:21:28.492Z" }, + { url = "https://files.pythonhosted.org/packages/24/22/4daacd05391b92c55759d55eaee21e1dfaea86ce5c571f10083360adf534/tomli-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:52c8ef851d9a240f11a88c003eacb03c31fc1c9c4ec64a99a0f922b93874fda9", size = 108847, upload-time = "2026-03-25T20:21:29.386Z" }, + { url = "https://files.pythonhosted.org/packages/68/fd/70e768887666ddd9e9f5d85129e84910f2db2796f9096aa02b721a53098d/tomli-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:f758f1b9299d059cc3f6546ae2af89670cb1c4d48ea29c3cacc4fe7de3058257", size = 95088, upload-time = "2026-03-25T20:21:30.677Z" }, + { url = "https://files.pythonhosted.org/packages/07/06/b823a7e818c756d9a7123ba2cda7d07bc2dd32835648d1a7b7b7a05d848d/tomli-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54", size = 155866, upload-time = "2026-03-25T20:21:31.65Z" }, + { url = "https://files.pythonhosted.org/packages/14/6f/12645cf7f08e1a20c7eb8c297c6f11d31c1b50f316a7e7e1e1de6e2e7b7e/tomli-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a", size = 149887, upload-time = "2026-03-25T20:21:33.028Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e0/90637574e5e7212c09099c67ad349b04ec4d6020324539297b634a0192b0/tomli-2.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897", size = 243704, upload-time = "2026-03-25T20:21:34.51Z" }, + { url = "https://files.pythonhosted.org/packages/10/8f/d3ddb16c5a4befdf31a23307f72828686ab2096f068eaf56631e136c1fdd/tomli-2.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f", size = 251628, upload-time = "2026-03-25T20:21:36.012Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f1/dbeeb9116715abee2485bf0a12d07a8f31af94d71608c171c45f64c0469d/tomli-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d", size = 247180, upload-time = "2026-03-25T20:21:37.136Z" }, + { url = "https://files.pythonhosted.org/packages/d3/74/16336ffd19ed4da28a70959f92f506233bd7cfc2332b20bdb01591e8b1d1/tomli-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5", size = 251674, upload-time = "2026-03-25T20:21:38.298Z" }, + { url = "https://files.pythonhosted.org/packages/16/f9/229fa3434c590ddf6c0aa9af64d3af4b752540686cace29e6281e3458469/tomli-2.4.1-cp313-cp313-win32.whl", hash = "sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd", size = 97976, upload-time = "2026-03-25T20:21:39.316Z" }, + { url = "https://files.pythonhosted.org/packages/6a/1e/71dfd96bcc1c775420cb8befe7a9d35f2e5b1309798f009dca17b7708c1e/tomli-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36", size = 108755, upload-time = "2026-03-25T20:21:40.248Z" }, + { url = "https://files.pythonhosted.org/packages/83/7a/d34f422a021d62420b78f5c538e5b102f62bea616d1d75a13f0a88acb04a/tomli-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd", size = 95265, upload-time = "2026-03-25T20:21:41.219Z" }, + { url = "https://files.pythonhosted.org/packages/3c/fb/9a5c8d27dbab540869f7c1f8eb0abb3244189ce780ba9cd73f3770662072/tomli-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf", size = 155726, upload-time = "2026-03-25T20:21:42.23Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/d2f816630cc771ad836af54f5001f47a6f611d2d39535364f148b6a92d6b/tomli-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac", size = 149859, upload-time = "2026-03-25T20:21:43.386Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/66341bdb858ad9bd0ceab5a86f90eddab127cf8b046418009f2125630ecb/tomli-2.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662", size = 244713, upload-time = "2026-03-25T20:21:44.474Z" }, + { url = "https://files.pythonhosted.org/packages/df/6d/c5fad00d82b3c7a3ab6189bd4b10e60466f22cfe8a08a9394185c8a8111c/tomli-2.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853", size = 252084, upload-time = "2026-03-25T20:21:45.62Z" }, + { url = "https://files.pythonhosted.org/packages/00/71/3a69e86f3eafe8c7a59d008d245888051005bd657760e96d5fbfb0b740c2/tomli-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15", size = 247973, upload-time = "2026-03-25T20:21:46.937Z" }, + { url = "https://files.pythonhosted.org/packages/67/50/361e986652847fec4bd5e4a0208752fbe64689c603c7ae5ea7cb16b1c0ca/tomli-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba", size = 256223, upload-time = "2026-03-25T20:21:48.467Z" }, + { url = "https://files.pythonhosted.org/packages/8c/9a/b4173689a9203472e5467217e0154b00e260621caa227b6fa01feab16998/tomli-2.4.1-cp314-cp314-win32.whl", hash = "sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6", size = 98973, upload-time = "2026-03-25T20:21:49.526Z" }, + { url = "https://files.pythonhosted.org/packages/14/58/640ac93bf230cd27d002462c9af0d837779f8773bc03dee06b5835208214/tomli-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7", size = 109082, upload-time = "2026-03-25T20:21:50.506Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2f/702d5e05b227401c1068f0d386d79a589bb12bf64c3d2c72ce0631e3bc49/tomli-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232", size = 96490, upload-time = "2026-03-25T20:21:51.474Z" }, + { url = "https://files.pythonhosted.org/packages/45/4b/b877b05c8ba62927d9865dd980e34a755de541eb65fffba52b4cc495d4d2/tomli-2.4.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4", size = 164263, upload-time = "2026-03-25T20:21:52.543Z" }, + { url = "https://files.pythonhosted.org/packages/24/79/6ab420d37a270b89f7195dec5448f79400d9e9c1826df982f3f8e97b24fd/tomli-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c", size = 160736, upload-time = "2026-03-25T20:21:53.674Z" }, + { url = "https://files.pythonhosted.org/packages/02/e0/3630057d8eb170310785723ed5adcdfb7d50cb7e6455f85ba8a3deed642b/tomli-2.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d", size = 270717, upload-time = "2026-03-25T20:21:55.129Z" }, + { url = "https://files.pythonhosted.org/packages/7a/b4/1613716072e544d1a7891f548d8f9ec6ce2faf42ca65acae01d76ea06bb0/tomli-2.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41", size = 278461, upload-time = "2026-03-25T20:21:56.228Z" }, + { url = "https://files.pythonhosted.org/packages/05/38/30f541baf6a3f6df77b3df16b01ba319221389e2da59427e221ef417ac0c/tomli-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c", size = 274855, upload-time = "2026-03-25T20:21:57.653Z" }, + { url = "https://files.pythonhosted.org/packages/77/a3/ec9dd4fd2c38e98de34223b995a3b34813e6bdadf86c75314c928350ed14/tomli-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f", size = 283144, upload-time = "2026-03-25T20:21:59.089Z" }, + { url = "https://files.pythonhosted.org/packages/ef/be/605a6261cac79fba2ec0c9827e986e00323a1945700969b8ee0b30d85453/tomli-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8", size = 108683, upload-time = "2026-03-25T20:22:00.214Z" }, + { url = "https://files.pythonhosted.org/packages/12/64/da524626d3b9cc40c168a13da8335fe1c51be12c0a63685cc6db7308daae/tomli-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26", size = 121196, upload-time = "2026-03-25T20:22:01.169Z" }, + { url = "https://files.pythonhosted.org/packages/5a/cd/e80b62269fc78fc36c9af5a6b89c835baa8af28ff5ad28c7028d60860320/tomli-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396", size = 100393, upload-time = "2026-03-25T20:22:02.137Z" }, + { url = "https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size = 14583, upload-time = "2026-03-25T20:22:03.012Z" }, ] [[package]] name = "types-pyyaml" -version = "6.0.12.20250915" +version = "6.0.12.20260408" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522, upload-time = "2025-09-15T03:01:00.728Z" } +sdist = { url = "https://files.pythonhosted.org/packages/74/73/b759b1e413c31034cc01ecdfb96b38115d0ab4db55a752a3929f0cd449fd/types_pyyaml-6.0.12.20260408.tar.gz", hash = "sha256:92a73f2b8d7f39ef392a38131f76b970f8c66e4c42b3125ae872b7c93b556307", size = 17735, upload-time = "2026-04-08T04:30:50.974Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, + { url = "https://files.pythonhosted.org/packages/1c/f0/c391068b86abb708882c6d75a08cd7d25b2c7227dab527b3a3685a3c635b/types_pyyaml-6.0.12.20260408-py3-none-any.whl", hash = "sha256:fbc42037d12159d9c801ebfcc79ebd28335a7c13b08a4cfbc6916df78fee9384", size = 20339, upload-time = "2026-04-08T04:30:50.113Z" }, ] [[package]] name = "types-shapely" -version = "2.1.0.20250917" +version = "2.1.0.20260408" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fa/19/7f28b10994433d43b9caa66f3b9bd6a0a9192b7ce8b5a7fc41534e54b821/types_shapely-2.1.0.20250917.tar.gz", hash = "sha256:5c56670742105aebe40c16414390d35fcaa55d6f774d328c1a18273ab0e2134a", size = 26363, upload-time = "2025-09-17T02:47:44.604Z" } +sdist = { url = "https://files.pythonhosted.org/packages/10/8d/bf9e3eb51249601e22d797481999a06fb34998c4db5c76804394f8a3fa28/types_shapely-2.1.0.20260408.tar.gz", hash = "sha256:8552549d9429baa52ec4331e43b5db3b334fc3a7f30da48663010b7454b1451c", size = 26529, upload-time = "2026-04-08T04:34:42.111Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/a9/554ac40810e530263b6163b30a2b623bc16aae3fb64416f5d2b3657d0729/types_shapely-2.1.0.20250917-py3-none-any.whl", hash = "sha256:9334a79339504d39b040426be4938d422cec419168414dc74972aa746a8bf3a1", size = 37813, upload-time = "2025-09-17T02:47:43.788Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3d/cbec691f56e71636192a07bf6809f598bed06d869b03b4e2b1ad2f7df032/types_shapely-2.1.0.20260408-py3-none-any.whl", hash = "sha256:8a31e2b074342a363f0c9d0c7d6e1e6c0dcce302a92ef94d64d0ca2a2b94a1d1", size = 37818, upload-time = "2026-04-08T04:34:41.243Z" }, ] [[package]] From ef2ef01a6cae53630e63586bafbf2a5564ef2053 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 13 May 2026 09:37:46 -0700 Subject: [PATCH 5/8] chore(pyspark): generate PySpark expressions Generate PySpark expressions (and tests) for models defined in the workspace Signed-off-by: Seth Fitzsimmons --- .../pyspark/expressions/generated/__init__.py | 0 .../generated/overture/__init__.py | 0 .../generated/overture/schema/__init__.py | 0 .../overture/schema/addresses/__init__.py | 0 .../overture/schema/addresses/address.py | 564 ++ .../overture/schema/annex/__init__.py | 0 .../overture/schema/annex/sources.py | 486 ++ .../overture/schema/base/__init__.py | 0 .../overture/schema/base/bathymetry.py | 478 ++ .../overture/schema/base/infrastructure.py | 997 ++++ .../generated/overture/schema/base/land.py | 848 +++ .../overture/schema/base/land_cover.py | 492 ++ .../overture/schema/base/land_use.py | 948 ++++ .../generated/overture/schema/base/water.py | 791 +++ .../overture/schema/buildings/__init__.py | 0 .../overture/schema/buildings/building.py | 1025 ++++ .../schema/buildings/building_part.py | 930 +++ .../overture/schema/divisions/__init__.py | 0 .../overture/schema/divisions/division.py | 1550 +++++ .../schema/divisions/division_area.py | 962 ++++ .../schema/divisions/division_boundary.py | 782 +++ .../overture/schema/places/__init__.py | 0 .../generated/overture/schema/places/place.py | 1505 +++++ .../schema/transportation/__init__.py | 0 .../schema/transportation/connector.py | 372 ++ .../overture/schema/transportation/segment.py | 5053 +++++++++++++++++ .../tests/generated/__init__.py | 0 .../tests/generated/overture/__init__.py | 0 .../generated/overture/schema/__init__.py | 0 .../overture/schema/addresses/__init__.py | 0 .../overture/schema/addresses/test_address.py | 462 ++ .../overture/schema/annex/__init__.py | 0 .../overture/schema/annex/test_sources.py | 843 +++ .../overture/schema/base/__init__.py | 0 .../overture/schema/base/test_bathymetry.py | 401 ++ .../schema/base/test_infrastructure.py | 650 +++ .../overture/schema/base/test_land.py | 634 +++ .../overture/schema/base/test_land_cover.py | 401 ++ .../overture/schema/base/test_land_use.py | 650 +++ .../overture/schema/base/test_water.py | 620 ++ .../overture/schema/buildings/__init__.py | 0 .../schema/buildings/test_building.py | 708 +++ .../schema/buildings/test_building_part.py | 714 +++ .../overture/schema/divisions/__init__.py | 0 .../schema/divisions/test_division.py | 1049 ++++ .../schema/divisions/test_division_area.py | 759 +++ .../divisions/test_division_boundary.py | 574 ++ .../overture/schema/places/__init__.py | 0 .../overture/schema/places/test_place.py | 1207 ++++ .../schema/transportation/__init__.py | 0 .../schema/transportation/test_connector.py | 342 ++ .../transportation/test_segment_rail.py | 1676 ++++++ .../transportation/test_segment_road.py | 3085 ++++++++++ .../transportation/test_segment_water.py | 1596 ++++++ 54 files changed, 34154 insertions(+) create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/__init__.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py create mode 100644 packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py create mode 100644 packages/overture-schema-pyspark/tests/generated/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/annex/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/places/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/__init__.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py create mode 100644 packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py new file mode 100644 index 000000000..19d17b7ba --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/addresses/address.py @@ -0,0 +1,564 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Address validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_max_length, + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["addresses"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["address"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _address_levels_min_length_check() -> Check: + return Check( + field="address_levels_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("address_levels"), 1), + shape=CheckShape.SCALAR, + root_field="address_levels", + ) + + +def _address_levels_max_length_check() -> Check: + return Check( + field="address_levels_max_length", + name="array_max_length", + expr=check_array_max_length(F.col("address_levels"), 5), + shape=CheckShape.SCALAR, + root_field="address_levels", + ) + + +def _address_levels_value_string_min_length_check() -> Check: + return Check( + field="address_levels[].value", + name="string_min_length", + expr=array_check( + "address_levels", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="address_levels", + ) + + +def _address_levels_value_stripped_check() -> Check: + return Check( + field="address_levels[].value", + name="stripped", + expr=array_check("address_levels", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="address_levels", + ) + + +def _country_required_check() -> Check: + return Check( + field="country", + name="required", + expr=check_required(F.col("country")), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _country_country_code_alpha2_check() -> Check: + return Check( + field="country", + name="country_code_alpha2", + expr=check_pattern( + F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _number_string_min_length_check() -> Check: + return Check( + field="number", + name="string_min_length", + expr=check_string_min_length(F.col("number"), 1), + shape=CheckShape.SCALAR, + root_field="number", + ) + + +def _number_stripped_check() -> Check: + return Check( + field="number", + name="stripped", + expr=check_stripped(F.col("number")), + shape=CheckShape.SCALAR, + root_field="number", + ) + + +def _postal_city_string_min_length_check() -> Check: + return Check( + field="postal_city", + name="string_min_length", + expr=check_string_min_length(F.col("postal_city"), 1), + shape=CheckShape.SCALAR, + root_field="postal_city", + ) + + +def _postal_city_stripped_check() -> Check: + return Check( + field="postal_city", + name="stripped", + expr=check_stripped(F.col("postal_city")), + shape=CheckShape.SCALAR, + root_field="postal_city", + ) + + +def _postcode_string_min_length_check() -> Check: + return Check( + field="postcode", + name="string_min_length", + expr=check_string_min_length(F.col("postcode"), 1), + shape=CheckShape.SCALAR, + root_field="postcode", + ) + + +def _postcode_stripped_check() -> Check: + return Check( + field="postcode", + name="stripped", + expr=check_stripped(F.col("postcode")), + shape=CheckShape.SCALAR, + root_field="postcode", + ) + + +def _street_string_min_length_check() -> Check: + return Check( + field="street", + name="string_min_length", + expr=check_string_min_length(F.col("street"), 1), + shape=CheckShape.SCALAR, + root_field="street", + ) + + +def _street_stripped_check() -> Check: + return Check( + field="street", + name="stripped", + expr=check_stripped(F.col("street")), + shape=CheckShape.SCALAR, + root_field="street", + ) + + +def _unit_string_min_length_check() -> Check: + return Check( + field="unit", + name="string_min_length", + expr=check_string_min_length(F.col("unit"), 1), + shape=CheckShape.SCALAR, + root_field="unit", + ) + + +def _unit_stripped_check() -> Check: + return Check( + field="unit", + name="stripped", + expr=check_stripped(F.col("unit")), + shape=CheckShape.SCALAR, + root_field="unit", + ) + + +def address_checks() -> list[Check]: + """All validation checks for address.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _address_levels_min_length_check(), + _address_levels_max_length_check(), + _address_levels_value_string_min_length_check(), + _address_levels_value_stripped_check(), + _country_required_check(), + _country_country_code_alpha2_check(), + _number_string_min_length_check(), + _number_stripped_check(), + _postal_city_string_min_length_check(), + _postal_city_stripped_check(), + _postcode_string_min_length_check(), + _postcode_stripped_check(), + _street_string_min_length_check(), + _street_stripped_check(), + _unit_string_min_length_check(), + _unit_stripped_check(), + ] + + +ADDRESS_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "address_levels", + ArrayType(StructType([StructField("value", StringType(), True)]), True), + True, + ), + StructField("country", StringType(), True), + StructField("number", StringType(), True), + StructField("postal_city", StringType(), True), + StructField("postcode", StringType(), True), + StructField("street", StringType(), True), + StructField("unit", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,) + +ENTRY_POINT = "overture.schema.addresses:Address" + +PARTITIONS: dict[str, str] = {"theme": "addresses"} + +FEATURE_VALIDATION = FeatureValidation( + schema=ADDRESS_SCHEMA, + checks=address_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py new file mode 100644 index 000000000..026130578 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/annex/sources.py @@ -0,0 +1,486 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Sources validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + DoubleType, + LongType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_max_length, + check_array_min_length, + check_enum, + check_pattern, + check_required, + check_url_format, + check_url_length, +) + + +def _datasets_check() -> Check: + return Check( + field="datasets", + name="required", + expr=check_required(F.col("datasets")), + shape=CheckShape.SCALAR, + root_field="datasets", + ) + + +def _datasets_source_name_check() -> Check: + return Check( + field="datasets[].source_name", + name="required", + expr=array_check("datasets", lambda el: check_required(el["source_name"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_source_dataset_name_check() -> Check: + return Check( + field="datasets[].source_dataset_name", + name="required", + expr=array_check( + "datasets", lambda el: check_required(el["source_dataset_name"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_required_check() -> Check: + return Check( + field="datasets[].data_url", + name="required", + expr=array_check("datasets", lambda el: check_required(el["data_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_url_format_check() -> Check: + return Check( + field="datasets[].data_url", + name="url_format", + expr=array_check("datasets", lambda el: check_url_format(el["data_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_url_length_check() -> Check: + return Check( + field="datasets[].data_url", + name="url_length", + expr=array_check("datasets", lambda el: check_url_length(el["data_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_archived_required_check() -> Check: + return Check( + field="datasets[].data_url_archived", + name="required", + expr=array_check( + "datasets", lambda el: check_required(el["data_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_archived_url_format_check() -> Check: + return Check( + field="datasets[].data_url_archived", + name="url_format", + expr=array_check( + "datasets", lambda el: check_url_format(el["data_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_url_archived_url_length_check() -> Check: + return Check( + field="datasets[].data_url_archived", + name="url_length", + expr=array_check( + "datasets", lambda el: check_url_length(el["data_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_required_check() -> Check: + return Check( + field="datasets[].license_url", + name="required", + expr=array_check("datasets", lambda el: check_required(el["license_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_url_format_check() -> Check: + return Check( + field="datasets[].license_url", + name="url_format", + expr=array_check("datasets", lambda el: check_url_format(el["license_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_url_length_check() -> Check: + return Check( + field="datasets[].license_url", + name="url_length", + expr=array_check("datasets", lambda el: check_url_length(el["license_url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_archived_required_check() -> Check: + return Check( + field="datasets[].license_url_archived", + name="required", + expr=array_check( + "datasets", lambda el: check_required(el["license_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_archived_url_format_check() -> Check: + return Check( + field="datasets[].license_url_archived", + name="url_format", + expr=array_check( + "datasets", lambda el: check_url_format(el["license_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_url_archived_url_length_check() -> Check: + return Check( + field="datasets[].license_url_archived", + name="url_length", + expr=array_check( + "datasets", lambda el: check_url_length(el["license_url_archived"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_type_check() -> Check: + return Check( + field="datasets[].license_type", + name="required", + expr=array_check("datasets", lambda el: check_required(el["license_type"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_text_check() -> Check: + return Check( + field="datasets[].license_text", + name="required", + expr=array_check("datasets", lambda el: check_required(el["license_text"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_license_attribution_check() -> Check: + return Check( + field="datasets[].license_attribution", + name="required", + expr=array_check( + "datasets", lambda el: check_required(el["license_attribution"]) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_coverage_bbox_check() -> Check: + return Check( + field="datasets[].coverage_bbox", + name="required", + expr=array_check("datasets", lambda el: check_required(el["coverage_bbox"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_coverage_bbox_min_length_check() -> Check: + return Check( + field="datasets[].coverage_bbox_min_length", + name="array_min_length", + expr=array_check( + "datasets", lambda el: check_array_min_length(el["coverage_bbox"], 4) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_coverage_bbox_max_length_check() -> Check: + return Check( + field="datasets[].coverage_bbox_max_length", + name="array_max_length", + expr=array_check( + "datasets", lambda el: check_array_max_length(el["coverage_bbox"], 4) + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_url_url_format_check() -> Check: + return Check( + field="datasets[].url", + name="url_format", + expr=array_check("datasets", lambda el: check_url_format(el["url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_url_url_length_check() -> Check: + return Check( + field="datasets[].url", + name="url_length", + expr=array_check("datasets", lambda el: check_url_length(el["url"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_url_archived_url_format_check() -> Check: + return Check( + field="datasets[].url_archived", + name="url_format", + expr=array_check("datasets", lambda el: check_url_format(el["url_archived"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_url_archived_url_length_check() -> Check: + return Check( + field="datasets[].url_archived", + name="url_length", + expr=array_check("datasets", lambda el: check_url_length(el["url_archived"])), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_download_url_url_format_check() -> Check: + return Check( + field="datasets[].data_download_url[]", + name="url_format", + expr=nested_array_check( + "datasets", + lambda el: array_check( + el["data_download_url"], lambda inner: check_url_format(inner) + ), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_data_download_url_url_length_check() -> Check: + return Check( + field="datasets[].data_download_url[]", + name="url_length", + expr=nested_array_check( + "datasets", + lambda el: array_check( + el["data_download_url"], lambda inner: check_url_length(inner) + ), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_countries_check() -> Check: + return Check( + field="datasets[].countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "datasets", + lambda el: array_check( + el["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_build_source_check() -> Check: + return Check( + field="datasets[].build_source", + name="enum", + expr=array_check( + "datasets", + lambda el: check_enum( + el["build_source"], ["OpenAddresses", "tf-data-platform"] + ), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _datasets_update_type_check() -> Check: + return Check( + field="datasets[].update_type", + name="enum", + expr=array_check( + "datasets", + lambda el: check_enum(el["update_type"], ["continuous", "manual"]), + ), + shape=CheckShape.ARRAY, + root_field="datasets", + ) + + +def _license_priority_check() -> Check: + return Check( + field="license_priority", + name="required", + expr=check_required(F.col("license_priority")), + shape=CheckShape.SCALAR, + root_field="license_priority", + ) + + +def sources_checks() -> list[Check]: + """All validation checks for sources.""" + return [ + _datasets_check(), + _datasets_source_name_check(), + _datasets_source_dataset_name_check(), + _datasets_data_url_required_check(), + _datasets_data_url_url_format_check(), + _datasets_data_url_url_length_check(), + _datasets_data_url_archived_required_check(), + _datasets_data_url_archived_url_format_check(), + _datasets_data_url_archived_url_length_check(), + _datasets_license_url_required_check(), + _datasets_license_url_url_format_check(), + _datasets_license_url_url_length_check(), + _datasets_license_url_archived_required_check(), + _datasets_license_url_archived_url_format_check(), + _datasets_license_url_archived_url_length_check(), + _datasets_license_type_check(), + _datasets_license_text_check(), + _datasets_license_attribution_check(), + _datasets_coverage_bbox_check(), + _datasets_coverage_bbox_min_length_check(), + _datasets_coverage_bbox_max_length_check(), + _datasets_url_url_format_check(), + _datasets_url_url_length_check(), + _datasets_url_archived_url_format_check(), + _datasets_url_archived_url_length_check(), + _datasets_data_download_url_url_format_check(), + _datasets_data_download_url_url_length_check(), + _datasets_countries_check(), + _datasets_build_source_check(), + _datasets_update_type_check(), + _license_priority_check(), + ] + + +SOURCES_SCHEMA = StructType( + [ + StructField( + "datasets", + ArrayType( + StructType( + [ + StructField("source_name", StringType(), True), + StructField("source_dataset_name", StringType(), True), + StructField("data_url", StringType(), True), + StructField("data_url_archived", StringType(), True), + StructField("license_url", StringType(), True), + StructField("license_url_archived", StringType(), True), + StructField("license_type", StringType(), True), + StructField("license_text", StringType(), True), + StructField("license_attribution", StringType(), True), + StructField( + "coverage_bbox", ArrayType(DoubleType(), True), True + ), + StructField("inception_date", StringType(), True), + StructField("url", StringType(), True), + StructField("url_archived", StringType(), True), + StructField( + "data_download_url", ArrayType(StringType(), True), True + ), + StructField("countries", ArrayType(StringType(), True), True), + StructField("coverage_description", StringType(), True), + StructField("data_layer_name", StringType(), True), + StructField("oa_path", ArrayType(StringType(), True), True), + StructField( + "address_levels", ArrayType(StringType(), True), True + ), + StructField("file_format", StringType(), True), + StructField("update_frequency", StringType(), True), + StructField("build_source", StringType(), True), + StructField("update_type", StringType(), True), + StructField( + "update_schedule", ArrayType(StringType(), True), True + ), + StructField("known_issues", StringType(), True), + StructField("notes", StringType(), True), + StructField("requires_attribution", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField("license_priority", MapType(StringType(), LongType(), True), True), + ] +) + +ENTRY_POINT = "overture.schema.annex:Sources" + +PARTITIONS: dict[str, str] = {} + +FEATURE_VALIDATION = FeatureValidation( + schema=SOURCES_SCHEMA, + checks=sources_checks, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py new file mode 100644 index 000000000..b57a1f074 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/bathymetry.py @@ -0,0 +1,478 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Bathymetry validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["bathymetry"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _depth_required_check() -> Check: + return Check( + field="depth", + name="required", + expr=check_required(F.col("depth")), + shape=CheckShape.SCALAR, + root_field="depth", + ) + + +def _depth_bounds_check() -> Check: + return Check( + field="depth", + name="bounds", + expr=check_bounds(F.col("depth"), ge=0), + shape=CheckShape.SCALAR, + root_field="depth", + ) + + +def _cartography_prominence_bounds_check() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), ge=1), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_prominence_bounds_check_1() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), le=100), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def bathymetry_checks() -> list[Check]: + """All validation checks for bathymetry.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _depth_required_check(), + _depth_bounds_check(), + _cartography_prominence_bounds_check(), + _cartography_prominence_bounds_check_1(), + _cartography_min_zoom_bounds_check(), + _cartography_min_zoom_bounds_check_1(), + _cartography_max_zoom_bounds_check(), + _cartography_max_zoom_bounds_check_1(), + ] + + +BATHYMETRY_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("depth", IntegerType(), True), + StructField( + "cartography", + StructType( + [ + StructField("prominence", IntegerType(), True), + StructField("min_zoom", IntegerType(), True), + StructField("max_zoom", IntegerType(), True), + StructField("sort_key", IntegerType(), True), + ] + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:Bathymetry" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=BATHYMETRY_SCHEMA, + checks=bathymetry_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py new file mode 100644 index 000000000..d388b7da1 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/infrastructure.py @@ -0,0 +1,997 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Infrastructure validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["infrastructure"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=check_required(F.col("class")), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "aerialway_station", + "airport", + "airport_gate", + "airstrip", + "apron", + "aqueduct", + "artwork", + "atm", + "barrier", + "bell_tower", + "bench", + "bicycle_parking", + "bicycle_rental", + "block", + "boardwalk", + "bollard", + "border_control", + "breakwater", + "bridge", + "bridge_support", + "bump_gate", + "bus_route", + "bus_station", + "bus_stop", + "bus_trap", + "cable", + "cable_barrier", + "cable_car", + "cable_distribution", + "camp_site", + "cantilever", + "catenary_mast", + "cattle_grid", + "chain", + "chair_lift", + "charging_station", + "city_wall", + "communication_line", + "communication_pole", + "communication_tower", + "connection", + "cooling", + "covered", + "crossing", + "cutline", + "cycle_barrier", + "dam", + "defensive", + "ditch", + "diving", + "drag_lift", + "drain", + "drinking_water", + "entrance", + "fence", + "ferry_terminal", + "fire_hydrant", + "fountain", + "full-height_turnstile", + "gasometer", + "gate", + "generator", + "give_way", + "gondola", + "goods", + "guard_rail", + "hampshire_gate", + "handrail", + "hedge", + "height_restrictor", + "heliostat", + "helipad", + "heliport", + "hose", + "information", + "insulator", + "international_airport", + "j-bar", + "jersey_barrier", + "kerb", + "kissing_gate", + "launchpad", + "lift_gate", + "lighting", + "lightning_protection", + "magic_carpet", + "manhole", + "milestone", + "military_airport", + "minaret", + "minor_line", + "mixed_lift", + "mobile_phone_tower", + "monitoring", + "motorcycle_parking", + "motorway_junction", + "movable", + "municipal_airport", + "observation", + "parking", + "parking_entrance", + "parking_space", + "pedestrian_crossing", + "picnic_table", + "pier", + "pipeline", + "plant", + "planter", + "platform", + "platter", + "portal", + "post_box", + "power_line", + "power_pole", + "power_tower", + "private_airport", + "pylon", + "quay", + "radar", + "railway_halt", + "railway_station", + "recycling", + "regional_airport", + "reservoir_covered", + "retaining_wall", + "roller_coaster", + "rope_tow", + "runway", + "sally_port", + "seaplane_airport", + "sewer", + "silo", + "siren", + "stile", + "stop", + "stop_position", + "stopway", + "storage_tank", + "street_cabinet", + "street_lamp", + "substation", + "subway_station", + "swing_gate", + "switch", + "t-bar", + "taxilane", + "taxiway", + "terminal", + "toilets", + "toll_booth", + "traffic_signals", + "transformer", + "trestle", + "utility_pole", + "vending_machine", + "viaduct", + "viewpoint", + "wall", + "waste_basket", + "waste_disposal", + "watchtower", + "water_tower", + "weir", + "zip_line", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "aerialway", + "airport", + "barrier", + "bridge", + "communication", + "emergency", + "manhole", + "pedestrian", + "pier", + "power", + "quay", + "recreation", + "tower", + "transit", + "transportation", + "utility", + "waste_management", + "water", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _height_check() -> Check: + return Check( + field="height", + name="bounds", + expr=check_bounds(F.col("height"), gt=0.0), + shape=CheckShape.SCALAR, + root_field="height", + ) + + +def _surface_check() -> Check: + return Check( + field="surface", + name="enum", + expr=check_enum( + F.col("surface"), + [ + "asphalt", + "cobblestone", + "compacted", + "concrete", + "concrete_plates", + "dirt", + "earth", + "fine_gravel", + "grass", + "gravel", + "ground", + "paved", + "paving_stones", + "pebblestone", + "recreation_grass", + "recreation_paved", + "recreation_sand", + "rubber", + "sand", + "sett", + "tartan", + "unpaved", + "wood", + "woodchips", + ], + ), + shape=CheckShape.SCALAR, + root_field="surface", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def infrastructure_checks() -> list[Check]: + """All validation checks for infrastructure.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _class_required_check(), + _class_enum_check(), + _subtype_required_check(), + _subtype_enum_check(), + _height_check(), + _surface_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _wikidata_check(), + ] + + +INFRASTRUCTURE_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("class", StringType(), True), + StructField("subtype", StringType(), True), + StructField("height", DoubleType(), True), + StructField("surface", StringType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("source_tags", MapType(StringType(), StringType(), True), True), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:Infrastructure" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=INFRASTRUCTURE_SCHEMA, + checks=infrastructure_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py new file mode 100644 index 000000000..53b53b926 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land.py @@ -0,0 +1,848 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Land validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["land"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _class_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "archipelago", + "bare_rock", + "beach", + "cave_entrance", + "cliff", + "desert", + "dune", + "fell", + "forest", + "glacier", + "grass", + "grassland", + "heath", + "hill", + "island", + "islet", + "land", + "meadow", + "meteor_crater", + "mountain_range", + "peak", + "peninsula", + "plateau", + "reef", + "ridge", + "rock", + "saddle", + "sand", + "scree", + "scrub", + "shingle", + "shrub", + "shrubbery", + "stone", + "tree", + "tree_row", + "tundra", + "valley", + "volcanic_caldera_rim", + "volcano", + "wetland", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _subtype_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "crater", + "desert", + "forest", + "glacier", + "grass", + "land", + "physical", + "reef", + "rock", + "sand", + "shrub", + "tree", + "wetland", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _elevation_check() -> Check: + return Check( + field="elevation", + name="bounds", + expr=check_bounds(F.col("elevation"), le=9000), + shape=CheckShape.SCALAR, + root_field="elevation", + ) + + +def _surface_check() -> Check: + return Check( + field="surface", + name="enum", + expr=check_enum( + F.col("surface"), + [ + "asphalt", + "cobblestone", + "compacted", + "concrete", + "concrete_plates", + "dirt", + "earth", + "fine_gravel", + "grass", + "gravel", + "ground", + "paved", + "paving_stones", + "pebblestone", + "recreation_grass", + "recreation_paved", + "recreation_sand", + "rubber", + "sand", + "sett", + "tartan", + "unpaved", + "wood", + "woodchips", + ], + ), + shape=CheckShape.SCALAR, + root_field="surface", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def land_checks() -> list[Check]: + """All validation checks for land.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _class_check(), + _subtype_check(), + _elevation_check(), + _surface_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _wikidata_check(), + ] + + +LAND_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("class", StringType(), True), + StructField("subtype", StringType(), True), + StructField("elevation", IntegerType(), True), + StructField("surface", StringType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("source_tags", MapType(StringType(), StringType(), True), True), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:Land" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=LAND_SCHEMA, + checks=land_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py new file mode 100644 index 000000000..7e65987e2 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_cover.py @@ -0,0 +1,492 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Land Cover validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["land_cover"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "barren", + "crop", + "forest", + "grass", + "mangrove", + "moss", + "shrub", + "snow", + "urban", + "wetland", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _cartography_prominence_bounds_check() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), ge=1), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_prominence_bounds_check_1() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), le=100), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def land_cover_checks() -> list[Check]: + """All validation checks for land_cover.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _cartography_prominence_bounds_check(), + _cartography_prominence_bounds_check_1(), + _cartography_min_zoom_bounds_check(), + _cartography_min_zoom_bounds_check_1(), + _cartography_max_zoom_bounds_check(), + _cartography_max_zoom_bounds_check_1(), + ] + + +LAND_COVER_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField( + "cartography", + StructType( + [ + StructField("prominence", IntegerType(), True), + StructField("min_zoom", IntegerType(), True), + StructField("max_zoom", IntegerType(), True), + StructField("sort_key", IntegerType(), True), + ] + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:LandCover" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=LAND_COVER_SCHEMA, + checks=land_cover_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py new file mode 100644 index 000000000..bf573f9bc --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/land_use.py @@ -0,0 +1,948 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Land Use validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["land_use"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=check_required(F.col("class")), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "aboriginal_land", + "airfield", + "allotments", + "animal_keeping", + "aquaculture", + "barracks", + "base", + "beach_resort", + "brownfield", + "bunker", + "camp_site", + "cemetery", + "clinic", + "college", + "commercial", + "connection", + "construction", + "danger_area", + "doctors", + "dog_park", + "downhill", + "driving_range", + "driving_school", + "education", + "environmental", + "fairway", + "farmland", + "farmyard", + "fatbike", + "flowerbed", + "forest", + "garages", + "garden", + "golf_course", + "grass", + "grave_yard", + "green", + "greenfield", + "greenhouse_horticulture", + "highway", + "hike", + "hospital", + "ice_skate", + "industrial", + "institutional", + "kindergarten", + "landfill", + "lateral_water_hazard", + "logging", + "marina", + "meadow", + "military", + "military_hospital", + "military_school", + "music_school", + "national_park", + "natural_monument", + "nature_reserve", + "naval_base", + "nordic", + "nuclear_explosion_site", + "obstacle_course", + "orchard", + "park", + "peat_cutting", + "pedestrian", + "pitch", + "plant_nursery", + "playground", + "plaza", + "protected", + "protected_landscape_seascape", + "quarry", + "railway", + "range", + "recreation_ground", + "religious", + "residential", + "resort", + "retail", + "rough", + "salt_pond", + "school", + "schoolyard", + "ski_jump", + "skitour", + "sled", + "sleigh", + "snow_park", + "species_management_area", + "stadium", + "state_park", + "static_caravan", + "strict_nature_reserve", + "tee", + "theme_park", + "track", + "traffic_island", + "training_area", + "trench", + "university", + "village_green", + "vineyard", + "water_hazard", + "water_park", + "wilderness_area", + "winter_sports", + "works", + "zoo", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "agriculture", + "aquaculture", + "campground", + "cemetery", + "construction", + "developed", + "education", + "entertainment", + "golf", + "grass", + "horticulture", + "landfill", + "managed", + "medical", + "military", + "park", + "pedestrian", + "protected", + "recreation", + "religious", + "residential", + "resource_extraction", + "transportation", + "winter_sports", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _elevation_check() -> Check: + return Check( + field="elevation", + name="bounds", + expr=check_bounds(F.col("elevation"), le=9000), + shape=CheckShape.SCALAR, + root_field="elevation", + ) + + +def _surface_check() -> Check: + return Check( + field="surface", + name="enum", + expr=check_enum( + F.col("surface"), + [ + "asphalt", + "cobblestone", + "compacted", + "concrete", + "concrete_plates", + "dirt", + "earth", + "fine_gravel", + "grass", + "gravel", + "ground", + "paved", + "paving_stones", + "pebblestone", + "recreation_grass", + "recreation_paved", + "recreation_sand", + "rubber", + "sand", + "sett", + "tartan", + "unpaved", + "wood", + "woodchips", + ], + ), + shape=CheckShape.SCALAR, + root_field="surface", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def land_use_checks() -> list[Check]: + """All validation checks for land_use.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _class_required_check(), + _class_enum_check(), + _subtype_required_check(), + _subtype_enum_check(), + _elevation_check(), + _surface_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _wikidata_check(), + ] + + +LAND_USE_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("class", StringType(), True), + StructField("subtype", StringType(), True), + StructField("elevation", IntegerType(), True), + StructField("surface", StringType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("source_tags", MapType(StringType(), StringType(), True), True), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:LandUse" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=LAND_USE_SCHEMA, + checks=land_use_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py new file mode 100644 index 000000000..d0484e725 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/base/water.py @@ -0,0 +1,791 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Water validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["base"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["water"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _class_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "basin", + "bay", + "blowhole", + "canal", + "cape", + "ditch", + "dock", + "drain", + "fairway", + "fish_pass", + "fishpond", + "geyser", + "hot_spring", + "lagoon", + "lake", + "moat", + "ocean", + "oxbow", + "pond", + "reflecting_pool", + "reservoir", + "river", + "salt_pond", + "sea", + "sewage", + "shoal", + "spring", + "strait", + "stream", + "swimming_pool", + "tidal_channel", + "wastewater", + "water", + "water_storage", + "waterfall", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _subtype_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "canal", + "human_made", + "lake", + "ocean", + "physical", + "pond", + "reservoir", + "river", + "spring", + "stream", + "wastewater", + "water", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def water_checks() -> list[Check]: + """All validation checks for water.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _class_check(), + _subtype_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _wikidata_check(), + ] + + +WATER_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("class", StringType(), True), + StructField("subtype", StringType(), True), + StructField("is_intermittent", BooleanType(), True), + StructField("is_salt", BooleanType(), True), + StructField("level", IntegerType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("source_tags", MapType(StringType(), StringType(), True), True), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_POLYGON, + GeometryType.POINT, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.base:Water" + +PARTITIONS: dict[str, str] = {"theme": "base"} + +FEATURE_VALIDATION = FeatureValidation( + schema=WATER_SCHEMA, + checks=water_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py new file mode 100644 index 000000000..a73f69b89 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building.py @@ -0,0 +1,1025 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Building validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["buildings"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["building"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "agricultural", + "civic", + "commercial", + "education", + "entertainment", + "industrial", + "medical", + "military", + "outbuilding", + "religious", + "residential", + "service", + "transportation", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _class_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), + [ + "agricultural", + "allotment_house", + "apartments", + "barn", + "beach_hut", + "boathouse", + "bridge_structure", + "bungalow", + "bunker", + "cabin", + "carport", + "cathedral", + "chapel", + "church", + "civic", + "college", + "commercial", + "cowshed", + "detached", + "digester", + "dormitory", + "dwelling_house", + "factory", + "farm", + "farm_auxiliary", + "fire_station", + "garage", + "garages", + "ger", + "glasshouse", + "government", + "grandstand", + "greenhouse", + "guardhouse", + "hangar", + "hospital", + "hotel", + "house", + "houseboat", + "hut", + "industrial", + "kindergarten", + "kiosk", + "library", + "manufacture", + "military", + "monastery", + "mosque", + "office", + "outbuilding", + "parking", + "pavilion", + "post_office", + "presbytery", + "public", + "religious", + "residential", + "retail", + "roof", + "school", + "semi", + "semidetached_house", + "service", + "shed", + "shrine", + "silo", + "slurry_tank", + "sports_centre", + "sports_hall", + "stable", + "stadium", + "static_caravan", + "stilt_house", + "storage_tank", + "sty", + "supermarket", + "synagogue", + "temple", + "terrace", + "toilets", + "train_station", + "transformer_tower", + "transportation", + "trullo", + "university", + "warehouse", + "wayside_shrine", + ], + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _height_check() -> Check: + return Check( + field="height", + name="bounds", + expr=check_bounds(F.col("height"), gt=0.0), + shape=CheckShape.SCALAR, + root_field="height", + ) + + +def _num_floors_check() -> Check: + return Check( + field="num_floors", + name="bounds", + expr=check_bounds(F.col("num_floors"), gt=0), + shape=CheckShape.SCALAR, + root_field="num_floors", + ) + + +def _num_floors_underground_check() -> Check: + return Check( + field="num_floors_underground", + name="bounds", + expr=check_bounds(F.col("num_floors_underground"), gt=0), + shape=CheckShape.SCALAR, + root_field="num_floors_underground", + ) + + +def _min_floor_check() -> Check: + return Check( + field="min_floor", + name="bounds", + expr=check_bounds(F.col("min_floor"), gt=0), + shape=CheckShape.SCALAR, + root_field="min_floor", + ) + + +def _facade_color_check() -> Check: + return Check( + field="facade_color", + name="hex_color", + expr=check_pattern( + F.col("facade_color"), + "^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?\\z", + label="Hexadecimal color code in format #RGB or #RRGGBB", + ), + shape=CheckShape.SCALAR, + root_field="facade_color", + ) + + +def _facade_material_check() -> Check: + return Check( + field="facade_material", + name="enum", + expr=check_enum( + F.col("facade_material"), + [ + "brick", + "cement_block", + "clay", + "concrete", + "glass", + "metal", + "plaster", + "plastic", + "stone", + "timber_framing", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="facade_material", + ) + + +def _roof_material_check() -> Check: + return Check( + field="roof_material", + name="enum", + expr=check_enum( + F.col("roof_material"), + [ + "concrete", + "copper", + "eternit", + "glass", + "grass", + "gravel", + "metal", + "plastic", + "roof_tiles", + "slate", + "solar_panels", + "tar_paper", + "thatch", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="roof_material", + ) + + +def _roof_shape_check() -> Check: + return Check( + field="roof_shape", + name="enum", + expr=check_enum( + F.col("roof_shape"), + [ + "dome", + "flat", + "gabled", + "gambrel", + "half_hipped", + "hipped", + "mansard", + "onion", + "pyramidal", + "round", + "saltbox", + "sawtooth", + "skillion", + "spherical", + ], + ), + shape=CheckShape.SCALAR, + root_field="roof_shape", + ) + + +def _roof_direction_bounds_check() -> Check: + return Check( + field="roof_direction", + name="bounds", + expr=check_bounds(F.col("roof_direction"), ge=0.0), + shape=CheckShape.SCALAR, + root_field="roof_direction", + ) + + +def _roof_direction_bounds_check_1() -> Check: + return Check( + field="roof_direction", + name="bounds", + expr=check_bounds(F.col("roof_direction"), lt=360.0), + shape=CheckShape.SCALAR, + root_field="roof_direction", + ) + + +def _roof_orientation_check() -> Check: + return Check( + field="roof_orientation", + name="enum", + expr=check_enum(F.col("roof_orientation"), ["across", "along"]), + shape=CheckShape.SCALAR, + root_field="roof_orientation", + ) + + +def _roof_color_check() -> Check: + return Check( + field="roof_color", + name="hex_color", + expr=check_pattern( + F.col("roof_color"), + "^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?\\z", + label="Hexadecimal color code in format #RGB or #RRGGBB", + ), + shape=CheckShape.SCALAR, + root_field="roof_color", + ) + + +def building_checks() -> list[Check]: + """All validation checks for building.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_check(), + _class_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _height_check(), + _num_floors_check(), + _num_floors_underground_check(), + _min_floor_check(), + _facade_color_check(), + _facade_material_check(), + _roof_material_check(), + _roof_shape_check(), + _roof_direction_bounds_check(), + _roof_direction_bounds_check_1(), + _roof_orientation_check(), + _roof_color_check(), + ] + + +BUILDING_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField("class", StringType(), True), + StructField("has_parts", BooleanType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("height", DoubleType(), True), + StructField("is_underground", BooleanType(), True), + StructField("num_floors", IntegerType(), True), + StructField("num_floors_underground", IntegerType(), True), + StructField("min_height", DoubleType(), True), + StructField("min_floor", IntegerType(), True), + StructField("facade_color", StringType(), True), + StructField("facade_material", StringType(), True), + StructField("roof_material", StringType(), True), + StructField("roof_shape", StringType(), True), + StructField("roof_direction", DoubleType(), True), + StructField("roof_orientation", StringType(), True), + StructField("roof_color", StringType(), True), + StructField("roof_height", DoubleType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.buildings:Building" + +PARTITIONS: dict[str, str] = {"theme": "buildings"} + +FEATURE_VALIDATION = FeatureValidation( + schema=BUILDING_SCHEMA, + checks=building_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py new file mode 100644 index 000000000..8a3a96eec --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/buildings/building_part.py @@ -0,0 +1,930 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Building Part validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["buildings"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["building_part"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _building_id_required_check() -> Check: + return Check( + field="building_id", + name="required", + expr=check_required(F.col("building_id")), + shape=CheckShape.SCALAR, + root_field="building_id", + ) + + +def _building_id_string_min_length_check() -> Check: + return Check( + field="building_id", + name="string_min_length", + expr=check_string_min_length(F.col("building_id"), 1), + shape=CheckShape.SCALAR, + root_field="building_id", + ) + + +def _building_id_no_whitespace_check() -> Check: + return Check( + field="building_id", + name="no_whitespace", + expr=check_pattern( + F.col("building_id"), + "^\\S+\\z", + label="String without whitespace characters", + ), + shape=CheckShape.SCALAR, + root_field="building_id", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _height_check() -> Check: + return Check( + field="height", + name="bounds", + expr=check_bounds(F.col("height"), gt=0.0), + shape=CheckShape.SCALAR, + root_field="height", + ) + + +def _num_floors_check() -> Check: + return Check( + field="num_floors", + name="bounds", + expr=check_bounds(F.col("num_floors"), gt=0), + shape=CheckShape.SCALAR, + root_field="num_floors", + ) + + +def _num_floors_underground_check() -> Check: + return Check( + field="num_floors_underground", + name="bounds", + expr=check_bounds(F.col("num_floors_underground"), gt=0), + shape=CheckShape.SCALAR, + root_field="num_floors_underground", + ) + + +def _min_floor_check() -> Check: + return Check( + field="min_floor", + name="bounds", + expr=check_bounds(F.col("min_floor"), gt=0), + shape=CheckShape.SCALAR, + root_field="min_floor", + ) + + +def _facade_color_check() -> Check: + return Check( + field="facade_color", + name="hex_color", + expr=check_pattern( + F.col("facade_color"), + "^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?\\z", + label="Hexadecimal color code in format #RGB or #RRGGBB", + ), + shape=CheckShape.SCALAR, + root_field="facade_color", + ) + + +def _facade_material_check() -> Check: + return Check( + field="facade_material", + name="enum", + expr=check_enum( + F.col("facade_material"), + [ + "brick", + "cement_block", + "clay", + "concrete", + "glass", + "metal", + "plaster", + "plastic", + "stone", + "timber_framing", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="facade_material", + ) + + +def _roof_material_check() -> Check: + return Check( + field="roof_material", + name="enum", + expr=check_enum( + F.col("roof_material"), + [ + "concrete", + "copper", + "eternit", + "glass", + "grass", + "gravel", + "metal", + "plastic", + "roof_tiles", + "slate", + "solar_panels", + "tar_paper", + "thatch", + "wood", + ], + ), + shape=CheckShape.SCALAR, + root_field="roof_material", + ) + + +def _roof_shape_check() -> Check: + return Check( + field="roof_shape", + name="enum", + expr=check_enum( + F.col("roof_shape"), + [ + "dome", + "flat", + "gabled", + "gambrel", + "half_hipped", + "hipped", + "mansard", + "onion", + "pyramidal", + "round", + "saltbox", + "sawtooth", + "skillion", + "spherical", + ], + ), + shape=CheckShape.SCALAR, + root_field="roof_shape", + ) + + +def _roof_direction_bounds_check() -> Check: + return Check( + field="roof_direction", + name="bounds", + expr=check_bounds(F.col("roof_direction"), ge=0.0), + shape=CheckShape.SCALAR, + root_field="roof_direction", + ) + + +def _roof_direction_bounds_check_1() -> Check: + return Check( + field="roof_direction", + name="bounds", + expr=check_bounds(F.col("roof_direction"), lt=360.0), + shape=CheckShape.SCALAR, + root_field="roof_direction", + ) + + +def _roof_orientation_check() -> Check: + return Check( + field="roof_orientation", + name="enum", + expr=check_enum(F.col("roof_orientation"), ["across", "along"]), + shape=CheckShape.SCALAR, + root_field="roof_orientation", + ) + + +def _roof_color_check() -> Check: + return Check( + field="roof_color", + name="hex_color", + expr=check_pattern( + F.col("roof_color"), + "^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?\\z", + label="Hexadecimal color code in format #RGB or #RRGGBB", + ), + shape=CheckShape.SCALAR, + root_field="roof_color", + ) + + +def building_part_checks() -> list[Check]: + """All validation checks for building_part.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _building_id_required_check(), + _building_id_string_min_length_check(), + _building_id_no_whitespace_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _height_check(), + _num_floors_check(), + _num_floors_underground_check(), + _min_floor_check(), + _facade_color_check(), + _facade_material_check(), + _roof_material_check(), + _roof_shape_check(), + _roof_direction_bounds_check(), + _roof_direction_bounds_check_1(), + _roof_orientation_check(), + _roof_color_check(), + ] + + +BUILDING_PART_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("building_id", StringType(), True), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("level", IntegerType(), True), + StructField("height", DoubleType(), True), + StructField("is_underground", BooleanType(), True), + StructField("num_floors", IntegerType(), True), + StructField("num_floors_underground", IntegerType(), True), + StructField("min_height", DoubleType(), True), + StructField("min_floor", IntegerType(), True), + StructField("facade_color", StringType(), True), + StructField("facade_material", StringType(), True), + StructField("roof_material", StringType(), True), + StructField("roof_shape", StringType(), True), + StructField("roof_direction", DoubleType(), True), + StructField("roof_orientation", StringType(), True), + StructField("roof_color", StringType(), True), + StructField("roof_height", DoubleType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.buildings:BuildingPart" + +PARTITIONS: dict[str, str] = {"theme": "buildings"} + +FEATURE_VALIDATION = FeatureValidation( + schema=BUILDING_PART_SCHEMA, + checks=building_part_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py new file mode 100644 index 000000000..cd42da918 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division.py @@ -0,0 +1,1550 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Division validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_forbid_if, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_require_if, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _cartography_prominence_bounds_check() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), ge=1), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_prominence_bounds_check_1() -> Check: + return Check( + field="cartography.prominence", + name="bounds", + expr=check_bounds(F.col("cartography.prominence"), le=100), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_min_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.min_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.min_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), ge=0), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _cartography_max_zoom_bounds_check_1() -> Check: + return Check( + field="cartography.max_zoom", + name="bounds", + expr=check_bounds(F.col("cartography.max_zoom"), le=23), + shape=CheckShape.SCALAR, + root_field="cartography", + ) + + +def _names_check() -> Check: + return Check( + field="names", + name="required", + expr=check_required(F.col("names")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=check_required(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["divisions"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["division"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _country_required_check() -> Check: + return Check( + field="country", + name="required", + expr=check_required(F.col("country")), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _country_country_code_alpha2_check() -> Check: + return Check( + field="country", + name="country_code_alpha2", + expr=check_pattern( + F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _hierarchies_check() -> Check: + return Check( + field="hierarchies", + name="required", + expr=check_required(F.col("hierarchies")), + shape=CheckShape.SCALAR, + root_field="hierarchies", + ) + + +def _hierarchies_min_length_check() -> Check: + return Check( + field="hierarchies_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("hierarchies"), 1), + shape=CheckShape.SCALAR, + root_field="hierarchies", + ) + + +def _hierarchies_unique_check() -> Check: + return Check( + field="hierarchies_unique", + name="struct_unique", + expr=check_struct_unique(F.col("hierarchies")), + shape=CheckShape.SCALAR, + root_field="hierarchies", + ) + + +def _hierarchies_min_length_check_1() -> Check: + return Check( + field="hierarchies[]_min_length", + name="array_min_length", + expr=array_check("hierarchies", lambda el: check_array_min_length(el, 1)), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_unique_check_1() -> Check: + return Check( + field="hierarchies[]_unique", + name="struct_unique", + expr=array_check("hierarchies", lambda el: check_struct_unique(el)), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_division_id_required_check() -> Check: + return Check( + field="hierarchies[][].division_id", + name="required", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, lambda inner: check_required(inner["division_id"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_division_id_string_min_length_check() -> Check: + return Check( + field="hierarchies[][].division_id", + name="string_min_length", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, lambda inner: check_string_min_length(inner["division_id"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_division_id_no_whitespace_check() -> Check: + return Check( + field="hierarchies[][].division_id", + name="no_whitespace", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, + lambda inner: check_pattern( + inner["division_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_subtype_required_check() -> Check: + return Check( + field="hierarchies[][].subtype", + name="required", + expr=nested_array_check( + "hierarchies", + lambda el: array_check(el, lambda inner: check_required(inner["subtype"])), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_subtype_enum_check() -> Check: + return Check( + field="hierarchies[][].subtype", + name="enum", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, + lambda inner: check_enum( + inner["subtype"], + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_name_required_check() -> Check: + return Check( + field="hierarchies[][].name", + name="required", + expr=nested_array_check( + "hierarchies", + lambda el: array_check(el, lambda inner: check_required(inner["name"])), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_name_string_min_length_check() -> Check: + return Check( + field="hierarchies[][].name", + name="string_min_length", + expr=nested_array_check( + "hierarchies", + lambda el: array_check( + el, lambda inner: check_string_min_length(inner["name"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _hierarchies_name_stripped_check() -> Check: + return Check( + field="hierarchies[][].name", + name="stripped", + expr=nested_array_check( + "hierarchies", + lambda el: array_check(el, lambda inner: check_stripped(inner["name"])), + ), + shape=CheckShape.ARRAY, + root_field="hierarchies", + ) + + +def _parent_division_id_string_min_length_check() -> Check: + return Check( + field="parent_division_id", + name="string_min_length", + expr=check_string_min_length(F.col("parent_division_id"), 1), + shape=CheckShape.SCALAR, + root_field="parent_division_id", + ) + + +def _parent_division_id_no_whitespace_check() -> Check: + return Check( + field="parent_division_id", + name="no_whitespace", + expr=check_pattern( + F.col("parent_division_id"), + "^\\S+\\z", + label="String without whitespace characters", + ), + shape=CheckShape.SCALAR, + root_field="parent_division_id", + ) + + +def _admin_level_bounds_check() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), ge=0), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _admin_level_bounds_check_1() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), le=16), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _class_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum( + F.col("class"), ["megacity", "city", "town", "village", "hamlet"] + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _region_check() -> Check: + return Check( + field="region", + name="region_code", + expr=check_pattern( + F.col("region"), + "^[A-Z]{2}-[A-Z0-9]{1,3}\\z", + label="ISO 3166-2 subdivision code", + ), + shape=CheckShape.SCALAR, + root_field="region", + ) + + +def _perspectives_mode_required_check() -> Check: + return Check( + field="perspectives.mode", + name="required", + expr=F.when( + F.col("perspectives").isNotNull(), + check_required(F.col("perspectives.mode")), + ), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_mode_enum_check() -> Check: + return Check( + field="perspectives.mode", + name="enum", + expr=check_enum(F.col("perspectives.mode"), ["accepted_by", "disputed_by"]), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_check() -> Check: + return Check( + field="perspectives.countries", + name="required", + expr=F.when( + F.col("perspectives").isNotNull(), + check_required(F.col("perspectives.countries")), + ), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_min_length_check() -> Check: + return Check( + field="perspectives.countries_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("perspectives.countries"), 1), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_unique_check() -> Check: + return Check( + field="perspectives.countries_unique", + name="struct_unique", + expr=check_struct_unique(F.col("perspectives.countries")), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_check_1() -> Check: + return Check( + field="perspectives.countries[]", + name="country_code_alpha2", + expr=array_check( + "perspectives.countries", + lambda el: check_pattern( + el, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + shape=CheckShape.ARRAY, + root_field="perspectives", + ) + + +def _norms_driving_side_check() -> Check: + return Check( + field="norms.driving_side", + name="enum", + expr=check_enum(F.col("norms.driving_side"), ["left", "right"]), + shape=CheckShape.SCALAR, + root_field="norms", + ) + + +def _population_check() -> Check: + return Check( + field="population", + name="bounds", + expr=check_bounds(F.col("population"), ge=0), + shape=CheckShape.SCALAR, + root_field="population", + ) + + +def _capital_division_ids_min_length_check() -> Check: + return Check( + field="capital_division_ids_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("capital_division_ids"), 1), + shape=CheckShape.SCALAR, + root_field="capital_division_ids", + ) + + +def _capital_division_ids_unique_check() -> Check: + return Check( + field="capital_division_ids_unique", + name="struct_unique", + expr=check_struct_unique(F.col("capital_division_ids")), + shape=CheckShape.SCALAR, + root_field="capital_division_ids", + ) + + +def _capital_division_ids_string_min_length_check() -> Check: + return Check( + field="capital_division_ids[]", + name="string_min_length", + expr=array_check( + "capital_division_ids", lambda el: check_string_min_length(el, 1) + ), + shape=CheckShape.ARRAY, + root_field="capital_division_ids", + ) + + +def _capital_division_ids_no_whitespace_check() -> Check: + return Check( + field="capital_division_ids[]", + name="no_whitespace", + expr=array_check( + "capital_division_ids", + lambda el: check_pattern( + el, "^\\S+\\z", label="String without whitespace characters" + ), + ), + shape=CheckShape.ARRAY, + root_field="capital_division_ids", + ) + + +def _capital_of_divisions_min_length_check() -> Check: + return Check( + field="capital_of_divisions_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("capital_of_divisions"), 1), + shape=CheckShape.SCALAR, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_unique_check() -> Check: + return Check( + field="capital_of_divisions_unique", + name="struct_unique", + expr=check_struct_unique(F.col("capital_of_divisions")), + shape=CheckShape.SCALAR, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_division_id_required_check() -> Check: + return Check( + field="capital_of_divisions[].division_id", + name="required", + expr=array_check( + "capital_of_divisions", lambda el: check_required(el["division_id"]) + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_division_id_string_min_length_check() -> Check: + return Check( + field="capital_of_divisions[].division_id", + name="string_min_length", + expr=array_check( + "capital_of_divisions", + lambda el: check_string_min_length(el["division_id"], 1), + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_division_id_no_whitespace_check() -> Check: + return Check( + field="capital_of_divisions[].division_id", + name="no_whitespace", + expr=array_check( + "capital_of_divisions", + lambda el: check_pattern( + el["division_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_subtype_required_check() -> Check: + return Check( + field="capital_of_divisions[].subtype", + name="required", + expr=array_check( + "capital_of_divisions", lambda el: check_required(el["subtype"]) + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _capital_of_divisions_subtype_enum_check() -> Check: + return Check( + field="capital_of_divisions[].subtype", + name="enum", + expr=array_check( + "capital_of_divisions", + lambda el: check_enum( + el["subtype"], + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + ), + shape=CheckShape.ARRAY, + root_field="capital_of_divisions", + ) + + +def _wikidata_check() -> Check: + return Check( + field="wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="wikidata", + ) + + +def _check_require_if_0_check() -> Check: + return Check( + field="admin_level_required_0", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_1_check() -> Check: + return Check( + field="admin_level_required_1", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macrocounty", + "subtype = 'macrocounty'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_2_check() -> Check: + return Check( + field="admin_level_required_2", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_3_check() -> Check: + return Check( + field="admin_level_required_3", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macroregion", + "subtype = 'macroregion'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_4_check() -> Check: + return Check( + field="admin_level_required_4", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "dependency", + "subtype = 'dependency'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_5_check() -> Check: + return Check( + field="admin_level_required_5", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_6_check() -> Check: + return Check( + field="parent_division_id_required", + name="require_if", + expr=check_require_if( + F.col("parent_division_id"), + F.col("subtype") != "country", + "subtype != 'country'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_7_check() -> Check: + return Check( + field="parent_division_id_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("parent_division_id"), + F.col("subtype") == "country", + "subtype = 'country'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def division_checks() -> list[Check]: + """All validation checks for division.""" + return [ + _cartography_prominence_bounds_check(), + _cartography_prominence_bounds_check_1(), + _cartography_min_zoom_bounds_check(), + _cartography_min_zoom_bounds_check_1(), + _cartography_max_zoom_bounds_check(), + _cartography_max_zoom_bounds_check_1(), + _names_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _country_required_check(), + _country_country_code_alpha2_check(), + _hierarchies_check(), + _hierarchies_min_length_check(), + _hierarchies_unique_check(), + _hierarchies_min_length_check_1(), + _hierarchies_unique_check_1(), + _hierarchies_division_id_required_check(), + _hierarchies_division_id_string_min_length_check(), + _hierarchies_division_id_no_whitespace_check(), + _hierarchies_subtype_required_check(), + _hierarchies_subtype_enum_check(), + _hierarchies_name_required_check(), + _hierarchies_name_string_min_length_check(), + _hierarchies_name_stripped_check(), + _parent_division_id_string_min_length_check(), + _parent_division_id_no_whitespace_check(), + _admin_level_bounds_check(), + _admin_level_bounds_check_1(), + _class_check(), + _region_check(), + _perspectives_mode_required_check(), + _perspectives_mode_enum_check(), + _perspectives_countries_check(), + _perspectives_countries_min_length_check(), + _perspectives_countries_unique_check(), + _perspectives_countries_check_1(), + _norms_driving_side_check(), + _population_check(), + _capital_division_ids_min_length_check(), + _capital_division_ids_unique_check(), + _capital_division_ids_string_min_length_check(), + _capital_division_ids_no_whitespace_check(), + _capital_of_divisions_min_length_check(), + _capital_of_divisions_unique_check(), + _capital_of_divisions_division_id_required_check(), + _capital_of_divisions_division_id_string_min_length_check(), + _capital_of_divisions_division_id_no_whitespace_check(), + _capital_of_divisions_subtype_required_check(), + _capital_of_divisions_subtype_enum_check(), + _wikidata_check(), + _check_require_if_0_check(), + _check_require_if_1_check(), + _check_require_if_2_check(), + _check_require_if_3_check(), + _check_require_if_4_check(), + _check_require_if_5_check(), + _check_require_if_6_check(), + _check_forbid_if_7_check(), + ] + + +DIVISION_SCHEMA = StructType( + [ + StructField( + "cartography", + StructType( + [ + StructField("prominence", IntegerType(), True), + StructField("min_zoom", IntegerType(), True), + StructField("max_zoom", IntegerType(), True), + StructField("sort_key", IntegerType(), True), + ] + ), + True, + ), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField("country", StringType(), True), + StructField( + "hierarchies", + ArrayType( + ArrayType( + StructType( + [ + StructField("division_id", StringType(), True), + StructField("subtype", StringType(), True), + StructField("name", StringType(), True), + ] + ), + True, + ), + True, + ), + True, + ), + StructField("parent_division_id", StringType(), True), + StructField("admin_level", IntegerType(), True), + StructField("class", StringType(), True), + StructField("local_type", MapType(StringType(), StringType(), True), True), + StructField("region", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField("countries", ArrayType(StringType(), True), True), + ] + ), + True, + ), + StructField( + "norms", StructType([StructField("driving_side", StringType(), True)]), True + ), + StructField("population", IntegerType(), True), + StructField("capital_division_ids", ArrayType(StringType(), True), True), + StructField( + "capital_of_divisions", + ArrayType( + StructType( + [ + StructField("division_id", StringType(), True), + StructField("subtype", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField("wikidata", StringType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,) + +ENTRY_POINT = "overture.schema.divisions:Division" + +PARTITIONS: dict[str, str] = {"theme": "divisions"} + +FEATURE_VALIDATION = FeatureValidation( + schema=DIVISION_SCHEMA, + checks=division_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py new file mode 100644 index 000000000..dffe20713 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_area.py @@ -0,0 +1,962 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Division Area validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_radio_group, + check_require_if, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _names_check() -> Check: + return Check( + field="names", + name="required", + expr=check_required(F.col("names")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=check_required(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.MULTI_POLYGON, GeometryType.POLYGON + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["divisions"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["division_area"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=check_required(F.col("class")), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum(F.col("class"), ["land", "maritime"]), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _division_id_required_check() -> Check: + return Check( + field="division_id", + name="required", + expr=check_required(F.col("division_id")), + shape=CheckShape.SCALAR, + root_field="division_id", + ) + + +def _division_id_string_min_length_check() -> Check: + return Check( + field="division_id", + name="string_min_length", + expr=check_string_min_length(F.col("division_id"), 1), + shape=CheckShape.SCALAR, + root_field="division_id", + ) + + +def _division_id_no_whitespace_check() -> Check: + return Check( + field="division_id", + name="no_whitespace", + expr=check_pattern( + F.col("division_id"), + "^\\S+\\z", + label="String without whitespace characters", + ), + shape=CheckShape.SCALAR, + root_field="division_id", + ) + + +def _country_required_check() -> Check: + return Check( + field="country", + name="required", + expr=check_required(F.col("country")), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _country_country_code_alpha2_check() -> Check: + return Check( + field="country", + name="country_code_alpha2", + expr=check_pattern( + F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _region_check() -> Check: + return Check( + field="region", + name="region_code", + expr=check_pattern( + F.col("region"), + "^[A-Z]{2}-[A-Z0-9]{1,3}\\z", + label="ISO 3166-2 subdivision code", + ), + shape=CheckShape.SCALAR, + root_field="region", + ) + + +def _admin_level_bounds_check() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), ge=0), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _admin_level_bounds_check_1() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), le=16), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _check_radio_group_0_check() -> Check: + return Check( + field="radio_group", + name="radio_group", + expr=check_radio_group( + [F.col("is_land"), F.col("is_territorial")], ["is_land", "is_territorial"] + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_1_check() -> Check: + return Check( + field="admin_level_required_0", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_2_check() -> Check: + return Check( + field="admin_level_required_1", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macrocounty", + "subtype = 'macrocounty'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_3_check() -> Check: + return Check( + field="admin_level_required_2", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_4_check() -> Check: + return Check( + field="admin_level_required_3", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macroregion", + "subtype = 'macroregion'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_5_check() -> Check: + return Check( + field="admin_level_required_4", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "dependency", + "subtype = 'dependency'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_6_check() -> Check: + return Check( + field="admin_level_required_5", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def division_area_checks() -> list[Check]: + """All validation checks for division_area.""" + return [ + _names_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _class_required_check(), + _class_enum_check(), + _division_id_required_check(), + _division_id_string_min_length_check(), + _division_id_no_whitespace_check(), + _country_required_check(), + _country_country_code_alpha2_check(), + _region_check(), + _admin_level_bounds_check(), + _admin_level_bounds_check_1(), + _check_radio_group_0_check(), + _check_require_if_1_check(), + _check_require_if_2_check(), + _check_require_if_3_check(), + _check_require_if_4_check(), + _check_require_if_5_check(), + _check_require_if_6_check(), + ] + + +DIVISION_AREA_SCHEMA = StructType( + [ + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField("class", StringType(), True), + StructField("is_land", BooleanType(), True), + StructField("is_territorial", BooleanType(), True), + StructField("division_id", StringType(), True), + StructField("country", StringType(), True), + StructField("region", StringType(), True), + StructField("admin_level", IntegerType(), True), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.MULTI_POLYGON, + GeometryType.POLYGON, +) + +ENTRY_POINT = "overture.schema.divisions:DivisionArea" + +PARTITIONS: dict[str, str] = {"theme": "divisions"} + +FEATURE_VALIDATION = FeatureValidation( + schema=DIVISION_AREA_SCHEMA, + checks=division_area_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py new file mode 100644 index 000000000..68c7b1f62 --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/divisions/division_boundary.py @@ -0,0 +1,782 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Division Boundary validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_max_length, + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_forbid_if, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_radio_group, + check_require_if, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type( + F.col("geometry"), GeometryType.LINE_STRING, GeometryType.MULTI_LINE_STRING + ), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["divisions"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["division_boundary"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum( + F.col("subtype"), + [ + "country", + "dependency", + "macroregion", + "region", + "macrocounty", + "county", + "localadmin", + "locality", + "borough", + "macrohood", + "neighborhood", + "microhood", + ], + ), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=check_required(F.col("class")), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=check_enum(F.col("class"), ["land", "maritime"]), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _division_ids_check() -> Check: + return Check( + field="division_ids", + name="required", + expr=check_required(F.col("division_ids")), + shape=CheckShape.SCALAR, + root_field="division_ids", + ) + + +def _division_ids_min_length_check() -> Check: + return Check( + field="division_ids_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("division_ids"), 2), + shape=CheckShape.SCALAR, + root_field="division_ids", + ) + + +def _division_ids_max_length_check() -> Check: + return Check( + field="division_ids_max_length", + name="array_max_length", + expr=check_array_max_length(F.col("division_ids"), 2), + shape=CheckShape.SCALAR, + root_field="division_ids", + ) + + +def _division_ids_unique_check() -> Check: + return Check( + field="division_ids_unique", + name="struct_unique", + expr=check_struct_unique(F.col("division_ids")), + shape=CheckShape.SCALAR, + root_field="division_ids", + ) + + +def _division_ids_string_min_length_check() -> Check: + return Check( + field="division_ids[]", + name="string_min_length", + expr=array_check("division_ids", lambda el: check_string_min_length(el, 1)), + shape=CheckShape.ARRAY, + root_field="division_ids", + ) + + +def _division_ids_no_whitespace_check() -> Check: + return Check( + field="division_ids[]", + name="no_whitespace", + expr=array_check( + "division_ids", + lambda el: check_pattern( + el, "^\\S+\\z", label="String without whitespace characters" + ), + ), + shape=CheckShape.ARRAY, + root_field="division_ids", + ) + + +def _country_check() -> Check: + return Check( + field="country", + name="country_code_alpha2", + expr=check_pattern( + F.col("country"), "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + shape=CheckShape.SCALAR, + root_field="country", + ) + + +def _region_check() -> Check: + return Check( + field="region", + name="region_code", + expr=check_pattern( + F.col("region"), + "^[A-Z]{2}-[A-Z0-9]{1,3}\\z", + label="ISO 3166-2 subdivision code", + ), + shape=CheckShape.SCALAR, + root_field="region", + ) + + +def _admin_level_bounds_check() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), ge=0), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _admin_level_bounds_check_1() -> Check: + return Check( + field="admin_level", + name="bounds", + expr=check_bounds(F.col("admin_level"), le=16), + shape=CheckShape.SCALAR, + root_field="admin_level", + ) + + +def _perspectives_mode_required_check() -> Check: + return Check( + field="perspectives.mode", + name="required", + expr=F.when( + F.col("perspectives").isNotNull(), + check_required(F.col("perspectives.mode")), + ), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_mode_enum_check() -> Check: + return Check( + field="perspectives.mode", + name="enum", + expr=check_enum(F.col("perspectives.mode"), ["accepted_by", "disputed_by"]), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_check() -> Check: + return Check( + field="perspectives.countries", + name="required", + expr=F.when( + F.col("perspectives").isNotNull(), + check_required(F.col("perspectives.countries")), + ), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_min_length_check() -> Check: + return Check( + field="perspectives.countries_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("perspectives.countries"), 1), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_unique_check() -> Check: + return Check( + field="perspectives.countries_unique", + name="struct_unique", + expr=check_struct_unique(F.col("perspectives.countries")), + shape=CheckShape.SCALAR, + root_field="perspectives", + ) + + +def _perspectives_countries_check_1() -> Check: + return Check( + field="perspectives.countries[]", + name="country_code_alpha2", + expr=array_check( + "perspectives.countries", + lambda el: check_pattern( + el, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + shape=CheckShape.ARRAY, + root_field="perspectives", + ) + + +def _check_radio_group_0_check() -> Check: + return Check( + field="radio_group", + name="radio_group", + expr=check_radio_group( + [F.col("is_land"), F.col("is_territorial")], ["is_land", "is_territorial"] + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_1_check() -> Check: + return Check( + field="admin_level_required_0", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "county", "subtype = 'county'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_2_check() -> Check: + return Check( + field="admin_level_required_1", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macrocounty", + "subtype = 'macrocounty'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_3_check() -> Check: + return Check( + field="admin_level_required_2", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "region", "subtype = 'region'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_4_check() -> Check: + return Check( + field="admin_level_required_3", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "macroregion", + "subtype = 'macroregion'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_5_check() -> Check: + return Check( + field="admin_level_required_4", + name="require_if", + expr=check_require_if( + F.col("admin_level"), + F.col("subtype") == "dependency", + "subtype = 'dependency'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_6_check() -> Check: + return Check( + field="admin_level_required_5", + name="require_if", + expr=check_require_if( + F.col("admin_level"), F.col("subtype") == "country", "subtype = 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_7_check() -> Check: + return Check( + field="country_required", + name="require_if", + expr=check_require_if( + F.col("country"), F.col("subtype") != "country", "subtype != 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_8_check() -> Check: + return Check( + field="country_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("country"), F.col("subtype") == "country", "subtype = 'country'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def division_boundary_checks() -> list[Check]: + """All validation checks for division_boundary.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _class_required_check(), + _class_enum_check(), + _division_ids_check(), + _division_ids_min_length_check(), + _division_ids_max_length_check(), + _division_ids_unique_check(), + _division_ids_string_min_length_check(), + _division_ids_no_whitespace_check(), + _country_check(), + _region_check(), + _admin_level_bounds_check(), + _admin_level_bounds_check_1(), + _perspectives_mode_required_check(), + _perspectives_mode_enum_check(), + _perspectives_countries_check(), + _perspectives_countries_min_length_check(), + _perspectives_countries_unique_check(), + _perspectives_countries_check_1(), + _check_radio_group_0_check(), + _check_require_if_1_check(), + _check_require_if_2_check(), + _check_require_if_3_check(), + _check_require_if_4_check(), + _check_require_if_5_check(), + _check_require_if_6_check(), + _check_require_if_7_check(), + _check_forbid_if_8_check(), + ] + + +DIVISION_BOUNDARY_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField("class", StringType(), True), + StructField("is_land", BooleanType(), True), + StructField("is_territorial", BooleanType(), True), + StructField("division_ids", ArrayType(StringType(), True), True), + StructField("country", StringType(), True), + StructField("region", StringType(), True), + StructField("admin_level", IntegerType(), True), + StructField("is_disputed", BooleanType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField("countries", ArrayType(StringType(), True), True), + ] + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = ( + GeometryType.LINE_STRING, + GeometryType.MULTI_LINE_STRING, +) + +ENTRY_POINT = "overture.schema.divisions:DivisionBoundary" + +PARTITIONS: dict[str, str] = {"theme": "divisions"} + +FEATURE_VALIDATION = FeatureValidation( + schema=DIVISION_BOUNDARY_SCHEMA, + checks=division_boundary_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py new file mode 100644 index 000000000..c9d448f6a --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/places/place.py @@ -0,0 +1,1505 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Place validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_email, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, + check_url_format, + check_url_length, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["places"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["place"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _operating_status_check() -> Check: + return Check( + field="operating_status", + name="enum", + expr=check_enum( + F.col("operating_status"), + ["open", "permanently_closed", "temporarily_closed"], + ), + shape=CheckShape.SCALAR, + root_field="operating_status", + ) + + +def _categories_primary_required_check() -> Check: + return Check( + field="categories.primary", + name="required", + expr=F.when( + F.col("categories").isNotNull(), check_required(F.col("categories.primary")) + ), + shape=CheckShape.SCALAR, + root_field="categories", + ) + + +def _categories_primary_snake_case_check() -> Check: + return Check( + field="categories.primary", + name="snake_case", + expr=check_pattern( + F.col("categories.primary"), + "^[a-z0-9]+(_[a-z0-9]+)*\\z", + label="Category in snake_case format", + ), + shape=CheckShape.SCALAR, + root_field="categories", + ) + + +def _categories_alternate_unique_check() -> Check: + return Check( + field="categories.alternate_unique", + name="struct_unique", + expr=check_struct_unique(F.col("categories.alternate")), + shape=CheckShape.SCALAR, + root_field="categories", + ) + + +def _categories_alternate_check() -> Check: + return Check( + field="categories.alternate[]", + name="snake_case", + expr=array_check( + "categories.alternate", + lambda el: check_pattern( + el, "^[a-z0-9]+(_[a-z0-9]+)*\\z", label="Category in snake_case format" + ), + ), + shape=CheckShape.ARRAY, + root_field="categories", + ) + + +def _basic_category_check() -> Check: + return Check( + field="basic_category", + name="snake_case", + expr=check_pattern( + F.col("basic_category"), + "^[a-z0-9]+(_[a-z0-9]+)*\\z", + label="Category in snake_case format", + ), + shape=CheckShape.SCALAR, + root_field="basic_category", + ) + + +def _taxonomy_primary_required_check() -> Check: + return Check( + field="taxonomy.primary", + name="required", + expr=F.when( + F.col("taxonomy").isNotNull(), check_required(F.col("taxonomy.primary")) + ), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_primary_snake_case_check() -> Check: + return Check( + field="taxonomy.primary", + name="snake_case", + expr=check_pattern( + F.col("taxonomy.primary"), + "^[a-z0-9]+(_[a-z0-9]+)*\\z", + label="Category in snake_case format", + ), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_hierarchy_check() -> Check: + return Check( + field="taxonomy.hierarchy", + name="required", + expr=F.when( + F.col("taxonomy").isNotNull(), check_required(F.col("taxonomy.hierarchy")) + ), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_hierarchy_min_length_check() -> Check: + return Check( + field="taxonomy.hierarchy_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("taxonomy.hierarchy"), 1), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_hierarchy_unique_check() -> Check: + return Check( + field="taxonomy.hierarchy_unique", + name="struct_unique", + expr=check_struct_unique(F.col("taxonomy.hierarchy")), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_hierarchy_check_1() -> Check: + return Check( + field="taxonomy.hierarchy[]", + name="snake_case", + expr=array_check( + "taxonomy.hierarchy", + lambda el: check_pattern( + el, "^[a-z0-9]+(_[a-z0-9]+)*\\z", label="Category in snake_case format" + ), + ), + shape=CheckShape.ARRAY, + root_field="taxonomy", + ) + + +def _taxonomy_alternates_min_length_check() -> Check: + return Check( + field="taxonomy.alternates_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("taxonomy.alternates"), 1), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_alternates_unique_check() -> Check: + return Check( + field="taxonomy.alternates_unique", + name="struct_unique", + expr=check_struct_unique(F.col("taxonomy.alternates")), + shape=CheckShape.SCALAR, + root_field="taxonomy", + ) + + +def _taxonomy_alternates_check() -> Check: + return Check( + field="taxonomy.alternates[]", + name="snake_case", + expr=array_check( + "taxonomy.alternates", + lambda el: check_pattern( + el, "^[a-z0-9]+(_[a-z0-9]+)*\\z", label="Category in snake_case format" + ), + ), + shape=CheckShape.ARRAY, + root_field="taxonomy", + ) + + +def _confidence_bounds_check() -> Check: + return Check( + field="confidence", + name="bounds", + expr=check_bounds(F.col("confidence"), ge=0.0), + shape=CheckShape.SCALAR, + root_field="confidence", + ) + + +def _confidence_bounds_check_1() -> Check: + return Check( + field="confidence", + name="bounds", + expr=check_bounds(F.col("confidence"), le=1.0), + shape=CheckShape.SCALAR, + root_field="confidence", + ) + + +def _websites_min_length_check() -> Check: + return Check( + field="websites_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("websites"), 1), + shape=CheckShape.SCALAR, + root_field="websites", + ) + + +def _websites_unique_check() -> Check: + return Check( + field="websites_unique", + name="struct_unique", + expr=check_struct_unique(F.col("websites")), + shape=CheckShape.SCALAR, + root_field="websites", + ) + + +def _websites_url_format_check() -> Check: + return Check( + field="websites[]", + name="url_format", + expr=array_check("websites", lambda el: check_url_format(el)), + shape=CheckShape.ARRAY, + root_field="websites", + ) + + +def _websites_url_length_check() -> Check: + return Check( + field="websites[]", + name="url_length", + expr=array_check("websites", lambda el: check_url_length(el)), + shape=CheckShape.ARRAY, + root_field="websites", + ) + + +def _socials_min_length_check() -> Check: + return Check( + field="socials_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("socials"), 1), + shape=CheckShape.SCALAR, + root_field="socials", + ) + + +def _socials_unique_check() -> Check: + return Check( + field="socials_unique", + name="struct_unique", + expr=check_struct_unique(F.col("socials")), + shape=CheckShape.SCALAR, + root_field="socials", + ) + + +def _socials_url_format_check() -> Check: + return Check( + field="socials[]", + name="url_format", + expr=array_check("socials", lambda el: check_url_format(el)), + shape=CheckShape.ARRAY, + root_field="socials", + ) + + +def _socials_url_length_check() -> Check: + return Check( + field="socials[]", + name="url_length", + expr=array_check("socials", lambda el: check_url_length(el)), + shape=CheckShape.ARRAY, + root_field="socials", + ) + + +def _emails_min_length_check() -> Check: + return Check( + field="emails_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("emails"), 1), + shape=CheckShape.SCALAR, + root_field="emails", + ) + + +def _emails_unique_check() -> Check: + return Check( + field="emails_unique", + name="struct_unique", + expr=check_struct_unique(F.col("emails")), + shape=CheckShape.SCALAR, + root_field="emails", + ) + + +def _emails_check() -> Check: + return Check( + field="emails[]", + name="email", + expr=array_check("emails", lambda el: check_email(el)), + shape=CheckShape.ARRAY, + root_field="emails", + ) + + +def _phones_min_length_check() -> Check: + return Check( + field="phones_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("phones"), 1), + shape=CheckShape.SCALAR, + root_field="phones", + ) + + +def _phones_unique_check() -> Check: + return Check( + field="phones_unique", + name="struct_unique", + expr=check_struct_unique(F.col("phones")), + shape=CheckShape.SCALAR, + root_field="phones", + ) + + +def _phones_check() -> Check: + return Check( + field="phones[]", + name="phone_number", + expr=array_check( + "phones", + lambda el: check_pattern( + el, + "^\\+\\d{1,3}[\\s\\-\\(\\)0-9]+\\z", + label="International phone number (+ followed by country code and number)", + ), + ), + shape=CheckShape.ARRAY, + root_field="phones", + ) + + +def _brand_names_primary_required_check() -> Check: + return Check( + field="brand.names.primary", + name="required", + expr=F.when( + F.col("brand.names").isNotNull(), + check_required(F.col("brand.names.primary")), + ), + shape=CheckShape.SCALAR, + root_field="brand", + ) + + +def _brand_names_primary_string_min_length_check() -> Check: + return Check( + field="brand.names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("brand.names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="brand", + ) + + +def _brand_names_primary_stripped_check() -> Check: + return Check( + field="brand.names.primary", + name="stripped", + expr=check_stripped(F.col("brand.names.primary")), + shape=CheckShape.SCALAR, + root_field="brand", + ) + + +def _brand_names_rules_value_required_check() -> Check: + return Check( + field="brand.names.rules[].value", + name="required", + expr=array_check("brand.names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_value_string_min_length_check() -> Check: + return Check( + field="brand.names.rules[].value", + name="string_min_length", + expr=array_check( + "brand.names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_value_stripped_check() -> Check: + return Check( + field="brand.names.rules[].value", + name="stripped", + expr=array_check("brand.names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_variant_required_check() -> Check: + return Check( + field="brand.names.rules[].variant", + name="required", + expr=array_check("brand.names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_variant_enum_check() -> Check: + return Check( + field="brand.names.rules[].variant", + name="enum", + expr=array_check( + "brand.names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_language_check() -> Check: + return Check( + field="brand.names.rules[].language", + name="language_tag", + expr=array_check( + "brand.names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.mode", + name="required", + expr=array_check( + "brand.names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "brand.names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_countries_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.countries", + name="required", + expr=array_check( + "brand.names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "brand.names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="brand.names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "brand.names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="brand.names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "brand.names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_between_linear_range_length_check() -> Check: + return Check( + field="brand.names.rules[].between", + name="linear_range_length", + expr=array_check( + "brand.names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="brand.names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "brand.names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_between_linear_range_order_check() -> Check: + return Check( + field="brand.names.rules[].between", + name="linear_range_order", + expr=array_check( + "brand.names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_names_rules_side_check() -> Check: + return Check( + field="brand.names.rules[].side", + name="enum", + expr=array_check( + "brand.names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="brand", + ) + + +def _brand_wikidata_check() -> Check: + return Check( + field="brand.wikidata", + name="wikidata_id", + expr=check_pattern( + F.col("brand.wikidata"), + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + shape=CheckShape.SCALAR, + root_field="brand", + ) + + +def _addresses_min_length_check() -> Check: + return Check( + field="addresses_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("addresses"), 1), + shape=CheckShape.SCALAR, + root_field="addresses", + ) + + +def _addresses_region_check() -> Check: + return Check( + field="addresses[].region", + name="region_code", + expr=array_check( + "addresses", + lambda el: check_pattern( + el["region"], + "^[A-Z]{2}-[A-Z0-9]{1,3}\\z", + label="ISO 3166-2 subdivision code", + ), + ), + shape=CheckShape.ARRAY, + root_field="addresses", + ) + + +def _addresses_country_check() -> Check: + return Check( + field="addresses[].country", + name="country_code_alpha2", + expr=array_check( + "addresses", + lambda el: check_pattern( + el["country"], "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + shape=CheckShape.ARRAY, + root_field="addresses", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def place_checks() -> list[Check]: + """All validation checks for place.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _operating_status_check(), + _categories_primary_required_check(), + _categories_primary_snake_case_check(), + _categories_alternate_unique_check(), + _categories_alternate_check(), + _basic_category_check(), + _taxonomy_primary_required_check(), + _taxonomy_primary_snake_case_check(), + _taxonomy_hierarchy_check(), + _taxonomy_hierarchy_min_length_check(), + _taxonomy_hierarchy_unique_check(), + _taxonomy_hierarchy_check_1(), + _taxonomy_alternates_min_length_check(), + _taxonomy_alternates_unique_check(), + _taxonomy_alternates_check(), + _confidence_bounds_check(), + _confidence_bounds_check_1(), + _websites_min_length_check(), + _websites_unique_check(), + _websites_url_format_check(), + _websites_url_length_check(), + _socials_min_length_check(), + _socials_unique_check(), + _socials_url_format_check(), + _socials_url_length_check(), + _emails_min_length_check(), + _emails_unique_check(), + _emails_check(), + _phones_min_length_check(), + _phones_unique_check(), + _phones_check(), + _brand_names_primary_required_check(), + _brand_names_primary_string_min_length_check(), + _brand_names_primary_stripped_check(), + _brand_names_rules_value_required_check(), + _brand_names_rules_value_string_min_length_check(), + _brand_names_rules_value_stripped_check(), + _brand_names_rules_variant_required_check(), + _brand_names_rules_variant_enum_check(), + _brand_names_rules_language_check(), + _brand_names_rules_perspectives_mode_required_check(), + _brand_names_rules_perspectives_mode_enum_check(), + _brand_names_rules_perspectives_countries_check(), + _brand_names_rules_perspectives_countries_min_length_check(), + _brand_names_rules_perspectives_countries_unique_check(), + _brand_names_rules_perspectives_countries_check_1(), + _brand_names_rules_between_linear_range_length_check(), + _brand_names_rules_between_linear_range_bounds_check(), + _brand_names_rules_between_linear_range_order_check(), + _brand_names_rules_side_check(), + _brand_wikidata_check(), + _addresses_min_length_check(), + _addresses_region_check(), + _addresses_country_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + ] + + +PLACE_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("operating_status", StringType(), True), + StructField( + "categories", + StructType( + [ + StructField("primary", StringType(), True), + StructField("alternate", ArrayType(StringType(), True), True), + ] + ), + True, + ), + StructField("basic_category", StringType(), True), + StructField( + "taxonomy", + StructType( + [ + StructField("primary", StringType(), True), + StructField("hierarchy", ArrayType(StringType(), True), True), + StructField("alternates", ArrayType(StringType(), True), True), + ] + ), + True, + ), + StructField("confidence", DoubleType(), True), + StructField("websites", ArrayType(StringType(), True), True), + StructField("socials", ArrayType(StringType(), True), True), + StructField("emails", ArrayType(StringType(), True), True), + StructField("phones", ArrayType(StringType(), True), True), + StructField( + "brand", + StructType( + [ + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", + MapType(StringType(), StringType(), True), + True, + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField( + "value", StringType(), True + ), + StructField( + "variant", StringType(), True + ), + StructField( + "language", StringType(), True + ), + StructField( + "perspectives", + StructType( + [ + StructField( + "mode", + StringType(), + True, + ), + StructField( + "countries", + ArrayType( + StringType(), True + ), + True, + ), + ] + ), + True, + ), + StructField( + "between", + ArrayType(DoubleType(), True), + True, + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("wikidata", StringType(), True), + ] + ), + True, + ), + StructField( + "addresses", + ArrayType( + StructType( + [ + StructField("freeform", StringType(), True), + StructField("locality", StringType(), True), + StructField("postcode", StringType(), True), + StructField("region", StringType(), True), + StructField("country", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,) + +ENTRY_POINT = "overture.schema.places:Place" + +PARTITIONS: dict[str, str] = {"theme": "places"} + +FEATURE_VALIDATION = FeatureValidation( + schema=PLACE_SCHEMA, + checks=place_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/__init__.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py new file mode 100644 index 000000000..5813ca61b --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/connector.py @@ -0,0 +1,372 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Connector validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.POINT), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["transportation"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["connector"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def connector_checks() -> list[Check]: + """All validation checks for connector.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + ] + + +CONNECTOR_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.POINT,) + +ENTRY_POINT = "overture.schema.transportation:Connector" + +PARTITIONS: dict[str, str] = {"theme": "transportation"} + +FEATURE_VALIDATION = FeatureValidation( + schema=CONNECTOR_SCHEMA, + checks=connector_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py new file mode 100644 index 000000000..cc9fd32bc --- /dev/null +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py @@ -0,0 +1,5053 @@ +# This file is auto-generated by overture-schema-codegen. Do not edit. + +"""Segment validation expression builders.""" + +from __future__ import annotations + +from pyspark.sql import functions as F +from pyspark.sql.types import ( + ArrayType, + BinaryType, + BooleanType, + DoubleType, + IntegerType, + MapType, + StringType, + StructField, + StructType, +) + +from overture.schema.pyspark.check import Check, CheckShape, FeatureValidation +from overture.schema.pyspark.expressions._schema_structs import ( + BBOX_STRUCT, +) +from overture.schema.pyspark.expressions.column_patterns import ( + array_check, + check_struct_unique, + nested_array_check, +) +from overture.schema.pyspark.expressions.constraint_expressions import ( + check_array_min_length, + check_bbox_completeness, + check_bbox_lat_ordering, + check_bbox_lat_range, + check_bounds, + check_enum, + check_forbid_if, + check_geometry_type, + check_json_pointer, + check_linear_range_bounds, + check_linear_range_length, + check_linear_range_order, + check_pattern, + check_require_any_of, + check_require_if, + check_required, + check_string_min_length, + check_stripped, +) +from overture.schema.system.primitive import GeometryType + + +def _id_required_check() -> Check: + return Check( + field="id", + name="required", + expr=check_required(F.col("id")), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_string_min_length_check() -> Check: + return Check( + field="id", + name="string_min_length", + expr=check_string_min_length(F.col("id"), 1), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _id_no_whitespace_check() -> Check: + return Check( + field="id", + name="no_whitespace", + expr=check_pattern( + F.col("id"), "^\\S+\\z", label="String without whitespace characters" + ), + shape=CheckShape.SCALAR, + root_field="id", + ) + + +def _bbox_bbox_completeness_check() -> Check: + return Check( + field="bbox", + name="bbox_completeness", + expr=check_bbox_completeness(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_ordering_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_ordering", + expr=check_bbox_lat_ordering(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _bbox_bbox_lat_range_check() -> Check: + return Check( + field="bbox", + name="bbox_lat_range", + expr=check_bbox_lat_range(F.col("bbox")), + shape=CheckShape.SCALAR, + root_field="bbox", + ) + + +def _geometry_required_check() -> Check: + return Check( + field="geometry", + name="required", + expr=check_required(F.col("geometry")), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _geometry_geometry_type_check() -> Check: + return Check( + field="geometry", + name="geometry_type", + expr=check_geometry_type(F.col("geometry"), GeometryType.LINE_STRING), + shape=CheckShape.SCALAR, + root_field="geometry", + ) + + +def _theme_required_check() -> Check: + return Check( + field="theme", + name="required", + expr=check_required(F.col("theme")), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _theme_enum_check() -> Check: + return Check( + field="theme", + name="enum", + expr=check_enum(F.col("theme"), ["transportation"]), + shape=CheckShape.SCALAR, + root_field="theme", + ) + + +def _type_required_check() -> Check: + return Check( + field="type", + name="required", + expr=check_required(F.col("type")), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _type_enum_check() -> Check: + return Check( + field="type", + name="enum", + expr=check_enum(F.col("type"), ["segment"]), + shape=CheckShape.SCALAR, + root_field="type", + ) + + +def _version_required_check() -> Check: + return Check( + field="version", + name="required", + expr=check_required(F.col("version")), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _version_bounds_check() -> Check: + return Check( + field="version", + name="bounds", + expr=check_bounds(F.col("version"), ge=0), + shape=CheckShape.SCALAR, + root_field="version", + ) + + +def _sources_min_length_check() -> Check: + return Check( + field="sources_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("sources"), 1), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_unique_check() -> Check: + return Check( + field="sources_unique", + name="struct_unique", + expr=check_struct_unique(F.col("sources")), + shape=CheckShape.SCALAR, + root_field="sources", + ) + + +def _sources_property_required_check() -> Check: + return Check( + field="sources[].property", + name="required", + expr=array_check("sources", lambda el: check_required(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_property_json_pointer_check() -> Check: + return Check( + field="sources[].property", + name="json_pointer", + expr=array_check("sources", lambda el: check_json_pointer(el["property"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_dataset_check() -> Check: + return Check( + field="sources[].dataset", + name="required", + expr=array_check("sources", lambda el: check_required(el["dataset"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_license_check() -> Check: + return Check( + field="sources[].license", + name="stripped", + expr=array_check("sources", lambda el: check_stripped(el["license"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_confidence_bounds_check_1() -> Check: + return Check( + field="sources[].confidence", + name="bounds", + expr=array_check("sources", lambda el: check_bounds(el["confidence"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_length_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_length", + expr=array_check( + "sources", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_bounds_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_bounds", + expr=array_check( + "sources", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _sources_between_linear_range_order_check() -> Check: + return Check( + field="sources[].between", + name="linear_range_order", + expr=array_check("sources", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="sources", + ) + + +def _subtype_required_check() -> Check: + return Check( + field="subtype", + name="required", + expr=check_required(F.col("subtype")), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _subtype_enum_check() -> Check: + return Check( + field="subtype", + name="enum", + expr=check_enum(F.col("subtype"), ["road", "rail", "water"]), + shape=CheckShape.SCALAR, + root_field="subtype", + ) + + +def _access_restrictions_min_length_check() -> Check: + return Check( + field="access_restrictions_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("access_restrictions"), 1), + shape=CheckShape.SCALAR, + root_field="access_restrictions", + ) + + +def _access_restrictions_unique_check() -> Check: + return Check( + field="access_restrictions_unique", + name="struct_unique", + expr=check_struct_unique(F.col("access_restrictions")), + shape=CheckShape.SCALAR, + root_field="access_restrictions", + ) + + +def _access_restrictions_access_type_required_check() -> Check: + return Check( + field="access_restrictions[].access_type", + name="required", + expr=array_check( + "access_restrictions", lambda el: check_required(el["access_type"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_access_type_enum_check() -> Check: + return Check( + field="access_restrictions[].access_type", + name="enum", + expr=array_check( + "access_restrictions", + lambda el: check_enum( + el["access_type"], ["allowed", "denied", "designated"] + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_between_linear_range_length_check() -> Check: + return Check( + field="access_restrictions[].between", + name="linear_range_length", + expr=array_check( + "access_restrictions", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_between_linear_range_bounds_check() -> Check: + return Check( + field="access_restrictions[].between", + name="linear_range_bounds", + expr=array_check( + "access_restrictions", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_between_linear_range_order_check() -> Check: + return Check( + field="access_restrictions[].between", + name="linear_range_order", + expr=array_check( + "access_restrictions", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_heading_check() -> Check: + return Check( + field="access_restrictions[].when.heading", + name="enum", + expr=array_check( + "access_restrictions", + lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_mode_min_length_check() -> Check: + return Check( + field="access_restrictions[].when.mode_min_length", + name="array_min_length", + expr=array_check( + "access_restrictions", + lambda el: check_array_min_length(el["when"]["mode"], 1), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_mode_unique_check() -> Check: + return Check( + field="access_restrictions[].when.mode_unique", + name="struct_unique", + expr=array_check( + "access_restrictions", lambda el: check_struct_unique(el["when"]["mode"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_mode_check() -> Check: + return Check( + field="access_restrictions[].when.mode[]", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["mode"], + lambda inner: check_enum( + inner, + [ + "vehicle", + "motor_vehicle", + "car", + "truck", + "motorcycle", + "foot", + "bicycle", + "bus", + "hgv", + "hov", + "emergency", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_using_min_length_check() -> Check: + return Check( + field="access_restrictions[].when.using_min_length", + name="array_min_length", + expr=array_check( + "access_restrictions", + lambda el: check_array_min_length(el["when"]["using"], 1), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_using_unique_check() -> Check: + return Check( + field="access_restrictions[].when.using_unique", + name="struct_unique", + expr=array_check( + "access_restrictions", lambda el: check_struct_unique(el["when"]["using"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_using_check() -> Check: + return Check( + field="access_restrictions[].when.using[]", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["using"], + lambda inner: check_enum( + inner, + [ + "as_customer", + "at_destination", + "to_deliver", + "to_farm", + "for_forestry", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_recognized_min_length_check() -> Check: + return Check( + field="access_restrictions[].when.recognized_min_length", + name="array_min_length", + expr=array_check( + "access_restrictions", + lambda el: check_array_min_length(el["when"]["recognized"], 1), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_recognized_unique_check() -> Check: + return Check( + field="access_restrictions[].when.recognized_unique", + name="struct_unique", + expr=array_check( + "access_restrictions", + lambda el: check_struct_unique(el["when"]["recognized"]), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_recognized_check() -> Check: + return Check( + field="access_restrictions[].when.recognized[]", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["recognized"], + lambda inner: check_enum( + inner, + [ + "as_permitted", + "as_private", + "as_disabled", + "as_employee", + "as_student", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_min_length_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle_min_length", + name="array_min_length", + expr=array_check( + "access_restrictions", + lambda el: check_array_min_length(el["when"]["vehicle"], 1), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unique_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle_unique", + name="struct_unique", + expr=array_check( + "access_restrictions", lambda el: check_struct_unique(el["when"]["vehicle"]) + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_dimension_required_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].dimension", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], lambda inner: check_required(inner["dimension"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_dimension_enum_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].dimension", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["dimension"], + ["axle_count", "height", "length", "weight", "width"], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_comparison_required_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].comparison", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], lambda inner: check_required(inner["comparison"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_comparison_enum_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].comparison", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["comparison"], + [ + "greater_than", + "greater_than_equal", + "equal", + "less_than", + "less_than_equal", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_value_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].value", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["axle_count"]), + check_required(inner["value"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_value_required_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].value", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "weight", "width"]), + check_required(inner["value"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_value_bounds_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].value", + name="bounds", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "weight", "width"]), + check_bounds(inner["value"], ge=0.0), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unit_required_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_required(inner["unit"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unit_enum_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_enum( + inner["unit"], ["in", "ft", "yd", "mi", "cm", "m", "km"] + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unit_required_check_1() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit", + name="required", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), check_required(inner["unit"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_unit_enum_check_1() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit", + name="enum", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_enum(inner["unit"], ["oz", "lb", "st", "lt", "g", "kg", "t"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _connectors_min_length_check() -> Check: + return Check( + field="connectors_min_length", + name="array_min_length", + expr=check_array_min_length(F.col("connectors"), 2), + shape=CheckShape.SCALAR, + root_field="connectors", + ) + + +def _connectors_unique_check() -> Check: + return Check( + field="connectors_unique", + name="struct_unique", + expr=check_struct_unique(F.col("connectors")), + shape=CheckShape.SCALAR, + root_field="connectors", + ) + + +def _connectors_connector_id_required_check() -> Check: + return Check( + field="connectors[].connector_id", + name="required", + expr=array_check("connectors", lambda el: check_required(el["connector_id"])), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _connectors_connector_id_string_min_length_check() -> Check: + return Check( + field="connectors[].connector_id", + name="string_min_length", + expr=array_check( + "connectors", lambda el: check_string_min_length(el["connector_id"], 1) + ), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _connectors_connector_id_no_whitespace_check() -> Check: + return Check( + field="connectors[].connector_id", + name="no_whitespace", + expr=array_check( + "connectors", + lambda el: check_pattern( + el["connector_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _connectors_at_bounds_check() -> Check: + return Check( + field="connectors[].at", + name="bounds", + expr=array_check("connectors", lambda el: check_bounds(el["at"], ge=0.0)), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _connectors_at_bounds_check_1() -> Check: + return Check( + field="connectors[].at", + name="bounds", + expr=array_check("connectors", lambda el: check_bounds(el["at"], le=1.0)), + shape=CheckShape.ARRAY, + root_field="connectors", + ) + + +def _level_rules_value_check() -> Check: + return Check( + field="level_rules[].value", + name="required", + expr=array_check("level_rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="level_rules", + ) + + +def _level_rules_between_linear_range_length_check() -> Check: + return Check( + field="level_rules[].between", + name="linear_range_length", + expr=array_check( + "level_rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="level_rules", + ) + + +def _level_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="level_rules[].between", + name="linear_range_bounds", + expr=array_check( + "level_rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="level_rules", + ) + + +def _level_rules_between_linear_range_order_check() -> Check: + return Check( + field="level_rules[].between", + name="linear_range_order", + expr=array_check( + "level_rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="level_rules", + ) + + +def _routes_name_string_min_length_check() -> Check: + return Check( + field="routes[].name", + name="string_min_length", + expr=array_check("routes", lambda el: check_string_min_length(el["name"], 1)), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_name_stripped_check() -> Check: + return Check( + field="routes[].name", + name="stripped", + expr=array_check("routes", lambda el: check_stripped(el["name"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_network_string_min_length_check() -> Check: + return Check( + field="routes[].network", + name="string_min_length", + expr=array_check( + "routes", lambda el: check_string_min_length(el["network"], 1) + ), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_network_stripped_check() -> Check: + return Check( + field="routes[].network", + name="stripped", + expr=array_check("routes", lambda el: check_stripped(el["network"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_ref_string_min_length_check() -> Check: + return Check( + field="routes[].ref", + name="string_min_length", + expr=array_check("routes", lambda el: check_string_min_length(el["ref"], 1)), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_ref_stripped_check() -> Check: + return Check( + field="routes[].ref", + name="stripped", + expr=array_check("routes", lambda el: check_stripped(el["ref"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_symbol_string_min_length_check() -> Check: + return Check( + field="routes[].symbol", + name="string_min_length", + expr=array_check("routes", lambda el: check_string_min_length(el["symbol"], 1)), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_symbol_stripped_check() -> Check: + return Check( + field="routes[].symbol", + name="stripped", + expr=array_check("routes", lambda el: check_stripped(el["symbol"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_wikidata_check() -> Check: + return Check( + field="routes[].wikidata", + name="wikidata_id", + expr=array_check( + "routes", + lambda el: check_pattern( + el["wikidata"], + "^Q\\d+\\z", + label="Wikidata identifier (Q followed by digits)", + ), + ), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_between_linear_range_length_check() -> Check: + return Check( + field="routes[].between", + name="linear_range_length", + expr=array_check("routes", lambda el: check_linear_range_length(el["between"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_between_linear_range_bounds_check() -> Check: + return Check( + field="routes[].between", + name="linear_range_bounds", + expr=array_check("routes", lambda el: check_linear_range_bounds(el["between"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _routes_between_linear_range_order_check() -> Check: + return Check( + field="routes[].between", + name="linear_range_order", + expr=array_check("routes", lambda el: check_linear_range_order(el["between"])), + shape=CheckShape.ARRAY, + root_field="routes", + ) + + +def _subclass_rules_value_required_check() -> Check: + return Check( + field="subclass_rules[].value", + name="required", + expr=array_check("subclass_rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _subclass_rules_value_enum_check() -> Check: + return Check( + field="subclass_rules[].value", + name="enum", + expr=array_check( + "subclass_rules", + lambda el: check_enum( + el["value"], + [ + "link", + "sidewalk", + "crosswalk", + "parking_aisle", + "driveway", + "alley", + "cycle_crossing", + ], + ), + ), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _subclass_rules_between_linear_range_length_check() -> Check: + return Check( + field="subclass_rules[].between", + name="linear_range_length", + expr=array_check( + "subclass_rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _subclass_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="subclass_rules[].between", + name="linear_range_bounds", + expr=array_check( + "subclass_rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _subclass_rules_between_linear_range_order_check() -> Check: + return Check( + field="subclass_rules[].between", + name="linear_range_order", + expr=array_check( + "subclass_rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="subclass_rules", + ) + + +def _names_primary_required_check() -> Check: + return Check( + field="names.primary", + name="required", + expr=F.when(F.col("names").isNotNull(), check_required(F.col("names.primary"))), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_string_min_length_check() -> Check: + return Check( + field="names.primary", + name="string_min_length", + expr=check_string_min_length(F.col("names.primary"), 1), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_primary_stripped_check() -> Check: + return Check( + field="names.primary", + name="stripped", + expr=check_stripped(F.col("names.primary")), + shape=CheckShape.SCALAR, + root_field="names", + ) + + +def _names_rules_value_required_check() -> Check: + return Check( + field="names.rules[].value", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_string_min_length_check() -> Check: + return Check( + field="names.rules[].value", + name="string_min_length", + expr=array_check( + "names.rules", lambda el: check_string_min_length(el["value"], 1) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_value_stripped_check() -> Check: + return Check( + field="names.rules[].value", + name="stripped", + expr=array_check("names.rules", lambda el: check_stripped(el["value"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_required_check() -> Check: + return Check( + field="names.rules[].variant", + name="required", + expr=array_check("names.rules", lambda el: check_required(el["variant"])), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_variant_enum_check() -> Check: + return Check( + field="names.rules[].variant", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["variant"], ["common", "official", "alternate", "short"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_language_check() -> Check: + return Check( + field="names.rules[].language", + name="language_tag", + expr=array_check( + "names.rules", + lambda el: check_pattern( + el["language"], + "^(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}?)|(?:[A-Za-z]{4,8}))(?:-[A-Za-z]{4})?(?:-[A-Za-z]{2}|[0-9]{3})?(?:-(?:[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(?:-[A-WY-Za-wy-z0-9](?:-[A-Za-z0-9]{2,8})+)*\\z", + label="IETF BCP-47 language tag", + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_required_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_mode_enum_check() -> Check: + return Check( + field="names.rules[].perspectives.mode", + name="enum", + expr=array_check( + "names.rules", + lambda el: check_enum( + el["perspectives"]["mode"], ["accepted_by", "disputed_by"] + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check() -> Check: + return Check( + field="names.rules[].perspectives.countries", + name="required", + expr=array_check( + "names.rules", + lambda el: F.when( + el["perspectives"].isNotNull(), + check_required(el["perspectives"]["countries"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_min_length_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_min_length", + name="array_min_length", + expr=array_check( + "names.rules", + lambda el: check_array_min_length(el["perspectives"]["countries"], 1), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_unique_check() -> Check: + return Check( + field="names.rules[].perspectives.countries_unique", + name="struct_unique", + expr=array_check( + "names.rules", + lambda el: check_struct_unique(el["perspectives"]["countries"]), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_perspectives_countries_check_1() -> Check: + return Check( + field="names.rules[].perspectives.countries[]", + name="country_code_alpha2", + expr=nested_array_check( + "names.rules", + lambda el: array_check( + el["perspectives"]["countries"], + lambda inner: check_pattern( + inner, "^[A-Z]{2}\\z", label="ISO 3166-1 alpha-2 country code" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_length_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_length", + expr=array_check( + "names.rules", lambda el: check_linear_range_length(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_bounds", + expr=array_check( + "names.rules", lambda el: check_linear_range_bounds(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_between_linear_range_order_check() -> Check: + return Check( + field="names.rules[].between", + name="linear_range_order", + expr=array_check( + "names.rules", lambda el: check_linear_range_order(el["between"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _names_rules_side_check() -> Check: + return Check( + field="names.rules[].side", + name="enum", + expr=array_check( + "names.rules", lambda el: check_enum(el["side"], ["left", "right"]) + ), + shape=CheckShape.ARRAY, + root_field="names", + ) + + +def _class_required_check() -> Check: + return Check( + field="class", + name="required", + expr=F.when(F.col("subtype").isin(["road"]), check_required(F.col("class"))), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check() -> Check: + return Check( + field="class", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + check_enum( + F.col("class"), + [ + "motorway", + "primary", + "secondary", + "tertiary", + "residential", + "living_street", + "trunk", + "unclassified", + "service", + "pedestrian", + "footway", + "steps", + "path", + "track", + "cycleway", + "bridleway", + "unknown", + ], + ), + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _destinations_from_connector_id_required_check() -> Check: + return Check( + field="destinations[].from_connector_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", lambda el: check_required(el["from_connector_id"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_from_connector_id_string_min_length_check() -> Check: + return Check( + field="destinations[].from_connector_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_string_min_length(el["from_connector_id"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_from_connector_id_no_whitespace_check() -> Check: + return Check( + field="destinations[].from_connector_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_pattern( + el["from_connector_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_connector_id_required_check() -> Check: + return Check( + field="destinations[].to_connector_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", lambda el: check_required(el["to_connector_id"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_connector_id_string_min_length_check() -> Check: + return Check( + field="destinations[].to_connector_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_string_min_length(el["to_connector_id"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_connector_id_no_whitespace_check() -> Check: + return Check( + field="destinations[].to_connector_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_pattern( + el["to_connector_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_segment_id_required_check() -> Check: + return Check( + field="destinations[].to_segment_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("destinations", lambda el: check_required(el["to_segment_id"])), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_segment_id_string_min_length_check() -> Check: + return Check( + field="destinations[].to_segment_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_string_min_length(el["to_segment_id"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_to_segment_id_no_whitespace_check() -> Check: + return Check( + field="destinations[].to_segment_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_pattern( + el["to_segment_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_final_heading_required_check() -> Check: + return Check( + field="destinations[].final_heading", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("destinations", lambda el: check_required(el["final_heading"])), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_final_heading_enum_check() -> Check: + return Check( + field="destinations[].final_heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_enum(el["final_heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_min_length_check() -> Check: + return Check( + field="destinations[].labels_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", lambda el: check_array_min_length(el["labels"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_unique_check() -> Check: + return Check( + field="destinations[].labels_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("destinations", lambda el: check_struct_unique(el["labels"])), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_value_required_check() -> Check: + return Check( + field="destinations[].labels[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], lambda inner: check_required(inner["value"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_value_string_min_length_check() -> Check: + return Check( + field="destinations[].labels[].value", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], + lambda inner: check_string_min_length(inner["value"], 1), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_value_stripped_check() -> Check: + return Check( + field="destinations[].labels[].value", + name="stripped", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], lambda inner: check_stripped(inner["value"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_type_required_check() -> Check: + return Check( + field="destinations[].labels[].type", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], lambda inner: check_required(inner["type"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_labels_type_enum_check() -> Check: + return Check( + field="destinations[].labels[].type", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["labels"], + lambda inner: check_enum( + inner["type"], + [ + "street", + "country", + "route_ref", + "toward_route_ref", + "unknown", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_symbols_unique_check() -> Check: + return Check( + field="destinations[].symbols_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("destinations", lambda el: check_struct_unique(el["symbols"])), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_symbols_check() -> Check: + return Check( + field="destinations[].symbols[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "destinations", + lambda el: array_check( + el["symbols"], + lambda inner: check_enum( + inner, + [ + "motorway", + "airport", + "hospital", + "center", + "industrial", + "parking", + "bus", + "train_station", + "rest_area", + "ferry", + "motorroad", + "fuel", + "viewpoint", + "fuel_diesel", + "food", + "lodging", + "info", + "camp_site", + "interchange", + "restrooms", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_when_heading_required_check() -> Check: + return Check( + field="destinations[].when.heading", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: F.when( + el["when"].isNotNull(), check_required(el["when"]["heading"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _destinations_when_heading_enum_check() -> Check: + return Check( + field="destinations[].when.heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "destinations", + lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _prohibited_transitions_sequence_check() -> Check: + return Check( + field="prohibited_transitions[].sequence", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", lambda el: check_required(el["sequence"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].sequence_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["sequence"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_unique_check() -> Check: + return Check( + field="prohibited_transitions[].sequence_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", lambda el: check_struct_unique(el["sequence"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_connector_id_required_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].connector_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], lambda inner: check_required(inner["connector_id"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_connector_id_string_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].connector_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], + lambda inner: check_string_min_length(inner["connector_id"], 1), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_connector_id_no_whitespace_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].connector_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], + lambda inner: check_pattern( + inner["connector_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_segment_id_required_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].segment_id", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], lambda inner: check_required(inner["segment_id"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_segment_id_string_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].segment_id", + name="string_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], + lambda inner: check_string_min_length(inner["segment_id"], 1), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_sequence_segment_id_no_whitespace_check() -> Check: + return Check( + field="prohibited_transitions[].sequence[].segment_id", + name="no_whitespace", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["sequence"], + lambda inner: check_pattern( + inner["segment_id"], + "^\\S+\\z", + label="String without whitespace characters", + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_final_heading_required_check() -> Check: + return Check( + field="prohibited_transitions[].final_heading", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", lambda el: check_required(el["final_heading"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_final_heading_enum_check() -> Check: + return Check( + field="prohibited_transitions[].final_heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_enum(el["final_heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_between_linear_range_length_check() -> Check: + return Check( + field="prohibited_transitions[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_linear_range_length(el["between"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_between_linear_range_bounds_check() -> Check: + return Check( + field="prohibited_transitions[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_linear_range_bounds(el["between"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_between_linear_range_order_check() -> Check: + return Check( + field="prohibited_transitions[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_linear_range_order(el["between"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_heading_check() -> Check: + return Check( + field="prohibited_transitions[].when.heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_mode_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].when.mode_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["when"]["mode"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_mode_unique_check() -> Check: + return Check( + field="prohibited_transitions[].when.mode_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_struct_unique(el["when"]["mode"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_mode_check() -> Check: + return Check( + field="prohibited_transitions[].when.mode[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["mode"], + lambda inner: check_enum( + inner, + [ + "vehicle", + "motor_vehicle", + "car", + "truck", + "motorcycle", + "foot", + "bicycle", + "bus", + "hgv", + "hov", + "emergency", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_using_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].when.using_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["when"]["using"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_using_unique_check() -> Check: + return Check( + field="prohibited_transitions[].when.using_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_struct_unique(el["when"]["using"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_using_check() -> Check: + return Check( + field="prohibited_transitions[].when.using[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["using"], + lambda inner: check_enum( + inner, + [ + "as_customer", + "at_destination", + "to_deliver", + "to_farm", + "for_forestry", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_recognized_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].when.recognized_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["when"]["recognized"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_recognized_unique_check() -> Check: + return Check( + field="prohibited_transitions[].when.recognized_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_struct_unique(el["when"]["recognized"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_recognized_check() -> Check: + return Check( + field="prohibited_transitions[].when.recognized[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["recognized"], + lambda inner: check_enum( + inner, + [ + "as_permitted", + "as_private", + "as_disabled", + "as_employee", + "as_student", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_min_length_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_array_min_length(el["when"]["vehicle"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unique_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "prohibited_transitions", + lambda el: check_struct_unique(el["when"]["vehicle"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_dimension_required_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].dimension", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_required(inner["dimension"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_dimension_enum_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].dimension", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["dimension"], + ["axle_count", "height", "length", "weight", "width"], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_comparison_required_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].comparison", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_required(inner["comparison"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_comparison_enum_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].comparison", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["comparison"], + [ + "greater_than", + "greater_than_equal", + "equal", + "less_than", + "less_than_equal", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_value_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["axle_count"]), + check_required(inner["value"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_value_required_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin( + ["height", "length", "weight", "width"] + ), + check_required(inner["value"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_value_bounds_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin( + ["height", "length", "weight", "width"] + ), + check_bounds(inner["value"], ge=0.0), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unit_required_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_required(inner["unit"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unit_enum_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_enum( + inner["unit"], ["in", "ft", "yd", "mi", "cm", "m", "km"] + ), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unit_required_check_1() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_required(inner["unit"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_unit_enum_check_1() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_enum( + inner["unit"], ["oz", "lb", "st", "lt", "g", "kg", "t"] + ), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _road_flags_min_length_check() -> Check: + return Check( + field="road_flags_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + check_array_min_length(F.col("road_flags"), 1), + ), + shape=CheckShape.SCALAR, + root_field="road_flags", + ) + + +def _road_flags_unique_check() -> Check: + return Check( + field="road_flags_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), check_struct_unique(F.col("road_flags")) + ), + shape=CheckShape.SCALAR, + root_field="road_flags", + ) + + +def _road_flags_values_check() -> Check: + return Check( + field="road_flags[].values", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("road_flags", lambda el: check_required(el["values"])), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_values_min_length_check() -> Check: + return Check( + field="road_flags[].values_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_flags", lambda el: check_array_min_length(el["values"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_values_unique_check() -> Check: + return Check( + field="road_flags[].values_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("road_flags", lambda el: check_struct_unique(el["values"])), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_values_check_1() -> Check: + return Check( + field="road_flags[].values[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "road_flags", + lambda el: array_check( + el["values"], + lambda inner: check_enum( + inner, + [ + "is_bridge", + "is_link", + "is_tunnel", + "is_under_construction", + "is_abandoned", + "is_covered", + "is_indoor", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_between_linear_range_length_check() -> Check: + return Check( + field="road_flags[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_flags", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_between_linear_range_bounds_check() -> Check: + return Check( + field="road_flags[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_flags", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_flags_between_linear_range_order_check() -> Check: + return Check( + field="road_flags[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_flags", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_flags", + ) + + +def _road_surface_min_length_check() -> Check: + return Check( + field="road_surface_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + check_array_min_length(F.col("road_surface"), 1), + ), + shape=CheckShape.SCALAR, + root_field="road_surface", + ) + + +def _road_surface_unique_check() -> Check: + return Check( + field="road_surface_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), check_struct_unique(F.col("road_surface")) + ), + shape=CheckShape.SCALAR, + root_field="road_surface", + ) + + +def _road_surface_value_required_check() -> Check: + return Check( + field="road_surface[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("road_surface", lambda el: check_required(el["value"])), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _road_surface_value_enum_check() -> Check: + return Check( + field="road_surface[].value", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_surface", + lambda el: check_enum( + el["value"], + [ + "unknown", + "paved", + "unpaved", + "gravel", + "dirt", + "paving_stones", + "metal", + ], + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _road_surface_between_linear_range_length_check() -> Check: + return Check( + field="road_surface[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_surface", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _road_surface_between_linear_range_bounds_check() -> Check: + return Check( + field="road_surface[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_surface", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _road_surface_between_linear_range_order_check() -> Check: + return Check( + field="road_surface[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "road_surface", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="road_surface", + ) + + +def _speed_limits_min_length_check() -> Check: + return Check( + field="speed_limits_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + check_array_min_length(F.col("speed_limits"), 1), + ), + shape=CheckShape.SCALAR, + root_field="speed_limits", + ) + + +def _speed_limits_unique_check() -> Check: + return Check( + field="speed_limits_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), check_struct_unique(F.col("speed_limits")) + ), + shape=CheckShape.SCALAR, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_value_required_check() -> Check: + return Check( + field="speed_limits[].max_speed.value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: F.when( + el["max_speed"].isNotNull(), + check_required(el["max_speed"]["value"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_value_bounds_check() -> Check: + return Check( + field="speed_limits[].max_speed.value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_bounds(el["max_speed"]["value"], ge=1) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_value_bounds_check_1() -> Check: + return Check( + field="speed_limits[].max_speed.value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_bounds(el["max_speed"]["value"], le=350), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_unit_required_check() -> Check: + return Check( + field="speed_limits[].max_speed.unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: F.when( + el["max_speed"].isNotNull(), check_required(el["max_speed"]["unit"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_max_speed_unit_enum_check() -> Check: + return Check( + field="speed_limits[].max_speed.unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_enum(el["max_speed"]["unit"], ["mph", "km/h"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_value_required_check() -> Check: + return Check( + field="speed_limits[].min_speed.value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: F.when( + el["min_speed"].isNotNull(), + check_required(el["min_speed"]["value"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_value_bounds_check() -> Check: + return Check( + field="speed_limits[].min_speed.value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_bounds(el["min_speed"]["value"], ge=1) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_value_bounds_check_1() -> Check: + return Check( + field="speed_limits[].min_speed.value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_bounds(el["min_speed"]["value"], le=350), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_unit_required_check() -> Check: + return Check( + field="speed_limits[].min_speed.unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: F.when( + el["min_speed"].isNotNull(), check_required(el["min_speed"]["unit"]) + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_min_speed_unit_enum_check() -> Check: + return Check( + field="speed_limits[].min_speed.unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_enum(el["min_speed"]["unit"], ["mph", "km/h"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_between_linear_range_length_check() -> Check: + return Check( + field="speed_limits[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_between_linear_range_bounds_check() -> Check: + return Check( + field="speed_limits[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_between_linear_range_order_check() -> Check: + return Check( + field="speed_limits[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_heading_check() -> Check: + return Check( + field="speed_limits[].when.heading", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_enum(el["when"]["heading"], ["forward", "backward"]), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_mode_min_length_check() -> Check: + return Check( + field="speed_limits[].when.mode_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_array_min_length(el["when"]["mode"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_mode_unique_check() -> Check: + return Check( + field="speed_limits[].when.mode_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_struct_unique(el["when"]["mode"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_mode_check() -> Check: + return Check( + field="speed_limits[].when.mode[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["mode"], + lambda inner: check_enum( + inner, + [ + "vehicle", + "motor_vehicle", + "car", + "truck", + "motorcycle", + "foot", + "bicycle", + "bus", + "hgv", + "hov", + "emergency", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_using_min_length_check() -> Check: + return Check( + field="speed_limits[].when.using_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_array_min_length(el["when"]["using"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_using_unique_check() -> Check: + return Check( + field="speed_limits[].when.using_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_struct_unique(el["when"]["using"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_using_check() -> Check: + return Check( + field="speed_limits[].when.using[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["using"], + lambda inner: check_enum( + inner, + [ + "as_customer", + "at_destination", + "to_deliver", + "to_farm", + "for_forestry", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_recognized_min_length_check() -> Check: + return Check( + field="speed_limits[].when.recognized_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_array_min_length(el["when"]["recognized"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_recognized_unique_check() -> Check: + return Check( + field="speed_limits[].when.recognized_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_struct_unique(el["when"]["recognized"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_recognized_check() -> Check: + return Check( + field="speed_limits[].when.recognized[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["recognized"], + lambda inner: check_enum( + inner, + [ + "as_permitted", + "as_private", + "as_disabled", + "as_employee", + "as_student", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_min_length_check() -> Check: + return Check( + field="speed_limits[].when.vehicle_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", + lambda el: check_array_min_length(el["when"]["vehicle"], 1), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unique_check() -> Check: + return Check( + field="speed_limits[].when.vehicle_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "speed_limits", lambda el: check_struct_unique(el["when"]["vehicle"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_dimension_required_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].dimension", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_required(inner["dimension"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_dimension_enum_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].dimension", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["dimension"], + ["axle_count", "height", "length", "weight", "width"], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_comparison_required_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].comparison", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_required(inner["comparison"]), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_comparison_enum_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].comparison", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_enum( + inner["comparison"], + [ + "greater_than", + "greater_than_equal", + "equal", + "less_than", + "less_than_equal", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_value_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["axle_count"]), + check_required(inner["value"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_value_required_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin( + ["height", "length", "weight", "width"] + ), + check_required(inner["value"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_value_bounds_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin( + ["height", "length", "weight", "width"] + ), + check_bounds(inner["value"], ge=0.0), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unit_required_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_required(inner["unit"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unit_enum_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["height", "length", "width"]), + check_enum( + inner["unit"], ["in", "ft", "yd", "mi", "cm", "m", "km"] + ), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unit_required_check_1() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_required(inner["unit"]), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_unit_enum_check_1() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: F.when( + inner["dimension"].isin(["weight"]), + check_enum( + inner["unit"], ["oz", "lb", "st", "lt", "g", "kg", "t"] + ), + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _subclass_check() -> Check: + return Check( + field="subclass", + name="enum", + expr=F.when( + F.col("subtype").isin(["road"]), + check_enum( + F.col("subclass"), + [ + "link", + "sidewalk", + "crosswalk", + "parking_aisle", + "driveway", + "alley", + "cycle_crossing", + ], + ), + ), + shape=CheckShape.SCALAR, + root_field="subclass", + ) + + +def _width_rules_min_length_check() -> Check: + return Check( + field="width_rules_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["road"]), + check_array_min_length(F.col("width_rules"), 1), + ), + shape=CheckShape.SCALAR, + root_field="width_rules", + ) + + +def _width_rules_unique_check() -> Check: + return Check( + field="width_rules_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["road"]), check_struct_unique(F.col("width_rules")) + ), + shape=CheckShape.SCALAR, + root_field="width_rules", + ) + + +def _width_rules_value_required_check() -> Check: + return Check( + field="width_rules[].value", + name="required", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("width_rules", lambda el: check_required(el["value"])), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _width_rules_value_bounds_check() -> Check: + return Check( + field="width_rules[].value", + name="bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check("width_rules", lambda el: check_bounds(el["value"], gt=0.0)), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _width_rules_between_linear_range_length_check() -> Check: + return Check( + field="width_rules[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "width_rules", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _width_rules_between_linear_range_bounds_check() -> Check: + return Check( + field="width_rules[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "width_rules", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _width_rules_between_linear_range_order_check() -> Check: + return Check( + field="width_rules[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["road"]), + array_check( + "width_rules", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="width_rules", + ) + + +def _class_required_check_1() -> Check: + return Check( + field="class", + name="required", + expr=F.when(F.col("subtype").isin(["rail"]), check_required(F.col("class"))), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _class_enum_check_1() -> Check: + return Check( + field="class", + name="enum", + expr=F.when( + F.col("subtype").isin(["rail"]), + check_enum( + F.col("class"), + [ + "funicular", + "light_rail", + "monorail", + "narrow_gauge", + "standard_gauge", + "subway", + "tram", + "unknown", + ], + ), + ), + shape=CheckShape.SCALAR, + root_field="class", + ) + + +def _rail_flags_min_length_check() -> Check: + return Check( + field="rail_flags_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["rail"]), + check_array_min_length(F.col("rail_flags"), 1), + ), + shape=CheckShape.SCALAR, + root_field="rail_flags", + ) + + +def _rail_flags_unique_check() -> Check: + return Check( + field="rail_flags_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["rail"]), check_struct_unique(F.col("rail_flags")) + ), + shape=CheckShape.SCALAR, + root_field="rail_flags", + ) + + +def _rail_flags_values_check() -> Check: + return Check( + field="rail_flags[].values", + name="required", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check("rail_flags", lambda el: check_required(el["values"])), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_values_min_length_check() -> Check: + return Check( + field="rail_flags[].values_min_length", + name="array_min_length", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check( + "rail_flags", lambda el: check_array_min_length(el["values"], 1) + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_values_unique_check() -> Check: + return Check( + field="rail_flags[].values_unique", + name="struct_unique", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check("rail_flags", lambda el: check_struct_unique(el["values"])), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_values_check_1() -> Check: + return Check( + field="rail_flags[].values[]", + name="enum", + expr=F.when( + F.col("subtype").isin(["rail"]), + nested_array_check( + "rail_flags", + lambda el: array_check( + el["values"], + lambda inner: check_enum( + inner, + [ + "is_bridge", + "is_tunnel", + "is_under_construction", + "is_abandoned", + "is_covered", + "is_passenger", + "is_freight", + "is_disused", + ], + ), + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_between_linear_range_length_check() -> Check: + return Check( + field="rail_flags[].between", + name="linear_range_length", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check( + "rail_flags", lambda el: check_linear_range_length(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_between_linear_range_bounds_check() -> Check: + return Check( + field="rail_flags[].between", + name="linear_range_bounds", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check( + "rail_flags", lambda el: check_linear_range_bounds(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _rail_flags_between_linear_range_order_check() -> Check: + return Check( + field="rail_flags[].between", + name="linear_range_order", + expr=F.when( + F.col("subtype").isin(["rail"]), + array_check( + "rail_flags", lambda el: check_linear_range_order(el["between"]) + ), + ), + shape=CheckShape.ARRAY, + root_field="rail_flags", + ) + + +def _access_restrictions_when_vehicle_check_forbid_if_0_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_forbidden", + name="forbid_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_forbid_if( + inner["unit"], + inner["dimension"] == "axle_count", + "dimension = 'axle_count'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_check_require_if_1_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_required_0", + name="require_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "height", + "dimension = 'height'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_check_require_if_2_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_required_1", + name="require_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "length", + "dimension = 'length'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_check_require_if_3_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_required_2", + name="require_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "weight", + "dimension = 'weight'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_vehicle_check_require_if_4_check() -> Check: + return Check( + field="access_restrictions[].when.vehicle[].unit_required_3", + name="require_if", + expr=nested_array_check( + "access_restrictions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], inner["dimension"] == "width", "dimension = 'width'" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _access_restrictions_when_check_require_any_of_5_check() -> Check: + return Check( + field="access_restrictions[].when", + name="require_any_of", + expr=array_check( + "access_restrictions", + lambda el: check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), + ), + shape=CheckShape.ARRAY, + root_field="access_restrictions", + ) + + +def _destinations_check_require_any_of_6_check() -> Check: + return Check( + field="destinations[]", + name="require_any_of", + expr=array_check( + "destinations", + lambda el: check_require_any_of( + [el["labels"], el["symbols"]], ["labels", "symbols"] + ), + ), + shape=CheckShape.ARRAY, + root_field="destinations", + ) + + +def _prohibited_transitions_when_vehicle_check_forbid_if_7_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_forbidden", + name="forbid_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_forbid_if( + inner["unit"], + inner["dimension"] == "axle_count", + "dimension = 'axle_count'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_check_require_if_8_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_required_0", + name="require_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "height", + "dimension = 'height'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_check_require_if_9_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_required_1", + name="require_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "length", + "dimension = 'length'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_check_require_if_10_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_required_2", + name="require_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "weight", + "dimension = 'weight'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_vehicle_check_require_if_11_check() -> Check: + return Check( + field="prohibited_transitions[].when.vehicle[].unit_required_3", + name="require_if", + expr=nested_array_check( + "prohibited_transitions", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], inner["dimension"] == "width", "dimension = 'width'" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _prohibited_transitions_when_check_require_any_of_12_check() -> Check: + return Check( + field="prohibited_transitions[].when", + name="require_any_of", + expr=array_check( + "prohibited_transitions", + lambda el: check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), + ), + shape=CheckShape.ARRAY, + root_field="prohibited_transitions", + ) + + +def _speed_limits_when_vehicle_check_forbid_if_13_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_forbidden", + name="forbid_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_forbid_if( + inner["unit"], + inner["dimension"] == "axle_count", + "dimension = 'axle_count'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_check_require_if_14_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_required_0", + name="require_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "height", + "dimension = 'height'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_check_require_if_15_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_required_1", + name="require_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "length", + "dimension = 'length'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_check_require_if_16_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_required_2", + name="require_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], + inner["dimension"] == "weight", + "dimension = 'weight'", + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_vehicle_check_require_if_17_check() -> Check: + return Check( + field="speed_limits[].when.vehicle[].unit_required_3", + name="require_if", + expr=nested_array_check( + "speed_limits", + lambda el: array_check( + el["when"]["vehicle"], + lambda inner: check_require_if( + inner["unit"], inner["dimension"] == "width", "dimension = 'width'" + ), + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_when_check_require_any_of_18_check() -> Check: + return Check( + field="speed_limits[].when", + name="require_any_of", + expr=array_check( + "speed_limits", + lambda el: check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _speed_limits_check_require_any_of_19_check() -> Check: + return Check( + field="speed_limits[]", + name="require_any_of", + expr=array_check( + "speed_limits", + lambda el: check_require_any_of( + [el["max_speed"]["value"], el["min_speed"]["value"]], + ["max_speed.value", "min_speed.value"], + ), + ), + shape=CheckShape.ARRAY, + root_field="speed_limits", + ) + + +def _check_forbid_if_20_check() -> Check: + return Check( + field="class_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("class"), F.col("subtype") == "water", "subtype = 'water'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_21_check() -> Check: + return Check( + field="class_required_0", + name="require_if", + expr=check_require_if( + F.col("class"), F.col("subtype") == "rail", "subtype = 'rail'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_require_if_22_check() -> Check: + return Check( + field="class_required_1", + name="require_if", + expr=check_require_if( + F.col("class"), F.col("subtype") == "road", "subtype = 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_23_check() -> Check: + return Check( + field="destinations_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("destinations"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_24_check() -> Check: + return Check( + field="prohibited_transitions_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("prohibited_transitions"), + F.col("subtype") != "road", + "subtype != 'road'", + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_25_check() -> Check: + return Check( + field="road_flags_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("road_flags"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_26_check() -> Check: + return Check( + field="road_surface_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("road_surface"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_27_check() -> Check: + return Check( + field="speed_limits_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("speed_limits"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_28_check() -> Check: + return Check( + field="subclass_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("subclass"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_29_check() -> Check: + return Check( + field="width_rules_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("width_rules"), F.col("subtype") != "road", "subtype != 'road'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def _check_forbid_if_30_check() -> Check: + return Check( + field="rail_flags_forbidden", + name="forbid_if", + expr=check_forbid_if( + F.col("rail_flags"), F.col("subtype") != "rail", "subtype != 'rail'" + ), + shape=CheckShape.SCALAR, + root_field=None, + ) + + +def segment_checks() -> list[Check]: + """All validation checks for segment.""" + return [ + _id_required_check(), + _id_string_min_length_check(), + _id_no_whitespace_check(), + _bbox_bbox_completeness_check(), + _bbox_bbox_lat_ordering_check(), + _bbox_bbox_lat_range_check(), + _geometry_required_check(), + _geometry_geometry_type_check(), + _theme_required_check(), + _theme_enum_check(), + _type_required_check(), + _type_enum_check(), + _version_required_check(), + _version_bounds_check(), + _sources_min_length_check(), + _sources_unique_check(), + _sources_property_required_check(), + _sources_property_json_pointer_check(), + _sources_dataset_check(), + _sources_license_check(), + _sources_confidence_bounds_check(), + _sources_confidence_bounds_check_1(), + _sources_between_linear_range_length_check(), + _sources_between_linear_range_bounds_check(), + _sources_between_linear_range_order_check(), + _subtype_required_check(), + _subtype_enum_check(), + _access_restrictions_min_length_check(), + _access_restrictions_unique_check(), + _access_restrictions_access_type_required_check(), + _access_restrictions_access_type_enum_check(), + _access_restrictions_between_linear_range_length_check(), + _access_restrictions_between_linear_range_bounds_check(), + _access_restrictions_between_linear_range_order_check(), + _access_restrictions_when_heading_check(), + _access_restrictions_when_mode_min_length_check(), + _access_restrictions_when_mode_unique_check(), + _access_restrictions_when_mode_check(), + _access_restrictions_when_using_min_length_check(), + _access_restrictions_when_using_unique_check(), + _access_restrictions_when_using_check(), + _access_restrictions_when_recognized_min_length_check(), + _access_restrictions_when_recognized_unique_check(), + _access_restrictions_when_recognized_check(), + _access_restrictions_when_vehicle_min_length_check(), + _access_restrictions_when_vehicle_unique_check(), + _access_restrictions_when_vehicle_dimension_required_check(), + _access_restrictions_when_vehicle_dimension_enum_check(), + _access_restrictions_when_vehicle_comparison_required_check(), + _access_restrictions_when_vehicle_comparison_enum_check(), + _access_restrictions_when_vehicle_value_check(), + _access_restrictions_when_vehicle_value_required_check(), + _access_restrictions_when_vehicle_value_bounds_check(), + _access_restrictions_when_vehicle_unit_required_check(), + _access_restrictions_when_vehicle_unit_enum_check(), + _access_restrictions_when_vehicle_unit_required_check_1(), + _access_restrictions_when_vehicle_unit_enum_check_1(), + _connectors_min_length_check(), + _connectors_unique_check(), + _connectors_connector_id_required_check(), + _connectors_connector_id_string_min_length_check(), + _connectors_connector_id_no_whitespace_check(), + _connectors_at_bounds_check(), + _connectors_at_bounds_check_1(), + _level_rules_value_check(), + _level_rules_between_linear_range_length_check(), + _level_rules_between_linear_range_bounds_check(), + _level_rules_between_linear_range_order_check(), + _routes_name_string_min_length_check(), + _routes_name_stripped_check(), + _routes_network_string_min_length_check(), + _routes_network_stripped_check(), + _routes_ref_string_min_length_check(), + _routes_ref_stripped_check(), + _routes_symbol_string_min_length_check(), + _routes_symbol_stripped_check(), + _routes_wikidata_check(), + _routes_between_linear_range_length_check(), + _routes_between_linear_range_bounds_check(), + _routes_between_linear_range_order_check(), + _subclass_rules_value_required_check(), + _subclass_rules_value_enum_check(), + _subclass_rules_between_linear_range_length_check(), + _subclass_rules_between_linear_range_bounds_check(), + _subclass_rules_between_linear_range_order_check(), + _names_primary_required_check(), + _names_primary_string_min_length_check(), + _names_primary_stripped_check(), + _names_rules_value_required_check(), + _names_rules_value_string_min_length_check(), + _names_rules_value_stripped_check(), + _names_rules_variant_required_check(), + _names_rules_variant_enum_check(), + _names_rules_language_check(), + _names_rules_perspectives_mode_required_check(), + _names_rules_perspectives_mode_enum_check(), + _names_rules_perspectives_countries_check(), + _names_rules_perspectives_countries_min_length_check(), + _names_rules_perspectives_countries_unique_check(), + _names_rules_perspectives_countries_check_1(), + _names_rules_between_linear_range_length_check(), + _names_rules_between_linear_range_bounds_check(), + _names_rules_between_linear_range_order_check(), + _names_rules_side_check(), + _class_required_check(), + _class_enum_check(), + _destinations_from_connector_id_required_check(), + _destinations_from_connector_id_string_min_length_check(), + _destinations_from_connector_id_no_whitespace_check(), + _destinations_to_connector_id_required_check(), + _destinations_to_connector_id_string_min_length_check(), + _destinations_to_connector_id_no_whitespace_check(), + _destinations_to_segment_id_required_check(), + _destinations_to_segment_id_string_min_length_check(), + _destinations_to_segment_id_no_whitespace_check(), + _destinations_final_heading_required_check(), + _destinations_final_heading_enum_check(), + _destinations_labels_min_length_check(), + _destinations_labels_unique_check(), + _destinations_labels_value_required_check(), + _destinations_labels_value_string_min_length_check(), + _destinations_labels_value_stripped_check(), + _destinations_labels_type_required_check(), + _destinations_labels_type_enum_check(), + _destinations_symbols_unique_check(), + _destinations_symbols_check(), + _destinations_when_heading_required_check(), + _destinations_when_heading_enum_check(), + _prohibited_transitions_sequence_check(), + _prohibited_transitions_sequence_min_length_check(), + _prohibited_transitions_sequence_unique_check(), + _prohibited_transitions_sequence_connector_id_required_check(), + _prohibited_transitions_sequence_connector_id_string_min_length_check(), + _prohibited_transitions_sequence_connector_id_no_whitespace_check(), + _prohibited_transitions_sequence_segment_id_required_check(), + _prohibited_transitions_sequence_segment_id_string_min_length_check(), + _prohibited_transitions_sequence_segment_id_no_whitespace_check(), + _prohibited_transitions_final_heading_required_check(), + _prohibited_transitions_final_heading_enum_check(), + _prohibited_transitions_between_linear_range_length_check(), + _prohibited_transitions_between_linear_range_bounds_check(), + _prohibited_transitions_between_linear_range_order_check(), + _prohibited_transitions_when_heading_check(), + _prohibited_transitions_when_mode_min_length_check(), + _prohibited_transitions_when_mode_unique_check(), + _prohibited_transitions_when_mode_check(), + _prohibited_transitions_when_using_min_length_check(), + _prohibited_transitions_when_using_unique_check(), + _prohibited_transitions_when_using_check(), + _prohibited_transitions_when_recognized_min_length_check(), + _prohibited_transitions_when_recognized_unique_check(), + _prohibited_transitions_when_recognized_check(), + _prohibited_transitions_when_vehicle_min_length_check(), + _prohibited_transitions_when_vehicle_unique_check(), + _prohibited_transitions_when_vehicle_dimension_required_check(), + _prohibited_transitions_when_vehicle_dimension_enum_check(), + _prohibited_transitions_when_vehicle_comparison_required_check(), + _prohibited_transitions_when_vehicle_comparison_enum_check(), + _prohibited_transitions_when_vehicle_value_check(), + _prohibited_transitions_when_vehicle_value_required_check(), + _prohibited_transitions_when_vehicle_value_bounds_check(), + _prohibited_transitions_when_vehicle_unit_required_check(), + _prohibited_transitions_when_vehicle_unit_enum_check(), + _prohibited_transitions_when_vehicle_unit_required_check_1(), + _prohibited_transitions_when_vehicle_unit_enum_check_1(), + _road_flags_min_length_check(), + _road_flags_unique_check(), + _road_flags_values_check(), + _road_flags_values_min_length_check(), + _road_flags_values_unique_check(), + _road_flags_values_check_1(), + _road_flags_between_linear_range_length_check(), + _road_flags_between_linear_range_bounds_check(), + _road_flags_between_linear_range_order_check(), + _road_surface_min_length_check(), + _road_surface_unique_check(), + _road_surface_value_required_check(), + _road_surface_value_enum_check(), + _road_surface_between_linear_range_length_check(), + _road_surface_between_linear_range_bounds_check(), + _road_surface_between_linear_range_order_check(), + _speed_limits_min_length_check(), + _speed_limits_unique_check(), + _speed_limits_max_speed_value_required_check(), + _speed_limits_max_speed_value_bounds_check(), + _speed_limits_max_speed_value_bounds_check_1(), + _speed_limits_max_speed_unit_required_check(), + _speed_limits_max_speed_unit_enum_check(), + _speed_limits_min_speed_value_required_check(), + _speed_limits_min_speed_value_bounds_check(), + _speed_limits_min_speed_value_bounds_check_1(), + _speed_limits_min_speed_unit_required_check(), + _speed_limits_min_speed_unit_enum_check(), + _speed_limits_between_linear_range_length_check(), + _speed_limits_between_linear_range_bounds_check(), + _speed_limits_between_linear_range_order_check(), + _speed_limits_when_heading_check(), + _speed_limits_when_mode_min_length_check(), + _speed_limits_when_mode_unique_check(), + _speed_limits_when_mode_check(), + _speed_limits_when_using_min_length_check(), + _speed_limits_when_using_unique_check(), + _speed_limits_when_using_check(), + _speed_limits_when_recognized_min_length_check(), + _speed_limits_when_recognized_unique_check(), + _speed_limits_when_recognized_check(), + _speed_limits_when_vehicle_min_length_check(), + _speed_limits_when_vehicle_unique_check(), + _speed_limits_when_vehicle_dimension_required_check(), + _speed_limits_when_vehicle_dimension_enum_check(), + _speed_limits_when_vehicle_comparison_required_check(), + _speed_limits_when_vehicle_comparison_enum_check(), + _speed_limits_when_vehicle_value_check(), + _speed_limits_when_vehicle_value_required_check(), + _speed_limits_when_vehicle_value_bounds_check(), + _speed_limits_when_vehicle_unit_required_check(), + _speed_limits_when_vehicle_unit_enum_check(), + _speed_limits_when_vehicle_unit_required_check_1(), + _speed_limits_when_vehicle_unit_enum_check_1(), + _subclass_check(), + _width_rules_min_length_check(), + _width_rules_unique_check(), + _width_rules_value_required_check(), + _width_rules_value_bounds_check(), + _width_rules_between_linear_range_length_check(), + _width_rules_between_linear_range_bounds_check(), + _width_rules_between_linear_range_order_check(), + _class_required_check_1(), + _class_enum_check_1(), + _rail_flags_min_length_check(), + _rail_flags_unique_check(), + _rail_flags_values_check(), + _rail_flags_values_min_length_check(), + _rail_flags_values_unique_check(), + _rail_flags_values_check_1(), + _rail_flags_between_linear_range_length_check(), + _rail_flags_between_linear_range_bounds_check(), + _rail_flags_between_linear_range_order_check(), + _access_restrictions_when_vehicle_check_forbid_if_0_check(), + _access_restrictions_when_vehicle_check_require_if_1_check(), + _access_restrictions_when_vehicle_check_require_if_2_check(), + _access_restrictions_when_vehicle_check_require_if_3_check(), + _access_restrictions_when_vehicle_check_require_if_4_check(), + _access_restrictions_when_check_require_any_of_5_check(), + _destinations_check_require_any_of_6_check(), + _prohibited_transitions_when_vehicle_check_forbid_if_7_check(), + _prohibited_transitions_when_vehicle_check_require_if_8_check(), + _prohibited_transitions_when_vehicle_check_require_if_9_check(), + _prohibited_transitions_when_vehicle_check_require_if_10_check(), + _prohibited_transitions_when_vehicle_check_require_if_11_check(), + _prohibited_transitions_when_check_require_any_of_12_check(), + _speed_limits_when_vehicle_check_forbid_if_13_check(), + _speed_limits_when_vehicle_check_require_if_14_check(), + _speed_limits_when_vehicle_check_require_if_15_check(), + _speed_limits_when_vehicle_check_require_if_16_check(), + _speed_limits_when_vehicle_check_require_if_17_check(), + _speed_limits_when_check_require_any_of_18_check(), + _speed_limits_check_require_any_of_19_check(), + _check_forbid_if_20_check(), + _check_require_if_21_check(), + _check_require_if_22_check(), + _check_forbid_if_23_check(), + _check_forbid_if_24_check(), + _check_forbid_if_25_check(), + _check_forbid_if_26_check(), + _check_forbid_if_27_check(), + _check_forbid_if_28_check(), + _check_forbid_if_29_check(), + _check_forbid_if_30_check(), + ] + + +SEGMENT_SCHEMA = StructType( + [ + StructField("id", StringType(), True), + StructField("bbox", BBOX_STRUCT, True), + StructField("geometry", BinaryType(), True), + StructField("theme", StringType(), True), + StructField("type", StringType(), True), + StructField("version", IntegerType(), True), + StructField( + "sources", + ArrayType( + StructType( + [ + StructField("property", StringType(), True), + StructField("dataset", StringType(), True), + StructField("license", StringType(), True), + StructField("record_id", StringType(), True), + StructField("update_time", StringType(), True), + StructField("confidence", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField("subtype", StringType(), True), + StructField( + "access_restrictions", + ArrayType( + StructType( + [ + StructField("access_type", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + StructField( + "when", + StructType( + [ + StructField("heading", StringType(), True), + StructField("during", StringType(), True), + StructField( + "mode", ArrayType(StringType(), True), True + ), + StructField( + "using", ArrayType(StringType(), True), True + ), + StructField( + "recognized", + ArrayType(StringType(), True), + True, + ), + StructField( + "vehicle", + ArrayType( + StructType( + [ + StructField( + "dimension", StringType(), True + ), + StructField( + "comparison", StringType(), True + ), + StructField( + "value", DoubleType(), True + ), + StructField( + "unit", StringType(), True + ), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + ] + ), + True, + ), + True, + ), + StructField( + "connectors", + ArrayType( + StructType( + [ + StructField("connector_id", StringType(), True), + StructField("at", DoubleType(), True), + ] + ), + True, + ), + True, + ), + StructField( + "level_rules", + ArrayType( + StructType( + [ + StructField("value", IntegerType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "routes", + ArrayType( + StructType( + [ + StructField("name", StringType(), True), + StructField("network", StringType(), True), + StructField("ref", StringType(), True), + StructField("symbol", StringType(), True), + StructField("wikidata", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "subclass_rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "names", + StructType( + [ + StructField("primary", StringType(), True), + StructField( + "common", MapType(StringType(), StringType(), True), True + ), + StructField( + "rules", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("variant", StringType(), True), + StructField("language", StringType(), True), + StructField( + "perspectives", + StructType( + [ + StructField("mode", StringType(), True), + StructField( + "countries", + ArrayType(StringType(), True), + True, + ), + ] + ), + True, + ), + StructField( + "between", ArrayType(DoubleType(), True), True + ), + StructField("side", StringType(), True), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + StructField("class", StringType(), True), + StructField( + "destinations", + ArrayType( + StructType( + [ + StructField("from_connector_id", StringType(), True), + StructField("to_connector_id", StringType(), True), + StructField("to_segment_id", StringType(), True), + StructField("final_heading", StringType(), True), + StructField( + "labels", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("type", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField("symbols", ArrayType(StringType(), True), True), + StructField( + "when", + StructType([StructField("heading", StringType(), True)]), + True, + ), + ] + ), + True, + ), + True, + ), + StructField( + "prohibited_transitions", + ArrayType( + StructType( + [ + StructField( + "sequence", + ArrayType( + StructType( + [ + StructField("connector_id", StringType(), True), + StructField("segment_id", StringType(), True), + ] + ), + True, + ), + True, + ), + StructField("final_heading", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + StructField( + "when", + StructType( + [ + StructField("heading", StringType(), True), + StructField("during", StringType(), True), + StructField( + "mode", ArrayType(StringType(), True), True + ), + StructField( + "using", ArrayType(StringType(), True), True + ), + StructField( + "recognized", + ArrayType(StringType(), True), + True, + ), + StructField( + "vehicle", + ArrayType( + StructType( + [ + StructField( + "dimension", StringType(), True + ), + StructField( + "comparison", StringType(), True + ), + StructField( + "value", DoubleType(), True + ), + StructField( + "unit", StringType(), True + ), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + ] + ), + True, + ), + True, + ), + StructField( + "road_flags", + ArrayType( + StructType( + [ + StructField("values", ArrayType(StringType(), True), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "road_surface", + ArrayType( + StructType( + [ + StructField("value", StringType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "speed_limits", + ArrayType( + StructType( + [ + StructField( + "max_speed", + StructType( + [ + StructField("value", IntegerType(), True), + StructField("unit", StringType(), True), + ] + ), + True, + ), + StructField( + "min_speed", + StructType( + [ + StructField("value", IntegerType(), True), + StructField("unit", StringType(), True), + ] + ), + True, + ), + StructField("is_max_speed_variable", BooleanType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + StructField( + "when", + StructType( + [ + StructField("heading", StringType(), True), + StructField("during", StringType(), True), + StructField( + "mode", ArrayType(StringType(), True), True + ), + StructField( + "using", ArrayType(StringType(), True), True + ), + StructField( + "recognized", + ArrayType(StringType(), True), + True, + ), + StructField( + "vehicle", + ArrayType( + StructType( + [ + StructField( + "dimension", StringType(), True + ), + StructField( + "comparison", StringType(), True + ), + StructField( + "value", DoubleType(), True + ), + StructField( + "unit", StringType(), True + ), + ] + ), + True, + ), + True, + ), + ] + ), + True, + ), + ] + ), + True, + ), + True, + ), + StructField("subclass", StringType(), True), + StructField( + "width_rules", + ArrayType( + StructType( + [ + StructField("value", DoubleType(), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + StructField( + "rail_flags", + ArrayType( + StructType( + [ + StructField("values", ArrayType(StringType(), True), True), + StructField("between", ArrayType(DoubleType(), True), True), + ] + ), + True, + ), + True, + ), + ] +) + +GEOMETRY_TYPES: tuple[GeometryType, ...] = (GeometryType.LINE_STRING,) + +ENTRY_POINT = "overture.schema.transportation:Segment" + +PARTITIONS: dict[str, str] = {"theme": "transportation"} + +FEATURE_VALIDATION = FeatureValidation( + schema=SEGMENT_SCHEMA, + checks=segment_checks, + geometry_types=GEOMETRY_TYPES, +) diff --git a/packages/overture-schema-pyspark/tests/generated/__init__.py b/packages/overture-schema-pyspark/tests/generated/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py new file mode 100644 index 000000000..b8da5893d --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py @@ -0,0 +1,462 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for address.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.addresses.address import ( + ADDRESS_SCHEMA, + address_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "9b51bb94-b26f-5f88-ad00-affc1e8f1935", + "geometry": "POINT (0 0)", + "theme": "addresses", + "type": "address", + "version": 0, + "country": "US", +} + + +BASE_ROW_POPULATED: dict = { + "id": "9b51bb94-b26f-5f88-ad00-affc1e8f1935", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "POINT (0 0)", + "theme": "addresses", + "type": "address", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "address_levels": [{"value": "a"}], + "country": "US", + "number": "a", + "postal_city": "a", + "postcode": "a", + "street": "a", + "unit": "a", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="address::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="address::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="address::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="address::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="address::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="address::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="address::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="address::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "LINESTRING (0 0, 1 1)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="address::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="address::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="address::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="address::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="address::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="address::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="address::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="address::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="address::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="address::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="address::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="address::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="address::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="address::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="address::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="address::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="address::address_levels_min_length:array_min_length", + scaffold={"address_levels": [{}]}, + mutate=set_at_path("address_levels", []), + expected_field="address_levels_min_length", + expected_check="array_min_length", + ), + Scenario( + id="address::address_levels_max_length:array_max_length", + scaffold={"address_levels": [{}]}, + mutate=set_at_path("address_levels", [{}, {}, {}, {}, {}, {}]), + expected_field="address_levels_max_length", + expected_check="array_max_length", + ), + Scenario( + id="address::address_levels[].value:string_min_length", + scaffold={"address_levels": [{"value": "a"}]}, + mutate=set_at_path("address_levels[].value", ""), + expected_field="address_levels[].value", + expected_check="string_min_length", + ), + Scenario( + id="address::address_levels[].value:stripped", + scaffold={"address_levels": [{"value": "a"}]}, + mutate=set_at_path("address_levels[].value", " has spaces "), + expected_field="address_levels[].value", + expected_check="stripped", + ), + Scenario( + id="address::country:required", + scaffold={}, + mutate=set_at_path("country", None), + expected_field="country", + expected_check="required", + ), + Scenario( + id="address::country:country_code_alpha2", + scaffold={}, + mutate=set_at_path("country", "99"), + expected_field="country", + expected_check="country_code_alpha2", + ), + Scenario( + id="address::number:string_min_length", + scaffold={"number": "a"}, + mutate=set_at_path("number", ""), + expected_field="number", + expected_check="string_min_length", + ), + Scenario( + id="address::number:stripped", + scaffold={"number": "a"}, + mutate=set_at_path("number", " has spaces "), + expected_field="number", + expected_check="stripped", + ), + Scenario( + id="address::postal_city:string_min_length", + scaffold={"postal_city": "a"}, + mutate=set_at_path("postal_city", ""), + expected_field="postal_city", + expected_check="string_min_length", + ), + Scenario( + id="address::postal_city:stripped", + scaffold={"postal_city": "a"}, + mutate=set_at_path("postal_city", " has spaces "), + expected_field="postal_city", + expected_check="stripped", + ), + Scenario( + id="address::postcode:string_min_length", + scaffold={"postcode": "a"}, + mutate=set_at_path("postcode", ""), + expected_field="postcode", + expected_check="string_min_length", + ), + Scenario( + id="address::postcode:stripped", + scaffold={"postcode": "a"}, + mutate=set_at_path("postcode", " has spaces "), + expected_field="postcode", + expected_check="stripped", + ), + Scenario( + id="address::street:string_min_length", + scaffold={"street": "a"}, + mutate=set_at_path("street", ""), + expected_field="street", + expected_check="string_min_length", + ), + Scenario( + id="address::street:stripped", + scaffold={"street": "a"}, + mutate=set_at_path("street", " has spaces "), + expected_field="street", + expected_check="stripped", + ), + Scenario( + id="address::unit:string_min_length", + scaffold={"unit": "a"}, + mutate=set_at_path("unit", ""), + expected_field="unit", + expected_check="string_min_length", + ), + Scenario( + id="address::unit:stripped", + scaffold={"unit": "a"}, + mutate=set_at_path("unit", " has spaces "), + expected_field="unit", + expected_check="stripped", + ), + Scenario( + id="address::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return address_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + ADDRESS_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="address", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + ADDRESS_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="address", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("address::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("address::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py new file mode 100644 index 000000000..168630de6 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/annex/test_sources.py @@ -0,0 +1,843 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for sources.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.annex.sources import ( + SOURCES_SCHEMA, + sources_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ], + "license_priority": {}, +} + + +BASE_ROW_POPULATED: dict = { + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "inception_date": "2024-01-01", + "url": "https://example.com/", + "url_archived": "https://example.com/", + "data_download_url": ["https://example.com/"], + "countries": ["US"], + "coverage_description": "", + "data_layer_name": "", + "oa_path": [""], + "address_levels": [""], + "file_format": "", + "update_frequency": "", + "build_source": "OpenAddresses", + "update_type": "continuous", + "update_schedule": [""], + "known_issues": "", + "notes": "", + "requires_attribution": "", + } + ], + "license_priority": {}, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="sources::datasets:required", + scaffold={}, + mutate=set_at_path("datasets", None), + expected_field="datasets", + expected_check="required", + ), + Scenario( + id="sources::datasets[].source_name:required", + scaffold={ + "datasets": [ + { + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "source_name": "", + } + ] + }, + mutate=set_at_path("datasets[].source_name", None), + expected_field="datasets[].source_name", + expected_check="required", + ), + Scenario( + id="sources::datasets[].source_dataset_name:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "source_dataset_name": "", + } + ] + }, + mutate=set_at_path("datasets[].source_dataset_name", None), + expected_field="datasets[].source_dataset_name", + expected_check="required", + ), + Scenario( + id="sources::datasets[].data_url:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].data_url", None), + expected_field="datasets[].data_url", + expected_check="required", + ), + Scenario( + id="sources::datasets[].data_url:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].data_url", "not-a-url"), + expected_field="datasets[].data_url", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].data_url:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].data_url", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].data_url", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].data_url_archived:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].data_url_archived", None), + expected_field="datasets[].data_url_archived", + expected_check="required", + ), + Scenario( + id="sources::datasets[].data_url_archived:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].data_url_archived", "not-a-url"), + expected_field="datasets[].data_url_archived", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].data_url_archived:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].data_url_archived", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].data_url_archived", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].license_url:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].license_url", None), + expected_field="datasets[].license_url", + expected_check="required", + ), + Scenario( + id="sources::datasets[].license_url:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].license_url", "not-a-url"), + expected_field="datasets[].license_url", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].license_url:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].license_url", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].license_url", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].license_url_archived:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].license_url_archived", None), + expected_field="datasets[].license_url_archived", + expected_check="required", + ), + Scenario( + id="sources::datasets[].license_url_archived:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].license_url_archived", "not-a-url"), + expected_field="datasets[].license_url_archived", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].license_url_archived:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].license_url_archived", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].license_url_archived", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].license_type:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_type": "", + } + ] + }, + mutate=set_at_path("datasets[].license_type", None), + expected_field="datasets[].license_type", + expected_check="required", + ), + Scenario( + id="sources::datasets[].license_text:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_text": "", + } + ] + }, + mutate=set_at_path("datasets[].license_text", None), + expected_field="datasets[].license_text", + expected_check="required", + ), + Scenario( + id="sources::datasets[].license_attribution:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "license_attribution": "", + } + ] + }, + mutate=set_at_path("datasets[].license_attribution", None), + expected_field="datasets[].license_attribution", + expected_check="required", + ), + Scenario( + id="sources::datasets[].coverage_bbox:required", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, + mutate=set_at_path("datasets[].coverage_bbox", None), + expected_field="datasets[].coverage_bbox", + expected_check="required", + ), + Scenario( + id="sources::datasets[].coverage_bbox_min_length:array_min_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, + mutate=set_at_path("datasets[].coverage_bbox", []), + expected_field="datasets[].coverage_bbox_min_length", + expected_check="array_min_length", + ), + Scenario( + id="sources::datasets[].coverage_bbox_max_length:array_max_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + } + ] + }, + mutate=set_at_path("datasets[].coverage_bbox", [{}, {}, {}, {}, {}]), + expected_field="datasets[].coverage_bbox_max_length", + expected_check="array_max_length", + ), + Scenario( + id="sources::datasets[].url:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "url": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].url", "not-a-url"), + expected_field="datasets[].url", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].url:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "url": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].url", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].url", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].url_archived:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path("datasets[].url_archived", "not-a-url"), + expected_field="datasets[].url_archived", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].url_archived:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "url_archived": "https://example.com/", + } + ] + }, + mutate=set_at_path( + "datasets[].url_archived", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].url_archived", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].data_download_url[]:url_format", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_download_url": ["https://example.com/"], + } + ] + }, + mutate=set_at_path("datasets[].data_download_url[]", "not-a-url"), + expected_field="datasets[].data_download_url[]", + expected_check="url_format", + ), + Scenario( + id="sources::datasets[].data_download_url[]:url_length", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "data_download_url": ["https://example.com/"], + } + ] + }, + mutate=set_at_path( + "datasets[].data_download_url[]", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="datasets[].data_download_url[]", + expected_check="url_length", + ), + Scenario( + id="sources::datasets[].countries[]:country_code_alpha2", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "countries": ["US"], + } + ] + }, + mutate=set_at_path("datasets[].countries[]", "99"), + expected_field="datasets[].countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="sources::datasets[].build_source:enum", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "build_source": "OpenAddresses", + } + ] + }, + mutate=set_at_path("datasets[].build_source", "__INVALID__"), + expected_field="datasets[].build_source", + expected_check="enum", + ), + Scenario( + id="sources::datasets[].update_type:enum", + scaffold={ + "datasets": [ + { + "source_name": "", + "source_dataset_name": "", + "data_url": "https://example.com/", + "data_url_archived": "https://example.com/", + "license_url": "https://example.com/", + "license_url_archived": "https://example.com/", + "license_type": "", + "license_text": "", + "license_attribution": "", + "coverage_bbox": [0.0, 0.0, 0.0, 0.0], + "update_type": "continuous", + } + ] + }, + mutate=set_at_path("datasets[].update_type", "__INVALID__"), + expected_field="datasets[].update_type", + expected_check="enum", + ), + Scenario( + id="sources::license_priority:required", + scaffold={}, + mutate=set_at_path("license_priority", None), + expected_field="license_priority", + expected_check="required", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return sources_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SOURCES_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="sources", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SOURCES_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="sources", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("sources::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("sources::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py new file mode 100644 index 000000000..eddc5ff2a --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py @@ -0,0 +1,401 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for bathymetry.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.bathymetry import ( + BATHYMETRY_SCHEMA, + bathymetry_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "e1c02779-55d2-5d7e-8673-b7de1642ae68", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "base", + "type": "bathymetry", + "version": 0, + "depth": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "e1c02779-55d2-5d7e-8673-b7de1642ae68", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "base", + "type": "bathymetry", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "depth": 0, + "cartography": {"prominence": 1, "min_zoom": 0, "max_zoom": 0, "sort_key": 0}, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="bathymetry::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="bathymetry::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="bathymetry::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="bathymetry::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="bathymetry::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="bathymetry::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="bathymetry::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="bathymetry::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="bathymetry::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="bathymetry::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="bathymetry::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="bathymetry::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="bathymetry::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="bathymetry::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="bathymetry::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="bathymetry::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="bathymetry::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="bathymetry::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="bathymetry::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="bathymetry::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="bathymetry::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="bathymetry::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="bathymetry::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="bathymetry::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="bathymetry::depth:required", + scaffold={}, + mutate=set_at_path("depth", None), + expected_field="depth", + expected_check="required", + ), + Scenario( + id="bathymetry::depth:bounds", + scaffold={}, + mutate=set_at_path("depth", -1), + expected_field="depth", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.prominence:bounds", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 0), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.prominence:bounds_1", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 101), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.min_zoom:bounds", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", -1), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.min_zoom:bounds_1", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", 24), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.max_zoom:bounds", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", -1), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="bathymetry::cartography.max_zoom:bounds_1", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", 24), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="bathymetry::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return bathymetry_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BATHYMETRY_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="bathymetry", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BATHYMETRY_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="bathymetry", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("bathymetry::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("bathymetry::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py new file mode 100644 index 000000000..ff98049f6 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py @@ -0,0 +1,650 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for infrastructure.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.infrastructure import ( + INFRASTRUCTURE_SCHEMA, + infrastructure_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "e6cc8648-6bf9-5147-994f-621f86c9f103", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "infrastructure", + "version": 0, + "class": "aerialway_station", + "subtype": "aerialway", +} + + +BASE_ROW_POPULATED: dict = { + "id": "e6cc8648-6bf9-5147-994f-621f86c9f103", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "infrastructure", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "class": "aerialway_station", + "subtype": "aerialway", + "height": 1.0, + "surface": "asphalt", + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "source_tags": {}, + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="infrastructure::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="infrastructure::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="infrastructure::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="infrastructure::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="infrastructure::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="infrastructure::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="infrastructure::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="infrastructure::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "GEOMETRYCOLLECTION EMPTY"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="infrastructure::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="infrastructure::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="infrastructure::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="infrastructure::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="infrastructure::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="infrastructure::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="infrastructure::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="infrastructure::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="infrastructure::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="infrastructure::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="infrastructure::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="infrastructure::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="infrastructure::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="infrastructure::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="infrastructure::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="infrastructure::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="infrastructure::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="infrastructure::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="infrastructure::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="infrastructure::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="infrastructure::height:bounds", + scaffold={"height": 1.0}, + mutate=set_at_path("height", 0.0), + expected_field="height", + expected_check="bounds", + ), + Scenario( + id="infrastructure::surface:enum", + scaffold={"surface": "asphalt"}, + mutate=set_at_path("surface", "__INVALID__"), + expected_field="surface", + expected_check="enum", + ), + Scenario( + id="infrastructure::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="infrastructure::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="infrastructure::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="infrastructure::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="infrastructure::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="infrastructure::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="infrastructure::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="infrastructure::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="infrastructure::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="infrastructure::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="infrastructure::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="infrastructure::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="infrastructure::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="infrastructure::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="infrastructure::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return infrastructure_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + INFRASTRUCTURE_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="infrastructure", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + INFRASTRUCTURE_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="infrastructure", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("infrastructure::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("infrastructure::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py new file mode 100644 index 000000000..6b07a4fdc --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py @@ -0,0 +1,634 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for land.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.land import ( + LAND_SCHEMA, + land_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "52a8b331-e001-5c79-8dab-dba632af0028", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "land", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "52a8b331-e001-5c79-8dab-dba632af0028", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "land", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "class": "archipelago", + "subtype": "crater", + "elevation": 9000, + "surface": "asphalt", + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "source_tags": {}, + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="land::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="land::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="land::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="land::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="land::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="land::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="land::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="land::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "GEOMETRYCOLLECTION EMPTY"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="land::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="land::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="land::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="land::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="land::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="land::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="land::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="land::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="land::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="land::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="land::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land::class:enum", + scaffold={"class": "archipelago"}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="land::subtype:enum", + scaffold={"subtype": "crater"}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="land::elevation:bounds", + scaffold={"elevation": 9000}, + mutate=set_at_path("elevation", 9001), + expected_field="elevation", + expected_check="bounds", + ), + Scenario( + id="land::surface:enum", + scaffold={"surface": "asphalt"}, + mutate=set_at_path("surface", "__INVALID__"), + expected_field="surface", + expected_check="enum", + ), + Scenario( + id="land::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="land::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="land::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="land::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="land::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="land::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="land::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="land::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="land::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="land::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="land::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="land::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="land::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="land::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="land::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="land::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="land::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return land_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="land", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="land", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("land::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("land::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py new file mode 100644 index 000000000..c2783e05c --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py @@ -0,0 +1,401 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for land_cover.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.land_cover import ( + LAND_COVER_SCHEMA, + land_cover_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "b03200e9-9f2f-52ac-bae8-e562b3fd26cc", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "base", + "type": "land_cover", + "version": 0, + "subtype": "barren", +} + + +BASE_ROW_POPULATED: dict = { + "id": "b03200e9-9f2f-52ac-bae8-e562b3fd26cc", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "base", + "type": "land_cover", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "barren", + "cartography": {"prominence": 1, "min_zoom": 0, "max_zoom": 0, "sort_key": 0}, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="land_cover::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="land_cover::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="land_cover::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="land_cover::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="land_cover::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="land_cover::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="land_cover::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="land_cover::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="land_cover::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="land_cover::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="land_cover::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="land_cover::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="land_cover::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="land_cover::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="land_cover::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land_cover::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="land_cover::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="land_cover::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="land_cover::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="land_cover::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land_cover::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land_cover::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land_cover::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land_cover::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land_cover::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="land_cover::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="land_cover::cartography.prominence:bounds", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 0), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.prominence:bounds_1", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 101), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.min_zoom:bounds", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", -1), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.min_zoom:bounds_1", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", 24), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.max_zoom:bounds", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", -1), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="land_cover::cartography.max_zoom:bounds_1", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", 24), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="land_cover::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return land_cover_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_COVER_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="land_cover", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_COVER_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="land_cover", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("land_cover::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("land_cover::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py new file mode 100644 index 000000000..f19165178 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py @@ -0,0 +1,650 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for land_use.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.land_use import ( + LAND_USE_SCHEMA, + land_use_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "fe1e5b5f-3ae6-5c23-ba83-444a90ccd659", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "land_use", + "version": 0, + "class": "aboriginal_land", + "subtype": "agriculture", +} + + +BASE_ROW_POPULATED: dict = { + "id": "fe1e5b5f-3ae6-5c23-ba83-444a90ccd659", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "land_use", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "class": "aboriginal_land", + "subtype": "agriculture", + "elevation": 9000, + "surface": "asphalt", + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "source_tags": {}, + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="land_use::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="land_use::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="land_use::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="land_use::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="land_use::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="land_use::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="land_use::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="land_use::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "GEOMETRYCOLLECTION EMPTY"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="land_use::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="land_use::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="land_use::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="land_use::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="land_use::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="land_use::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="land_use::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land_use::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="land_use::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="land_use::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="land_use::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="land_use::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land_use::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="land_use::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land_use::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land_use::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land_use::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="land_use::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="land_use::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="land_use::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="land_use::elevation:bounds", + scaffold={"elevation": 9000}, + mutate=set_at_path("elevation", 9001), + expected_field="elevation", + expected_check="bounds", + ), + Scenario( + id="land_use::surface:enum", + scaffold={"surface": "asphalt"}, + mutate=set_at_path("surface", "__INVALID__"), + expected_field="surface", + expected_check="enum", + ), + Scenario( + id="land_use::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="land_use::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="land_use::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="land_use::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="land_use::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="land_use::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="land_use::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="land_use::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="land_use::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="land_use::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="land_use::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="land_use::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="land_use::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="land_use::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="land_use::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="land_use::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="land_use::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="land_use::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="land_use::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="land_use::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="land_use::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return land_use_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_USE_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="land_use", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + LAND_USE_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="land_use", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("land_use::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("land_use::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py new file mode 100644 index 000000000..1c460c47f --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py @@ -0,0 +1,620 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for water.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.base.water import ( + WATER_SCHEMA, + water_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "a7a5e73a-79c0-55d7-ab4d-5f9fc65fe915", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "water", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "a7a5e73a-79c0-55d7-ab4d-5f9fc65fe915", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "base", + "type": "water", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "class": "basin", + "subtype": "canal", + "is_intermittent": False, + "is_salt": False, + "level": 0, + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "source_tags": {}, + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="water::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="water::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="water::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="water::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="water::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="water::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="water::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="water::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "GEOMETRYCOLLECTION EMPTY"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="water::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="water::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="water::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="water::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="water::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="water::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="water::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="water::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="water::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="water::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="water::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="water::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="water::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="water::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="water::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="water::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="water::class:enum", + scaffold={"class": "basin"}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="water::subtype:enum", + scaffold={"subtype": "canal"}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="water::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="water::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="water::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="water::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="water::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="water::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="water::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="water::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="water::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="water::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="water::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="water::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="water::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="water::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="water::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="water::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="water::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="water::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="water::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="water::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="water::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return water_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + WATER_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="water", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + WATER_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="water", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("water::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("water::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py new file mode 100644 index 000000000..ebfd4a131 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py @@ -0,0 +1,708 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for building.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.buildings.building import ( + BUILDING_SCHEMA, + building_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "f59ea25f-5910-56e0-b595-25dd9d65ef4b", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "buildings", + "type": "building", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "f59ea25f-5910-56e0-b595-25dd9d65ef4b", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "buildings", + "type": "building", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "agricultural", + "class": "agricultural", + "has_parts": False, + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "height": 1.0, + "is_underground": False, + "num_floors": 1, + "num_floors_underground": 1, + "min_height": 0.0, + "min_floor": 1, + "facade_color": "#aabbcc", + "facade_material": "brick", + "roof_material": "concrete", + "roof_shape": "dome", + "roof_direction": 0.0, + "roof_orientation": "across", + "roof_color": "#aabbcc", + "roof_height": 0.0, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="building::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="building::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="building::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="building::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="building::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="building::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="building::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="building::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="building::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="building::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="building::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="building::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="building::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="building::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="building::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="building::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="building::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="building::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="building::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="building::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="building::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="building::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="building::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="building::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="building::subtype:enum", + scaffold={"subtype": "agricultural"}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="building::class:enum", + scaffold={"class": "agricultural"}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="building::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="building::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="building::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="building::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="building::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="building::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="building::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="building::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="building::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="building::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="building::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="building::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="building::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="building::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="building::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="building::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="building::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="building::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="building::height:bounds", + scaffold={"height": 1.0}, + mutate=set_at_path("height", 0.0), + expected_field="height", + expected_check="bounds", + ), + Scenario( + id="building::num_floors:bounds", + scaffold={"num_floors": 1}, + mutate=set_at_path("num_floors", 0), + expected_field="num_floors", + expected_check="bounds", + ), + Scenario( + id="building::num_floors_underground:bounds", + scaffold={"num_floors_underground": 1}, + mutate=set_at_path("num_floors_underground", 0), + expected_field="num_floors_underground", + expected_check="bounds", + ), + Scenario( + id="building::min_floor:bounds", + scaffold={"min_floor": 1}, + mutate=set_at_path("min_floor", 0), + expected_field="min_floor", + expected_check="bounds", + ), + Scenario( + id="building::facade_color:hex_color", + scaffold={"facade_color": "#aabbcc"}, + mutate=set_at_path("facade_color", "not-hex"), + expected_field="facade_color", + expected_check="hex_color", + ), + Scenario( + id="building::facade_material:enum", + scaffold={"facade_material": "brick"}, + mutate=set_at_path("facade_material", "__INVALID__"), + expected_field="facade_material", + expected_check="enum", + ), + Scenario( + id="building::roof_material:enum", + scaffold={"roof_material": "concrete"}, + mutate=set_at_path("roof_material", "__INVALID__"), + expected_field="roof_material", + expected_check="enum", + ), + Scenario( + id="building::roof_shape:enum", + scaffold={"roof_shape": "dome"}, + mutate=set_at_path("roof_shape", "__INVALID__"), + expected_field="roof_shape", + expected_check="enum", + ), + Scenario( + id="building::roof_direction:bounds", + scaffold={"roof_direction": 0.0}, + mutate=set_at_path("roof_direction", -1.0), + expected_field="roof_direction", + expected_check="bounds", + ), + Scenario( + id="building::roof_direction:bounds_1", + scaffold={"roof_direction": 0.0}, + mutate=set_at_path("roof_direction", 360.0), + expected_field="roof_direction", + expected_check="bounds", + ), + Scenario( + id="building::roof_orientation:enum", + scaffold={"roof_orientation": "across"}, + mutate=set_at_path("roof_orientation", "__INVALID__"), + expected_field="roof_orientation", + expected_check="enum", + ), + Scenario( + id="building::roof_color:hex_color", + scaffold={"roof_color": "#aabbcc"}, + mutate=set_at_path("roof_color", "not-hex"), + expected_field="roof_color", + expected_check="hex_color", + ), + Scenario( + id="building::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="building::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return building_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BUILDING_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="building", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BUILDING_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="building", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("building::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("building::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py new file mode 100644 index 000000000..73ab44863 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py @@ -0,0 +1,714 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for building_part.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.buildings.building_part import ( + BUILDING_PART_SCHEMA, + building_part_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "c039cf20-2e1c-5116-a393-4d834e447d46", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "buildings", + "type": "building_part", + "version": 0, + "building_id": "a", +} + + +BASE_ROW_POPULATED: dict = { + "id": "c039cf20-2e1c-5116-a393-4d834e447d46", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "buildings", + "type": "building_part", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "building_id": "a", + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "level": 0, + "height": 1.0, + "is_underground": False, + "num_floors": 1, + "num_floors_underground": 1, + "min_height": 0.0, + "min_floor": 1, + "facade_color": "#aabbcc", + "facade_material": "brick", + "roof_material": "concrete", + "roof_shape": "dome", + "roof_direction": 0.0, + "roof_orientation": "across", + "roof_color": "#aabbcc", + "roof_height": 0.0, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="building_part::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="building_part::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="building_part::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="building_part::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="building_part::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="building_part::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="building_part::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="building_part::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="building_part::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="building_part::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="building_part::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="building_part::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="building_part::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="building_part::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="building_part::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="building_part::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="building_part::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="building_part::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="building_part::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="building_part::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="building_part::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="building_part::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="building_part::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="building_part::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="building_part::building_id:required", + scaffold={}, + mutate=set_at_path("building_id", None), + expected_field="building_id", + expected_check="required", + ), + Scenario( + id="building_part::building_id:string_min_length", + scaffold={}, + mutate=set_at_path("building_id", ""), + expected_field="building_id", + expected_check="string_min_length", + ), + Scenario( + id="building_part::building_id:no_whitespace", + scaffold={}, + mutate=set_at_path("building_id", "has whitespace"), + expected_field="building_id", + expected_check="no_whitespace", + ), + Scenario( + id="building_part::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="building_part::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="building_part::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="building_part::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="building_part::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="building_part::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="building_part::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="building_part::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="building_part::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="building_part::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="building_part::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="building_part::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="building_part::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="building_part::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="building_part::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="building_part::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="building_part::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="building_part::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="building_part::height:bounds", + scaffold={"height": 1.0}, + mutate=set_at_path("height", 0.0), + expected_field="height", + expected_check="bounds", + ), + Scenario( + id="building_part::num_floors:bounds", + scaffold={"num_floors": 1}, + mutate=set_at_path("num_floors", 0), + expected_field="num_floors", + expected_check="bounds", + ), + Scenario( + id="building_part::num_floors_underground:bounds", + scaffold={"num_floors_underground": 1}, + mutate=set_at_path("num_floors_underground", 0), + expected_field="num_floors_underground", + expected_check="bounds", + ), + Scenario( + id="building_part::min_floor:bounds", + scaffold={"min_floor": 1}, + mutate=set_at_path("min_floor", 0), + expected_field="min_floor", + expected_check="bounds", + ), + Scenario( + id="building_part::facade_color:hex_color", + scaffold={"facade_color": "#aabbcc"}, + mutate=set_at_path("facade_color", "not-hex"), + expected_field="facade_color", + expected_check="hex_color", + ), + Scenario( + id="building_part::facade_material:enum", + scaffold={"facade_material": "brick"}, + mutate=set_at_path("facade_material", "__INVALID__"), + expected_field="facade_material", + expected_check="enum", + ), + Scenario( + id="building_part::roof_material:enum", + scaffold={"roof_material": "concrete"}, + mutate=set_at_path("roof_material", "__INVALID__"), + expected_field="roof_material", + expected_check="enum", + ), + Scenario( + id="building_part::roof_shape:enum", + scaffold={"roof_shape": "dome"}, + mutate=set_at_path("roof_shape", "__INVALID__"), + expected_field="roof_shape", + expected_check="enum", + ), + Scenario( + id="building_part::roof_direction:bounds", + scaffold={"roof_direction": 0.0}, + mutate=set_at_path("roof_direction", -1.0), + expected_field="roof_direction", + expected_check="bounds", + ), + Scenario( + id="building_part::roof_direction:bounds_1", + scaffold={"roof_direction": 0.0}, + mutate=set_at_path("roof_direction", 360.0), + expected_field="roof_direction", + expected_check="bounds", + ), + Scenario( + id="building_part::roof_orientation:enum", + scaffold={"roof_orientation": "across"}, + mutate=set_at_path("roof_orientation", "__INVALID__"), + expected_field="roof_orientation", + expected_check="enum", + ), + Scenario( + id="building_part::roof_color:hex_color", + scaffold={"roof_color": "#aabbcc"}, + mutate=set_at_path("roof_color", "not-hex"), + expected_field="roof_color", + expected_check="hex_color", + ), + Scenario( + id="building_part::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="building_part::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return building_part_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BUILDING_PART_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="building_part", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + BUILDING_PART_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="building_part", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("building_part::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("building_part::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py new file mode 100644 index 000000000..399495474 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py @@ -0,0 +1,1049 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for division.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.divisions.division import ( + DIVISION_SCHEMA, + division_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "names": {"primary": "a"}, + "id": "97a2a97d-1eb8-5161-9ae5-bfb82594ed67", + "geometry": "POINT (0 0)", + "theme": "divisions", + "type": "division", + "version": 0, + "subtype": "country", + "country": "US", + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]], + "admin_level": 0, +} + + +BASE_ROW_POPULATED: dict = { + "cartography": {"prominence": 1, "min_zoom": 0, "max_zoom": 0, "sort_key": 0}, + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "97a2a97d-1eb8-5161-9ae5-bfb82594ed67", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "POINT (0 0)", + "theme": "divisions", + "type": "division", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "country", + "country": "US", + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]], + "admin_level": 0, + "class": "megacity", + "local_type": {}, + "region": "US-CA", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "norms": {"driving_side": "left"}, + "population": 0, + "capital_division_ids": ["a"], + "capital_of_divisions": [{"division_id": "a", "subtype": "country"}], + "wikidata": "Q42", +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="division::cartography.prominence:bounds", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 0), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="division::cartography.prominence:bounds_1", + scaffold={"cartography": {"prominence": 1}}, + mutate=set_at_path("cartography.prominence", 101), + expected_field="cartography.prominence", + expected_check="bounds", + ), + Scenario( + id="division::cartography.min_zoom:bounds", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", -1), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="division::cartography.min_zoom:bounds_1", + scaffold={"cartography": {"min_zoom": 0}}, + mutate=set_at_path("cartography.min_zoom", 24), + expected_field="cartography.min_zoom", + expected_check="bounds", + ), + Scenario( + id="division::cartography.max_zoom:bounds", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", -1), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="division::cartography.max_zoom:bounds_1", + scaffold={"cartography": {"max_zoom": 0}}, + mutate=set_at_path("cartography.max_zoom", 24), + expected_field="cartography.max_zoom", + expected_check="bounds", + ), + Scenario( + id="division::names:required", + scaffold={}, + mutate=set_at_path("names", None), + expected_field="names", + expected_check="required", + ), + Scenario( + id="division::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="division::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="division::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="division::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="division::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="division::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="division::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="division::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="division::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="division::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="division::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="division::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="division::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="division::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="division::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="division::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="division::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="division::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="division::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="division::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="division::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="division::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "LINESTRING (0 0, 1 1)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="division::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="division::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="division::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="division::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="division::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="division::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="division::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="division::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="division::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="division::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="division::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="division::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="division::country:required", + scaffold={}, + mutate=set_at_path("country", None), + expected_field="country", + expected_check="required", + ), + Scenario( + id="division::country:country_code_alpha2", + scaffold={}, + mutate=set_at_path("country", "99"), + expected_field="country", + expected_check="country_code_alpha2", + ), + Scenario( + id="division::hierarchies:required", + scaffold={}, + mutate=set_at_path("hierarchies", None), + expected_field="hierarchies", + expected_check="required", + ), + Scenario( + id="division::hierarchies_min_length:array_min_length", + scaffold={}, + mutate=set_at_path("hierarchies", []), + expected_field="hierarchies_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::hierarchies[]_min_length:array_min_length", + scaffold={}, + mutate=set_at_path("hierarchies[]", []), + expected_field="hierarchies[]_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::hierarchies[][].division_id:required", + scaffold={ + "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + }, + mutate=set_at_path("hierarchies[][].division_id", None), + expected_field="hierarchies[][].division_id", + expected_check="required", + ), + Scenario( + id="division::hierarchies[][].division_id:string_min_length", + scaffold={ + "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + }, + mutate=set_at_path("hierarchies[][].division_id", ""), + expected_field="hierarchies[][].division_id", + expected_check="string_min_length", + ), + Scenario( + id="division::hierarchies[][].division_id:no_whitespace", + scaffold={ + "hierarchies": [[{"subtype": "country", "name": "a", "division_id": "a"}]] + }, + mutate=set_at_path("hierarchies[][].division_id", "has whitespace"), + expected_field="hierarchies[][].division_id", + expected_check="no_whitespace", + ), + Scenario( + id="division::hierarchies[][].subtype:required", + scaffold={ + "hierarchies": [[{"division_id": "a", "name": "a", "subtype": "country"}]] + }, + mutate=set_at_path("hierarchies[][].subtype", None), + expected_field="hierarchies[][].subtype", + expected_check="required", + ), + Scenario( + id="division::hierarchies[][].subtype:enum", + scaffold={ + "hierarchies": [[{"division_id": "a", "name": "a", "subtype": "country"}]] + }, + mutate=set_at_path("hierarchies[][].subtype", "__INVALID__"), + expected_field="hierarchies[][].subtype", + expected_check="enum", + ), + Scenario( + id="division::hierarchies[][].name:required", + scaffold={ + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] + }, + mutate=set_at_path("hierarchies[][].name", None), + expected_field="hierarchies[][].name", + expected_check="required", + ), + Scenario( + id="division::hierarchies[][].name:string_min_length", + scaffold={ + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] + }, + mutate=set_at_path("hierarchies[][].name", ""), + expected_field="hierarchies[][].name", + expected_check="string_min_length", + ), + Scenario( + id="division::hierarchies[][].name:stripped", + scaffold={ + "hierarchies": [[{"division_id": "a", "subtype": "country", "name": "a"}]] + }, + mutate=set_at_path("hierarchies[][].name", " has spaces "), + expected_field="hierarchies[][].name", + expected_check="stripped", + ), + Scenario( + id="division::parent_division_id:string_min_length", + scaffold={"parent_division_id": "a"}, + mutate=set_at_path("parent_division_id", ""), + expected_field="parent_division_id", + expected_check="string_min_length", + ), + Scenario( + id="division::parent_division_id:no_whitespace", + scaffold={"parent_division_id": "a"}, + mutate=set_at_path("parent_division_id", "has whitespace"), + expected_field="parent_division_id", + expected_check="no_whitespace", + ), + Scenario( + id="division::admin_level:bounds", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", -1), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division::admin_level:bounds_1", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", 17), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division::class:enum", + scaffold={"class": "megacity"}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="division::region:region_code", + scaffold={"region": "US-CA"}, + mutate=set_at_path("region", "99-999"), + expected_field="region", + expected_check="region_code", + ), + Scenario( + id="division::perspectives.mode:required", + scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + mutate=set_at_path("perspectives.mode", None), + expected_field="perspectives.mode", + expected_check="required", + ), + Scenario( + id="division::perspectives.mode:enum", + scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + mutate=set_at_path("perspectives.mode", "__INVALID__"), + expected_field="perspectives.mode", + expected_check="enum", + ), + Scenario( + id="division::perspectives.countries:required", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries", None), + expected_field="perspectives.countries", + expected_check="required", + ), + Scenario( + id="division::perspectives.countries_min_length:array_min_length", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries", []), + expected_field="perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::perspectives.countries[]:country_code_alpha2", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries[]", "99"), + expected_field="perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="division::norms.driving_side:enum", + scaffold={"norms": {"driving_side": "left"}}, + mutate=set_at_path("norms.driving_side", "__INVALID__"), + expected_field="norms.driving_side", + expected_check="enum", + ), + Scenario( + id="division::population:bounds", + scaffold={"population": 0}, + mutate=set_at_path("population", -1), + expected_field="population", + expected_check="bounds", + ), + Scenario( + id="division::capital_division_ids_min_length:array_min_length", + scaffold={"capital_division_ids": ["a"]}, + mutate=set_at_path("capital_division_ids", []), + expected_field="capital_division_ids_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::capital_division_ids[]:string_min_length", + scaffold={"capital_division_ids": ["a"]}, + mutate=set_at_path("capital_division_ids[]", ""), + expected_field="capital_division_ids[]", + expected_check="string_min_length", + ), + Scenario( + id="division::capital_division_ids[]:no_whitespace", + scaffold={"capital_division_ids": ["a"]}, + mutate=set_at_path("capital_division_ids[]", "has whitespace"), + expected_field="capital_division_ids[]", + expected_check="no_whitespace", + ), + Scenario( + id="division::capital_of_divisions_min_length:array_min_length", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=set_at_path("capital_of_divisions", []), + expected_field="capital_of_divisions_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division::capital_of_divisions[].division_id:required", + scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + mutate=set_at_path("capital_of_divisions[].division_id", None), + expected_field="capital_of_divisions[].division_id", + expected_check="required", + ), + Scenario( + id="division::capital_of_divisions[].division_id:string_min_length", + scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + mutate=set_at_path("capital_of_divisions[].division_id", ""), + expected_field="capital_of_divisions[].division_id", + expected_check="string_min_length", + ), + Scenario( + id="division::capital_of_divisions[].division_id:no_whitespace", + scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, + mutate=set_at_path("capital_of_divisions[].division_id", "has whitespace"), + expected_field="capital_of_divisions[].division_id", + expected_check="no_whitespace", + ), + Scenario( + id="division::capital_of_divisions[].subtype:required", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=set_at_path("capital_of_divisions[].subtype", None), + expected_field="capital_of_divisions[].subtype", + expected_check="required", + ), + Scenario( + id="division::capital_of_divisions[].subtype:enum", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=set_at_path("capital_of_divisions[].subtype", "__INVALID__"), + expected_field="capital_of_divisions[].subtype", + expected_check="enum", + ), + Scenario( + id="division::wikidata:wikidata_id", + scaffold={"wikidata": "Q42"}, + mutate=set_at_path("wikidata", "P999"), + expected_field="wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="division::model:require_if:0", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "county"), + expected_field="admin_level_required_0", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:1", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macrocounty" + ), + expected_field="admin_level_required_1", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:2", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "region"), + expected_field="admin_level_required_2", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:3", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macroregion" + ), + expected_field="admin_level_required_3", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:4", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "dependency" + ), + expected_field="admin_level_required_4", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:5", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "country" + ), + expected_field="admin_level_required_5", + expected_check="require_if", + ), + Scenario( + id="division::model:require_if:6", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["parent_division_id"], "subtype", "country", negate=True + ), + expected_field="parent_division_id_required", + expected_check="require_if", + ), + Scenario( + id="division::model:forbid_if:7", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, ["parent_division_id"], "subtype", "country" + ), + expected_field="parent_division_id_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="division::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::hierarchies_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "hierarchies"), + expected_field="hierarchies_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::hierarchies[]_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "hierarchies[]"), + expected_field="hierarchies[]_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::perspectives.countries_unique:struct_unique", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), + expected_field="perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::capital_division_ids_unique:struct_unique", + scaffold={"capital_division_ids": ["a"]}, + mutate=lambda row: mutate_unique_items(row, "capital_division_ids"), + expected_field="capital_division_ids_unique", + expected_check="struct_unique", + ), + Scenario( + id="division::capital_of_divisions_unique:struct_unique", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=lambda row: mutate_unique_items(row, "capital_of_divisions"), + expected_field="capital_of_divisions_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return division_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="division", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="division", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("division::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("division::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py new file mode 100644 index 000000000..9f4d8e2f8 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py @@ -0,0 +1,759 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for division_area.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.divisions.division_area import ( + DIVISION_AREA_SCHEMA, + division_area_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_radio_group, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "names": {"primary": "a"}, + "id": "4619f66f-2d01-5776-ba67-01e9f3ccd9d7", + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "divisions", + "type": "division_area", + "version": 0, + "subtype": "country", + "class": "land", + "division_id": "a", + "country": "US", + "is_land": True, + "admin_level": 0, +} + + +BASE_ROW_POPULATED: dict = { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "4619f66f-2d01-5776-ba67-01e9f3ccd9d7", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)))", + "theme": "divisions", + "type": "division_area", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "country", + "class": "land", + "is_land": True, + "is_territorial": False, + "division_id": "a", + "country": "US", + "region": "US-CA", + "admin_level": 0, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="division_area::names:required", + scaffold={}, + mutate=set_at_path("names", None), + expected_field="names", + expected_check="required", + ), + Scenario( + id="division_area::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="division_area::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="division_area::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="division_area::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="division_area::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="division_area::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="division_area::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="division_area::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="division_area::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="division_area::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="division_area::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="division_area::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="division_area::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_area::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="division_area::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division_area::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division_area::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division_area::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="division_area::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="division_area::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="division_area::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="division_area::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="division_area::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="division_area::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="division_area::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="division_area::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="division_area::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="division_area::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="division_area::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="division_area::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="division_area::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="division_area::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="division_area::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_area::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="division_area::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="division_area::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="division_area::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="division_area::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division_area::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division_area::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division_area::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division_area::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division_area::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="division_area::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="division_area::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="division_area::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="division_area::division_id:required", + scaffold={}, + mutate=set_at_path("division_id", None), + expected_field="division_id", + expected_check="required", + ), + Scenario( + id="division_area::division_id:string_min_length", + scaffold={}, + mutate=set_at_path("division_id", ""), + expected_field="division_id", + expected_check="string_min_length", + ), + Scenario( + id="division_area::division_id:no_whitespace", + scaffold={}, + mutate=set_at_path("division_id", "has whitespace"), + expected_field="division_id", + expected_check="no_whitespace", + ), + Scenario( + id="division_area::country:required", + scaffold={}, + mutate=set_at_path("country", None), + expected_field="country", + expected_check="required", + ), + Scenario( + id="division_area::country:country_code_alpha2", + scaffold={}, + mutate=set_at_path("country", "99"), + expected_field="country", + expected_check="country_code_alpha2", + ), + Scenario( + id="division_area::region:region_code", + scaffold={"region": "US-CA"}, + mutate=set_at_path("region", "99-999"), + expected_field="region", + expected_check="region_code", + ), + Scenario( + id="division_area::admin_level:bounds", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", -1), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division_area::admin_level:bounds_1", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", 17), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division_area::model:radio_group:0", + scaffold={}, + mutate=lambda row: mutate_radio_group(row, ["is_land", "is_territorial"]), + expected_field="radio_group", + expected_check="radio_group", + ), + Scenario( + id="division_area::model:require_if:1", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "county"), + expected_field="admin_level_required_0", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:2", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macrocounty" + ), + expected_field="admin_level_required_1", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:3", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "region"), + expected_field="admin_level_required_2", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:4", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macroregion" + ), + expected_field="admin_level_required_3", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:5", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "dependency" + ), + expected_field="admin_level_required_4", + expected_check="require_if", + ), + Scenario( + id="division_area::model:require_if:6", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "country" + ), + expected_field="admin_level_required_5", + expected_check="require_if", + ), + Scenario( + id="division_area::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="division_area::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return division_area_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_AREA_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="division_area", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_AREA_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="division_area", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("division_area::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("division_area::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py new file mode 100644 index 000000000..27e05e731 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py @@ -0,0 +1,574 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for division_boundary.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.divisions.division_boundary import ( + DIVISION_BOUNDARY_SCHEMA, + division_boundary_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_radio_group, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "3c9e8190-33ce-5962-9668-d467336901b4", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "divisions", + "type": "division_boundary", + "version": 0, + "subtype": "country", + "class": "land", + "division_ids": ["a", "a1"], + "is_land": True, + "admin_level": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "3c9e8190-33ce-5962-9668-d467336901b4", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "divisions", + "type": "division_boundary", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "country", + "class": "land", + "is_land": True, + "is_territorial": False, + "division_ids": ["a", "a1"], + "region": "US-CA", + "admin_level": 0, + "is_disputed": False, + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="division_boundary::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="division_boundary::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="division_boundary::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="division_boundary::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="division_boundary::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="division_boundary::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="division_boundary::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="division_boundary::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="division_boundary::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="division_boundary::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="division_boundary::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="division_boundary::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="division_boundary::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="division_boundary::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="division_boundary::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_boundary::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="division_boundary::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="division_boundary::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="division_boundary::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="division_boundary::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division_boundary::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="division_boundary::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="division_boundary::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="division_boundary::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="division_boundary::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="division_boundary::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="division_boundary::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="division_boundary::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="division_boundary::division_ids:required", + scaffold={}, + mutate=set_at_path("division_ids", None), + expected_field="division_ids", + expected_check="required", + ), + Scenario( + id="division_boundary::division_ids_min_length:array_min_length", + scaffold={}, + mutate=set_at_path("division_ids", []), + expected_field="division_ids_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_boundary::division_ids_max_length:array_max_length", + scaffold={}, + mutate=set_at_path("division_ids", [{}, {}, {}]), + expected_field="division_ids_max_length", + expected_check="array_max_length", + ), + Scenario( + id="division_boundary::division_ids[]:string_min_length", + scaffold={}, + mutate=set_at_path("division_ids[]", ""), + expected_field="division_ids[]", + expected_check="string_min_length", + ), + Scenario( + id="division_boundary::division_ids[]:no_whitespace", + scaffold={}, + mutate=set_at_path("division_ids[]", "has whitespace"), + expected_field="division_ids[]", + expected_check="no_whitespace", + ), + Scenario( + id="division_boundary::country:country_code_alpha2", + scaffold={"country": "US"}, + mutate=set_at_path("country", "99"), + expected_field="country", + expected_check="country_code_alpha2", + ), + Scenario( + id="division_boundary::region:region_code", + scaffold={"region": "US-CA"}, + mutate=set_at_path("region", "99-999"), + expected_field="region", + expected_check="region_code", + ), + Scenario( + id="division_boundary::admin_level:bounds", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", -1), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division_boundary::admin_level:bounds_1", + scaffold={"admin_level": 0}, + mutate=set_at_path("admin_level", 17), + expected_field="admin_level", + expected_check="bounds", + ), + Scenario( + id="division_boundary::perspectives.mode:required", + scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + mutate=set_at_path("perspectives.mode", None), + expected_field="perspectives.mode", + expected_check="required", + ), + Scenario( + id="division_boundary::perspectives.mode:enum", + scaffold={"perspectives": {"countries": ["US"], "mode": "accepted_by"}}, + mutate=set_at_path("perspectives.mode", "__INVALID__"), + expected_field="perspectives.mode", + expected_check="enum", + ), + Scenario( + id="division_boundary::perspectives.countries:required", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries", None), + expected_field="perspectives.countries", + expected_check="required", + ), + Scenario( + id="division_boundary::perspectives.countries_min_length:array_min_length", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries", []), + expected_field="perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="division_boundary::perspectives.countries[]:country_code_alpha2", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=set_at_path("perspectives.countries[]", "99"), + expected_field="perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="division_boundary::model:radio_group:0", + scaffold={}, + mutate=lambda row: mutate_radio_group(row, ["is_land", "is_territorial"]), + expected_field="radio_group", + expected_check="radio_group", + ), + Scenario( + id="division_boundary::model:require_if:1", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "county"), + expected_field="admin_level_required_0", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:2", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macrocounty" + ), + expected_field="admin_level_required_1", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:3", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["admin_level"], "subtype", "region"), + expected_field="admin_level_required_2", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:4", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "macroregion" + ), + expected_field="admin_level_required_3", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:5", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "dependency" + ), + expected_field="admin_level_required_4", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:6", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["admin_level"], "subtype", "country" + ), + expected_field="admin_level_required_5", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:require_if:7", + scaffold={}, + mutate=lambda row: mutate_require_if( + row, ["country"], "subtype", "country", negate=True + ), + expected_field="country_required", + expected_check="require_if", + ), + Scenario( + id="division_boundary::model:forbid_if:8", + scaffold={}, + mutate=lambda row: mutate_forbid_if(row, ["country"], "subtype", "country"), + expected_field="country_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="division_boundary::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="division_boundary::division_ids_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "division_ids"), + expected_field="division_ids_unique", + expected_check="struct_unique", + ), + Scenario( + id="division_boundary::perspectives.countries_unique:struct_unique", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), + expected_field="perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return division_boundary_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_BOUNDARY_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="division_boundary", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + DIVISION_BOUNDARY_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="division_boundary", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("division_boundary::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("division_boundary::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py new file mode 100644 index 000000000..ad8fc0002 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py @@ -0,0 +1,1207 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for place.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.places.place import ( + PLACE_SCHEMA, + place_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "771dc733-3cd9-5ec4-a0b9-946ff01afb4e", + "geometry": "POINT (0 0)", + "theme": "places", + "type": "place", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "771dc733-3cd9-5ec4-a0b9-946ff01afb4e", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "POINT (0 0)", + "theme": "places", + "type": "place", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "operating_status": "open", + "categories": {"primary": "snake_case", "alternate": ["snake_case"]}, + "basic_category": "snake_case", + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + }, + "confidence": 0.0, + "websites": ["https://example.com/"], + "socials": ["https://example.com/"], + "emails": ["user@example.com"], + "phones": ["+1 555-555-5555"], + "brand": { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "wikidata": "Q42", + }, + "addresses": [ + { + "freeform": "", + "locality": "", + "postcode": "", + "region": "US-CA", + "country": "US", + } + ], + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="place::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="place::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="place::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="place::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="place::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="place::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="place::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="place::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "LINESTRING (0 0, 1 1)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="place::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="place::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="place::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="place::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="place::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="place::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="place::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="place::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="place::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="place::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="place::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="place::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="place::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="place::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="place::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="place::operating_status:enum", + scaffold={"operating_status": "open"}, + mutate=set_at_path("operating_status", "__INVALID__"), + expected_field="operating_status", + expected_check="enum", + ), + Scenario( + id="place::categories.primary:required", + scaffold={"categories": {"primary": "snake_case"}}, + mutate=set_at_path("categories.primary", None), + expected_field="categories.primary", + expected_check="required", + ), + Scenario( + id="place::categories.primary:snake_case", + scaffold={"categories": {"primary": "snake_case"}}, + mutate=set_at_path("categories.primary", "HAS SPACES"), + expected_field="categories.primary", + expected_check="snake_case", + ), + Scenario( + id="place::categories.alternate[]:snake_case", + scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, + mutate=set_at_path("categories.alternate[]", "HAS SPACES"), + expected_field="categories.alternate[]", + expected_check="snake_case", + ), + Scenario( + id="place::basic_category:snake_case", + scaffold={"basic_category": "snake_case"}, + mutate=set_at_path("basic_category", "HAS SPACES"), + expected_field="basic_category", + expected_check="snake_case", + ), + Scenario( + id="place::taxonomy.primary:required", + scaffold={"taxonomy": {"hierarchy": ["snake_case"], "primary": "snake_case"}}, + mutate=set_at_path("taxonomy.primary", None), + expected_field="taxonomy.primary", + expected_check="required", + ), + Scenario( + id="place::taxonomy.primary:snake_case", + scaffold={"taxonomy": {"hierarchy": ["snake_case"], "primary": "snake_case"}}, + mutate=set_at_path("taxonomy.primary", "HAS SPACES"), + expected_field="taxonomy.primary", + expected_check="snake_case", + ), + Scenario( + id="place::taxonomy.hierarchy:required", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=set_at_path("taxonomy.hierarchy", None), + expected_field="taxonomy.hierarchy", + expected_check="required", + ), + Scenario( + id="place::taxonomy.hierarchy_min_length:array_min_length", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=set_at_path("taxonomy.hierarchy", []), + expected_field="taxonomy.hierarchy_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::taxonomy.hierarchy[]:snake_case", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=set_at_path("taxonomy.hierarchy[]", "HAS SPACES"), + expected_field="taxonomy.hierarchy[]", + expected_check="snake_case", + ), + Scenario( + id="place::taxonomy.alternates_min_length:array_min_length", + scaffold={ + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + } + }, + mutate=set_at_path("taxonomy.alternates", []), + expected_field="taxonomy.alternates_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::taxonomy.alternates[]:snake_case", + scaffold={ + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + } + }, + mutate=set_at_path("taxonomy.alternates[]", "HAS SPACES"), + expected_field="taxonomy.alternates[]", + expected_check="snake_case", + ), + Scenario( + id="place::confidence:bounds", + scaffold={"confidence": 0.0}, + mutate=set_at_path("confidence", -1.0), + expected_field="confidence", + expected_check="bounds", + ), + Scenario( + id="place::confidence:bounds_1", + scaffold={"confidence": 0.0}, + mutate=set_at_path("confidence", 2.0), + expected_field="confidence", + expected_check="bounds", + ), + Scenario( + id="place::websites_min_length:array_min_length", + scaffold={"websites": ["https://example.com/"]}, + mutate=set_at_path("websites", []), + expected_field="websites_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::websites[]:url_format", + scaffold={"websites": ["https://example.com/"]}, + mutate=set_at_path("websites[]", "not-a-url"), + expected_field="websites[]", + expected_check="url_format", + ), + Scenario( + id="place::websites[]:url_length", + scaffold={"websites": ["https://example.com/"]}, + mutate=set_at_path( + "websites[]", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="websites[]", + expected_check="url_length", + ), + Scenario( + id="place::socials_min_length:array_min_length", + scaffold={"socials": ["https://example.com/"]}, + mutate=set_at_path("socials", []), + expected_field="socials_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::socials[]:url_format", + scaffold={"socials": ["https://example.com/"]}, + mutate=set_at_path("socials[]", "not-a-url"), + expected_field="socials[]", + expected_check="url_format", + ), + Scenario( + id="place::socials[]:url_length", + scaffold={"socials": ["https://example.com/"]}, + mutate=set_at_path( + "socials[]", + "https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + ), + expected_field="socials[]", + expected_check="url_length", + ), + Scenario( + id="place::emails_min_length:array_min_length", + scaffold={"emails": ["user@example.com"]}, + mutate=set_at_path("emails", []), + expected_field="emails_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::emails[]:email", + scaffold={"emails": ["user@example.com"]}, + mutate=set_at_path("emails[]", "not-an-email"), + expected_field="emails[]", + expected_check="email", + ), + Scenario( + id="place::phones_min_length:array_min_length", + scaffold={"phones": ["+1 555-555-5555"]}, + mutate=set_at_path("phones", []), + expected_field="phones_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::phones[]:phone_number", + scaffold={"phones": ["+1 555-555-5555"]}, + mutate=set_at_path("phones[]", "1234567890"), + expected_field="phones[]", + expected_check="phone_number", + ), + Scenario( + id="place::brand.names.primary:required", + scaffold={"brand": {"names": {"primary": "a"}}}, + mutate=set_at_path("brand.names.primary", None), + expected_field="brand.names.primary", + expected_check="required", + ), + Scenario( + id="place::brand.names.primary:string_min_length", + scaffold={"brand": {"names": {"primary": "a"}}}, + mutate=set_at_path("brand.names.primary", ""), + expected_field="brand.names.primary", + expected_check="string_min_length", + ), + Scenario( + id="place::brand.names.primary:stripped", + scaffold={"brand": {"names": {"primary": "a"}}}, + mutate=set_at_path("brand.names.primary", " has spaces "), + expected_field="brand.names.primary", + expected_check="stripped", + ), + Scenario( + id="place::brand.names.rules[].value:required", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"variant": "common", "value": "a"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].value", None), + expected_field="brand.names.rules[].value", + expected_check="required", + ), + Scenario( + id="place::brand.names.rules[].value:string_min_length", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"variant": "common", "value": "a"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].value", ""), + expected_field="brand.names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="place::brand.names.rules[].value:stripped", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"variant": "common", "value": "a"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].value", " has spaces "), + expected_field="brand.names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="place::brand.names.rules[].variant:required", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].variant", None), + expected_field="brand.names.rules[].variant", + expected_check="required", + ), + Scenario( + id="place::brand.names.rules[].variant:enum", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].variant", "__INVALID__"), + expected_field="brand.names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="place::brand.names.rules[].language:language_tag", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].language", "123"), + expected_field="brand.names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="place::brand.names.rules[].perspectives.mode:required", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "countries": ["US"], + "mode": "accepted_by", + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.mode", None), + expected_field="brand.names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="place::brand.names.rules[].perspectives.mode:enum", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "countries": ["US"], + "mode": "accepted_by", + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.mode", "__INVALID__"), + expected_field="brand.names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="place::brand.names.rules[].perspectives.countries:required", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.countries", None), + expected_field="brand.names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="place::brand.names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.countries", []), + expected_field="brand.names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::brand.names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].perspectives.countries[]", "99"), + expected_field="brand.names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="place::brand.names.rules[].between:linear_range_length", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + {"value": "a", "variant": "common", "between": [0.0, 1.0]} + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].between", [0.5]), + expected_field="brand.names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="place::brand.names.rules[].between:linear_range_bounds", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + {"value": "a", "variant": "common", "between": [0.0, 1.0]} + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].between", [1.5, 2.0]), + expected_field="brand.names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="place::brand.names.rules[].between:linear_range_order", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + {"value": "a", "variant": "common", "between": [0.0, 1.0]} + ], + } + } + }, + mutate=set_at_path("brand.names.rules[].between", [0.8, 0.2]), + expected_field="brand.names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="place::brand.names.rules[].side:enum", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + } + }, + mutate=set_at_path("brand.names.rules[].side", "__INVALID__"), + expected_field="brand.names.rules[].side", + expected_check="enum", + ), + Scenario( + id="place::brand.wikidata:wikidata_id", + scaffold={"brand": {"wikidata": "Q42"}}, + mutate=set_at_path("brand.wikidata", "P999"), + expected_field="brand.wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="place::addresses_min_length:array_min_length", + scaffold={"addresses": [{}]}, + mutate=set_at_path("addresses", []), + expected_field="addresses_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::addresses[].region:region_code", + scaffold={"addresses": [{"region": "US-CA"}]}, + mutate=set_at_path("addresses[].region", "99-999"), + expected_field="addresses[].region", + expected_check="region_code", + ), + Scenario( + id="place::addresses[].country:country_code_alpha2", + scaffold={"addresses": [{"country": "US"}]}, + mutate=set_at_path("addresses[].country", "99"), + expected_field="addresses[].country", + expected_check="country_code_alpha2", + ), + Scenario( + id="place::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="place::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="place::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="place::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="place::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="place::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="place::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="place::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="place::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="place::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="place::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="place::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="place::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="place::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="place::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="place::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="place::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="place::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="place::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::categories.alternate_unique:struct_unique", + scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, + mutate=lambda row: mutate_unique_items(row, "categories.alternate"), + expected_field="categories.alternate_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::taxonomy.hierarchy_unique:struct_unique", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=lambda row: mutate_unique_items(row, "taxonomy.hierarchy"), + expected_field="taxonomy.hierarchy_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::taxonomy.alternates_unique:struct_unique", + scaffold={ + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + } + }, + mutate=lambda row: mutate_unique_items(row, "taxonomy.alternates"), + expected_field="taxonomy.alternates_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::websites_unique:struct_unique", + scaffold={"websites": ["https://example.com/"]}, + mutate=lambda row: mutate_unique_items(row, "websites"), + expected_field="websites_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::socials_unique:struct_unique", + scaffold={"socials": ["https://example.com/"]}, + mutate=lambda row: mutate_unique_items(row, "socials"), + expected_field="socials_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::emails_unique:struct_unique", + scaffold={"emails": ["user@example.com"]}, + mutate=lambda row: mutate_unique_items(row, "emails"), + expected_field="emails_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::phones_unique:struct_unique", + scaffold={"phones": ["+1 555-555-5555"]}, + mutate=lambda row: mutate_unique_items(row, "phones"), + expected_field="phones_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::brand.names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=lambda row: mutate_unique_items( + row, "brand.names.rules[].perspectives.countries" + ), + expected_field="brand.names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="place::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return place_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + PLACE_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="place", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + PLACE_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="place", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("place::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("place::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/__init__.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py new file mode 100644 index 000000000..6552a950a --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py @@ -0,0 +1,342 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for connector.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.transportation.connector import ( + CONNECTOR_SCHEMA, + connector_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import mutate_unique_items +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "257724e0-9751-53b0-9891-95a9ffa523da", + "geometry": "POINT (0 0)", + "theme": "transportation", + "type": "connector", + "version": 0, +} + + +BASE_ROW_POPULATED: dict = { + "id": "257724e0-9751-53b0-9891-95a9ffa523da", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "POINT (0 0)", + "theme": "transportation", + "type": "connector", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="connector::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="connector::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="connector::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="connector::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="connector::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="connector::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="connector::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="connector::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "LINESTRING (0 0, 1 1)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="connector::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="connector::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="connector::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="connector::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="connector::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="connector::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="connector::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="connector::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="connector::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="connector::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="connector::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="connector::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="connector::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="connector::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="connector::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="connector::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="connector::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return connector_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + CONNECTOR_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="connector", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + CONNECTOR_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="connector", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("connector::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("connector::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py new file mode 100644 index 000000000..22cfd600b --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py @@ -0,0 +1,1676 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for segment.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.transportation.segment import ( + SEGMENT_SCHEMA, + segment_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_require_any_of, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "subtype": "rail", + "class": "funicular", +} + + +BASE_ROW_POPULATED: dict = { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "rail", + "access_restrictions": [ + { + "access_type": "allowed", + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1", "at": 0.0}], + "level_rules": [{"value": 0, "between": [0.0, 1.0]}], + "routes": [ + { + "name": "a", + "network": "a", + "ref": "a", + "symbol": "a", + "wikidata": "Q42", + "between": [0.0, 1.0], + } + ], + "subclass_rules": [{"value": "link", "between": [0.0, 1.0]}], + "class": "funicular", + "rail_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}], +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="segment::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="segment::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="segment::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="segment::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="segment::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="segment::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="segment::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="segment::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="segment::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="segment::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="segment::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="segment::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="segment::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="segment::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="segment::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="segment::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="segment::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="segment::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="segment::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions_min_length:array_min_length", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions", []), + expected_field="access_restrictions_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].access_type:required", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", None), + expected_field="access_restrictions[].access_type", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].access_type:enum", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", "__INVALID__"), + expected_field="access_restrictions[].access_type", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_length", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.5]), + expected_field="access_restrictions[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_bounds", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [1.5, 2.0]), + expected_field="access_restrictions[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_order", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.8, 0.2]), + expected_field="access_restrictions[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::access_restrictions[].when.heading:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, + mutate=set_at_path("access_restrictions[].when.heading", "__INVALID__"), + expected_field="access_restrictions[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.mode_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode", []), + expected_field="access_restrictions[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.mode[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), + expected_field="access_restrictions[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.using_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using", []), + expected_field="access_restrictions[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.using[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), + expected_field="access_restrictions[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized", []), + expected_field="access_restrictions[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.recognized[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), + expected_field="access_restrictions[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle", []), + expected_field="access_restrictions[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].dimension", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].comparison", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "axle_count"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:bounds", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", -1.0), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::connectors_min_length:array_min_length", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=set_at_path("connectors", []), + expected_field="connectors_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:required", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", None), + expected_field="connectors[].connector_id", + expected_check="required", + ), + Scenario( + id="segment::connectors[].connector_id:string_min_length", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", ""), + expected_field="connectors[].connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:no_whitespace", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", "has whitespace"), + expected_field="connectors[].connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::connectors[].at:bounds", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", -1.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::connectors[].at:bounds_1", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", 2.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::level_rules[].value:required", + scaffold={"level_rules": [{"value": 0}]}, + mutate=set_at_path("level_rules[].value", None), + expected_field="level_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::level_rules[].between:linear_range_length", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.5]), + expected_field="level_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::level_rules[].between:linear_range_bounds", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [1.5, 2.0]), + expected_field="level_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::level_rules[].between:linear_range_order", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.8, 0.2]), + expected_field="level_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::routes[].name:string_min_length", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", ""), + expected_field="routes[].name", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].name:stripped", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", " has spaces "), + expected_field="routes[].name", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].network:string_min_length", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", ""), + expected_field="routes[].network", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].network:stripped", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", " has spaces "), + expected_field="routes[].network", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].ref:string_min_length", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", ""), + expected_field="routes[].ref", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].ref:stripped", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", " has spaces "), + expected_field="routes[].ref", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].symbol:string_min_length", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", ""), + expected_field="routes[].symbol", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].symbol:stripped", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", " has spaces "), + expected_field="routes[].symbol", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].wikidata:wikidata_id", + scaffold={"routes": [{"wikidata": "Q42"}]}, + mutate=set_at_path("routes[].wikidata", "P999"), + expected_field="routes[].wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="segment::routes[].between:linear_range_length", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.5]), + expected_field="routes[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::routes[].between:linear_range_bounds", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [1.5, 2.0]), + expected_field="routes[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::routes[].between:linear_range_order", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.8, 0.2]), + expected_field="routes[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subclass_rules[].value:required", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", None), + expected_field="subclass_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::subclass_rules[].value:enum", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", "__INVALID__"), + expected_field="subclass_rules[].value", + expected_check="enum", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_length", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.5]), + expected_field="subclass_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_bounds", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [1.5, 2.0]), + expected_field="subclass_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_order", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.8, 0.2]), + expected_field="subclass_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="segment::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="segment::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="segment::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="segment::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="segment::rail_flags_min_length:array_min_length", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("rail_flags", []), + expected_field="rail_flags_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::rail_flags[].values:required", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("rail_flags[].values", None), + expected_field="rail_flags[].values", + expected_check="required", + ), + Scenario( + id="segment::rail_flags[].values_min_length:array_min_length", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("rail_flags[].values", []), + expected_field="rail_flags[].values_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::rail_flags[].values[]:enum", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("rail_flags[].values[]", "__INVALID__"), + expected_field="rail_flags[].values[]", + expected_check="enum", + ), + Scenario( + id="segment::rail_flags[].between:linear_range_length", + scaffold={"rail_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("rail_flags[].between", [0.5]), + expected_field="rail_flags[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::rail_flags[].between:linear_range_bounds", + scaffold={"rail_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("rail_flags[].between", [1.5, 2.0]), + expected_field="rail_flags[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::rail_flags[].between:linear_range_order", + scaffold={"rail_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("rail_flags[].between", [0.8, 0.2]), + expected_field="rail_flags[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::model:forbid_if:0", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:1", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:2", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:3", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:4", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:5", + scaffold={"access_restrictions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="access_restrictions", + struct_path="when", + ), + expected_field="access_restrictions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:6", + scaffold={"destinations": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["labels", "symbols"], array_path="destinations" + ), + expected_field="destinations[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:7", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:8", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:9", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:10", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:11", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:12", + scaffold={"prohibited_transitions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="prohibited_transitions", + struct_path="when", + ), + expected_field="prohibited_transitions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:13", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:14", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:15", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:16", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:17", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:18", + scaffold={"speed_limits": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="speed_limits", + struct_path="when", + ), + expected_field="speed_limits[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:19", + scaffold={"speed_limits": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" + ), + expected_field="speed_limits[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:20", + scaffold={}, + mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), + expected_field="class_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:21", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), + expected_field="class_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:22", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), + expected_field="class_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:forbid_if:23", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + negate=True, + fill_values={"destinations": [{}]}, + ), + expected_field="destinations_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:24", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["prohibited_transitions"], + "subtype", + "road", + negate=True, + fill_values={"prohibited_transitions": [{}]}, + ), + expected_field="prohibited_transitions_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:25", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_flags"], + "subtype", + "road", + negate=True, + fill_values={"road_flags": [{}]}, + ), + expected_field="road_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:26", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_surface"], + "subtype", + "road", + negate=True, + fill_values={"road_surface": [{}]}, + ), + expected_field="road_surface_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:27", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["speed_limits"], + "subtype", + "road", + negate=True, + fill_values={"speed_limits": [{}]}, + ), + expected_field="speed_limits_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:28", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, ["subclass"], "subtype", "road", negate=True + ), + expected_field="subclass_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:29", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["width_rules"], + "subtype", + "road", + negate=True, + fill_values={"width_rules": [{}]}, + ), + expected_field="width_rules_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:30", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["rail_flags"], + "subtype", + "rail", + negate=True, + fill_values={"rail_flags": [{}]}, + ), + expected_field="rail_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::rail_flags_unique:struct_unique", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "rail_flags"), + expected_field="rail_flags_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::rail_flags[].values_unique:struct_unique", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "rail_flags[].values"), + expected_field="rail_flags[].values_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return segment_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="segment", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="segment", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py new file mode 100644 index 000000000..0a8d0a946 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py @@ -0,0 +1,3085 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for segment.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.transportation.segment import ( + SEGMENT_SCHEMA, + segment_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_require_any_of, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "subtype": "road", + "class": "motorway", +} + + +BASE_ROW_POPULATED: dict = { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "road", + "access_restrictions": [ + { + "access_type": "allowed", + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1", "at": 0.0}], + "level_rules": [{"value": 0, "between": [0.0, 1.0]}], + "routes": [ + { + "name": "a", + "network": "a", + "ref": "a", + "symbol": "a", + "wikidata": "Q42", + "between": [0.0, 1.0], + } + ], + "subclass_rules": [{"value": "link", "between": [0.0, 1.0]}], + "class": "motorway", + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + "symbols": ["motorway"], + "when": {"heading": "forward"}, + } + ], + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "road_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}], + "road_surface": [{"value": "unknown", "between": [0.0, 1.0]}], + "speed_limits": [ + { + "max_speed": {"value": 1, "unit": "mph"}, + "min_speed": {"value": 1, "unit": "mph"}, + "is_max_speed_variable": False, + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "subclass": "link", + "width_rules": [{"value": 1.0, "between": [0.0, 1.0]}], +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="segment::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="segment::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="segment::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="segment::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="segment::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="segment::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="segment::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="segment::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="segment::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="segment::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="segment::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="segment::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="segment::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="segment::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="segment::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="segment::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="segment::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="segment::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="segment::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions_min_length:array_min_length", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions", []), + expected_field="access_restrictions_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].access_type:required", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", None), + expected_field="access_restrictions[].access_type", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].access_type:enum", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", "__INVALID__"), + expected_field="access_restrictions[].access_type", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_length", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.5]), + expected_field="access_restrictions[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_bounds", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [1.5, 2.0]), + expected_field="access_restrictions[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_order", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.8, 0.2]), + expected_field="access_restrictions[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::access_restrictions[].when.heading:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, + mutate=set_at_path("access_restrictions[].when.heading", "__INVALID__"), + expected_field="access_restrictions[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.mode_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode", []), + expected_field="access_restrictions[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.mode[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), + expected_field="access_restrictions[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.using_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using", []), + expected_field="access_restrictions[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.using[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), + expected_field="access_restrictions[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized", []), + expected_field="access_restrictions[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.recognized[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), + expected_field="access_restrictions[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle", []), + expected_field="access_restrictions[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].dimension", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].comparison", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "axle_count"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:bounds", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", -1.0), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::connectors_min_length:array_min_length", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=set_at_path("connectors", []), + expected_field="connectors_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:required", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", None), + expected_field="connectors[].connector_id", + expected_check="required", + ), + Scenario( + id="segment::connectors[].connector_id:string_min_length", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", ""), + expected_field="connectors[].connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:no_whitespace", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", "has whitespace"), + expected_field="connectors[].connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::connectors[].at:bounds", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", -1.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::connectors[].at:bounds_1", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", 2.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::level_rules[].value:required", + scaffold={"level_rules": [{"value": 0}]}, + mutate=set_at_path("level_rules[].value", None), + expected_field="level_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::level_rules[].between:linear_range_length", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.5]), + expected_field="level_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::level_rules[].between:linear_range_bounds", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [1.5, 2.0]), + expected_field="level_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::level_rules[].between:linear_range_order", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.8, 0.2]), + expected_field="level_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::routes[].name:string_min_length", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", ""), + expected_field="routes[].name", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].name:stripped", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", " has spaces "), + expected_field="routes[].name", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].network:string_min_length", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", ""), + expected_field="routes[].network", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].network:stripped", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", " has spaces "), + expected_field="routes[].network", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].ref:string_min_length", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", ""), + expected_field="routes[].ref", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].ref:stripped", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", " has spaces "), + expected_field="routes[].ref", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].symbol:string_min_length", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", ""), + expected_field="routes[].symbol", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].symbol:stripped", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", " has spaces "), + expected_field="routes[].symbol", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].wikidata:wikidata_id", + scaffold={"routes": [{"wikidata": "Q42"}]}, + mutate=set_at_path("routes[].wikidata", "P999"), + expected_field="routes[].wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="segment::routes[].between:linear_range_length", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.5]), + expected_field="routes[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::routes[].between:linear_range_bounds", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [1.5, 2.0]), + expected_field="routes[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::routes[].between:linear_range_order", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.8, 0.2]), + expected_field="routes[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subclass_rules[].value:required", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", None), + expected_field="subclass_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::subclass_rules[].value:enum", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", "__INVALID__"), + expected_field="subclass_rules[].value", + expected_check="enum", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_length", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.5]), + expected_field="subclass_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_bounds", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [1.5, 2.0]), + expected_field="subclass_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_order", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.8, 0.2]), + expected_field="subclass_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="segment::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="segment::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="segment::class:required", + scaffold={}, + mutate=set_at_path("class", None), + expected_field="class", + expected_check="required", + ), + Scenario( + id="segment::class:enum", + scaffold={}, + mutate=set_at_path("class", "__INVALID__"), + expected_field="class", + expected_check="enum", + ), + Scenario( + id="segment::destinations[].from_connector_id:required", + scaffold={ + "destinations": [ + { + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "from_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].from_connector_id", None), + expected_field="destinations[].from_connector_id", + expected_check="required", + ), + Scenario( + id="segment::destinations[].from_connector_id:string_min_length", + scaffold={ + "destinations": [ + { + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "from_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].from_connector_id", ""), + expected_field="destinations[].from_connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::destinations[].from_connector_id:no_whitespace", + scaffold={ + "destinations": [ + { + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "from_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].from_connector_id", "has whitespace"), + expected_field="destinations[].from_connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::destinations[].to_connector_id:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "to_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_connector_id", None), + expected_field="destinations[].to_connector_id", + expected_check="required", + ), + Scenario( + id="segment::destinations[].to_connector_id:string_min_length", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "to_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_connector_id", ""), + expected_field="destinations[].to_connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::destinations[].to_connector_id:no_whitespace", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "to_connector_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_connector_id", "has whitespace"), + expected_field="destinations[].to_connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::destinations[].to_segment_id:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "final_heading": "forward", + "to_segment_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_segment_id", None), + expected_field="destinations[].to_segment_id", + expected_check="required", + ), + Scenario( + id="segment::destinations[].to_segment_id:string_min_length", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "final_heading": "forward", + "to_segment_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_segment_id", ""), + expected_field="destinations[].to_segment_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::destinations[].to_segment_id:no_whitespace", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "final_heading": "forward", + "to_segment_id": "a", + } + ] + }, + mutate=set_at_path("destinations[].to_segment_id", "has whitespace"), + expected_field="destinations[].to_segment_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::destinations[].final_heading:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + } + ] + }, + mutate=set_at_path("destinations[].final_heading", None), + expected_field="destinations[].final_heading", + expected_check="required", + ), + Scenario( + id="segment::destinations[].final_heading:enum", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + } + ] + }, + mutate=set_at_path("destinations[].final_heading", "__INVALID__"), + expected_field="destinations[].final_heading", + expected_check="enum", + ), + Scenario( + id="segment::destinations[].labels_min_length:array_min_length", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=set_at_path("destinations[].labels", []), + expected_field="destinations[].labels_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::destinations[].labels[].value:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"type": "street", "value": "a"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].value", None), + expected_field="destinations[].labels[].value", + expected_check="required", + ), + Scenario( + id="segment::destinations[].labels[].value:string_min_length", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"type": "street", "value": "a"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].value", ""), + expected_field="destinations[].labels[].value", + expected_check="string_min_length", + ), + Scenario( + id="segment::destinations[].labels[].value:stripped", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"type": "street", "value": "a"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].value", " has spaces "), + expected_field="destinations[].labels[].value", + expected_check="stripped", + ), + Scenario( + id="segment::destinations[].labels[].type:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].type", None), + expected_field="destinations[].labels[].type", + expected_check="required", + ), + Scenario( + id="segment::destinations[].labels[].type:enum", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=set_at_path("destinations[].labels[].type", "__INVALID__"), + expected_field="destinations[].labels[].type", + expected_check="enum", + ), + Scenario( + id="segment::destinations[].symbols[]:enum", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "symbols": ["motorway"], + } + ] + }, + mutate=set_at_path("destinations[].symbols[]", "__INVALID__"), + expected_field="destinations[].symbols[]", + expected_check="enum", + ), + Scenario( + id="segment::destinations[].when.heading:required", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "when": {"heading": "forward"}, + } + ] + }, + mutate=set_at_path("destinations[].when.heading", None), + expected_field="destinations[].when.heading", + expected_check="required", + ), + Scenario( + id="segment::destinations[].when.heading:enum", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "when": {"heading": "forward"}, + } + ] + }, + mutate=set_at_path("destinations[].when.heading", "__INVALID__"), + expected_field="destinations[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].sequence:required", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence", None), + expected_field="prohibited_transitions[].sequence", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].sequence_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence", []), + expected_field="prohibited_transitions[].sequence_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].connector_id:required", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"segment_id": "a", "connector_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence[].connector_id", None), + expected_field="prohibited_transitions[].sequence[].connector_id", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].connector_id:string_min_length", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"segment_id": "a", "connector_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence[].connector_id", ""), + expected_field="prohibited_transitions[].sequence[].connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].connector_id:no_whitespace", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"segment_id": "a", "connector_id": "a"}], + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].sequence[].connector_id", "has whitespace" + ), + expected_field="prohibited_transitions[].sequence[].connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].segment_id:required", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence[].segment_id", None), + expected_field="prohibited_transitions[].sequence[].segment_id", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].segment_id:string_min_length", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].sequence[].segment_id", ""), + expected_field="prohibited_transitions[].sequence[].segment_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].sequence[].segment_id:no_whitespace", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].sequence[].segment_id", "has whitespace" + ), + expected_field="prohibited_transitions[].sequence[].segment_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::prohibited_transitions[].final_heading:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + } + ] + }, + mutate=set_at_path("prohibited_transitions[].final_heading", None), + expected_field="prohibited_transitions[].final_heading", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].final_heading:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + } + ] + }, + mutate=set_at_path("prohibited_transitions[].final_heading", "__INVALID__"), + expected_field="prohibited_transitions[].final_heading", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].between:linear_range_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "between": [0.0, 1.0], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].between", [0.5]), + expected_field="prohibited_transitions[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::prohibited_transitions[].between:linear_range_bounds", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "between": [0.0, 1.0], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].between", [1.5, 2.0]), + expected_field="prohibited_transitions[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::prohibited_transitions[].between:linear_range_order", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "between": [0.0, 1.0], + } + ] + }, + mutate=set_at_path("prohibited_transitions[].between", [0.8, 0.2]), + expected_field="prohibited_transitions[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::prohibited_transitions[].when.heading:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"heading": "forward"}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.heading", "__INVALID__"), + expected_field="prohibited_transitions[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.mode_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"mode": ["vehicle"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.mode", []), + expected_field="prohibited_transitions[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].when.mode[]:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"mode": ["vehicle"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.mode[]", "__INVALID__"), + expected_field="prohibited_transitions[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.using_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"using": ["as_customer"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.using", []), + expected_field="prohibited_transitions[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].when.using[]:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"using": ["as_customer"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.using[]", "__INVALID__"), + expected_field="prohibited_transitions[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.recognized_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"recognized": ["as_permitted"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.recognized", []), + expected_field="prohibited_transitions[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].when.recognized[]:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"recognized": ["as_permitted"]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.recognized[]", "__INVALID__"), + expected_field="prohibited_transitions[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle_min_length:array_min_length", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle", []), + expected_field="prohibited_transitions[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].dimension:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].dimension", None), + expected_field="prohibited_transitions[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].dimension:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{}]}, + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].when.vehicle[].dimension", "__INVALID__" + ), + expected_field="prohibited_transitions[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].comparison:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].comparison", None), + expected_field="prohibited_transitions[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].comparison:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{}]}, + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].when.vehicle[].comparison", "__INVALID__" + ), + expected_field="prohibited_transitions[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].value:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "axle_count"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].value", None), + expected_field="prohibited_transitions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].value:required_1", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].value", None), + expected_field="prohibited_transitions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].value:bounds", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].value", -1.0), + expected_field="prohibited_transitions[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].unit:required", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].unit", None), + expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].unit:enum", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].when.vehicle[].unit", "__INVALID__" + ), + expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].unit:required_1", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("prohibited_transitions[].when.vehicle[].unit", None), + expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle[].unit:enum_1", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path( + "prohibited_transitions[].when.vehicle[].unit", "__INVALID__" + ), + expected_field="prohibited_transitions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::road_flags_min_length:array_min_length", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("road_flags", []), + expected_field="road_flags_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::road_flags[].values:required", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("road_flags[].values", None), + expected_field="road_flags[].values", + expected_check="required", + ), + Scenario( + id="segment::road_flags[].values_min_length:array_min_length", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("road_flags[].values", []), + expected_field="road_flags[].values_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::road_flags[].values[]:enum", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=set_at_path("road_flags[].values[]", "__INVALID__"), + expected_field="road_flags[].values[]", + expected_check="enum", + ), + Scenario( + id="segment::road_flags[].between:linear_range_length", + scaffold={"road_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_flags[].between", [0.5]), + expected_field="road_flags[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::road_flags[].between:linear_range_bounds", + scaffold={"road_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_flags[].between", [1.5, 2.0]), + expected_field="road_flags[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::road_flags[].between:linear_range_order", + scaffold={"road_flags": [{"values": ["is_bridge"], "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_flags[].between", [0.8, 0.2]), + expected_field="road_flags[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::road_surface_min_length:array_min_length", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=set_at_path("road_surface", []), + expected_field="road_surface_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::road_surface[].value:required", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=set_at_path("road_surface[].value", None), + expected_field="road_surface[].value", + expected_check="required", + ), + Scenario( + id="segment::road_surface[].value:enum", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=set_at_path("road_surface[].value", "__INVALID__"), + expected_field="road_surface[].value", + expected_check="enum", + ), + Scenario( + id="segment::road_surface[].between:linear_range_length", + scaffold={"road_surface": [{"value": "unknown", "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_surface[].between", [0.5]), + expected_field="road_surface[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::road_surface[].between:linear_range_bounds", + scaffold={"road_surface": [{"value": "unknown", "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_surface[].between", [1.5, 2.0]), + expected_field="road_surface[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::road_surface[].between:linear_range_order", + scaffold={"road_surface": [{"value": "unknown", "between": [0.0, 1.0]}]}, + mutate=set_at_path("road_surface[].between", [0.8, 0.2]), + expected_field="road_surface[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::speed_limits_min_length:array_min_length", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits", []), + expected_field="speed_limits_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].max_speed.value:required", + scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].max_speed.value", None), + expected_field="speed_limits[].max_speed.value", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].max_speed.value:bounds", + scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].max_speed.value", 0), + expected_field="speed_limits[].max_speed.value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].max_speed.value:bounds_1", + scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].max_speed.value", 351), + expected_field="speed_limits[].max_speed.value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].max_speed.unit:required", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits[].max_speed.unit", None), + expected_field="speed_limits[].max_speed.unit", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].max_speed.unit:enum", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits[].max_speed.unit", "__INVALID__"), + expected_field="speed_limits[].max_speed.unit", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].min_speed.value:required", + scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].min_speed.value", None), + expected_field="speed_limits[].min_speed.value", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].min_speed.value:bounds", + scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].min_speed.value", 0), + expected_field="speed_limits[].min_speed.value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].min_speed.value:bounds_1", + scaffold={"speed_limits": [{"min_speed": {"unit": "mph", "value": 1}}]}, + mutate=set_at_path("speed_limits[].min_speed.value", 351), + expected_field="speed_limits[].min_speed.value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].min_speed.unit:required", + scaffold={"speed_limits": [{"min_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits[].min_speed.unit", None), + expected_field="speed_limits[].min_speed.unit", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].min_speed.unit:enum", + scaffold={"speed_limits": [{"min_speed": {"value": 1, "unit": "mph"}}]}, + mutate=set_at_path("speed_limits[].min_speed.unit", "__INVALID__"), + expected_field="speed_limits[].min_speed.unit", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].between:linear_range_length", + scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("speed_limits[].between", [0.5]), + expected_field="speed_limits[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::speed_limits[].between:linear_range_bounds", + scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("speed_limits[].between", [1.5, 2.0]), + expected_field="speed_limits[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::speed_limits[].between:linear_range_order", + scaffold={"speed_limits": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("speed_limits[].between", [0.8, 0.2]), + expected_field="speed_limits[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::speed_limits[].when.heading:enum", + scaffold={"speed_limits": [{"when": {"heading": "forward"}}]}, + mutate=set_at_path("speed_limits[].when.heading", "__INVALID__"), + expected_field="speed_limits[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.mode_min_length:array_min_length", + scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + mutate=set_at_path("speed_limits[].when.mode", []), + expected_field="speed_limits[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].when.mode[]:enum", + scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + mutate=set_at_path("speed_limits[].when.mode[]", "__INVALID__"), + expected_field="speed_limits[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.using_min_length:array_min_length", + scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + mutate=set_at_path("speed_limits[].when.using", []), + expected_field="speed_limits[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].when.using[]:enum", + scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + mutate=set_at_path("speed_limits[].when.using[]", "__INVALID__"), + expected_field="speed_limits[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.recognized_min_length:array_min_length", + scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + mutate=set_at_path("speed_limits[].when.recognized", []), + expected_field="speed_limits[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].when.recognized[]:enum", + scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + mutate=set_at_path("speed_limits[].when.recognized[]", "__INVALID__"), + expected_field="speed_limits[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle_min_length:array_min_length", + scaffold={ + "speed_limits": [ + { + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + } + } + ] + }, + mutate=set_at_path("speed_limits[].when.vehicle", []), + expected_field="speed_limits[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].dimension:required", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].dimension", None), + expected_field="speed_limits[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].dimension:enum", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].dimension", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].comparison:required", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].comparison", None), + expected_field="speed_limits[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].comparison:enum", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].comparison", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].value:required", + scaffold={ + "speed_limits": [{"when": {"vehicle": [{"dimension": "axle_count"}]}}] + }, + mutate=set_at_path("speed_limits[].when.vehicle[].value", None), + expected_field="speed_limits[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].value:required_1", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].value", None), + expected_field="speed_limits[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].value:bounds", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].value", -1.0), + expected_field="speed_limits[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit:required", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), + expected_field="speed_limits[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit:enum", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "height"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit:required_1", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "weight"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", None), + expected_field="speed_limits[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::speed_limits[].when.vehicle[].unit:enum_1", + scaffold={"speed_limits": [{"when": {"vehicle": [{"dimension": "weight"}]}}]}, + mutate=set_at_path("speed_limits[].when.vehicle[].unit", "__INVALID__"), + expected_field="speed_limits[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::subclass:enum", + scaffold={"subclass": "link"}, + mutate=set_at_path("subclass", "__INVALID__"), + expected_field="subclass", + expected_check="enum", + ), + Scenario( + id="segment::width_rules_min_length:array_min_length", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=set_at_path("width_rules", []), + expected_field="width_rules_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::width_rules[].value:required", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=set_at_path("width_rules[].value", None), + expected_field="width_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::width_rules[].value:bounds", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=set_at_path("width_rules[].value", 0.0), + expected_field="width_rules[].value", + expected_check="bounds", + ), + Scenario( + id="segment::width_rules[].between:linear_range_length", + scaffold={"width_rules": [{"value": 1.0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("width_rules[].between", [0.5]), + expected_field="width_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::width_rules[].between:linear_range_bounds", + scaffold={"width_rules": [{"value": 1.0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("width_rules[].between", [1.5, 2.0]), + expected_field="width_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::width_rules[].between:linear_range_order", + scaffold={"width_rules": [{"value": 1.0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("width_rules[].between", [0.8, 0.2]), + expected_field="width_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::model:forbid_if:0", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:1", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:2", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:3", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:4", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:5", + scaffold={"access_restrictions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="access_restrictions", + struct_path="when", + ), + expected_field="access_restrictions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:6", + scaffold={"destinations": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["labels", "symbols"], array_path="destinations" + ), + expected_field="destinations[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:7", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:8", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:9", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:10", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:11", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:12", + scaffold={"prohibited_transitions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="prohibited_transitions", + struct_path="when", + ), + expected_field="prohibited_transitions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:13", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:14", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:15", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:16", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:17", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:18", + scaffold={"speed_limits": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="speed_limits", + struct_path="when", + ), + expected_field="speed_limits[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:19", + scaffold={"speed_limits": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" + ), + expected_field="speed_limits[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:20", + scaffold={}, + mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), + expected_field="class_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:21", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), + expected_field="class_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:22", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), + expected_field="class_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:forbid_if:23", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + negate=True, + fill_values={"destinations": [{}]}, + ), + expected_field="destinations_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:24", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["prohibited_transitions"], + "subtype", + "road", + negate=True, + fill_values={"prohibited_transitions": [{}]}, + ), + expected_field="prohibited_transitions_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:25", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_flags"], + "subtype", + "road", + negate=True, + fill_values={"road_flags": [{}]}, + ), + expected_field="road_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:26", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_surface"], + "subtype", + "road", + negate=True, + fill_values={"road_surface": [{}]}, + ), + expected_field="road_surface_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:27", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["speed_limits"], + "subtype", + "road", + negate=True, + fill_values={"speed_limits": [{}]}, + ), + expected_field="speed_limits_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:28", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, ["subclass"], "subtype", "road", negate=True + ), + expected_field="subclass_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:29", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["width_rules"], + "subtype", + "road", + negate=True, + fill_values={"width_rules": [{}]}, + ), + expected_field="width_rules_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:30", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["rail_flags"], + "subtype", + "rail", + negate=True, + fill_values={"rail_flags": [{}]}, + ), + expected_field="rail_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::destinations[].labels_unique:struct_unique", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "destinations[].labels"), + expected_field="destinations[].labels_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::destinations[].symbols_unique:struct_unique", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "symbols": ["motorway"], + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "destinations[].symbols"), + expected_field="destinations[].symbols_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].sequence_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].sequence" + ), + expected_field="prohibited_transitions[].sequence_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].when.mode_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"mode": ["vehicle"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.mode" + ), + expected_field="prohibited_transitions[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].when.using_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"using": ["as_customer"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.using" + ), + expected_field="prohibited_transitions[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].when.recognized_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"recognized": ["as_permitted"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.recognized" + ), + expected_field="prohibited_transitions[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.vehicle" + ), + expected_field="prohibited_transitions[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::road_flags_unique:struct_unique", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "road_flags"), + expected_field="road_flags_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::road_flags[].values_unique:struct_unique", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "road_flags[].values"), + expected_field="road_flags[].values_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::road_surface_unique:struct_unique", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=lambda row: mutate_unique_items(row, "road_surface"), + expected_field="road_surface_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits_unique:struct_unique", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits"), + expected_field="speed_limits_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits[].when.mode_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.mode"), + expected_field="speed_limits[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits[].when.using_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.using"), + expected_field="speed_limits[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits[].when.recognized_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.recognized"), + expected_field="speed_limits[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::speed_limits[].when.vehicle_unique:struct_unique", + scaffold={ + "speed_limits": [ + { + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + } + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.vehicle"), + expected_field="speed_limits[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::width_rules_unique:struct_unique", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=lambda row: mutate_unique_items(row, "width_rules"), + expected_field="width_rules_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return segment_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="segment", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="segment", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py new file mode 100644 index 000000000..bf3e6b1d6 --- /dev/null +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py @@ -0,0 +1,1596 @@ +# Auto-generated — do not edit. + +"""Generated conformance tests for segment.""" + +from __future__ import annotations + +import pytest +from overture.schema.pyspark.expressions.generated.overture.schema.transportation.segment import ( + SEGMENT_SCHEMA, + segment_checks, +) +from pyspark.sql import SparkSession + +from ....._support.harness import ( + ValidationResults, + run_validation_pipeline, +) +from ....._support.helpers import set_at_path +from ....._support.mutations import ( + mutate_forbid_if, + mutate_require_any_of, + mutate_require_if, + mutate_unique_items, +) +from ....._support.scenarios import Scenario + +BASE_ROW_SPARSE: dict = { + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "subtype": "water", +} + + +BASE_ROW_POPULATED: dict = { + "names": { + "primary": "a", + "common": {}, + "rules": [ + { + "value": "a", + "variant": "common", + "language": "en", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + "between": [0.0, 1.0], + "side": "left", + } + ], + }, + "id": "1f4d65c9-e092-52c4-b002-7c11ce69a554", + "bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}, + "geometry": "LINESTRING (0 0, 1 1)", + "theme": "transportation", + "type": "segment", + "version": 0, + "sources": [ + { + "property": "/valid/pointer", + "dataset": "", + "license": "clean", + "record_id": "", + "update_time": "2024-01-01T00:00:00Z", + "confidence": 0.0, + "between": [0.0, 1.0], + } + ], + "subtype": "water", + "access_restrictions": [ + { + "access_type": "allowed", + "between": [0.0, 1.0], + "when": { + "heading": "forward", + "during": "", + "mode": ["vehicle"], + "using": ["as_customer"], + "recognized": ["as_permitted"], + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ], + }, + } + ], + "connectors": [{"connector_id": "a", "at": 0.0}, {"connector_id": "a1", "at": 0.0}], + "level_rules": [{"value": 0, "between": [0.0, 1.0]}], + "routes": [ + { + "name": "a", + "network": "a", + "ref": "a", + "symbol": "a", + "wikidata": "Q42", + "between": [0.0, 1.0], + } + ], + "subclass_rules": [{"value": "link", "between": [0.0, 1.0]}], +} + + +SCENARIOS: list[Scenario] = [ + Scenario( + id="segment::id:required", + scaffold={}, + mutate=set_at_path("id", None), + expected_field="id", + expected_check="required", + ), + Scenario( + id="segment::id:string_min_length", + scaffold={}, + mutate=set_at_path("id", ""), + expected_field="id", + expected_check="string_min_length", + ), + Scenario( + id="segment::id:no_whitespace", + scaffold={}, + mutate=set_at_path("id", "has whitespace"), + expected_field="id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::bbox:bbox_completeness", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": None, "ymax": 1.0} + ), + expected_field="bbox", + expected_check="bbox_completeness", + ), + Scenario( + id="segment::bbox:bbox_lat_ordering", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": 10.0, "ymax": -10.0} + ), + expected_field="bbox", + expected_check="bbox_lat_ordering", + ), + Scenario( + id="segment::bbox:bbox_lat_range", + scaffold={"bbox": {"xmin": 0.0, "xmax": 1.0, "ymin": 0.0, "ymax": 1.0}}, + mutate=set_at_path( + "bbox", {"xmin": 0.0, "xmax": 1.0, "ymin": -100.0, "ymax": 100.0} + ), + expected_field="bbox", + expected_check="bbox_lat_range", + ), + Scenario( + id="segment::geometry:required", + scaffold={}, + mutate=set_at_path("geometry", None), + expected_field="geometry", + expected_check="required", + ), + Scenario( + id="segment::geometry:geometry_type", + scaffold={}, + mutate=set_at_path("geometry", "POINT (0 0)"), + expected_field="geometry", + expected_check="geometry_type", + ), + Scenario( + id="segment::theme:required", + scaffold={}, + mutate=set_at_path("theme", None), + expected_field="theme", + expected_check="required", + ), + Scenario( + id="segment::theme:enum", + scaffold={}, + mutate=set_at_path("theme", "__INVALID__"), + expected_field="theme", + expected_check="enum", + ), + Scenario( + id="segment::type:required", + scaffold={}, + mutate=set_at_path("type", None), + expected_field="type", + expected_check="required", + ), + Scenario( + id="segment::type:enum", + scaffold={}, + mutate=set_at_path("type", "__INVALID__"), + expected_field="type", + expected_check="enum", + ), + Scenario( + id="segment::version:required", + scaffold={}, + mutate=set_at_path("version", None), + expected_field="version", + expected_check="required", + ), + Scenario( + id="segment::version:bounds", + scaffold={}, + mutate=set_at_path("version", -1), + expected_field="version", + expected_check="bounds", + ), + Scenario( + id="segment::sources_min_length:array_min_length", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources", []), + expected_field="sources_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::sources[].property:required", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", None), + expected_field="sources[].property", + expected_check="required", + ), + Scenario( + id="segment::sources[].property:json_pointer", + scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, + mutate=set_at_path("sources[].property", "no-slash"), + expected_field="sources[].property", + expected_check="json_pointer", + ), + Scenario( + id="segment::sources[].dataset:required", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=set_at_path("sources[].dataset", None), + expected_field="sources[].dataset", + expected_check="required", + ), + Scenario( + id="segment::sources[].license:stripped", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "license": "clean"} + ] + }, + mutate=set_at_path("sources[].license", " has spaces "), + expected_field="sources[].license", + expected_check="stripped", + ), + Scenario( + id="segment::sources[].confidence:bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", -1.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].confidence:bounds_1", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "confidence": 0.0} + ] + }, + mutate=set_at_path("sources[].confidence", 2.0), + expected_field="sources[].confidence", + expected_check="bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_length", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.5]), + expected_field="sources[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::sources[].between:linear_range_bounds", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [1.5, 2.0]), + expected_field="sources[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::sources[].between:linear_range_order", + scaffold={ + "sources": [ + {"property": "/valid/pointer", "dataset": "", "between": [0.0, 1.0]} + ] + }, + mutate=set_at_path("sources[].between", [0.8, 0.2]), + expected_field="sources[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subtype:required", + scaffold={}, + mutate=set_at_path("subtype", None), + expected_field="subtype", + expected_check="required", + ), + Scenario( + id="segment::subtype:enum", + scaffold={}, + mutate=set_at_path("subtype", "__INVALID__"), + expected_field="subtype", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions_min_length:array_min_length", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions", []), + expected_field="access_restrictions_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].access_type:required", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", None), + expected_field="access_restrictions[].access_type", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].access_type:enum", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=set_at_path("access_restrictions[].access_type", "__INVALID__"), + expected_field="access_restrictions[].access_type", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_length", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.5]), + expected_field="access_restrictions[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_bounds", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [1.5, 2.0]), + expected_field="access_restrictions[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::access_restrictions[].between:linear_range_order", + scaffold={ + "access_restrictions": [{"access_type": "allowed", "between": [0.0, 1.0]}] + }, + mutate=set_at_path("access_restrictions[].between", [0.8, 0.2]), + expected_field="access_restrictions[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::access_restrictions[].when.heading:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"heading": "forward"}} + ] + }, + mutate=set_at_path("access_restrictions[].when.heading", "__INVALID__"), + expected_field="access_restrictions[].when.heading", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.mode_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode", []), + expected_field="access_restrictions[].when.mode_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.mode[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.mode[]", "__INVALID__"), + expected_field="access_restrictions[].when.mode[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.using_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using", []), + expected_field="access_restrictions[].when.using_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.using[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.using[]", "__INVALID__"), + expected_field="access_restrictions[].when.using[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized", []), + expected_field="access_restrictions[].when.recognized_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.recognized[]:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.recognized[]", "__INVALID__"), + expected_field="access_restrictions[].when.recognized[]", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_min_length:array_min_length", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle", []), + expected_field="access_restrictions[].when.vehicle_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].dimension", None), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].dimension:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].dimension", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].dimension", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:required", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].comparison", None), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].comparison:enum", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"vehicle": [{}]}} + ] + }, + mutate=set_at_path( + "access_restrictions[].when.vehicle[].comparison", "__INVALID__" + ), + expected_field="access_restrictions[].when.vehicle[].comparison", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "axle_count"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", None), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].value:bounds", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].value", -1.0), + expected_field="access_restrictions[].when.vehicle[].value", + expected_check="bounds", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "height"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:required_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", None), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="required", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle[].unit:enum_1", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": {"vehicle": [{"dimension": "weight"}]}, + } + ] + }, + mutate=set_at_path("access_restrictions[].when.vehicle[].unit", "__INVALID__"), + expected_field="access_restrictions[].when.vehicle[].unit", + expected_check="enum", + ), + Scenario( + id="segment::connectors_min_length:array_min_length", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=set_at_path("connectors", []), + expected_field="connectors_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:required", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", None), + expected_field="connectors[].connector_id", + expected_check="required", + ), + Scenario( + id="segment::connectors[].connector_id:string_min_length", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", ""), + expected_field="connectors[].connector_id", + expected_check="string_min_length", + ), + Scenario( + id="segment::connectors[].connector_id:no_whitespace", + scaffold={"connectors": [{"connector_id": "a"}]}, + mutate=set_at_path("connectors[].connector_id", "has whitespace"), + expected_field="connectors[].connector_id", + expected_check="no_whitespace", + ), + Scenario( + id="segment::connectors[].at:bounds", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", -1.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::connectors[].at:bounds_1", + scaffold={"connectors": [{"connector_id": "a", "at": 0.0}]}, + mutate=set_at_path("connectors[].at", 2.0), + expected_field="connectors[].at", + expected_check="bounds", + ), + Scenario( + id="segment::level_rules[].value:required", + scaffold={"level_rules": [{"value": 0}]}, + mutate=set_at_path("level_rules[].value", None), + expected_field="level_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::level_rules[].between:linear_range_length", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.5]), + expected_field="level_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::level_rules[].between:linear_range_bounds", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [1.5, 2.0]), + expected_field="level_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::level_rules[].between:linear_range_order", + scaffold={"level_rules": [{"value": 0, "between": [0.0, 1.0]}]}, + mutate=set_at_path("level_rules[].between", [0.8, 0.2]), + expected_field="level_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::routes[].name:string_min_length", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", ""), + expected_field="routes[].name", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].name:stripped", + scaffold={"routes": [{"name": "a"}]}, + mutate=set_at_path("routes[].name", " has spaces "), + expected_field="routes[].name", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].network:string_min_length", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", ""), + expected_field="routes[].network", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].network:stripped", + scaffold={"routes": [{"network": "a"}]}, + mutate=set_at_path("routes[].network", " has spaces "), + expected_field="routes[].network", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].ref:string_min_length", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", ""), + expected_field="routes[].ref", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].ref:stripped", + scaffold={"routes": [{"ref": "a"}]}, + mutate=set_at_path("routes[].ref", " has spaces "), + expected_field="routes[].ref", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].symbol:string_min_length", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", ""), + expected_field="routes[].symbol", + expected_check="string_min_length", + ), + Scenario( + id="segment::routes[].symbol:stripped", + scaffold={"routes": [{"symbol": "a"}]}, + mutate=set_at_path("routes[].symbol", " has spaces "), + expected_field="routes[].symbol", + expected_check="stripped", + ), + Scenario( + id="segment::routes[].wikidata:wikidata_id", + scaffold={"routes": [{"wikidata": "Q42"}]}, + mutate=set_at_path("routes[].wikidata", "P999"), + expected_field="routes[].wikidata", + expected_check="wikidata_id", + ), + Scenario( + id="segment::routes[].between:linear_range_length", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.5]), + expected_field="routes[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::routes[].between:linear_range_bounds", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [1.5, 2.0]), + expected_field="routes[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::routes[].between:linear_range_order", + scaffold={"routes": [{"between": [0.0, 1.0]}]}, + mutate=set_at_path("routes[].between", [0.8, 0.2]), + expected_field="routes[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::subclass_rules[].value:required", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", None), + expected_field="subclass_rules[].value", + expected_check="required", + ), + Scenario( + id="segment::subclass_rules[].value:enum", + scaffold={"subclass_rules": [{"value": "link"}]}, + mutate=set_at_path("subclass_rules[].value", "__INVALID__"), + expected_field="subclass_rules[].value", + expected_check="enum", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_length", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.5]), + expected_field="subclass_rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_bounds", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [1.5, 2.0]), + expected_field="subclass_rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::subclass_rules[].between:linear_range_order", + scaffold={"subclass_rules": [{"value": "link", "between": [0.0, 1.0]}]}, + mutate=set_at_path("subclass_rules[].between", [0.8, 0.2]), + expected_field="subclass_rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.primary:required", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", None), + expected_field="names.primary", + expected_check="required", + ), + Scenario( + id="segment::names.primary:string_min_length", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", ""), + expected_field="names.primary", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.primary:stripped", + scaffold={"names": {"primary": "a"}}, + mutate=set_at_path("names.primary", " has spaces "), + expected_field="names.primary", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].value:required", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", None), + expected_field="names.rules[].value", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].value:string_min_length", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", ""), + expected_field="names.rules[].value", + expected_check="string_min_length", + ), + Scenario( + id="segment::names.rules[].value:stripped", + scaffold={ + "names": {"primary": "a", "rules": [{"variant": "common", "value": "a"}]} + }, + mutate=set_at_path("names.rules[].value", " has spaces "), + expected_field="names.rules[].value", + expected_check="stripped", + ), + Scenario( + id="segment::names.rules[].variant:required", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", None), + expected_field="names.rules[].variant", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].variant:enum", + scaffold={ + "names": {"primary": "a", "rules": [{"value": "a", "variant": "common"}]} + }, + mutate=set_at_path("names.rules[].variant", "__INVALID__"), + expected_field="names.rules[].variant", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].language:language_tag", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "language": "en"}], + } + }, + mutate=set_at_path("names.rules[].language", "123"), + expected_field="names.rules[].language", + expected_check="language_tag", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", None), + expected_field="names.rules[].perspectives.mode", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.mode:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"countries": ["US"], "mode": "accepted_by"}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.mode", "__INVALID__"), + expected_field="names.rules[].perspectives.mode", + expected_check="enum", + ), + Scenario( + id="segment::names.rules[].perspectives.countries:required", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", None), + expected_field="names.rules[].perspectives.countries", + expected_check="required", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_min_length:array_min_length", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries", []), + expected_field="names.rules[].perspectives.countries_min_length", + expected_check="array_min_length", + ), + Scenario( + id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=set_at_path("names.rules[].perspectives.countries[]", "99"), + expected_field="names.rules[].perspectives.countries[]", + expected_check="country_code_alpha2", + ), + Scenario( + id="segment::names.rules[].between:linear_range_length", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.5]), + expected_field="names.rules[].between", + expected_check="linear_range_length", + ), + Scenario( + id="segment::names.rules[].between:linear_range_bounds", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [1.5, 2.0]), + expected_field="names.rules[].between", + expected_check="linear_range_bounds", + ), + Scenario( + id="segment::names.rules[].between:linear_range_order", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "between": [0.0, 1.0]}], + } + }, + mutate=set_at_path("names.rules[].between", [0.8, 0.2]), + expected_field="names.rules[].between", + expected_check="linear_range_order", + ), + Scenario( + id="segment::names.rules[].side:enum", + scaffold={ + "names": { + "primary": "a", + "rules": [{"value": "a", "variant": "common", "side": "left"}], + } + }, + mutate=set_at_path("names.rules[].side", "__INVALID__"), + expected_field="names.rules[].side", + expected_check="enum", + ), + Scenario( + id="segment::model:forbid_if:0", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:1", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:2", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:3", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:4", + scaffold={"access_restrictions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="access_restrictions", + inner_array_path="when.vehicle", + ), + expected_field="access_restrictions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:5", + scaffold={"access_restrictions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="access_restrictions", + struct_path="when", + ), + expected_field="access_restrictions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:6", + scaffold={"destinations": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["labels", "symbols"], array_path="destinations" + ), + expected_field="destinations[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:7", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:8", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:9", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:10", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:11", + scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="prohibited_transitions", + inner_array_path="when.vehicle", + ), + expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:12", + scaffold={"prohibited_transitions": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="prohibited_transitions", + struct_path="when", + ), + expected_field="prohibited_transitions[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:13", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_forbid_if( + row, + ["unit"], + "dimension", + "axle_count", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:14", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "height", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:15", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "length", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:16", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "weight", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_2", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:17", + scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, + mutate=lambda row: mutate_require_if( + row, + ["unit"], + "dimension", + "width", + array_path="speed_limits", + inner_array_path="when.vehicle", + ), + expected_field="speed_limits[].when.vehicle[].unit_required_3", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_any_of:18", + scaffold={"speed_limits": [{"when": {}}]}, + mutate=lambda row: mutate_require_any_of( + row, + ["heading", "during", "mode", "using", "recognized", "vehicle"], + array_path="speed_limits", + struct_path="when", + ), + expected_field="speed_limits[].when", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:require_any_of:19", + scaffold={"speed_limits": [{}]}, + mutate=lambda row: mutate_require_any_of( + row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" + ), + expected_field="speed_limits[]", + expected_check="require_any_of", + ), + Scenario( + id="segment::model:forbid_if:20", + scaffold={}, + mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), + expected_field="class_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:require_if:21", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), + expected_field="class_required_0", + expected_check="require_if", + ), + Scenario( + id="segment::model:require_if:22", + scaffold={}, + mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), + expected_field="class_required_1", + expected_check="require_if", + ), + Scenario( + id="segment::model:forbid_if:23", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["destinations"], + "subtype", + "road", + negate=True, + fill_values={"destinations": [{}]}, + ), + expected_field="destinations_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:24", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["prohibited_transitions"], + "subtype", + "road", + negate=True, + fill_values={"prohibited_transitions": [{}]}, + ), + expected_field="prohibited_transitions_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:25", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_flags"], + "subtype", + "road", + negate=True, + fill_values={"road_flags": [{}]}, + ), + expected_field="road_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:26", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["road_surface"], + "subtype", + "road", + negate=True, + fill_values={"road_surface": [{}]}, + ), + expected_field="road_surface_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:27", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["speed_limits"], + "subtype", + "road", + negate=True, + fill_values={"speed_limits": [{}]}, + ), + expected_field="speed_limits_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:28", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, ["subclass"], "subtype", "road", negate=True + ), + expected_field="subclass_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:29", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["width_rules"], + "subtype", + "road", + negate=True, + fill_values={"width_rules": [{}]}, + ), + expected_field="width_rules_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::model:forbid_if:30", + scaffold={}, + mutate=lambda row: mutate_forbid_if( + row, + ["rail_flags"], + "subtype", + "rail", + negate=True, + fill_values={"rail_flags": [{}]}, + ), + expected_field="rail_flags_forbidden", + expected_check="forbid_if", + ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), +] + + +@pytest.fixture(scope="module") +def checks() -> list: + return segment_checks() + + +@pytest.fixture(scope="module") +def sparse_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_SPARSE, + SCENARIOS, + feature_name="segment", + ) + + +@pytest.fixture(scope="module") +def populated_results(spark: SparkSession, checks: list) -> ValidationResults: + return run_validation_pipeline( + spark, + SEGMENT_SCHEMA, + checks, + BASE_ROW_POPULATED, + SCENARIOS, + feature_name="segment", + ) + + +def test_baseline_sparse(sparse_results: ValidationResults) -> None: + """Sparse base row passes every check the codegen produced. + + Catches drift between base_row synthesis, schema_builder, and + check_builder -- if any of those produce output inconsistent with + the others (e.g. a check that rejects values the synthesizer emits + for required-only fields), the baseline fails here before any + scenario runs. + """ + baseline = sparse_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Sparse baseline has violations: {baseline}" + + +def test_baseline_populated(populated_results: ValidationResults) -> None: + """Fully-populated base row passes every check the codegen produced. + + Mirrors `test_baseline_sparse` but with all optional fields + filled, exercising codegen paths that only fire when a value is + present. + """ + baseline = populated_results.violations.get("segment::baseline", set()) + assert baseline == set(), f"Populated baseline has violations: {baseline}" + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_sparse( + scenario: Scenario, + sparse_results: ValidationResults, +) -> None: + _assert_scenario(scenario, sparse_results) + + +@pytest.mark.parametrize("scenario", SCENARIOS, ids=lambda s: s.id) +def test_scenario_populated( + scenario: Scenario, + populated_results: ValidationResults, +) -> None: + _assert_scenario(scenario, populated_results) + + +def _assert_scenario( + scenario: Scenario, + validation_results: ValidationResults, +) -> None: + expected = (scenario.expected_field, scenario.expected_check) + if scenario.id in validation_results.skipped: + pytest.skip(validation_results.skipped[scenario.id]) + valid_violations = validation_results.violations.get(f"{scenario.id}::valid", set()) + assert expected not in valid_violations + invalid_violations = validation_results.violations.get( + f"{scenario.id}::invalid", set() + ) + assert expected in invalid_violations From 1da7d9e0256bc99e46b11b421c833652e188190b Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 13 May 2026 09:39:14 -0700 Subject: [PATCH 6/8] chore(ci): Pin Java 17 for lowest-direct cell PySpark 3.4 (the declared floor) doesn't run on Java 21, the default JDK on ubuntu-latest runners -- it hits NoSuchMethodException on java.nio.DirectByteBuffer.(long, int), removed in JDK 21. Pin the lowest-direct cell to Java 17 so the resolved pyspark==3.4.0 can actually start. The default cell (which resolves to a current pyspark 4.x) keeps the runner's default Java 21. Signed-off-by: Seth Fitzsimmons --- .github/workflows/check-python-code.yaml | 15 +- .../overture/schema/addresses/test_address.py | 14 +- .../overture/schema/base/test_bathymetry.py | 14 +- .../schema/base/test_infrastructure.py | 54 +- .../overture/schema/base/test_land.py | 54 +- .../overture/schema/base/test_land_cover.py | 14 +- .../overture/schema/base/test_land_use.py | 54 +- .../overture/schema/base/test_water.py | 54 +- .../schema/buildings/test_building.py | 54 +- .../schema/buildings/test_building_part.py | 54 +- .../schema/divisions/test_division.py | 124 ++-- .../schema/divisions/test_division_area.py | 54 +- .../divisions/test_division_boundary.py | 42 +- .../overture/schema/places/test_place.py | 214 +++--- .../schema/transportation/test_connector.py | 14 +- .../transportation/test_segment_rail.py | 434 ++++--------- .../transportation/test_segment_road.py | 612 +++++++++--------- .../transportation/test_segment_water.py | 406 ++++-------- 18 files changed, 964 insertions(+), 1317 deletions(-) diff --git a/.github/workflows/check-python-code.yaml b/.github/workflows/check-python-code.yaml index b3edac612..bf3c54aee 100644 --- a/.github/workflows/check-python-code.yaml +++ b/.github/workflows/check-python-code.yaml @@ -31,7 +31,8 @@ jobs: # Default resolution exercises the committed lock against every # supported Python minor version. The lowest-direct cell pins each # direct dependency to its declared floor (see UV_RESOLUTION below) - # and runs only on the Python floor. + # and runs only on the Python floor, since the resolved-low pyspark + # 3.4 wheels exist for 3.10/3.11 only. python: ["3.10", "3.11", "3.12", "3.13", "3.14"] resolution: [default] include: @@ -56,9 +57,19 @@ jobs: with: python-version: ${{ matrix.python }} + # PySpark 3.4 (the declared minimum) does not support Java 21, which is + # the default JDK on ubuntu-latest runners. Pin to Java 17 for the + # lowest-direct cell so the resolved pyspark==3.4.0 can actually start. + - name: Set up JDK 17 + if: matrix.resolution == 'lowest-direct' + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + distribution: temurin + java-version: '17' + # UV_RESOLUTION=lowest-direct makes `uv sync` re-resolve every direct # dependency to the lowest version permitted by pyproject.toml. This - # exercises the declared floor (e.g. pydantic==2.12.0) instead of + # exercises the declared floor (e.g. pyspark==3.4.0) instead of # whatever the committed lock happens to point at. Failures here mean # a direct dep's minimum needs to be bumped. Set via GITHUB_ENV only # in the relevant cell so default cells run with no UV_RESOLUTION at diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py index b8da5893d..3c5b66709 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/addresses/test_address.py @@ -169,6 +169,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="address::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="address::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -368,13 +375,6 @@ expected_field="unit", expected_check="stripped", ), - Scenario( - id="address::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py index eddc5ff2a..9fdc1679d 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_bathymetry.py @@ -164,6 +164,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="bathymetry::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="bathymetry::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -307,13 +314,6 @@ expected_field="cartography.max_zoom", expected_check="bounds", ), - Scenario( - id="bathymetry::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py index ff98049f6..33839d9ee 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_infrastructure.py @@ -184,6 +184,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="infrastructure::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="infrastructure::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -463,6 +470,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="infrastructure::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="infrastructure::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -536,33 +563,6 @@ expected_field="wikidata", expected_check="wikidata_id", ), - Scenario( - id="infrastructure::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="infrastructure::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py index 6b07a4fdc..2784a5806 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land.py @@ -182,6 +182,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="land::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="land::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -447,6 +454,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="land::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="land::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -520,33 +547,6 @@ expected_field="wikidata", expected_check="wikidata_id", ), - Scenario( - id="land::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="land::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py index c2783e05c..962268634 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_cover.py @@ -164,6 +164,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="land_cover::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="land_cover::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -307,13 +314,6 @@ expected_field="cartography.max_zoom", expected_check="bounds", ), - Scenario( - id="land_cover::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py index f19165178..19bd140a9 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_land_use.py @@ -184,6 +184,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="land_use::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="land_use::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -463,6 +470,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="land_use::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="land_use::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -536,33 +563,6 @@ expected_field="wikidata", expected_check="wikidata_id", ), - Scenario( - id="land_use::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="land_use::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py index 1c460c47f..9a1c9d57e 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/base/test_water.py @@ -182,6 +182,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="water::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="water::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -433,6 +440,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="water::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="water::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -506,33 +533,6 @@ expected_field="wikidata", expected_check="wikidata_id", ), - Scenario( - id="water::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="water::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py index ebfd4a131..9102f235c 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building.py @@ -193,6 +193,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="building::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="building::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -444,6 +451,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="building::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="building::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -594,33 +621,6 @@ expected_field="roof_color", expected_check="hex_color", ), - Scenario( - id="building::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="building::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py index 73ab44863..45589511f 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/buildings/test_building_part.py @@ -192,6 +192,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="building_part::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="building_part::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -450,6 +457,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="building_part::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="building_part::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -600,33 +627,6 @@ expected_field="roof_color", expected_check="hex_color", ), - Scenario( - id="building_part::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="building_part::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py index 399495474..0c16e74d7 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division.py @@ -286,6 +286,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="division::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="division::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -463,6 +483,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="division::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="division::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -592,6 +619,13 @@ expected_field="hierarchies_min_length", expected_check="array_min_length", ), + Scenario( + id="division::hierarchies_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "hierarchies"), + expected_field="hierarchies_unique", + expected_check="struct_unique", + ), Scenario( id="division::hierarchies[]_min_length:array_min_length", scaffold={}, @@ -599,6 +633,13 @@ expected_field="hierarchies[]_min_length", expected_check="array_min_length", ), + Scenario( + id="division::hierarchies[]_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "hierarchies[]"), + expected_field="hierarchies[]_unique", + expected_check="struct_unique", + ), Scenario( id="division::hierarchies[][].division_id:required", scaffold={ @@ -741,6 +782,13 @@ expected_field="perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="division::perspectives.countries_unique:struct_unique", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), + expected_field="perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="division::perspectives.countries[]:country_code_alpha2", scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, @@ -769,6 +817,13 @@ expected_field="capital_division_ids_min_length", expected_check="array_min_length", ), + Scenario( + id="division::capital_division_ids_unique:struct_unique", + scaffold={"capital_division_ids": ["a"]}, + mutate=lambda row: mutate_unique_items(row, "capital_division_ids"), + expected_field="capital_division_ids_unique", + expected_check="struct_unique", + ), Scenario( id="division::capital_division_ids[]:string_min_length", scaffold={"capital_division_ids": ["a"]}, @@ -790,6 +845,13 @@ expected_field="capital_of_divisions_min_length", expected_check="array_min_length", ), + Scenario( + id="division::capital_of_divisions_unique:struct_unique", + scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, + mutate=lambda row: mutate_unique_items(row, "capital_of_divisions"), + expected_field="capital_of_divisions_unique", + expected_check="struct_unique", + ), Scenario( id="division::capital_of_divisions[].division_id:required", scaffold={"capital_of_divisions": [{"subtype": "country", "division_id": "a"}]}, @@ -900,68 +962,6 @@ expected_field="parent_division_id_forbidden", expected_check="forbid_if", ), - Scenario( - id="division::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::hierarchies_unique:struct_unique", - scaffold={}, - mutate=lambda row: mutate_unique_items(row, "hierarchies"), - expected_field="hierarchies_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::hierarchies[]_unique:struct_unique", - scaffold={}, - mutate=lambda row: mutate_unique_items(row, "hierarchies[]"), - expected_field="hierarchies[]_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::perspectives.countries_unique:struct_unique", - scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, - mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), - expected_field="perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::capital_division_ids_unique:struct_unique", - scaffold={"capital_division_ids": ["a"]}, - mutate=lambda row: mutate_unique_items(row, "capital_division_ids"), - expected_field="capital_division_ids_unique", - expected_check="struct_unique", - ), - Scenario( - id="division::capital_of_divisions_unique:struct_unique", - scaffold={"capital_of_divisions": [{"division_id": "a", "subtype": "country"}]}, - mutate=lambda row: mutate_unique_items(row, "capital_of_divisions"), - expected_field="capital_of_divisions_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py index 9f4d8e2f8..d9170b23b 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_area.py @@ -240,6 +240,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="division_area::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="division_area::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -417,6 +437,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="division_area::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="division_area::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -645,33 +672,6 @@ expected_field="admin_level_required_5", expected_check="require_if", ), - Scenario( - id="division_area::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="division_area::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py index 27e05e731..41f9d3d59 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/divisions/test_division_boundary.py @@ -180,6 +180,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="division_boundary::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="division_boundary::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -316,6 +323,13 @@ expected_field="division_ids_max_length", expected_check="array_max_length", ), + Scenario( + id="division_boundary::division_ids_unique:struct_unique", + scaffold={}, + mutate=lambda row: mutate_unique_items(row, "division_ids"), + expected_field="division_ids_unique", + expected_check="struct_unique", + ), Scenario( id="division_boundary::division_ids[]:string_min_length", scaffold={}, @@ -386,6 +400,13 @@ expected_field="perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="division_boundary::perspectives.countries_unique:struct_unique", + scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, + mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), + expected_field="perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="division_boundary::perspectives.countries[]:country_code_alpha2", scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, @@ -466,27 +487,6 @@ expected_field="country_forbidden", expected_check="forbid_if", ), - Scenario( - id="division_boundary::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="division_boundary::division_ids_unique:struct_unique", - scaffold={}, - mutate=lambda row: mutate_unique_items(row, "division_ids"), - expected_field="division_ids_unique", - expected_check="struct_unique", - ), - Scenario( - id="division_boundary::perspectives.countries_unique:struct_unique", - scaffold={"perspectives": {"mode": "accepted_by", "countries": ["US"]}}, - mutate=lambda row: mutate_unique_items(row, "perspectives.countries"), - expected_field="perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py index ad8fc0002..b8a128bb0 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/places/test_place.py @@ -214,6 +214,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="place::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="place::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -322,6 +329,13 @@ expected_field="categories.primary", expected_check="snake_case", ), + Scenario( + id="place::categories.alternate_unique:struct_unique", + scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, + mutate=lambda row: mutate_unique_items(row, "categories.alternate"), + expected_field="categories.alternate_unique", + expected_check="struct_unique", + ), Scenario( id="place::categories.alternate[]:snake_case", scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, @@ -364,6 +378,13 @@ expected_field="taxonomy.hierarchy_min_length", expected_check="array_min_length", ), + Scenario( + id="place::taxonomy.hierarchy_unique:struct_unique", + scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, + mutate=lambda row: mutate_unique_items(row, "taxonomy.hierarchy"), + expected_field="taxonomy.hierarchy_unique", + expected_check="struct_unique", + ), Scenario( id="place::taxonomy.hierarchy[]:snake_case", scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, @@ -384,6 +405,19 @@ expected_field="taxonomy.alternates_min_length", expected_check="array_min_length", ), + Scenario( + id="place::taxonomy.alternates_unique:struct_unique", + scaffold={ + "taxonomy": { + "primary": "snake_case", + "hierarchy": ["snake_case"], + "alternates": ["snake_case"], + } + }, + mutate=lambda row: mutate_unique_items(row, "taxonomy.alternates"), + expected_field="taxonomy.alternates_unique", + expected_check="struct_unique", + ), Scenario( id="place::taxonomy.alternates[]:snake_case", scaffold={ @@ -418,6 +452,13 @@ expected_field="websites_min_length", expected_check="array_min_length", ), + Scenario( + id="place::websites_unique:struct_unique", + scaffold={"websites": ["https://example.com/"]}, + mutate=lambda row: mutate_unique_items(row, "websites"), + expected_field="websites_unique", + expected_check="struct_unique", + ), Scenario( id="place::websites[]:url_format", scaffold={"websites": ["https://example.com/"]}, @@ -442,6 +483,13 @@ expected_field="socials_min_length", expected_check="array_min_length", ), + Scenario( + id="place::socials_unique:struct_unique", + scaffold={"socials": ["https://example.com/"]}, + mutate=lambda row: mutate_unique_items(row, "socials"), + expected_field="socials_unique", + expected_check="struct_unique", + ), Scenario( id="place::socials[]:url_format", scaffold={"socials": ["https://example.com/"]}, @@ -466,6 +514,13 @@ expected_field="emails_min_length", expected_check="array_min_length", ), + Scenario( + id="place::emails_unique:struct_unique", + scaffold={"emails": ["user@example.com"]}, + mutate=lambda row: mutate_unique_items(row, "emails"), + expected_field="emails_unique", + expected_check="struct_unique", + ), Scenario( id="place::emails[]:email", scaffold={"emails": ["user@example.com"]}, @@ -480,6 +535,13 @@ expected_field="phones_min_length", expected_check="array_min_length", ), + Scenario( + id="place::phones_unique:struct_unique", + scaffold={"phones": ["+1 555-555-5555"]}, + mutate=lambda row: mutate_unique_items(row, "phones"), + expected_field="phones_unique", + expected_check="struct_unique", + ), Scenario( id="place::phones[]:phone_number", scaffold={"phones": ["+1 555-555-5555"]}, @@ -684,6 +746,31 @@ expected_field="brand.names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="place::brand.names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "brand": { + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": { + "mode": "accepted_by", + "countries": ["US"], + }, + } + ], + } + } + }, + mutate=lambda row: mutate_unique_items( + row, "brand.names.rules[].perspectives.countries" + ), + expected_field="brand.names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="place::brand.names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -947,6 +1034,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="place::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="place::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -1013,113 +1120,6 @@ expected_field="names.rules[].side", expected_check="enum", ), - Scenario( - id="place::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::categories.alternate_unique:struct_unique", - scaffold={"categories": {"primary": "snake_case", "alternate": ["snake_case"]}}, - mutate=lambda row: mutate_unique_items(row, "categories.alternate"), - expected_field="categories.alternate_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::taxonomy.hierarchy_unique:struct_unique", - scaffold={"taxonomy": {"primary": "snake_case", "hierarchy": ["snake_case"]}}, - mutate=lambda row: mutate_unique_items(row, "taxonomy.hierarchy"), - expected_field="taxonomy.hierarchy_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::taxonomy.alternates_unique:struct_unique", - scaffold={ - "taxonomy": { - "primary": "snake_case", - "hierarchy": ["snake_case"], - "alternates": ["snake_case"], - } - }, - mutate=lambda row: mutate_unique_items(row, "taxonomy.alternates"), - expected_field="taxonomy.alternates_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::websites_unique:struct_unique", - scaffold={"websites": ["https://example.com/"]}, - mutate=lambda row: mutate_unique_items(row, "websites"), - expected_field="websites_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::socials_unique:struct_unique", - scaffold={"socials": ["https://example.com/"]}, - mutate=lambda row: mutate_unique_items(row, "socials"), - expected_field="socials_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::emails_unique:struct_unique", - scaffold={"emails": ["user@example.com"]}, - mutate=lambda row: mutate_unique_items(row, "emails"), - expected_field="emails_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::phones_unique:struct_unique", - scaffold={"phones": ["+1 555-555-5555"]}, - mutate=lambda row: mutate_unique_items(row, "phones"), - expected_field="phones_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::brand.names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "brand": { - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": { - "mode": "accepted_by", - "countries": ["US"], - }, - } - ], - } - } - }, - mutate=lambda row: mutate_unique_items( - row, "brand.names.rules[].perspectives.countries" - ), - expected_field="brand.names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="place::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py index 6552a950a..7fb7739ad 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_connector.py @@ -161,6 +161,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="connector::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="connector::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -248,13 +255,6 @@ expected_field="sources[].between", expected_check="linear_range_order", ), - Scenario( - id="connector::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py index 22cfd600b..fda44888d 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_rail.py @@ -219,6 +219,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="segment::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -327,6 +334,13 @@ expected_field="access_restrictions_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].access_type:required", scaffold={"access_restrictions": [{"access_type": "allowed"}]}, @@ -390,6 +404,17 @@ expected_field="access_restrictions[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.mode[]:enum", scaffold={ @@ -412,6 +437,17 @@ expected_field="access_restrictions[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.using[]:enum", scaffold={ @@ -434,6 +470,19 @@ expected_field="access_restrictions[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ @@ -468,6 +517,31 @@ expected_field="access_restrictions[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ @@ -621,6 +695,13 @@ expected_field="connectors_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), Scenario( id="segment::connectors[].connector_id:required", scaffold={"connectors": [{"connector_id": "a"}]}, @@ -953,6 +1034,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -1040,6 +1141,13 @@ expected_field="rail_flags_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::rail_flags_unique:struct_unique", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "rail_flags"), + expected_field="rail_flags_unique", + expected_check="struct_unique", + ), Scenario( id="segment::rail_flags[].values:required", scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, @@ -1054,6 +1162,13 @@ expected_field="rail_flags[].values_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::rail_flags[].values_unique:struct_unique", + scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "rail_flags[].values"), + expected_field="rail_flags[].values_unique", + expected_check="struct_unique", + ), Scenario( id="segment::rail_flags[].values[]:enum", scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, @@ -1165,210 +1280,28 @@ expected_check="require_any_of", ), Scenario( - id="segment::model:require_any_of:6", - scaffold={"destinations": [{}]}, - mutate=lambda row: mutate_require_any_of( - row, ["labels", "symbols"], array_path="destinations" - ), - expected_field="destinations[]", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:7", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_forbid_if( - row, - ["unit"], - "dimension", - "axle_count", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", - expected_check="forbid_if", - ), - Scenario( - id="segment::model:require_if:8", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "height", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:9", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "length", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:10", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "weight", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:11", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "width", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_any_of:12", - scaffold={"prohibited_transitions": [{"when": {}}]}, - mutate=lambda row: mutate_require_any_of( - row, - ["heading", "during", "mode", "using", "recognized", "vehicle"], - array_path="prohibited_transitions", - struct_path="when", - ), - expected_field="prohibited_transitions[].when", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:13", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_forbid_if( - row, - ["unit"], - "dimension", - "axle_count", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_forbidden", - expected_check="forbid_if", - ), - Scenario( - id="segment::model:require_if:14", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "height", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_0", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:15", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "length", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_1", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:16", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "weight", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_2", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:17", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "width", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_3", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_any_of:18", - scaffold={"speed_limits": [{"when": {}}]}, - mutate=lambda row: mutate_require_any_of( - row, - ["heading", "during", "mode", "using", "recognized", "vehicle"], - array_path="speed_limits", - struct_path="when", - ), - expected_field="speed_limits[].when", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:require_any_of:19", - scaffold={"speed_limits": [{}]}, - mutate=lambda row: mutate_require_any_of( - row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" - ), - expected_field="speed_limits[]", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:20", + id="segment::model:forbid_if:6", scaffold={}, mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), expected_field="class_forbidden", expected_check="forbid_if", ), Scenario( - id="segment::model:require_if:21", + id="segment::model:require_if:7", scaffold={}, mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), expected_field="class_required_0", expected_check="require_if", ), Scenario( - id="segment::model:require_if:22", + id="segment::model:require_if:8", scaffold={}, mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), expected_field="class_required_1", expected_check="require_if", ), Scenario( - id="segment::model:forbid_if:23", + id="segment::model:forbid_if:9", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1382,7 +1315,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:24", + id="segment::model:forbid_if:10", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1396,7 +1329,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:25", + id="segment::model:forbid_if:11", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1410,7 +1343,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:26", + id="segment::model:forbid_if:12", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1424,7 +1357,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:27", + id="segment::model:forbid_if:13", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1438,7 +1371,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:28", + id="segment::model:forbid_if:14", scaffold={}, mutate=lambda row: mutate_forbid_if( row, ["subclass"], "subtype", "road", negate=True @@ -1447,7 +1380,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:29", + id="segment::model:forbid_if:15", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1461,7 +1394,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:30", + id="segment::model:forbid_if:16", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1474,121 +1407,6 @@ expected_field="rail_flags_forbidden", expected_check="forbid_if", ), - Scenario( - id="segment::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions_unique:struct_unique", - scaffold={"access_restrictions": [{"access_type": "allowed"}]}, - mutate=lambda row: mutate_unique_items(row, "access_restrictions"), - expected_field="access_restrictions_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.mode_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), - expected_field="access_restrictions[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.using_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), - expected_field="access_restrictions[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.recognized_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.recognized" - ), - expected_field="access_restrictions[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.vehicle_unique:struct_unique", - scaffold={ - "access_restrictions": [ - { - "access_type": "allowed", - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - }, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.vehicle" - ), - expected_field="access_restrictions[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::connectors_unique:struct_unique", - scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, - mutate=lambda row: mutate_unique_items(row, "connectors"), - expected_field="connectors_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::rail_flags_unique:struct_unique", - scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, - mutate=lambda row: mutate_unique_items(row, "rail_flags"), - expected_field="rail_flags_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::rail_flags[].values_unique:struct_unique", - scaffold={"rail_flags": [{"values": ["is_bridge"]}]}, - mutate=lambda row: mutate_unique_items(row, "rail_flags[].values"), - expected_field="rail_flags[].values_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py index 0a8d0a946..137862634 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_road.py @@ -278,6 +278,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="segment::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -386,6 +393,13 @@ expected_field="access_restrictions_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].access_type:required", scaffold={"access_restrictions": [{"access_type": "allowed"}]}, @@ -449,6 +463,17 @@ expected_field="access_restrictions[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.mode[]:enum", scaffold={ @@ -471,6 +496,17 @@ expected_field="access_restrictions[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.using[]:enum", scaffold={ @@ -493,6 +529,19 @@ expected_field="access_restrictions[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ @@ -527,6 +576,31 @@ expected_field="access_restrictions[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ @@ -680,6 +754,13 @@ expected_field="connectors_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), Scenario( id="segment::connectors[].connector_id:required", scaffold={"connectors": [{"connector_id": "a"}]}, @@ -1012,6 +1093,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -1285,6 +1386,23 @@ expected_field="destinations[].labels_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::destinations[].labels_unique:struct_unique", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "labels": [{"value": "a", "type": "street"}], + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "destinations[].labels"), + expected_field="destinations[].labels_unique", + expected_check="struct_unique", + ), Scenario( id="segment::destinations[].labels[].value:required", scaffold={ @@ -1370,6 +1488,23 @@ expected_field="destinations[].labels[].type", expected_check="enum", ), + Scenario( + id="segment::destinations[].symbols_unique:struct_unique", + scaffold={ + "destinations": [ + { + "from_connector_id": "a", + "to_connector_id": "a", + "to_segment_id": "a", + "final_heading": "forward", + "symbols": ["motorway"], + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "destinations[].symbols"), + expected_field="destinations[].symbols_unique", + expected_check="struct_unique", + ), Scenario( id="segment::destinations[].symbols[]:enum", scaffold={ @@ -1449,6 +1584,22 @@ expected_field="prohibited_transitions[].sequence_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].sequence_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "final_heading": "forward", + "sequence": [{"connector_id": "a", "segment_id": "a"}], + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].sequence" + ), + expected_field="prohibited_transitions[].sequence_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].sequence[].connector_id:required", scaffold={ @@ -1640,6 +1791,23 @@ expected_field="prohibited_transitions[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].when.mode_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"mode": ["vehicle"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.mode" + ), + expected_field="prohibited_transitions[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].when.mode[]:enum", scaffold={ @@ -1670,6 +1838,23 @@ expected_field="prohibited_transitions[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].when.using_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"using": ["as_customer"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.using" + ), + expected_field="prohibited_transitions[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].when.using[]:enum", scaffold={ @@ -1700,6 +1885,23 @@ expected_field="prohibited_transitions[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].when.recognized_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": {"recognized": ["as_permitted"]}, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.recognized" + ), + expected_field="prohibited_transitions[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].when.recognized[]:enum", scaffold={ @@ -1739,6 +1941,32 @@ expected_field="prohibited_transitions[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::prohibited_transitions[].when.vehicle_unique:struct_unique", + scaffold={ + "prohibited_transitions": [ + { + "sequence": [{"connector_id": "a", "segment_id": "a"}], + "final_heading": "forward", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "prohibited_transitions[].when.vehicle" + ), + expected_field="prohibited_transitions[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::prohibited_transitions[].when.vehicle[].dimension:required", scaffold={ @@ -1919,6 +2147,13 @@ expected_field="road_flags_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::road_flags_unique:struct_unique", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "road_flags"), + expected_field="road_flags_unique", + expected_check="struct_unique", + ), Scenario( id="segment::road_flags[].values:required", scaffold={"road_flags": [{"values": ["is_bridge"]}]}, @@ -1933,6 +2168,13 @@ expected_field="road_flags[].values_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::road_flags[].values_unique:struct_unique", + scaffold={"road_flags": [{"values": ["is_bridge"]}]}, + mutate=lambda row: mutate_unique_items(row, "road_flags[].values"), + expected_field="road_flags[].values_unique", + expected_check="struct_unique", + ), Scenario( id="segment::road_flags[].values[]:enum", scaffold={"road_flags": [{"values": ["is_bridge"]}]}, @@ -1968,6 +2210,13 @@ expected_field="road_surface_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::road_surface_unique:struct_unique", + scaffold={"road_surface": [{"value": "unknown"}]}, + mutate=lambda row: mutate_unique_items(row, "road_surface"), + expected_field="road_surface_unique", + expected_check="struct_unique", + ), Scenario( id="segment::road_surface[].value:required", scaffold={"road_surface": [{"value": "unknown"}]}, @@ -2010,6 +2259,13 @@ expected_field="speed_limits_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits_unique:struct_unique", + scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits"), + expected_field="speed_limits_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].max_speed.value:required", scaffold={"speed_limits": [{"max_speed": {"unit": "mph", "value": 1}}]}, @@ -2115,6 +2371,13 @@ expected_field="speed_limits[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits[].when.mode_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.mode"), + expected_field="speed_limits[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].when.mode[]:enum", scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, @@ -2129,6 +2392,13 @@ expected_field="speed_limits[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits[].when.using_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.using"), + expected_field="speed_limits[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].when.using[]:enum", scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, @@ -2143,6 +2413,13 @@ expected_field="speed_limits[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits[].when.recognized_unique:struct_unique", + scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.recognized"), + expected_field="speed_limits[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].when.recognized[]:enum", scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, @@ -2172,6 +2449,28 @@ expected_field="speed_limits[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::speed_limits[].when.vehicle_unique:struct_unique", + scaffold={ + "speed_limits": [ + { + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + } + } + ] + }, + mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.vehicle"), + expected_field="speed_limits[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::speed_limits[].when.vehicle[].dimension:required", scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, @@ -2265,6 +2564,13 @@ expected_field="width_rules_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::width_rules_unique:struct_unique", + scaffold={"width_rules": [{"value": 1.0}]}, + mutate=lambda row: mutate_unique_items(row, "width_rules"), + expected_field="width_rules_unique", + expected_check="struct_unique", + ), Scenario( id="segment::width_rules[].value:required", scaffold={"width_rules": [{"value": 1.0}]}, @@ -2692,312 +2998,6 @@ expected_field="rail_flags_forbidden", expected_check="forbid_if", ), - Scenario( - id="segment::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions_unique:struct_unique", - scaffold={"access_restrictions": [{"access_type": "allowed"}]}, - mutate=lambda row: mutate_unique_items(row, "access_restrictions"), - expected_field="access_restrictions_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.mode_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), - expected_field="access_restrictions[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.using_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), - expected_field="access_restrictions[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.recognized_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.recognized" - ), - expected_field="access_restrictions[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.vehicle_unique:struct_unique", - scaffold={ - "access_restrictions": [ - { - "access_type": "allowed", - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - }, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.vehicle" - ), - expected_field="access_restrictions[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::connectors_unique:struct_unique", - scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, - mutate=lambda row: mutate_unique_items(row, "connectors"), - expected_field="connectors_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::destinations[].labels_unique:struct_unique", - scaffold={ - "destinations": [ - { - "from_connector_id": "a", - "to_connector_id": "a", - "to_segment_id": "a", - "final_heading": "forward", - "labels": [{"value": "a", "type": "street"}], - } - ] - }, - mutate=lambda row: mutate_unique_items(row, "destinations[].labels"), - expected_field="destinations[].labels_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::destinations[].symbols_unique:struct_unique", - scaffold={ - "destinations": [ - { - "from_connector_id": "a", - "to_connector_id": "a", - "to_segment_id": "a", - "final_heading": "forward", - "symbols": ["motorway"], - } - ] - }, - mutate=lambda row: mutate_unique_items(row, "destinations[].symbols"), - expected_field="destinations[].symbols_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].sequence_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "final_heading": "forward", - "sequence": [{"connector_id": "a", "segment_id": "a"}], - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].sequence" - ), - expected_field="prohibited_transitions[].sequence_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].when.mode_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "sequence": [{"connector_id": "a", "segment_id": "a"}], - "final_heading": "forward", - "when": {"mode": ["vehicle"]}, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].when.mode" - ), - expected_field="prohibited_transitions[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].when.using_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "sequence": [{"connector_id": "a", "segment_id": "a"}], - "final_heading": "forward", - "when": {"using": ["as_customer"]}, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].when.using" - ), - expected_field="prohibited_transitions[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].when.recognized_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "sequence": [{"connector_id": "a", "segment_id": "a"}], - "final_heading": "forward", - "when": {"recognized": ["as_permitted"]}, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].when.recognized" - ), - expected_field="prohibited_transitions[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::prohibited_transitions[].when.vehicle_unique:struct_unique", - scaffold={ - "prohibited_transitions": [ - { - "sequence": [{"connector_id": "a", "segment_id": "a"}], - "final_heading": "forward", - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - }, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "prohibited_transitions[].when.vehicle" - ), - expected_field="prohibited_transitions[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::road_flags_unique:struct_unique", - scaffold={"road_flags": [{"values": ["is_bridge"]}]}, - mutate=lambda row: mutate_unique_items(row, "road_flags"), - expected_field="road_flags_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::road_flags[].values_unique:struct_unique", - scaffold={"road_flags": [{"values": ["is_bridge"]}]}, - mutate=lambda row: mutate_unique_items(row, "road_flags[].values"), - expected_field="road_flags[].values_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::road_surface_unique:struct_unique", - scaffold={"road_surface": [{"value": "unknown"}]}, - mutate=lambda row: mutate_unique_items(row, "road_surface"), - expected_field="road_surface_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits_unique:struct_unique", - scaffold={"speed_limits": [{"max_speed": {"value": 1, "unit": "mph"}}]}, - mutate=lambda row: mutate_unique_items(row, "speed_limits"), - expected_field="speed_limits_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits[].when.mode_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"mode": ["vehicle"]}}]}, - mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.mode"), - expected_field="speed_limits[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits[].when.using_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"using": ["as_customer"]}}]}, - mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.using"), - expected_field="speed_limits[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits[].when.recognized_unique:struct_unique", - scaffold={"speed_limits": [{"when": {"recognized": ["as_permitted"]}}]}, - mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.recognized"), - expected_field="speed_limits[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::speed_limits[].when.vehicle_unique:struct_unique", - scaffold={ - "speed_limits": [ - { - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - } - } - ] - }, - mutate=lambda row: mutate_unique_items(row, "speed_limits[].when.vehicle"), - expected_field="speed_limits[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::width_rules_unique:struct_unique", - scaffold={"width_rules": [{"value": 1.0}]}, - mutate=lambda row: mutate_unique_items(row, "width_rules"), - expected_field="width_rules_unique", - expected_check="struct_unique", - ), ] diff --git a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py index bf3e6b1d6..bfbe81702 100644 --- a/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py +++ b/packages/overture-schema-pyspark/tests/generated/overture/schema/transportation/test_segment_water.py @@ -216,6 +216,13 @@ expected_field="sources_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::sources_unique:struct_unique", + scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, + mutate=lambda row: mutate_unique_items(row, "sources"), + expected_field="sources_unique", + expected_check="struct_unique", + ), Scenario( id="segment::sources[].property:required", scaffold={"sources": [{"dataset": "", "property": "/valid/pointer"}]}, @@ -324,6 +331,13 @@ expected_field="access_restrictions_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions_unique:struct_unique", + scaffold={"access_restrictions": [{"access_type": "allowed"}]}, + mutate=lambda row: mutate_unique_items(row, "access_restrictions"), + expected_field="access_restrictions_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].access_type:required", scaffold={"access_restrictions": [{"access_type": "allowed"}]}, @@ -387,6 +401,17 @@ expected_field="access_restrictions[].when.mode_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.mode_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"mode": ["vehicle"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), + expected_field="access_restrictions[].when.mode_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.mode[]:enum", scaffold={ @@ -409,6 +434,17 @@ expected_field="access_restrictions[].when.using_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.using_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"using": ["as_customer"]}} + ] + }, + mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), + expected_field="access_restrictions[].when.using_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.using[]:enum", scaffold={ @@ -431,6 +467,19 @@ expected_field="access_restrictions[].when.recognized_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.recognized_unique:struct_unique", + scaffold={ + "access_restrictions": [ + {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.recognized" + ), + expected_field="access_restrictions[].when.recognized_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.recognized[]:enum", scaffold={ @@ -465,6 +514,31 @@ expected_field="access_restrictions[].when.vehicle_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::access_restrictions[].when.vehicle_unique:struct_unique", + scaffold={ + "access_restrictions": [ + { + "access_type": "allowed", + "when": { + "vehicle": [ + { + "dimension": "height", + "comparison": "greater_than", + "value": 0.0, + "unit": "in", + } + ] + }, + } + ] + }, + mutate=lambda row: mutate_unique_items( + row, "access_restrictions[].when.vehicle" + ), + expected_field="access_restrictions[].when.vehicle_unique", + expected_check="struct_unique", + ), Scenario( id="segment::access_restrictions[].when.vehicle[].dimension:required", scaffold={ @@ -618,6 +692,13 @@ expected_field="connectors_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::connectors_unique:struct_unique", + scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, + mutate=lambda row: mutate_unique_items(row, "connectors"), + expected_field="connectors_unique", + expected_check="struct_unique", + ), Scenario( id="segment::connectors[].connector_id:required", scaffold={"connectors": [{"connector_id": "a"}]}, @@ -950,6 +1031,26 @@ expected_field="names.rules[].perspectives.countries_min_length", expected_check="array_min_length", ), + Scenario( + id="segment::names.rules[].perspectives.countries_unique:struct_unique", + scaffold={ + "names": { + "primary": "a", + "rules": [ + { + "value": "a", + "variant": "common", + "perspectives": {"mode": "accepted_by", "countries": ["US"]}, + } + ], + } + }, + mutate=lambda row: mutate_unique_items( + row, "names.rules[].perspectives.countries" + ), + expected_field="names.rules[].perspectives.countries_unique", + expected_check="struct_unique", + ), Scenario( id="segment::names.rules[].perspectives.countries[]:country_code_alpha2", scaffold={ @@ -1099,210 +1200,28 @@ expected_check="require_any_of", ), Scenario( - id="segment::model:require_any_of:6", - scaffold={"destinations": [{}]}, - mutate=lambda row: mutate_require_any_of( - row, ["labels", "symbols"], array_path="destinations" - ), - expected_field="destinations[]", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:7", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_forbid_if( - row, - ["unit"], - "dimension", - "axle_count", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_forbidden", - expected_check="forbid_if", - ), - Scenario( - id="segment::model:require_if:8", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "height", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_0", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:9", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "length", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_1", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:10", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "weight", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_2", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:11", - scaffold={"prohibited_transitions": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "width", - array_path="prohibited_transitions", - inner_array_path="when.vehicle", - ), - expected_field="prohibited_transitions[].when.vehicle[].unit_required_3", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_any_of:12", - scaffold={"prohibited_transitions": [{"when": {}}]}, - mutate=lambda row: mutate_require_any_of( - row, - ["heading", "during", "mode", "using", "recognized", "vehicle"], - array_path="prohibited_transitions", - struct_path="when", - ), - expected_field="prohibited_transitions[].when", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:13", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_forbid_if( - row, - ["unit"], - "dimension", - "axle_count", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_forbidden", - expected_check="forbid_if", - ), - Scenario( - id="segment::model:require_if:14", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "height", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_0", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:15", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "length", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_1", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:16", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "weight", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_2", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_if:17", - scaffold={"speed_limits": [{"when": {"vehicle": [{}]}}]}, - mutate=lambda row: mutate_require_if( - row, - ["unit"], - "dimension", - "width", - array_path="speed_limits", - inner_array_path="when.vehicle", - ), - expected_field="speed_limits[].when.vehicle[].unit_required_3", - expected_check="require_if", - ), - Scenario( - id="segment::model:require_any_of:18", - scaffold={"speed_limits": [{"when": {}}]}, - mutate=lambda row: mutate_require_any_of( - row, - ["heading", "during", "mode", "using", "recognized", "vehicle"], - array_path="speed_limits", - struct_path="when", - ), - expected_field="speed_limits[].when", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:require_any_of:19", - scaffold={"speed_limits": [{}]}, - mutate=lambda row: mutate_require_any_of( - row, ["max_speed.value", "min_speed.value"], array_path="speed_limits" - ), - expected_field="speed_limits[]", - expected_check="require_any_of", - ), - Scenario( - id="segment::model:forbid_if:20", + id="segment::model:forbid_if:6", scaffold={}, mutate=lambda row: mutate_forbid_if(row, ["class"], "subtype", "water"), expected_field="class_forbidden", expected_check="forbid_if", ), Scenario( - id="segment::model:require_if:21", + id="segment::model:require_if:7", scaffold={}, mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "rail"), expected_field="class_required_0", expected_check="require_if", ), Scenario( - id="segment::model:require_if:22", + id="segment::model:require_if:8", scaffold={}, mutate=lambda row: mutate_require_if(row, ["class"], "subtype", "road"), expected_field="class_required_1", expected_check="require_if", ), Scenario( - id="segment::model:forbid_if:23", + id="segment::model:forbid_if:9", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1316,7 +1235,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:24", + id="segment::model:forbid_if:10", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1330,7 +1249,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:25", + id="segment::model:forbid_if:11", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1344,7 +1263,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:26", + id="segment::model:forbid_if:12", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1358,7 +1277,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:27", + id="segment::model:forbid_if:13", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1372,7 +1291,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:28", + id="segment::model:forbid_if:14", scaffold={}, mutate=lambda row: mutate_forbid_if( row, ["subclass"], "subtype", "road", negate=True @@ -1381,7 +1300,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:29", + id="segment::model:forbid_if:15", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1395,7 +1314,7 @@ expected_check="forbid_if", ), Scenario( - id="segment::model:forbid_if:30", + id="segment::model:forbid_if:16", scaffold={}, mutate=lambda row: mutate_forbid_if( row, @@ -1408,107 +1327,6 @@ expected_field="rail_flags_forbidden", expected_check="forbid_if", ), - Scenario( - id="segment::sources_unique:struct_unique", - scaffold={"sources": [{"property": "/valid/pointer", "dataset": ""}]}, - mutate=lambda row: mutate_unique_items(row, "sources"), - expected_field="sources_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions_unique:struct_unique", - scaffold={"access_restrictions": [{"access_type": "allowed"}]}, - mutate=lambda row: mutate_unique_items(row, "access_restrictions"), - expected_field="access_restrictions_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.mode_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"mode": ["vehicle"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.mode"), - expected_field="access_restrictions[].when.mode_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.using_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"using": ["as_customer"]}} - ] - }, - mutate=lambda row: mutate_unique_items(row, "access_restrictions[].when.using"), - expected_field="access_restrictions[].when.using_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.recognized_unique:struct_unique", - scaffold={ - "access_restrictions": [ - {"access_type": "allowed", "when": {"recognized": ["as_permitted"]}} - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.recognized" - ), - expected_field="access_restrictions[].when.recognized_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::access_restrictions[].when.vehicle_unique:struct_unique", - scaffold={ - "access_restrictions": [ - { - "access_type": "allowed", - "when": { - "vehicle": [ - { - "dimension": "height", - "comparison": "greater_than", - "value": 0.0, - "unit": "in", - } - ] - }, - } - ] - }, - mutate=lambda row: mutate_unique_items( - row, "access_restrictions[].when.vehicle" - ), - expected_field="access_restrictions[].when.vehicle_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::connectors_unique:struct_unique", - scaffold={"connectors": [{"connector_id": "a"}, {"connector_id": "a1"}]}, - mutate=lambda row: mutate_unique_items(row, "connectors"), - expected_field="connectors_unique", - expected_check="struct_unique", - ), - Scenario( - id="segment::names.rules[].perspectives.countries_unique:struct_unique", - scaffold={ - "names": { - "primary": "a", - "rules": [ - { - "value": "a", - "variant": "common", - "perspectives": {"mode": "accepted_by", "countries": ["US"]}, - } - ], - } - }, - mutate=lambda row: mutate_unique_items( - row, "names.rules[].perspectives.countries" - ), - expected_field="names.rules[].perspectives.countries_unique", - expected_check="struct_unique", - ), ] From 1e3143c8b23169f0f8b52715fa2d890dbab65707 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 20 May 2026 14:02:22 -0700 Subject: [PATCH 7/8] fix(pyspark): don't crash on absent input columns validate_feature built check expressions referencing every column the schema declares, then evaluated them with an eager df.select. When the input DataFrame lacked a declared column, Spark's plan analysis raised an AnalysisException before the caller could inspect the schema mismatch, so a file missing a required column produced a Java stack trace instead of the schema-mismatch report the CLI is built to emit. Columns that compare_schemas reports as absent from the data now have their checks dropped, the same as --skip-columns columns; referencing them is what crashes Spark. The mismatch is still recorded in schema_mismatches, so the CLI reports it and exits cleanly (or, with --skip-schema-check, validates the columns that are present). The CLI also prints the --skip-columns invocation for the absent columns, so the escape hatch is discoverable from the error itself. Signed-off-by: Seth Fitzsimmons --- .../src/overture/schema/pyspark/cli.py | 13 ++++++++++ .../src/overture/schema/pyspark/validate.py | 21 ++++++++++++++- .../overture-schema-pyspark/tests/test_cli.py | 18 +++++++++++++ .../tests/test_validate.py | 26 +++++++++++++++++++ 4 files changed, 77 insertions(+), 1 deletion(-) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py index 1a8ada445..2be4f9aeb 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/cli.py @@ -220,6 +220,19 @@ def validate_cli( click.echo(f"Schema mismatches for {resolved}:", err=True) for m in result.schema_mismatches: click.echo(f" {m.path}: expected {m.expected}, got {m.actual}", err=True) + absent_columns = list( + dict.fromkeys( + m.path.split(".", 1)[0] + for m in result.schema_mismatches + if m.actual == "missing" + ) + ) + if absent_columns: + flags = " ".join(f"--skip-columns {c}" for c in absent_columns) + click.echo( + f" Re-run with `{flags}` to skip missing columns.", + err=True, + ) if not skip_schema_check: sys.exit(1) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py index 9b03ed34b..0274d6c18 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/validate.py @@ -311,11 +311,30 @@ def validate_feature( + "; ".join(parts) ) + # Schema columns the data lacks. A check referencing an absent column + # raises an AnalysisException during Spark plan analysis, so such checks + # are dropped before evaluation via the `excluded` filter below -- the + # same filter skip_columns feeds. Only that check-filtering is shared: + # a skip_columns mismatch was suppressed by the loop above, so the + # caller sees no mismatch and validation continues; an absent-column + # mismatch stays in `mismatches` and is reported, so the caller (the + # CLI) aborts unless --skip-schema-check. `--skip-columns` opts into + # that suppression -- it is not a restatement of the default. + # `Check.root_field` is column-granular, so filtering is all-or-nothing: + # if the data has the `bbox` struct but is missing only `bbox.xmin`, + # every check whose root_field is `bbox` is dropped, including checks on + # sub-fields that are present. Finer granularity would require + # sub-column awareness in Check, which it deliberately lacks. + absent_columns = { + m.path.split(".", 1)[0] for m in mismatches if m.actual == "missing" + } + # Check filtering + excluded = skip | absent_columns kept: list[Check] = [] suppressed: list[Check] = [] for chk in all_checks: - if chk.root_field is not None and chk.root_field in skip: + if chk.root_field is not None and chk.root_field in excluded: continue # structurally absent, not tracked in suppressed if chk.root_field is not None and chk.root_field in suppress_roots: suppressed.append(chk) diff --git a/packages/overture-schema-pyspark/tests/test_cli.py b/packages/overture-schema-pyspark/tests/test_cli.py index 037d6aaeb..2caaf558d 100644 --- a/packages/overture-schema-pyspark/tests/test_cli.py +++ b/packages/overture-schema-pyspark/tests/test_cli.py @@ -247,6 +247,24 @@ def test_validate_skip_columns(spark: SparkSession, tmp_path: Path) -> None: assert "0 / 1 rows with errors" in result.output +def test_validate_missing_column_suggests_skip_columns( + spark: SparkSession, tmp_path: Path +) -> None: + """A column absent from the data hints the --skip-columns flag.""" + input_path = str(tmp_path / "input.parquet") + + # Data missing the 'value' column the schema expects + spark.createDataFrame([Row(id="r1", theme="test", type="test_cli")]).write.parquet( + input_path + ) + + runner = CliRunner() + result = runner.invoke(validate_cli, [_TEST_TYPE, input_path]) + assert result.exit_code != 0 + assert "Schema mismatch" in result.output + assert "--skip-columns value" in result.output + + def test_validate_ignore_extra_columns(spark: SparkSession, tmp_path: Path) -> None: """--ignore-extra-columns suppresses 'expected missing' schema mismatches.""" input_path = str(tmp_path / "input.parquet") diff --git a/packages/overture-schema-pyspark/tests/test_validate.py b/packages/overture-schema-pyspark/tests/test_validate.py index c3fe3ea08..f15f4e806 100644 --- a/packages/overture-schema-pyspark/tests/test_validate.py +++ b/packages/overture-schema-pyspark/tests/test_validate.py @@ -514,3 +514,29 @@ def test_all_checks_suppressed(self, vf_df: DataFrame) -> None: ) assert result.checks == [] assert result.error_rows().count() == 0 + + def test_missing_column_does_not_raise(self, spark: SparkSession) -> None: + # A DataFrame missing a required column causes AnalysisException when + # evaluate_checks references that column. validate_feature must detect + # structurally absent columns via schema_mismatches and silently drop + # the corresponding checks before calling evaluate_checks -- mirroring + # the skip_columns path. + schema_no_theme = StructType( + [f for f in _VF_SCHEMA.fields if f.name != "theme"] + ) + df = spark.createDataFrame( + [Row(id="1", type=_VF_TYPE, value="ok", sources="s")], + schema=schema_no_theme, + ) + result = validate_feature(df, _VF_TYPE) + # Must not raise -- returns normally + assert isinstance(result, ValidationResult) + # Missing column is reported as a schema mismatch + mismatch_paths = [m.path for m in result.schema_mismatches] + assert "theme" in mismatch_paths + # No check may reference the absent root field + missing_root_fields = {c.root_field for c in result.checks} + assert "theme" not in missing_root_fields + # Absent-column checks are silently dropped, not tracked in suppressed + suppressed_root_fields = {c.root_field for c in result.suppressed_checks} + assert "theme" not in suppressed_root_fields From 4b55cc0a4464112d8e75ddb61a5a311cd97abd7e Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Wed, 20 May 2026 14:02:46 -0700 Subject: [PATCH 8/8] fix(codegen): null-guard optional sub-model checks Model-level constraints (require_any_of and the like) generated for a sub-model reached through an optional field fired even when that field was null. Pydantic skips a model validator when the optional sub-model is absent, so the generated PySpark expression produced a false positive the schema itself never raises. ModelCheck now carries a gate: the optional-ancestor path that must be non-null for the constraint to apply. check_builder sets it when the constrained model is reached via an optional struct field inside an array; the renderer wraps the constraint in F.when(.isNotNull(), ...). Regenerated Segment expressions: the speed_limits[].when, access_restrictions[].when, and prohibited_transitions[].when require_any_of checks are now skipped when their when sub-model is null. Signed-off-by: Seth Fitzsimmons --- .../schema/codegen/pyspark/check_builder.py | 9 +- .../schema/codegen/pyspark/check_ir.py | 7 ++ .../schema/codegen/pyspark/renderer.py | 16 ++++ .../tests/test_pyspark_check_builder.py | 83 +++++++++++++++++++ .../tests/test_pyspark_renderer.py | 70 ++++++++++++++++ .../overture/schema/transportation/segment.py | 69 ++++++++------- 6 files changed, 223 insertions(+), 31 deletions(-) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py index 9e736a67c..885074ca6 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_builder.py @@ -394,11 +394,17 @@ def _recurse_into_model( model_checks.extend(sub_model_checks) if model_spec.constraints: + constraint_gate = ( + prefix + if is_optional and not field_is_list and isinstance(prefix, ArrayPath) + else None + ) sub_model_constraint_checks = _dispatch_model_constraints( model_spec.constraints, model_spec.fields, target=_model_constraint_target(prefix), arm=arm, + gate=constraint_gate, ) if sub_model_constraint_checks: _guard_struct_nested_anchor(prefix, model_spec.name) @@ -485,10 +491,11 @@ def _dispatch_model_constraints( *, target: FieldPath = ScalarPath(), arm: str | None = None, + gate: FieldPath | None = None, ) -> list[ModelCheck]: """Dispatch model constraints to ModelChecks.""" return [ - ModelCheck(descriptor=desc, target=target, arm=arm) + ModelCheck(descriptor=desc, target=target, arm=arm, gate=gate) for mc in constraints for desc in dispatch_model_constraint(mc, fields) ] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py index e9029c632..d7e769c31 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/check_ir.py @@ -76,8 +76,15 @@ class ModelCheck: value. Constraints discovered through a variant-specific field's sub-model or sub-union inherit the contributing outer arm, so they land only in that arm's test module. + + `gate` is the optional-ancestor path that must be non-null for the + constraint to apply. Set when the constrained model is reached via + an optional field (`field: Model | None`). The renderer wraps the + constraint expression in `F.when(.isNotNull(), ...)` so + the check is skipped when the optional model is absent (NULL). """ descriptor: ModelConstraintDescriptor target: FieldPath = ScalarPath() arm: str | None = None + gate: FieldPath | None = None diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py index 9728a499a..dcd687610 100644 --- a/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/pyspark/renderer.py @@ -455,10 +455,26 @@ def _cols_and_names() -> tuple[str, str]: raise TypeError(f"Unhandled model constraint descriptor: {desc!r}") if isinstance(target, ArrayPath): + if check.gate is not None: + assert not target.iter_struct_paths, ( + f"gated ModelCheck with a nested-array target ({target!r}) is unsupported; " + f"the element-gate wrap assumes a single array level" + ) + element_relative = target.element_relative_gate(check.gate) + assert element_relative is not None, ( + f"ModelCheck gate={check.gate!r} is not reachable as an element-level " + f"accessor on target={target!r}; gates on ModelChecks must be ArrayPaths " + f"entering the same outer array as the target" + ) + inner_expr = _wrap_element_gate(inner_expr, var, element_relative) expr = _wrap_in_array_iteration( target.column_path, target.iter_struct_paths, inner_expr ) else: + assert check.gate is None, ( + f"ModelCheck gate={check.gate!r} paired with non-ArrayPath target={target!r}; " + f"a gate only makes sense when the constrained model is inside an array" + ) expr = inner_expr return _check_function_context( diff --git a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py index 0c89a0367..983b4d348 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_check_builder.py @@ -1336,6 +1336,10 @@ class _ArrayOfConstrainedModel(BaseModel): items: list[_ArrayElementWithConstraint] +class _OptionalArrayOfConstrainedModel(BaseModel): + items: list[_ArrayElementWithConstraint] | None = None + + @require_any_of("a", "b") class _NestedConstrainedStruct(BaseModel): a: str | None = None @@ -1875,3 +1879,82 @@ def test_primitive_bounds_excluded(self, nodes: list[Check]) -> None: d = dict(b.kwargs) assert d.get("ge") != -(2**31) assert d.get("le") != 2**31 - 1 + + +@require_any_of("x", "y") +class _OptionalSubModelConstrained(BaseModel): + """Sub-model with require_any_of on its own fields.""" + + x: str | None = None + y: str | None = None + + +class _ElementWithOptionalConstrained(BaseModel): + nested: _OptionalSubModelConstrained | None = None + + +class _ArrayOfElementWithOptionalConstrained(BaseModel): + items: list[_ElementWithOptionalConstrained] + + +class TestOptionalSubModelModelCheckGate: + """ModelCheck for a constraint on an optional sub-model carries gate set to its path. + + When the constrained model is reached via an optional field (`field: Model | None`), + the PySpark validator must skip the constraint when the field is NULL. The + `ModelCheck.gate` carries the path to the optional field so the renderer can emit + `F.when(.isNotNull(), ...)`. + """ + + def test_optional_nested_model_gate_set(self) -> None: + """items[].nested is optional -- gate == path to nested.""" + _, model_nodes = _checks_for(_ArrayOfElementWithOptionalConstrained) + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("items[].nested") + ] + assert len(nodes) == 1 + assert nodes[0].gate == _path("items[].nested") + + def test_non_optional_sub_model_has_no_gate(self) -> None: + """Direct array element model (not optional) -- gate is None.""" + _, model_nodes = _checks_for(_ArrayOfConstrainedModel) + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("items[]") + ] + assert len(nodes) == 1 + assert nodes[0].gate is None + + def test_optional_list_field_element_model_has_no_gate(self) -> None: + """Optional list field (list[Model] | None) -- element constraint gate is None. + + The field being optional means the list itself may be absent; but the + constrained model is reached via array iteration, not a nullable struct + field, so no element-level gate belongs. + """ + _, model_nodes = _checks_for(_OptionalArrayOfConstrainedModel) + nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("items[]") + ] + assert len(nodes) == 1 + assert nodes[0].gate is None + + def test_segment_speed_limits_when_has_gate(self) -> None: + """Segment.speed_limits[].when is optional -- gate == path to when.""" + from codegen_test_support import discover_feature + + spec = discover_feature("Segment") + _, model_nodes = build_checks(spec) + when_nodes = [ + n + for n in _filter_nodes(model_nodes, "check_require_any_of") + if n.target == _path("speed_limits[].when") + ] + assert len(when_nodes) >= 1 + for node in when_nodes: + assert node.gate == _path("speed_limits[].when") diff --git a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py index 42775be41..91a63c380 100644 --- a/packages/overture-schema-codegen/tests/test_pyspark_renderer.py +++ b/packages/overture-schema-codegen/tests/test_pyspark_renderer.py @@ -33,6 +33,7 @@ ) from overture.schema.codegen.pyspark.schema_builder import build_schema from overture.schema.system.field_path import ( + ScalarPath, parse, ) from overture.schema.system.model_constraint import ( @@ -1095,3 +1096,72 @@ def test_nested_array_gate_applied_at_outermost_lambda(self) -> None: # Gate must be on el (the rule struct), not inner (the country string). assert 'el["perspectives"].isNotNull()' in source assert "inner[" not in source + + +@require_any_of("a", "b") +class _OptionalSubModel(BaseModel): + a: str | None = None + b: str | None = None + + +class _ElementWithOptional(BaseModel): + nested: _OptionalSubModel | None = None + + +class _ArrayWithOptionalSubModel(BaseModel): + items: list[_ElementWithOptional] + + +class TestGatedModelConstraintRendering: + """ModelCheck with gate wraps the constraint in F.when(.isNotNull(), ...).""" + + def test_gated_model_check_wraps_in_f_when(self) -> None: + """A gated ModelCheck on items[].nested emits F.when(el['nested'].isNotNull(), ...).""" + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=_path("items[].nested"), + gate=_path("items[].nested"), + ) + source = _render_model_node(check) + assert 'el["nested"].isNotNull()' in source + assert "check_require_any_of" in source + assert "F.when(" in source + + def test_gated_model_check_is_parseable(self) -> None: + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=_path("items[].nested"), + gate=_path("items[].nested"), + ) + source = _render_model_node(check) + ast.parse(source) + + def test_ungated_model_check_no_f_when(self) -> None: + """A ModelCheck without gate does NOT emit isNotNull wrapping.""" + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("x", "y")), + target=_path("items[]"), + gate=None, + ) + source = _render_model_node(check) + assert "isNotNull" not in source + assert "check_require_any_of" in source + + def test_full_render_optional_sub_model_has_when_guard(self) -> None: + """End-to-end: rendering _ArrayWithOptionalSubModel emits the isNotNull guard.""" + source = _render(_ArrayWithOptionalSubModel, "arr_optional_sub") + assert 'el["nested"].isNotNull()' in source + + def test_full_render_optional_sub_model_parseable(self) -> None: + source = _render(_ArrayWithOptionalSubModel, "arr_optional_sub") + ast.parse(source) + + def test_gated_model_check_assertion_on_non_array_target(self) -> None: + """A gate paired with a non-ArrayPath target raises AssertionError.""" + check = ModelCheck( + descriptor=RequireAnyOf(field_names=("a", "b")), + target=ScalarPath(), + gate=_path("items[].nested"), + ) + with pytest.raises(AssertionError, match="gate.*non-ArrayPath"): + _render_model_node(check) diff --git a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py index cc9fd32bc..539999f21 100644 --- a/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py +++ b/packages/overture-schema-pyspark/src/overture/schema/pyspark/expressions/generated/overture/schema/transportation/segment.py @@ -3919,16 +3919,19 @@ def _access_restrictions_when_check_require_any_of_5_check() -> Check: name="require_any_of", expr=array_check( "access_restrictions", - lambda el: check_require_any_of( - [ - el["when"]["heading"], - el["when"]["during"], - el["when"]["mode"], - el["when"]["using"], - el["when"]["recognized"], - el["when"]["vehicle"], - ], - ["heading", "during", "mode", "using", "recognized", "vehicle"], + lambda el: F.when( + el["when"].isNotNull(), + check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), ), ), shape=CheckShape.ARRAY, @@ -4055,16 +4058,19 @@ def _prohibited_transitions_when_check_require_any_of_12_check() -> Check: name="require_any_of", expr=array_check( "prohibited_transitions", - lambda el: check_require_any_of( - [ - el["when"]["heading"], - el["when"]["during"], - el["when"]["mode"], - el["when"]["using"], - el["when"]["recognized"], - el["when"]["vehicle"], - ], - ["heading", "during", "mode", "using", "recognized", "vehicle"], + lambda el: F.when( + el["when"].isNotNull(), + check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), ), ), shape=CheckShape.ARRAY, @@ -4176,16 +4182,19 @@ def _speed_limits_when_check_require_any_of_18_check() -> Check: name="require_any_of", expr=array_check( "speed_limits", - lambda el: check_require_any_of( - [ - el["when"]["heading"], - el["when"]["during"], - el["when"]["mode"], - el["when"]["using"], - el["when"]["recognized"], - el["when"]["vehicle"], - ], - ["heading", "during", "mode", "using", "recognized", "vehicle"], + lambda el: F.when( + el["when"].isNotNull(), + check_require_any_of( + [ + el["when"]["heading"], + el["when"]["during"], + el["when"]["mode"], + el["when"]["using"], + el["when"]["recognized"], + el["when"]["vehicle"], + ], + ["heading", "during", "mode", "using", "recognized", "vehicle"], + ), ), ), shape=CheckShape.ARRAY,