device-builder/.github/workflows/test.yml at main · ApolloAutomation/device-builder · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
name: Test

# Runs lint + tests on every PR and on every push to main. Mirrors
# the matter-server / music-assistant pattern: one ``lint`` job that
# runs the same pre-commit hooks contributors run locally, plus a
# ``test`` matrix across the supported Python versions. The catalog
# smoke test (``script/check_catalog.py``) runs alongside lint so a
# bad sync result fails CI even when no one ran the full sync.

permissions:
  contents: read

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  workflow_call:
    # Lets release.yml run the full lint + test matrix as a
    # preflight against the branch it's about to release from.
    inputs:
      ref:
        description: "Git ref to check out (defaults to main)."
        required: false
        type: string
        default: main

jobs:
  lint:
    name: Lint + smoke checks
    runs-on: ubuntu-latest
    steps:
      - name: Check out code from GitHub
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
          python-version: "3.13"

      - name: Set up uv
        # uv replaces pip for the install step (an order of
        # magnitude faster on cold boots, with its own wheel cache).
        # ``actions/setup-python`` provides the interpreter — its
        # Python isn't marked externally-managed, so ``uv pip
        # install --system`` works on macos / windows runners that
        # would otherwise refuse a brew-shipped Python under PEP 668.
        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b  # v8.1.0
        with:
          enable-cache: true

      - name: Install package + dev tools
        # ``[esphome]`` is needed so the catalog smoke test below
        # can construct a ``ComponentCatalog`` against the same
        # esphome version the dashboard ships with. ``--system``
        # installs into the runner's Python instead of a venv —
        # matches the existing pip-based CI shape so subsequent
        # ``pre-commit`` / ``python script/...`` steps keep working
        # without a ``uv run`` prefix.
        run: uv pip install --system -e '.[esphome,test]'

      - name: Cache pre-commit hook envs
        # Keyed on the python version + ``.pre-commit-config.yaml``
        # hash so any hook bump invalidates automatically. Mirrors
        # what ``pre-commit/action`` does internally — inlined here
        # because that action's transitive ``actions/cache@v4``
        # reference isn't SHA-pinned, which the org policy blocks
        # ("all actions must be pinned to a full-length commit SHA").
        uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
        with:
          path: ~/.cache/pre-commit
          key: pre-commit|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}

      - name: Run pre-commit (ruff lint + format, codespell, yaml/json checks)
        # On a cache hit the per-hook envs (ruff, codespell, …) are
        # already on disk, so the previously-30s init phase drops
        # to a near-instant restore. ``no-commit-to-branch`` is a
        # local-only guard; CI runs on branches by definition, so
        # skip it the same way matter-server does.
        run: SKIP=no-commit-to-branch pre-commit run --all-files --show-diff-on-failure --color=always

      - name: Validate board / component manifests
        run: python script/validate_definitions.py

      - name: Verify boards.json is in sync with manifests
        # Catches PRs that bypass the pre-commit hook — the diff
        # fails the build if the committed JSON doesn't match what
        # the script regenerates from the YAMLs.
        run: |
          python script/sync_boards.py
          git diff --exit-code -- esphome_device_builder/definitions/boards.json

      - name: Smoke-test component catalog
        run: python script/check_catalog.py

      - name: Type-check (mypy)
        # Mypy is configured strict in ``pyproject.toml``
        # (``disallow_untyped_defs``, ``disallow_incomplete_defs``,
        # ``warn_return_any``). Hard gate — a typing regression
        # blocks the PR. Started life as advisory (#481) while the
        # 24-error baseline got walked down to zero across PRs
        # #483-#492; flipped on once the standing count hit zero.
        run: mypy esphome_device_builder

  test:
    name: Pytest (${{ matrix.os }} / Python ${{ matrix.python-version }})
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        # Linux only runs on 3.12 (the oldest supported Python) — the
        # ``test-esphome-channels`` job below already covers 3.14 on
        # Linux against beta and dev esphome, which is a strict
        # superset of "stable esphome on 3.14 / Linux". Windows and
        # macOS only run on the newest Python — enough to catch
        # OS-specific regressions without paying for extra runs on
        # slower runners.
        include:
          - os: ubuntu-latest
            python-version: "3.12"
          - os: windows-latest
            python-version: "3.14"
          - os: macos-latest
            python-version: "3.14"
    steps:
      - name: Check out code from GitHub
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
          python-version: ${{ matrix.python-version }}

      - name: Set up uv
        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b  # v8.1.0
        with:
          enable-cache: true

      - name: Install package + test deps
        run: uv pip install --system -e '.[esphome,test]'

      - name: Run pytest
        # Single-line command — Windows runners default to PowerShell,
        # which doesn't accept bash-style ``\`` line continuation.
        # ``-n auto`` runs the suite under pytest-xdist with one
        # worker per logical CPU; pytest-cov auto-merges the
        # per-worker ``.coverage`` files at the end so the xml
        # report still reflects the whole suite. Mirrors the
        # upstream esphome workflow's ``-n auto`` invocation.
        # ``--maxfail=5`` keeps CI snappy when something fundamental
        # is broken; ``-q`` keeps the log readable without ``-vv``.
        # The ``benchmarks/`` subtree is excluded — it's CodSpeed-driven,
        # runs in a separate job, and its assertions only check chunk
        # counts (not behaviour).
        # Two-layer hang protection:
        # * ``--timeout=120`` (pytest-timeout plugin) faults any
        #   individual test that wedges for more than 2 minutes,
        #   surfacing the offending test name + traceback in the log.
        # * ``timeout-minutes: 5`` is the outer hard cap — if the
        #   plugin can't recover (deadlocked event loop, stuck
        #   subprocess), the runner kills the step. Healthy runs land
        #   at ~1-2 minutes; without the cap a single hung worker
        #   burns the runner's full 6h budget.
        timeout-minutes: 5
        run: pytest -q -n auto --maxfail=5 --durations=10 --timeout=120 --ignore=tests/benchmarks --ignore=tests/real_compile --cov=esphome_device_builder --cov-report=xml --cov-report=term

      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354  # v6.0.1
        with:
          files: ./coverage.xml
          flags: py${{ matrix.python-version }}
          fail_ci_if_error: false

  test-esphome-channels:
    name: Pytest (esphome ${{ matrix.channel }} / ${{ matrix.os }} / Python ${{ matrix.python-version }})
    runs-on: ${{ matrix.os }}
    # Probes the dashboard against the next two esphome release
    # channels on a single Linux 3.14 runner. The ``test`` matrix
    # above already covers stable (it's what ``pip install
    # -e '.[esphome]'`` resolves to), so this job focuses on the
    # forward-looking channels: ``beta`` is a strict gate so we catch
    # incompatibilities before they ship to users, and ``dev`` is
    # advisory (allow-failure) because ESPHome's main-branch nightly
    # can break mid-day — we want the signal without a permanent red
    # on every device-builder PR.
    strategy:
      fail-fast: false
      matrix:
        include:
          - channel: beta
            os: ubuntu-latest
            python-version: "3.14"
            # ``--prerelease=allow`` opts into pre-release versions
            # (uv's flag — pip's ``--pre`` doesn't apply here).
            # ``--upgrade`` makes uv pick the highest one even if a
            # stable is already installed transitively.
            install: uv pip install --system --upgrade --prerelease=allow esphome
            allow_failure: false
          - channel: dev
            os: ubuntu-latest
            python-version: "3.14"
            # The ``dev`` branch is ESPHome's nightly working copy;
            # it can break at any time and we want the signal but
            # not the gate.
            install: uv pip install --system --upgrade git+https://github.com/esphome/esphome.git@dev
            allow_failure: true
    # Job-level ``continue-on-error`` decides whether a failure of
    # this matrix entry fails the whole workflow. ``dev`` opts into
    # advisory-only via the matrix flag; stable + beta stay strict.
    continue-on-error: ${{ matrix.allow_failure }}
    steps:
      - name: Check out code from GitHub
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          ref: ${{ inputs.ref || github.ref }}

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
          python-version: ${{ matrix.python-version }}

      - name: Set up uv
        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b  # v8.1.0
        with:
          enable-cache: true

      - name: Install package + test deps
        run: uv pip install --system -e '.[esphome,test]'

      - name: Install esphome (${{ matrix.channel }} channel)
        run: ${{ matrix.install }}

      - name: Show installed esphome version
        # Logs the resolved version so a failure tied to a specific
        # release is greppable in the workflow log without re-running.
        run: uv pip show --system esphome | grep -E '^(Name|Version):'

      - name: Run pytest
        # ``-n auto`` runs under pytest-xdist for the same speedup
        # the OS-axis matrix gets. No ``--cov`` here — the merged
        # coverage report comes from the OS-axis ``test`` job; this
        # run is purely a "does the suite still pass against
        # upstream X" probe. ``--timeout=120`` (per-test) +
        # ``timeout-minutes: 5`` (outer hard cap) match the OS-axis
        # job — hangs against upstream beta/dev are exactly the case
        # this protects against.
        timeout-minutes: 5
        run: pytest -q -n auto --maxfail=5 --durations=10 --timeout=120 --ignore=tests/benchmarks --ignore=tests/real_compile

  benchmarks:
    name: Run benchmarks (CodSpeed)
    runs-on: ubuntu-latest
    # Benchmarks only run on PRs to ``main`` and pushes to ``main`` —
    # ``workflow_call`` runs (release preflight) skip them since
    # CodSpeed's instrumentation harness adds non-trivial wallclock
    # to the matrix and the comparison only makes sense against the
    # historical baseline CodSpeed already has.
    if: github.event_name != 'workflow_call'
    steps:
      - name: Check out code from GitHub
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
          python-version: "3.13"

      - name: Set up uv
        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b  # v8.1.0
        with:
          enable-cache: true

      - name: Install package + test deps
        run: uv pip install --system -e '.[esphome,test]'

      - name: Run benchmarks
        # ``simulation`` is the new name for what used to be called
        # ``instrumentation`` — same callgrind-based runner under
        # the hood, just renamed. The action prints a deprecation
        # warning when you ask for ``instrumentation`` explicitly.
        #
        # ``--timeout=600`` overrides the 10s per-test default set
        # in ``pyproject.toml``. CodSpeed's callgrind instrumentation
        # multiplies each benchmark's wallclock by 10-50x, so a
        # microbenchmark that runs in 50ms outside the harness can
        # legitimately take a few minutes here. 10 minutes covers
        # worst-case (the catalog-load benchmark, which exercises
        # every BoardCatalogEntry's nested dataclass deserialisation)
        # with ~5x headroom over the observed 3-5 minute floor.
        uses: CodSpeedHQ/action@3194d9a39c4d46684cb44bf7207fc56626aad8fd  # v4.15.1
        with:
          mode: simulation
          # ``--durations 10`` prints the 10 slowest benchmark wall
          # times at the end of the run so a regression in any
          # single bench is visible in the CI log without having
          # to dig into the CodSpeed dashboard.
          run: pytest tests/benchmarks --codspeed --no-cov --timeout=600 --durations 10