From 928de4ef7f7353efe9f09d01b18d9a03374ca984 Mon Sep 17 00:00:00 2001 From: Harsh Mishra Date: Tue, 3 Mar 2026 14:20:56 +0530 Subject: [PATCH 1/5] Automate service coverage data update for Azure --- .github/workflows/update-azure-coverage.yml | 76 +++++++++++ scripts/create_azure_coverage.py | 139 ++++++++++++++++++++ scripts/get_latest_github_metrics.sh | 65 +++++++++ 3 files changed, 280 insertions(+) create mode 100644 .github/workflows/update-azure-coverage.yml create mode 100644 scripts/create_azure_coverage.py create mode 100644 scripts/get_latest_github_metrics.sh diff --git a/.github/workflows/update-azure-coverage.yml b/.github/workflows/update-azure-coverage.yml new file mode 100644 index 00000000..4c141b1f --- /dev/null +++ b/.github/workflows/update-azure-coverage.yml @@ -0,0 +1,76 @@ +name: Update Azure Coverage Data + +on: + schedule: + - cron: 0 5 * * MON + workflow_dispatch: + inputs: + targetBranch: + required: false + type: string + default: "main" + pull_request: + types: [opened, synchronize] + +jobs: + update-azure-coverage: + name: Update Azure coverage data + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - name: Checkout docs + uses: actions/checkout@v4 + with: + fetch-depth: 0 + path: docs + ref: ${{ github.event.inputs.targetBranch || 'main' }} + + - name: Set up system wide dependencies + run: | + sudo apt-get install jq wget + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Download Azure implementation metrics artifact + working-directory: docs + run: bash ./scripts/get_latest_github_metrics.sh ./target main + env: + GITHUB_TOKEN: ${{ secrets.PRO_ACCESS_TOKEN }} + REPOSITORY_NAME: localstack-pro + ARTIFACT_ID: implemented_features_python-amd64.csv + WORKFLOW: "Az / Build, Test, Push" + + - name: Generate Azure coverage JSON data + working-directory: docs + run: | + python3 scripts/create_azure_coverage.py -i target/implemented_features_python-amd64.csv/implemented_features.csv -o target/updated_azure_coverage + mv -f target/updated_azure_coverage/*.json src/data/azure-coverage/ + + - name: Check for changes + id: check-for-changes + working-directory: docs + env: + TARGET_BRANCH: ${{ github.event.inputs.targetBranch || 'main' }} + run: | + mkdir -p resources + (git diff --name-only origin/automated-azure-coverage-updates src/data/azure-coverage/ 2>/dev/null || git diff --name-only "origin/$TARGET_BRANCH" src/data/azure-coverage/ 2>/dev/null) | tee -a resources/diff-check.log + echo "diff-count=$(cat resources/diff-check.log | wc -l)" >> "$GITHUB_OUTPUT" + cat resources/diff-check.log + + - name: Create PR + uses: peter-evans/create-pull-request@v7 + if: ${{ success() && steps.check-for-changes.outputs.diff-count != '0' && steps.check-for-changes.outputs.diff-count != '' }} + with: + path: docs + title: "Update Azure coverage data" + body: "Update generated Azure coverage JSON data from the latest LocalStack Pro parity metrics artifact." + branch: "automated-azure-coverage-updates" + author: "LocalStack Bot " + committer: "LocalStack Bot " + commit-message: "update generated azure coverage data" + token: ${{ secrets.PRO_ACCESS_TOKEN }} diff --git a/scripts/create_azure_coverage.py b/scripts/create_azure_coverage.py new file mode 100644 index 00000000..339ddfb7 --- /dev/null +++ b/scripts/create_azure_coverage.py @@ -0,0 +1,139 @@ +""" +Generate Azure coverage JSON files from implementation CSV data. +""" + +import argparse +import csv +import json +from pathlib import Path +from typing import Any + + +def _as_bool(value: Any, default: bool = True) -> bool: + if value is None: + return default + if isinstance(value, bool): + return value + return str(value).strip().lower() in {"1", "true", "yes", "y"} + + +def _group_name(service_name: str, category: str) -> str: + service_name = (service_name or "").strip() + category = (category or "").strip() + if not category: + return service_name + if category.lower() in {"none", "null", "n/a"}: + return service_name + if category == service_name: + return service_name + return f"{service_name} ({category})" + + +def _normalize_provider(value: str) -> str: + return (value or "").strip().replace("_", ".") + + +def _resolve_input_csv(path: Path) -> Path: + if path.exists(): + if path.is_file(): + return path + # Support passing a directory that contains the extracted artifact. + nested_csv = path / "implemented_features.csv" + if nested_csv.exists(): + return nested_csv + matches = sorted(path.rglob("implemented_features.csv")) + if matches: + return matches[0] + raise FileNotFoundError(f"No implemented_features.csv found under: {path}") + + # Backward-compatible fallback for target/implemented_features.csv. + if path.name == "implemented_features.csv" and path.parent.exists(): + matches = sorted(path.parent.rglob("implemented_features.csv")) + if matches: + return matches[0] + + raise FileNotFoundError(f"Input CSV not found: {path}") + + +def _load_csv(path: Path) -> dict[str, dict[str, dict[str, dict[str, Any]]]]: + path = _resolve_input_csv(path) + + coverage: dict[str, dict[str, dict[str, dict[str, Any]]]] = {} + with path.open(mode="r", encoding="utf-8") as file: + reader = csv.DictReader(file) + if not reader.fieldnames: + raise ValueError("Input CSV has no headers.") + + for row in reader: + provider = _normalize_provider(row.get("resource_provider", "")) + if not provider: + continue + + feature_name = (row.get("feature") or row.get("operation") or "").strip() + if not feature_name: + continue + + group = _group_name(row.get("service", ""), row.get("category", "")) + if not group: + group = "General" + + implemented = _as_bool( + row.get("implemented", row.get("is_implemented", row.get("isImplemented"))), + default=True, + ) + pro_only = _as_bool(row.get("pro", row.get("is_pro", row.get("isPro"))), default=True) + + provider_data = coverage.setdefault(provider, {}) + group_data = provider_data.setdefault(group, {}) + group_data[feature_name] = { + "implemented": implemented, + "pro": pro_only, + } + + return coverage + + +def _sorted_details(details: dict[str, dict[str, dict[str, Any]]]) -> dict[str, dict[str, dict[str, Any]]]: + sorted_details: dict[str, dict[str, dict[str, Any]]] = {} + for group_name in sorted(details.keys()): + operations = details[group_name] + sorted_details[group_name] = dict(sorted(operations.items(), key=lambda item: item[0])) + return sorted_details + + +def write_coverage_files(coverage: dict[str, dict[str, dict[str, dict[str, Any]]]], output_dir: Path) -> None: + output_dir.mkdir(parents=True, exist_ok=True) + for provider in sorted(coverage.keys()): + payload = { + "service": provider, + "operations": [], + "details": _sorted_details(coverage[provider]), + } + file_path = output_dir / f"{provider}.json" + with file_path.open(mode="w", encoding="utf-8") as fd: + json.dump(payload, fd, indent=2) + fd.write("\n") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate Azure coverage JSON data.") + parser.add_argument( + "-i", + "--implementation-details", + required=True, + help="Path to implementation details CSV.", + ) + parser.add_argument( + "-o", + "--output-dir", + required=True, + help="Directory where generated JSON files will be written.", + ) + args = parser.parse_args() + + coverage = _load_csv(Path(args.implementation_details)) + write_coverage_files(coverage, Path(args.output_dir)) + + +if __name__ == "__main__": + main() diff --git a/scripts/get_latest_github_metrics.sh b/scripts/get_latest_github_metrics.sh new file mode 100644 index 00000000..6433b8c8 --- /dev/null +++ b/scripts/get_latest_github_metrics.sh @@ -0,0 +1,65 @@ +#!/bin/bash +set -euo pipefail + +# input params +PARENT_FOLDER=${1:-target} +METRICS_ARTIFACTS_BRANCH=${2:-main} + +# env vars +REPOSITORY_NAME=${REPOSITORY_NAME:-localstack-pro} +ARTIFACT_ID=${ARTIFACT_ID:-implemented_features_python-amd64.csv} +WORKFLOW=${WORKFLOW:-"Az / Build, Test, Push"} +PREFIX_ARTIFACT=${PREFIX_ARTIFACT:-} +FILTER_SUCCESS=${FILTER_SUCCESS:-1} +LIMIT=${LIMIT:-20} + +RESOURCE_FOLDER=${RESOURCE_FOLDER:-} +REPOSITORY_OWNER=${REPOSITORY_OWNER:-localstack} +TARGET_FOLDER="$PARENT_FOLDER/$RESOURCE_FOLDER" + +TMP_FOLDER="$PARENT_FOLDER/tmp_download" +mkdir -p "$TMP_FOLDER" + +echo "Searching for artifact '$ARTIFACT_ID' in workflow '$WORKFLOW' on branch '$METRICS_ARTIFACTS_BRANCH' in repo '$REPOSITORY_OWNER/$REPOSITORY_NAME'." + +if [ "$FILTER_SUCCESS" = "1" ]; then + echo "Filtering runs by conclusion=success" + SELECTOR='.[] | select(.conclusion=="success")' +else + echo "Filtering runs by completed status (success/failure)" + SELECTOR='.[] | select(.status=="completed" and (.conclusion=="failure" or .conclusion=="success"))' +fi + +RUN_IDS=$(gh run list --limit "$LIMIT" --branch "$METRICS_ARTIFACTS_BRANCH" --repo "$REPOSITORY_OWNER/$REPOSITORY_NAME" --workflow "$WORKFLOW" --json databaseId,conclusion,status --jq "$SELECTOR") + +if [ "$(echo "$RUN_IDS" | jq -rs '.[0].databaseId')" = "null" ]; then + echo "No matching workflow run found." + exit 1 +fi + +for ((i=0; i/dev/null | wc -l)" -gt 0 ]; then + echo "Downloaded artifact successfully." + break + fi +done + +echo "Moving artifact to $TARGET_FOLDER" +mkdir -p "$TARGET_FOLDER" +if [[ -z "${PREFIX_ARTIFACT}" ]]; then + cp -R "$TMP_FOLDER"/. "$TARGET_FOLDER"/ +else + while IFS= read -r file; do + org_file_name=$(echo "$file" | sed "s/.*\///") + mv -- "$file" "$TARGET_FOLDER/$PREFIX_ARTIFACT-$org_file_name" + done < <(find "$TMP_FOLDER" -type f -name "*.csv") +fi + +rm -rf "$TMP_FOLDER" +echo "Contents of $TARGET_FOLDER:" +ls -la "$TARGET_FOLDER" From eb14deacdc4559a32f4fe72998be87d203d0ddfa Mon Sep 17 00:00:00 2001 From: Harsh Mishra Date: Tue, 3 Mar 2026 14:23:43 +0530 Subject: [PATCH 2/5] test --- .github/workflows/update-azure-coverage.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update-azure-coverage.yml b/.github/workflows/update-azure-coverage.yml index 4c141b1f..0901e82d 100644 --- a/.github/workflows/update-azure-coverage.yml +++ b/.github/workflows/update-azure-coverage.yml @@ -6,9 +6,9 @@ on: workflow_dispatch: inputs: targetBranch: - required: false + required: true type: string - default: "main" + description: "Branch to checkout and compare against (e.g. harshmishra/doc-91)" pull_request: types: [opened, synchronize] @@ -25,7 +25,7 @@ jobs: with: fetch-depth: 0 path: docs - ref: ${{ github.event.inputs.targetBranch || 'main' }} + ref: ${{ github.event.inputs.targetBranch }} - name: Set up system wide dependencies run: | @@ -55,7 +55,7 @@ jobs: id: check-for-changes working-directory: docs env: - TARGET_BRANCH: ${{ github.event.inputs.targetBranch || 'main' }} + TARGET_BRANCH: ${{ github.event.inputs.targetBranch }} run: | mkdir -p resources (git diff --name-only origin/automated-azure-coverage-updates src/data/azure-coverage/ 2>/dev/null || git diff --name-only "origin/$TARGET_BRANCH" src/data/azure-coverage/ 2>/dev/null) | tee -a resources/diff-check.log From 1545e20174292ef90fddc06b0895b5544c30fea3 Mon Sep 17 00:00:00 2001 From: Harsh Mishra Date: Mon, 16 Mar 2026 20:43:11 +0530 Subject: [PATCH 3/5] get the pipeline fixed --- .github/workflows/update-azure-coverage.yml | 17 +++++++----- scripts/create_azure_coverage.py | 13 +++++++++ scripts/get_latest_github_metrics.sh | 30 ++++++++++++++++----- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/.github/workflows/update-azure-coverage.yml b/.github/workflows/update-azure-coverage.yml index 0901e82d..b4293861 100644 --- a/.github/workflows/update-azure-coverage.yml +++ b/.github/workflows/update-azure-coverage.yml @@ -9,8 +9,6 @@ on: required: true type: string description: "Branch to checkout and compare against (e.g. harshmishra/doc-91)" - pull_request: - types: [opened, synchronize] jobs: update-azure-coverage: @@ -25,7 +23,7 @@ jobs: with: fetch-depth: 0 path: docs - ref: ${{ github.event.inputs.targetBranch }} + ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || 'main' }} - name: Set up system wide dependencies run: | @@ -42,20 +40,25 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.PRO_ACCESS_TOKEN }} REPOSITORY_NAME: localstack-pro - ARTIFACT_ID: implemented_features_python-amd64.csv + ARTIFACT_ID: implemented_features_python-amd64 WORKFLOW: "Az / Build, Test, Push" - name: Generate Azure coverage JSON data working-directory: docs run: | - python3 scripts/create_azure_coverage.py -i target/implemented_features_python-amd64.csv/implemented_features.csv -o target/updated_azure_coverage - mv -f target/updated_azure_coverage/*.json src/data/azure-coverage/ + python3 scripts/create_azure_coverage.py -i target/implemented_features.csv -o target/updated_azure_coverage + if ls target/updated_azure_coverage/*.json > /dev/null 2>&1; then + mv -f target/updated_azure_coverage/*.json src/data/azure-coverage/ + else + echo "No JSON files generated in target/updated_azure_coverage." + exit 1 + fi - name: Check for changes id: check-for-changes working-directory: docs env: - TARGET_BRANCH: ${{ github.event.inputs.targetBranch }} + TARGET_BRANCH: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || 'main' }} run: | mkdir -p resources (git diff --name-only origin/automated-azure-coverage-updates src/data/azure-coverage/ 2>/dev/null || git diff --name-only "origin/$TARGET_BRANCH" src/data/azure-coverage/ 2>/dev/null) | tee -a resources/diff-check.log diff --git a/scripts/create_azure_coverage.py b/scripts/create_azure_coverage.py index 339ddfb7..24f33819 100644 --- a/scripts/create_azure_coverage.py +++ b/scripts/create_azure_coverage.py @@ -63,6 +63,13 @@ def _load_csv(path: Path) -> dict[str, dict[str, dict[str, dict[str, Any]]]]: reader = csv.DictReader(file) if not reader.fieldnames: raise ValueError("Input CSV has no headers.") + required_headers = {"resource_provider", "service", "feature"} + if not required_headers.issubset(set(reader.fieldnames)): + raise ValueError( + "Unexpected CSV schema. Expected headers including " + f"{sorted(required_headers)}, got {reader.fieldnames}. " + "The downloaded artifact may contain an error payload instead of CSV data." + ) for row in reader: provider = _normalize_provider(row.get("resource_provider", "")) @@ -90,6 +97,12 @@ def _load_csv(path: Path) -> dict[str, dict[str, dict[str, dict[str, Any]]]]: "pro": pro_only, } + if not coverage: + raise ValueError( + "No Azure coverage records were parsed from the input CSV. " + "Please verify the artifact content is valid and non-empty." + ) + return coverage diff --git a/scripts/get_latest_github_metrics.sh b/scripts/get_latest_github_metrics.sh index 6433b8c8..1f77f5ad 100644 --- a/scripts/get_latest_github_metrics.sh +++ b/scripts/get_latest_github_metrics.sh @@ -7,7 +7,7 @@ METRICS_ARTIFACTS_BRANCH=${2:-main} # env vars REPOSITORY_NAME=${REPOSITORY_NAME:-localstack-pro} -ARTIFACT_ID=${ARTIFACT_ID:-implemented_features_python-amd64.csv} +ARTIFACT_ID=${ARTIFACT_ID:-implemented_features_python-amd64} WORKFLOW=${WORKFLOW:-"Az / Build, Test, Push"} PREFIX_ARTIFACT=${PREFIX_ARTIFACT:-} FILTER_SUCCESS=${FILTER_SUCCESS:-1} @@ -30,15 +30,28 @@ else SELECTOR='.[] | select(.status=="completed" and (.conclusion=="failure" or .conclusion=="success"))' fi -RUN_IDS=$(gh run list --limit "$LIMIT" --branch "$METRICS_ARTIFACTS_BRANCH" --repo "$REPOSITORY_OWNER/$REPOSITORY_NAME" --workflow "$WORKFLOW" --json databaseId,conclusion,status --jq "$SELECTOR") +RUN_IDS=() +while IFS= read -r run_id; do + RUN_IDS+=("$run_id") +done < <( + gh run list \ + --limit "$LIMIT" \ + --branch "$METRICS_ARTIFACTS_BRANCH" \ + --repo "$REPOSITORY_OWNER/$REPOSITORY_NAME" \ + --workflow "$WORKFLOW" \ + --json databaseId,conclusion,status \ + --jq "$SELECTOR | .databaseId" +) -if [ "$(echo "$RUN_IDS" | jq -rs '.[0].databaseId')" = "null" ]; then - echo "No matching workflow run found." +if [ "${#RUN_IDS[@]}" -eq 0 ]; then + echo "No matching workflow runs found." exit 1 fi -for ((i=0; i/dev/null | wc -l)" -eq 0 ]; then + echo "Failed to download artifact '$ARTIFACT_ID' from the checked workflow runs." + exit 1 +fi + echo "Moving artifact to $TARGET_FOLDER" mkdir -p "$TARGET_FOLDER" if [[ -z "${PREFIX_ARTIFACT}" ]]; then From db8d294b2a064ccb95671f69b0238832910c456b Mon Sep 17 00:00:00 2001 From: Harsh Mishra Date: Mon, 16 Mar 2026 20:44:21 +0530 Subject: [PATCH 4/5] run on PRs --- .github/workflows/update-azure-coverage.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-azure-coverage.yml b/.github/workflows/update-azure-coverage.yml index b4293861..5c229fd5 100644 --- a/.github/workflows/update-azure-coverage.yml +++ b/.github/workflows/update-azure-coverage.yml @@ -9,6 +9,8 @@ on: required: true type: string description: "Branch to checkout and compare against (e.g. harshmishra/doc-91)" + pull_request: + types: [opened, synchronize, reopened] jobs: update-azure-coverage: @@ -23,7 +25,7 @@ jobs: with: fetch-depth: 0 path: docs - ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || 'main' }} + ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || github.event_name == 'pull_request' && github.head_ref || 'main' }} - name: Set up system wide dependencies run: | @@ -58,7 +60,7 @@ jobs: id: check-for-changes working-directory: docs env: - TARGET_BRANCH: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || 'main' }} + TARGET_BRANCH: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || github.event_name == 'pull_request' && github.head_ref || 'main' }} run: | mkdir -p resources (git diff --name-only origin/automated-azure-coverage-updates src/data/azure-coverage/ 2>/dev/null || git diff --name-only "origin/$TARGET_BRANCH" src/data/azure-coverage/ 2>/dev/null) | tee -a resources/diff-check.log From 1c252bfe81ef9e40e4ea7f6ecfd33f48fea3aa09 Mon Sep 17 00:00:00 2001 From: Harsh Mishra Date: Mon, 16 Mar 2026 20:56:19 +0530 Subject: [PATCH 5/5] get it ready --- .github/workflows/update-azure-coverage.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update-azure-coverage.yml b/.github/workflows/update-azure-coverage.yml index 5c229fd5..b4293861 100644 --- a/.github/workflows/update-azure-coverage.yml +++ b/.github/workflows/update-azure-coverage.yml @@ -9,8 +9,6 @@ on: required: true type: string description: "Branch to checkout and compare against (e.g. harshmishra/doc-91)" - pull_request: - types: [opened, synchronize, reopened] jobs: update-azure-coverage: @@ -25,7 +23,7 @@ jobs: with: fetch-depth: 0 path: docs - ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || github.event_name == 'pull_request' && github.head_ref || 'main' }} + ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || 'main' }} - name: Set up system wide dependencies run: | @@ -60,7 +58,7 @@ jobs: id: check-for-changes working-directory: docs env: - TARGET_BRANCH: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || github.event_name == 'pull_request' && github.head_ref || 'main' }} + TARGET_BRANCH: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || 'main' }} run: | mkdir -p resources (git diff --name-only origin/automated-azure-coverage-updates src/data/azure-coverage/ 2>/dev/null || git diff --name-only "origin/$TARGET_BRANCH" src/data/azure-coverage/ 2>/dev/null) | tee -a resources/diff-check.log