Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 247 additions & 15 deletions .github/workflows/refresh-embedded-dwarf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,24 @@ name: Refresh embedded DWARF for new Ceph releases
# Phase 1 (this file): centos-stream / el9 only. Detects newly-published
# Ceph point releases (quincy / reef / squid / tentacle) on
# download.ceph.com, generates osdtrace + radostrace DWARF JSONs for the
# missing ones inside a disposable centos:stream9 podman container, and
# opens a follow-up PR with the new files.
# missing ones inside a disposable centos:stream9 podman container,
# runtime-verifies each one against a real cephadm cluster of the matching
# version, and opens a follow-up PR with only the verified files.
#
# Three jobs:
# generate - detect + generate JSONs (one podman container per version),
# re-aggregate the embedded header as an early link gate, and
# publish the new JSONs + manifest as artifacts. Emits a JSON
# array of the generated versions for the verify matrix.
# verify - dynamic matrix, one runner per generated version (parallel).
# Each cell rebuilds osdtrace/radostrace with the new JSONs
# embedded, provisions a single-host cephadm cluster running
# quay.io/ceph/ceph:v<version> (whose build_id matches the el9
# RPM the JSON came from), drives an S3 workload, and traces a
# real OSD + radosgw via the EMBEDDED path (REQUIRE_EMBEDDED=1
# -> a fallback to live DWARF parsing is a failure).
# open-pr - assemble the verified subset (drop any version whose runtime
# verification failed; list it for retry) and open the PR.
#
# Phases 2-3 (future): mirror the same detect/generate/PR flow for
# quay.io container-image build-ids, then for Ubuntu / Cloud Archive /
Expand All @@ -24,12 +40,18 @@ permissions:
pull-requests: write

jobs:
refresh:
generate:
runs-on: ubuntu-24.04
# Worst case: 15 missing versions * ~6 min/version = 90 min for the
# generators alone, plus ~5 min for the host build + ~5 min for the
# final rebuild + PR open. 120 min leaves headroom for slow downloads.
# final rebuild. 120 min leaves headroom for slow downloads.
timeout-minutes: 180
outputs:
count: ${{ steps.detect.outputs.count }}
succeeded: ${{ steps.generate.outputs.succeeded }}
# JSON array of generated versions, e.g. ["17.2.7","19.2.2"]; drives
# the verify job's dynamic matrix. "[]" when nothing was generated.
versions: ${{ steps.generate.outputs.versions }}
Comment on lines +52 to +54

steps:
- name: Checkout code and submodules
Expand Down Expand Up @@ -89,6 +111,12 @@ jobs:
echo "succeeded=$S" >> "$GITHUB_OUTPUT"
echo "failed=$F" >> "$GITHUB_OUTPUT"

# Emit the generated versions as a JSON array for the verify matrix.
versions=$(awk -F'\t' 'NF>=3{print $3}' /tmp/succeeded.tsv \
| python3 -c "import sys,json; print(json.dumps([l.strip() for l in sys.stdin if l.strip()]))")
echo "versions=$versions" >> "$GITHUB_OUTPUT"
echo "versions=$versions"

- name: Re-aggregate embedded DWARF header + relink
# This step proves the new JSONs parse cleanly through
# tools/generate_embedded_dwarf.py and that osdtrace + radostrace
Expand All @@ -100,43 +128,239 @@ jobs:
make clean
make -j"$(nproc)" osdtrace radostrace

- name: Compose pull-request body
- name: Bundle generated JSONs + manifest for downstream jobs
if: steps.generate.outputs.succeeded != '0'
run: |
# The JSON filenames embed the package epoch (e.g.
# osd-2:19.2.2-0.el9_dwarf.json) and actions/upload-artifact@v4
# rejects the ':' character outright, so ship a colon-free tarball
# (plus the colon-free manifest TSVs) rather than the raw files.
mkdir -p /tmp/artifacts
: > /tmp/artifacts/filelist.txt
while IFS=$'\t' read -r distro tools version pkgver; do
for f in "files/centos-stream/osdtrace/osd-${pkgver}_dwarf.json" \
"files/centos-stream/radostrace/rados-${pkgver}_dwarf.json"; do
[ -f "$f" ] && echo "$f" >> /tmp/artifacts/filelist.txt
done
done < /tmp/succeeded.tsv
tar -czf /tmp/artifacts/generated-jsons.tar.gz -T /tmp/artifacts/filelist.txt
cp /tmp/succeeded.tsv /tmp/failed.tsv /tmp/missing.tsv /tmp/artifacts/ 2>/dev/null || true
echo "=== tarball contents ==="; tar -tzf /tmp/artifacts/generated-jsons.tar.gz

- name: Upload generated JSONs + manifest
if: steps.generate.outputs.succeeded != '0'
uses: actions/upload-artifact@v4
with:
name: generated-jsons
path: /tmp/artifacts/
retention-days: 7

verify:
needs: generate
# Skip entirely when nothing was generated (empty matrix is invalid).
if: ${{ needs.generate.outputs.versions != '' && needs.generate.outputs.versions != '[]' }}
runs-on: ubuntu-24.04
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
version: ${{ fromJson(needs.generate.outputs.versions) }}
steps:
- name: Checkout code and submodules
uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0

- name: Install build dependencies
run: |
sudo apt-get update
sudo apt-get install -y g++ clang libelf-dev libc6-dev-i386 libdw-dev python3

- name: Pull in the freshly generated JSONs
uses: actions/download-artifact@v4
with:
name: generated-jsons
path: /tmp/gen

- name: Unpack generated JSONs into the tree
run: tar -xzf /tmp/gen/generated-jsons.tar.gz

- name: Build osdtrace + radostrace with the new JSONs embedded
# Rebuilding here (rather than shipping a binary artifact) guarantees
# the embedded header is regenerated to include the just-generated
# version, and that the binary links against this runner's libs.
run: make -j"$(nproc)" osdtrace radostrace

- name: Runtime trace verification via cephadm (v${{ matrix.version }})
id: verify
# Pin the exact point-release image so the running ceph-osd / radosgw
# carry the same build_id as the el9 RPM the JSON was extracted from;
# REQUIRE_EMBEDDED=1 makes "Using embedded DWARF data" mandatory, so a
# silent fall-through to live DWARF parsing counts as a failure.
run: |
sudo CEPH_IMAGE="quay.io/ceph/ceph:v${{ matrix.version }}" \
REQUIRE_EMBEDDED=1 \
./tests/functional-test-cephadm-rgw.sh "${{ matrix.version }}"

- name: Record verification result
if: always()
run: |
mkdir -p /tmp/vr
echo "${{ steps.verify.outcome }}" > "/tmp/vr/${{ matrix.version }}"
echo "version ${{ matrix.version }} -> ${{ steps.verify.outcome }}"

- name: Upload verification result
if: always()
uses: actions/upload-artifact@v4
with:
name: verify-result-${{ matrix.version }}
path: /tmp/vr/
retention-days: 7

- name: Stage trace logs for failure artifact
if: failure()
run: |
mkdir -p trace-logs
for f in /tmp/osdtrace-cephadm-${{ matrix.version }}.log \
/tmp/radostrace-cephadm-${{ matrix.version }}.log \
/tmp/s3-workload-${{ matrix.version }}.log; do
[ -e "$f" ] && sudo cp "$f" trace-logs/ || true
done
sudo chown -R "$USER" trace-logs 2>/dev/null || true

- name: Upload trace logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: trace-logs-${{ matrix.version }}
path: trace-logs/
retention-days: 14
if-no-files-found: ignore

open-pr:
needs: [generate, verify]
# Run even if some verify cells failed (we drop those versions); only
# skip when generation produced nothing.
if: ${{ always() && needs.generate.result == 'success' && needs.generate.outputs.succeeded != '0' }}
runs-on: ubuntu-24.04
timeout-minutes: 15
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Download generated JSONs + manifest
uses: actions/download-artifact@v4
with:
name: generated-jsons
path: /tmp/gen

- name: Unpack generated JSONs
run: |
mkdir -p /tmp/gen-extract
tar -xzf /tmp/gen/generated-jsons.tar.gz -C /tmp/gen-extract

- name: Download verification results
uses: actions/download-artifact@v4
with:
pattern: verify-result-*
path: /tmp/verify
merge-multiple: true

- name: Assemble verified JSONs + compose PR body
id: assemble
run: |
# Map version -> success|failure from the verify result files.
declare -A status
if [ -d /tmp/verify ]; then
for f in /tmp/verify/*; do
[ -f "$f" ] || continue
status["$(basename "$f")"]="$(tr -d '[:space:]' < "$f")"
done
fi

: > /tmp/verified.tsv
: > /tmp/verifyfailed.tsv
copied=0
if [ -f /tmp/gen/succeeded.tsv ]; then
while IFS=$'\t' read -r distro tools version pkgver; do
[ -n "$version" ] || continue
st="${status[$version]:-missing}"
if [ "$st" = "success" ]; then
for pair in "osdtrace/osd-${pkgver}_dwarf.json" \
"radostrace/rados-${pkgver}_dwarf.json"; do
src="/tmp/gen-extract/files/centos-stream/${pair}"
dst="files/centos-stream/${pair}"
if [ -f "$src" ]; then
mkdir -p "$(dirname "$dst")"
cp "$src" "$dst"
copied=$((copied + 1))
fi
done
printf '%s\t%s\t%s\t%s\n' "$distro" "$tools" "$version" "$pkgver" \
>> /tmp/verified.tsv
else
printf '%s\t%s\t%s\t%s\t%s\n' "$distro" "$tools" "$version" "$pkgver" "$st" \
>> /tmp/verifyfailed.tsv
echo "::warning::runtime verification failed for $version (status=$st); dropping from PR"
fi
done < /tmp/gen/succeeded.tsv
fi
echo "copied=$copied" >> "$GITHUB_OUTPUT"
echo "Copied $copied verified JSON file(s) into the working tree."

{
echo "## Newly added embedded DWARF JSONs"
echo "## Newly added embedded DWARF JSONs (runtime-verified)"
echo
echo "Each version below was traced against a live cephadm cluster"
echo "running \`quay.io/ceph/ceph:v<version>\`; both osdtrace and"
echo "radostrace loaded the **embedded** JSON (build_id match) and"
echo "produced valid trace output."
echo
echo "| distro | tools | version | pkgver |"
echo "|---|---|---|---|"
while IFS=$'\t' read -r d t v p; do
printf '| %s | %s | %s | `%s` |\n' "$d" "$t" "$v" "$p"
done < /tmp/succeeded.tsv
done < /tmp/verified.tsv
echo
if [ -s /tmp/failed.tsv ]; then
if [ -s /tmp/verifyfailed.tsv ]; then
echo "## Generated but failed runtime verification"
echo
echo "Excluded from this PR; will be retried by the next run."
echo
echo '```'
cat /tmp/verifyfailed.tsv
echo '```'
echo
fi
if [ -s /tmp/gen/failed.tsv ]; then
echo "## Versions that failed to generate"
echo
echo "These will be retried by the next scheduled run."
echo
echo '```'
cat /tmp/failed.tsv
cat /tmp/gen/failed.tsv
echo '```'
echo
fi
echo "## Verification"
echo "## How this was produced"
echo "- \`tools/detect_missing_dwarf.py\` identified the rows above"
echo " by probing \`download.ceph.com/rpm-X.Y.Z/el9/x86_64/\`."
echo "- Each JSON was generated inside a disposable"
echo " \`quay.io/centos/centos:stream9\` container with the"
echo " matching ceph-osd + lib*-debuginfo packages installed."
echo "- \`make -j\` re-aggregated the headers and linked"
echo " \`osdtrace\` + \`radostrace\` cleanly."
echo "- Each JSON was runtime-verified by"
echo " \`tests/functional-test-cephadm-rgw.sh\` against a cephadm"
echo " cluster of the matching version (embedded path enforced)."
echo
echo "_Generated by \`.github/workflows/refresh-embedded-dwarf.yaml\` ($(date -u +'%Y-%m-%d %H:%MZ'))._"
echo "_Generated by \`.github/workflows/refresh-embedded-dwarf.yaml\`._"
} > /tmp/pr_body.md
cat /tmp/pr_body.md

- name: Open pull request
if: steps.generate.outputs.succeeded != '0'
if: steps.assemble.outputs.copied != '0'
uses: peter-evans/create-pull-request@v6
with:
branch: chore/refresh-embedded-dwarf-${{ github.run_id }}
Expand All @@ -146,11 +370,19 @@ jobs:
chore: refresh embedded DWARF for new Ceph point releases

Auto-generated by the refresh-embedded-dwarf workflow.
See PR body for the list of versions added.
Each JSON was runtime-verified against a cephadm cluster of the
matching version via the embedded-DWARF path. See PR body for
the list of versions added.
body-path: /tmp/pr_body.md
labels: |
dwarf-refresh
automated
add-paths: |
files/centos-stream/osdtrace/*.json
files/centos-stream/radostrace/*.json

- name: Note when nothing was verified
if: steps.assemble.outputs.copied == '0'
run: |
echo "No version passed runtime verification; no PR opened."
echo "See the 'verify' job matrix for per-version failures."
47 changes: 45 additions & 2 deletions tests/functional-test-cephadm-rgw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,16 @@ fi

############################################################################
info "=== Step 2: install cephadm + resolve image for $CEPH_RELEASE ==="
CEPH_IMG=$(cephadm_image_for_release "$CEPH_RELEASE")
# CEPH_IMAGE lets a caller pin an exact point-release image
# (e.g. quay.io/ceph/ceph:v19.2.2) instead of the per-major "latest"
# default. The embedded-DWARF refresh bot uses this to run the verification
# against the precise version it just generated a JSON for, so the build_id
# matches and the embedded path actually engages.
if [ -n "${CEPH_IMAGE:-}" ]; then
CEPH_IMG="$CEPH_IMAGE"
else
CEPH_IMG=$(cephadm_image_for_release "$CEPH_RELEASE")
fi
info "image: $CEPH_IMG"
install_cephadm "$CEPH_RELEASE" /tmp/cephadm "$CEPH_IMG"

Expand All @@ -135,7 +144,7 @@ info "OSD devices: ${OSD_DEVS[*]}"
info "=== Step 4: bootstrap single-host cephadm cluster ==="
MON_IP=$(hostname -I | awk '{print $1}')
info "MON_IP=$MON_IP"
FSID=$(cephadm_bootstrap_single_host "$CEPH_IMG" "$MON_IP" /tmp/cephadm)
FSID=$(cephadm_bootstrap_single_host "$CEPH_IMG" "$MON_IP" /tmp/cephadm) || true
[ -n "$FSID" ] || { err "bootstrap failed to produce FSID"; exit 1; }
info "FSID=$FSID"

Expand Down Expand Up @@ -291,6 +300,40 @@ kill "$WL_PUT" "$WL_GET" 2>/dev/null || true
sleep 2


############################################################################
info "=== Step 13b: check embedded-DWARF boot marker ==="
# When REQUIRE_EMBEDDED=1 (the refresh bot's per-version verification), the
# whole point is to prove the *embedded* JSON works: a fallback to live DWARF
# parsing means the embedded data was never exercised, so treat it as a hard
# failure. Without the flag (the normal PR matrix) the marker is advisory --
# the major-release "latest" image may legitimately predate or postdate the
# embedded data, and live-parse fallback is an acceptable optimisation miss.
check_embedded_marker() {
local tool="$1" log="$2"
if grep -q "Using embedded DWARF data" "$log"; then
info "$tool used embedded DWARF data"
return 0
fi
if [ "${REQUIRE_EMBEDDED:-0}" = "1" ]; then
err "$tool did NOT use embedded DWARF data (REQUIRE_EMBEDDED=1)"
if grep -q "Start to parse dwarf info" "$log"; then
err " -> it fell back to live DWARF parsing; the embedded JSON for this version was not matched (build_id mismatch?)"
else
err " -> neither embedded nor live-parse marker present; tool may have failed to start"
fi
return 1
fi
if grep -q "Start to parse dwarf info" "$log"; then
info "[NOTE] $tool fell back to live DWARF parsing (embedded data not matched in this env)"
else
info "[NOTE] $tool: no embedded/live-parse marker found"
fi
return 0
}
check_embedded_marker osdtrace "$OSDTRACE_LOG"
check_embedded_marker radostrace "$RADOSTRACE_LOG"


############################################################################
info "=== Step 14: gather cluster facts for verifiers ==="
# osdtrace counts data rows targeting a single pool; pick the RGW data
Expand Down
Loading
Loading