diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index fe011fc7c8..bb70b5a640 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,6 +3,7 @@ # AppSec /benchmark/sirun/appsec/ @DataDog/asm-js /benchmark/sirun/appsec-iast/ @DataDog/asm-js +/benchmark/sirun/iast/ @DataDog/asm-js /integration-tests/appsec/ @DataDog/asm-js /packages/dd-trace/src/appsec/ @DataDog/asm-js /packages/dd-trace/test/appsec/ @DataDog/asm-js @@ -36,13 +37,20 @@ /packages/dd-trace/test/plugins/util/inferred_proxy.spec.js @DataDog/serverless-aws @DataDog/apm-serverless # IDM +/benchmark/sirun/child_process/ @DataDog/apm-idm-js +/benchmark/sirun/fs/ @DataDog/apm-idm-js /benchmark/sirun/plugin-aws-sdk/ @DataDog/apm-idm-js -/benchmark/sirun/plugin-bluebird/ @DataDog/apm-idm-js /benchmark/sirun/plugin-dns/ @DataDog/apm-idm-js /benchmark/sirun/plugin-graphql/ @DataDog/apm-idm-js /benchmark/sirun/plugin-http/ @DataDog/apm-idm-js +/benchmark/sirun/plugin-kafkajs/ @DataDog/apm-idm-js +/benchmark/sirun/plugin-mongodb-core/ @DataDog/apm-idm-js /benchmark/sirun/plugin-net/ @DataDog/apm-idm-js -/benchmark/sirun/plugin-q/ @DataDog/apm-idm-js +/benchmark/sirun/plugin-pg/ @DataDog/apm-idm-js +/benchmark/sirun/plugin-pino/ @DataDog/apm-idm-js +/benchmark/sirun/plugin-redis/ @DataDog/apm-idm-js +/benchmark/sirun/plugin-ws/ @DataDog/apm-idm-js +/benchmark/sirun/url/ @DataDog/apm-idm-js /integration-tests/electron/ @DataDog/apm-idm-js /integration-tests/esbuild/ @DataDog/apm-idm-js @@ -286,12 +294,14 @@ /benchmark/sirun/async_hooks/ @DataDog/lang-platform-js /benchmark/sirun/encoding/ @DataDog/lang-platform-js /benchmark/sirun/exporting-pipeline/ @DataDog/lang-platform-js +/benchmark/sirun/id/ @DataDog/lang-platform-js /benchmark/sirun/log/ @DataDog/lang-platform-js /benchmark/sirun/runtime-metrics/ @DataDog/lang-platform-js /benchmark/sirun/scope/ @DataDog/lang-platform-js /benchmark/sirun/shimmer-runtime/ @DataDog/lang-platform-js /benchmark/sirun/shimmer-startup/ @DataDog/lang-platform-js /benchmark/sirun/startup/ @DataDog/lang-platform-js +/benchmark/sirun/tracing-channel/ @DataDog/lang-platform-js /integration-tests/bun/ @DataDog/lang-platform-js /integration-tests/coverage/ @DataDog/lang-platform-js diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml index 3866d0d941..225cf15f86 100644 --- a/.gitlab/benchmarks.yml +++ b/.gitlab/benchmarks.yml @@ -4,8 +4,10 @@ variables: SLS_CI_BRANCH: main # Benchmark's env variables. Modify to tweak benchmark parameters. - UNCONFIDENCE_THRESHOLD: "5.0" + UNCONFIDENCE_THRESHOLD: "2.0" MD_REPORT_ONLY_CHANGES: "1" + # Set to the dd-trace-js benchmarks dashboard URL to add a link in the PR comment. + BENCHMARK_DASHBOARD_URL: "" .benchmarks: stage: benchmarks diff --git a/benchmark/sirun/runall.sh b/benchmark/sirun/runall.sh index 40d7fd47f7..30a0bce7b6 100755 --- a/benchmark/sirun/runall.sh +++ b/benchmark/sirun/runall.sh @@ -8,12 +8,15 @@ CWD=$(pwd) # Background subshells can't share a bash variable, so failed variants # write their dir/variant name here and the parent counts lines after `wait`. FAILURES_FILE=$(mktemp) +# Variants whose latest definition failed against the older baseline source; +# tolerated there unless this PR also changes non-benchmark source (see below). +SKIPPED_FILE=$(mktemp) function cleanup { for D in "${DIRS[@]}"; do rm -f "${CWD}/${D}/meta-temp.json" done - rm -f "$FAILURES_FILE" + rm -f "$FAILURES_FILE" "$SKIPPED_FILE" } trap cleanup EXIT @@ -69,6 +72,22 @@ CPU_AFFINITY="${CPUSET_START}" # reset for each node.js version SPLITS=${SPLITS:-1} GROUP=${GROUP:-1} +# With BENCHMARKS_FROM=candidate the baseline runs this PR's benchmark code on +# the older source. Skip a baseline failure only when the same variant passed on +# the candidate run -- proof the failure is specific to the older source, not a +# broken benchmark. The candidate run records its passing variants below. +SKIP_BASELINE_FAILURES="" +RECORD_CANDIDATE_PASS="" +CANDIDATE_PASSED_FILE="${ARTIFACTS_DIR:-/tmp}/candidate-passed-variants.txt" +if [[ "${TOLERATE_NEW_BENCHMARK_FAILURES:-}" == "1" ]]; then + if [[ "${BASELINE_OR_CANDIDATE:-}" == "candidate" ]]; then + RECORD_CANDIDATE_PASS="1" + : > "$CANDIDATE_PASSED_FILE" + elif [[ "${BASELINE_OR_CANDIDATE:-}" == "baseline" ]]; then + SKIP_BASELINE_FAILURES="1" + fi +fi + BENCH_COUNT=0 for D in "${DIRS[@]}"; do cd "${D}" @@ -77,17 +96,24 @@ for D in "${DIRS[@]}"; do cd .. done +# Auto-shard from the variant count and available cores: each shard pins one variant +# per core, so the suite needs ceil(BENCH_COUNT / cores) shards. The CI matrix supplies +# SPLITS shards; fail with the exact number to configure rather than silently dropping +# variants once the suite outgrows the matrix. +SHARDS_NEEDED=$(( (BENCH_COUNT + TOTAL_CPU_CORES - 1) / TOTAL_CPU_CORES )) +if [[ ${SPLITS} -lt ${SHARDS_NEEDED} ]]; then + echo "${BENCH_COUNT} variants on ${TOTAL_CPU_CORES} cores need ${SHARDS_NEEDED} shards, but SPLITS=${SPLITS}." >&2 + echo "Set SPLITS and the GROUP rows per MAJOR_VERSION in .gitlab/benchmarks.yml to ${SHARDS_NEEDED}." >&2 + exit 1 +fi + +# Balance variants evenly across all configured shards; guaranteed <= cores each by the check above. GROUP_SIZE=$(($(($BENCH_COUNT+$SPLITS-1))/$SPLITS)) # round up BENCH_INDEX=0 BENCH_END=$(($GROUP_SIZE*$GROUP)) BENCH_START=$(($BENCH_END-$GROUP_SIZE)) -if [[ ${GROUP_SIZE} -gt ${TOTAL_CPU_CORES} ]]; then - echo "Group size ${GROUP_SIZE} exceeds available CPU cores (${TOTAL_CPU_CORES} from nproc)" - exit 1 -fi - for D in "${DIRS[@]}"; do cd "${D}" variants="$(node ../get-variants.js)" @@ -103,10 +129,14 @@ for D in "${DIRS[@]}"; do ( if time node ../run-one-variant.js >> ../results.ndjson; then echo "${D}/${V} finished." - else - echo "${D}/${V} FAILED on core ${CPU_AFFINITY}" >&2 + if [[ -n "${RECORD_CANDIDATE_PASS}" ]]; then echo "${D}/${V}" >> "$CANDIDATE_PASSED_FILE"; fi + elif [[ -n "${SKIP_BASELINE_FAILURES}" ]] && grep -Fqx "${D}/${V}" "$CANDIDATE_PASSED_FILE" 2>/dev/null; then + echo "${D}/${V} skipped: passed on the candidate but failed on the older baseline source." >&2 # Append-only writes to a single tempfile from parallel subshells are # atomic on Linux below PIPE_BUF (4 KiB); each line here is ~30 bytes. + echo "${D}/${V}" >> "$SKIPPED_FILE" + else + echo "${D}/${V} FAILED on core ${CPU_AFFINITY}" >&2 echo "${D}/${V}" >> "$FAILURES_FILE" fi ) & @@ -137,3 +167,28 @@ if [[ "${FAILED_COUNT}" -gt 0 ]]; then sed 's/^/ - /' "$FAILURES_FILE" >&2 exit 1 fi + +SKIPPED_COUNT=$(wc -l < "$SKIPPED_FILE" | tr -d ' ') +if [[ "${SKIPPED_COUNT}" -gt 0 ]]; then + echo "" >&2 + echo "${SKIPPED_COUNT} benchmark variant(s) failed on the baseline source and were skipped:" >&2 + sed 's/^/ - /' "$SKIPPED_FILE" >&2 + + # A benchmark-only change is fine -- the skipped benchmark is the work. Any other + # source change leaves the A/B comparison incomplete, so fail and ask for the + # benchmark to land on its own first. Docs, CODEOWNERS, CI config and tests do + # not count as source here. + NON_BENCH_SOURCE_CHANGED="" + if [[ -d /app/candidate/.git && -n "${COMMIT_SHA:-}" && -n "${CI_COMMIT_SHA:-}" ]]; then + NON_BENCH_SOURCE_CHANGED="$(git -C /app/candidate diff --name-only "${COMMIT_SHA}..${CI_COMMIT_SHA}" \ + | grep -vE '(^benchmark/|^docs/|^\.github/|^\.gitlab/|\.md$|(^|/)CODEOWNERS$|^test/|/test/|/__tests__/|\.spec\.[jt]s$|\.test\.[jt]s$)' || true)" + fi + + if [[ -n "${NON_BENCH_SOURCE_CHANGED}" ]]; then + echo "" >&2 + echo "This PR also changes non-benchmark source, so the A/B comparison is incomplete." >&2 + echo "Land the benchmark change separately first, then rebase. Changed source files:" >&2 + echo "${NON_BENCH_SOURCE_CHANGED}" | sed 's/^/ - /' >&2 + exit 1 + fi +fi diff --git a/benchmark/sirun/startup-guard.js b/benchmark/sirun/startup-guard.js new file mode 100644 index 0000000000..3644b3d7f3 --- /dev/null +++ b/benchmark/sirun/startup-guard.js @@ -0,0 +1,51 @@ +'use strict' + +// Startup-share guard. Require this FIRST in a loop benchmark so START captures +// the file's load time (the heavy requires that follow, especially the tracer). +// Call loopStart() right before the measured loop and done() right after it (for +// async loops, call done() from the completion callback). done() fails the run +// if load+setup grew past the allowed share of the total, which is the recurring +// way a bench rots into measuring startup instead of its hot path. +// +// const guard = require('../startup-guard') +// // ...requires, setup... +// guard.loopStart() +// for (...) { ... } +// guard.done() // default 10% ceiling +// guard.done(0.15) // relaxed ceiling when the loop legitimately can't dominate further + +const assert = require('node:assert/strict') + +const START = process.hrtime.bigint() +let loopStartedAt + +function loopStart () { + loopStartedAt = process.hrtime.bigint() +} + +function done (maxShare = 0.10) { + const end = process.hrtime.bigint() + assert.ok(loopStartedAt !== undefined, 'startup-guard: loopStart() was never called') + const total = Number(end - START) + const startup = Number(loopStartedAt - START) + const share = total === 0 ? 1 : startup / total + + // Report mode (used by the overview collector): write the share to the given + // file and skip the assertion, so a high-startup variant still reports instead + // of crashing the data run. Off in normal/CI runs, where the assertion gates. + const reportPath = process.env.STARTUP_GUARD_REPORT + if (reportPath) { + try { + require('fs').writeFileSync(reportPath, share.toFixed(4)) + } catch {} + return + } + + assert.ok( + share <= maxShare, + `startup-guard: load+setup was ${(share * 100).toFixed(1)}% of the run ` + + `(max ${(maxShare * 100).toFixed(0)}%); grow the loop or load fewer modules up front` + ) +} + +module.exports = { loopStart, done }