Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# AppSec
/benchmark/sirun/appsec/ @DataDog/asm-js
/benchmark/sirun/appsec-iast/ @DataDog/asm-js
/benchmark/sirun/iast/ @DataDog/asm-js
/integration-tests/appsec/ @DataDog/asm-js
/packages/dd-trace/src/appsec/ @DataDog/asm-js
/packages/dd-trace/test/appsec/ @DataDog/asm-js
Expand Down Expand Up @@ -36,13 +37,20 @@
/packages/dd-trace/test/plugins/util/inferred_proxy.spec.js @DataDog/serverless-aws @DataDog/apm-serverless

# IDM
/benchmark/sirun/child_process/ @DataDog/apm-idm-js
/benchmark/sirun/fs/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-aws-sdk/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-bluebird/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-dns/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-graphql/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-http/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-kafkajs/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-mongodb-core/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-net/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-q/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-pg/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-pino/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-redis/ @DataDog/apm-idm-js
/benchmark/sirun/plugin-ws/ @DataDog/apm-idm-js
/benchmark/sirun/url/ @DataDog/apm-idm-js

/integration-tests/electron/ @DataDog/apm-idm-js
/integration-tests/esbuild/ @DataDog/apm-idm-js
Expand Down Expand Up @@ -286,12 +294,14 @@
/benchmark/sirun/async_hooks/ @DataDog/lang-platform-js
/benchmark/sirun/encoding/ @DataDog/lang-platform-js
/benchmark/sirun/exporting-pipeline/ @DataDog/lang-platform-js
/benchmark/sirun/id/ @DataDog/lang-platform-js
/benchmark/sirun/log/ @DataDog/lang-platform-js
/benchmark/sirun/runtime-metrics/ @DataDog/lang-platform-js
/benchmark/sirun/scope/ @DataDog/lang-platform-js
/benchmark/sirun/shimmer-runtime/ @DataDog/lang-platform-js
/benchmark/sirun/shimmer-startup/ @DataDog/lang-platform-js
/benchmark/sirun/startup/ @DataDog/lang-platform-js
/benchmark/sirun/tracing-channel/ @DataDog/lang-platform-js

/integration-tests/bun/ @DataDog/lang-platform-js
/integration-tests/coverage/ @DataDog/lang-platform-js
Expand Down
4 changes: 3 additions & 1 deletion .gitlab/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ variables:
SLS_CI_BRANCH: main

# Benchmark's env variables. Modify to tweak benchmark parameters.
UNCONFIDENCE_THRESHOLD: "5.0"
UNCONFIDENCE_THRESHOLD: "2.0"
MD_REPORT_ONLY_CHANGES: "1"
# Set to the dd-trace-js benchmarks dashboard URL to add a link in the PR comment.
BENCHMARK_DASHBOARD_URL: ""

.benchmarks:
stage: benchmarks
Expand Down
71 changes: 63 additions & 8 deletions benchmark/sirun/runall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ CWD=$(pwd)
# Background subshells can't share a bash variable, so failed variants
# write their dir/variant name here and the parent counts lines after `wait`.
FAILURES_FILE=$(mktemp)
# Variants whose latest definition failed against the older baseline source;
# tolerated there unless this PR also changes non-benchmark source (see below).
SKIPPED_FILE=$(mktemp)

function cleanup {
for D in "${DIRS[@]}"; do
rm -f "${CWD}/${D}/meta-temp.json"
done
rm -f "$FAILURES_FILE"
rm -f "$FAILURES_FILE" "$SKIPPED_FILE"
}

trap cleanup EXIT
Expand Down Expand Up @@ -69,6 +72,22 @@ CPU_AFFINITY="${CPUSET_START}" # reset for each node.js version
SPLITS=${SPLITS:-1}
GROUP=${GROUP:-1}

# With BENCHMARKS_FROM=candidate the baseline runs this PR's benchmark code on
# the older source. Skip a baseline failure only when the same variant passed on
# the candidate run -- proof the failure is specific to the older source, not a
# broken benchmark. The candidate run records its passing variants below.
SKIP_BASELINE_FAILURES=""
RECORD_CANDIDATE_PASS=""
CANDIDATE_PASSED_FILE="${ARTIFACTS_DIR:-/tmp}/candidate-passed-variants.txt"
if [[ "${TOLERATE_NEW_BENCHMARK_FAILURES:-}" == "1" ]]; then
if [[ "${BASELINE_OR_CANDIDATE:-}" == "candidate" ]]; then
RECORD_CANDIDATE_PASS="1"
: > "$CANDIDATE_PASSED_FILE"
elif [[ "${BASELINE_OR_CANDIDATE:-}" == "baseline" ]]; then
SKIP_BASELINE_FAILURES="1"
fi
fi

BENCH_COUNT=0
for D in "${DIRS[@]}"; do
cd "${D}"
Expand All @@ -77,17 +96,24 @@ for D in "${DIRS[@]}"; do
cd ..
done

# Auto-shard from the variant count and available cores: each shard pins one variant
# per core, so the suite needs ceil(BENCH_COUNT / cores) shards. The CI matrix supplies
# SPLITS shards; fail with the exact number to configure rather than silently dropping
# variants once the suite outgrows the matrix.
SHARDS_NEEDED=$(( (BENCH_COUNT + TOTAL_CPU_CORES - 1) / TOTAL_CPU_CORES ))
if [[ ${SPLITS} -lt ${SHARDS_NEEDED} ]]; then
echo "${BENCH_COUNT} variants on ${TOTAL_CPU_CORES} cores need ${SHARDS_NEEDED} shards, but SPLITS=${SPLITS}." >&2
echo "Set SPLITS and the GROUP rows per MAJOR_VERSION in .gitlab/benchmarks.yml to ${SHARDS_NEEDED}." >&2
exit 1
fi

# Balance variants evenly across all configured shards; guaranteed <= cores each by the check above.
GROUP_SIZE=$(($(($BENCH_COUNT+$SPLITS-1))/$SPLITS)) # round up

BENCH_INDEX=0
BENCH_END=$(($GROUP_SIZE*$GROUP))
BENCH_START=$(($BENCH_END-$GROUP_SIZE))

if [[ ${GROUP_SIZE} -gt ${TOTAL_CPU_CORES} ]]; then
echo "Group size ${GROUP_SIZE} exceeds available CPU cores (${TOTAL_CPU_CORES} from nproc)"
exit 1
fi

for D in "${DIRS[@]}"; do
cd "${D}"
variants="$(node ../get-variants.js)"
Expand All @@ -103,10 +129,14 @@ for D in "${DIRS[@]}"; do
(
if time node ../run-one-variant.js >> ../results.ndjson; then
echo "${D}/${V} finished."
else
echo "${D}/${V} FAILED on core ${CPU_AFFINITY}" >&2
if [[ -n "${RECORD_CANDIDATE_PASS}" ]]; then echo "${D}/${V}" >> "$CANDIDATE_PASSED_FILE"; fi
elif [[ -n "${SKIP_BASELINE_FAILURES}" ]] && grep -Fqx "${D}/${V}" "$CANDIDATE_PASSED_FILE" 2>/dev/null; then
echo "${D}/${V} skipped: passed on the candidate but failed on the older baseline source." >&2
# Append-only writes to a single tempfile from parallel subshells are
# atomic on Linux below PIPE_BUF (4 KiB); each line here is ~30 bytes.
echo "${D}/${V}" >> "$SKIPPED_FILE"
else
echo "${D}/${V} FAILED on core ${CPU_AFFINITY}" >&2
echo "${D}/${V}" >> "$FAILURES_FILE"
fi
) &
Expand Down Expand Up @@ -137,3 +167,28 @@ if [[ "${FAILED_COUNT}" -gt 0 ]]; then
sed 's/^/ - /' "$FAILURES_FILE" >&2
exit 1
fi

SKIPPED_COUNT=$(wc -l < "$SKIPPED_FILE" | tr -d ' ')
if [[ "${SKIPPED_COUNT}" -gt 0 ]]; then
echo "" >&2
echo "${SKIPPED_COUNT} benchmark variant(s) failed on the baseline source and were skipped:" >&2
sed 's/^/ - /' "$SKIPPED_FILE" >&2

# A benchmark-only change is fine -- the skipped benchmark is the work. Any other
# source change leaves the A/B comparison incomplete, so fail and ask for the
# benchmark to land on its own first. Docs, CODEOWNERS, CI config and tests do
# not count as source here.
NON_BENCH_SOURCE_CHANGED=""
if [[ -d /app/candidate/.git && -n "${COMMIT_SHA:-}" && -n "${CI_COMMIT_SHA:-}" ]]; then
NON_BENCH_SOURCE_CHANGED="$(git -C /app/candidate diff --name-only "${COMMIT_SHA}..${CI_COMMIT_SHA}" \
| grep -vE '(^benchmark/|^docs/|^\.github/|^\.gitlab/|\.md$|(^|/)CODEOWNERS$|^test/|/test/|/__tests__/|\.spec\.[jt]s$|\.test\.[jt]s$)' || true)"
fi

if [[ -n "${NON_BENCH_SOURCE_CHANGED}" ]]; then
echo "" >&2
echo "This PR also changes non-benchmark source, so the A/B comparison is incomplete." >&2
echo "Land the benchmark change separately first, then rebase. Changed source files:" >&2
echo "${NON_BENCH_SOURCE_CHANGED}" | sed 's/^/ - /' >&2
exit 1
fi
fi
51 changes: 51 additions & 0 deletions benchmark/sirun/startup-guard.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
'use strict'

// Startup-share guard. Require this FIRST in a loop benchmark so START captures
// the file's load time (the heavy requires that follow, especially the tracer).
// Call loopStart() right before the measured loop and done() right after it (for
// async loops, call done() from the completion callback). done() fails the run
// if load+setup grew past the allowed share of the total, which is the recurring
// way a bench rots into measuring startup instead of its hot path.
//
// const guard = require('../startup-guard')
// // ...requires, setup...
// guard.loopStart()
// for (...) { ... }
// guard.done() // default 10% ceiling
// guard.done(0.15) // relaxed ceiling when the loop legitimately can't dominate further

const assert = require('node:assert/strict')

const START = process.hrtime.bigint()
let loopStartedAt

function loopStart () {
loopStartedAt = process.hrtime.bigint()
}

function done (maxShare = 0.10) {
const end = process.hrtime.bigint()
assert.ok(loopStartedAt !== undefined, 'startup-guard: loopStart() was never called')
const total = Number(end - START)
const startup = Number(loopStartedAt - START)
const share = total === 0 ? 1 : startup / total

// Report mode (used by the overview collector): write the share to the given
// file and skip the assertion, so a high-startup variant still reports instead
// of crashing the data run. Off in normal/CI runs, where the assertion gates.
const reportPath = process.env.STARTUP_GUARD_REPORT
if (reportPath) {
try {
require('fs').writeFileSync(reportPath, share.toFixed(4))
} catch {}
return
}

assert.ok(
share <= maxShare,
`startup-guard: load+setup was ${(share * 100).toFixed(1)}% of the run ` +
`(max ${(maxShare * 100).toFixed(0)}%); grow the loop or load fewer modules up front`
)
}

module.exports = { loopStart, done }
Loading