diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index fe011fc7c8..bb70b5a640 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -3,6 +3,7 @@
 # AppSec
 /benchmark/sirun/appsec/ @DataDog/asm-js
 /benchmark/sirun/appsec-iast/ @DataDog/asm-js
+/benchmark/sirun/iast/ @DataDog/asm-js
 /integration-tests/appsec/ @DataDog/asm-js
 /packages/dd-trace/src/appsec/ @DataDog/asm-js
 /packages/dd-trace/test/appsec/ @DataDog/asm-js
@@ -36,13 +37,20 @@
 /packages/dd-trace/test/plugins/util/inferred_proxy.spec.js @DataDog/serverless-aws @DataDog/apm-serverless
 
 # IDM
+/benchmark/sirun/child_process/ @DataDog/apm-idm-js
+/benchmark/sirun/fs/ @DataDog/apm-idm-js
 /benchmark/sirun/plugin-aws-sdk/ @DataDog/apm-idm-js
-/benchmark/sirun/plugin-bluebird/ @DataDog/apm-idm-js
 /benchmark/sirun/plugin-dns/ @DataDog/apm-idm-js
 /benchmark/sirun/plugin-graphql/ @DataDog/apm-idm-js
 /benchmark/sirun/plugin-http/ @DataDog/apm-idm-js
+/benchmark/sirun/plugin-kafkajs/ @DataDog/apm-idm-js
+/benchmark/sirun/plugin-mongodb-core/ @DataDog/apm-idm-js
 /benchmark/sirun/plugin-net/ @DataDog/apm-idm-js
-/benchmark/sirun/plugin-q/ @DataDog/apm-idm-js
+/benchmark/sirun/plugin-pg/ @DataDog/apm-idm-js
+/benchmark/sirun/plugin-pino/ @DataDog/apm-idm-js
+/benchmark/sirun/plugin-redis/ @DataDog/apm-idm-js
+/benchmark/sirun/plugin-ws/ @DataDog/apm-idm-js
+/benchmark/sirun/url/ @DataDog/apm-idm-js
 
 /integration-tests/electron/ @DataDog/apm-idm-js
 /integration-tests/esbuild/ @DataDog/apm-idm-js
@@ -286,12 +294,14 @@
 /benchmark/sirun/async_hooks/ @DataDog/lang-platform-js
 /benchmark/sirun/encoding/ @DataDog/lang-platform-js
 /benchmark/sirun/exporting-pipeline/ @DataDog/lang-platform-js
+/benchmark/sirun/id/ @DataDog/lang-platform-js
 /benchmark/sirun/log/ @DataDog/lang-platform-js
 /benchmark/sirun/runtime-metrics/ @DataDog/lang-platform-js
 /benchmark/sirun/scope/ @DataDog/lang-platform-js
 /benchmark/sirun/shimmer-runtime/ @DataDog/lang-platform-js
 /benchmark/sirun/shimmer-startup/ @DataDog/lang-platform-js
 /benchmark/sirun/startup/ @DataDog/lang-platform-js
+/benchmark/sirun/tracing-channel/ @DataDog/lang-platform-js
 
 /integration-tests/bun/ @DataDog/lang-platform-js
 /integration-tests/coverage/ @DataDog/lang-platform-js
diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index 3866d0d941..225cf15f86 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -4,8 +4,10 @@ variables:
   SLS_CI_BRANCH: main
 
   # Benchmark's env variables. Modify to tweak benchmark parameters.
-  UNCONFIDENCE_THRESHOLD: "5.0"
+  UNCONFIDENCE_THRESHOLD: "2.0"
   MD_REPORT_ONLY_CHANGES: "1"
+  # Set to the dd-trace-js benchmarks dashboard URL to add a link in the PR comment.
+  BENCHMARK_DASHBOARD_URL: ""
 
 .benchmarks:
   stage: benchmarks
diff --git a/benchmark/sirun/runall.sh b/benchmark/sirun/runall.sh
index 40d7fd47f7..30a0bce7b6 100755
--- a/benchmark/sirun/runall.sh
+++ b/benchmark/sirun/runall.sh
@@ -8,12 +8,15 @@ CWD=$(pwd)
 # Background subshells can't share a bash variable, so failed variants
 # write their dir/variant name here and the parent counts lines after `wait`.
 FAILURES_FILE=$(mktemp)
+# Variants whose latest definition failed against the older baseline source;
+# tolerated there unless this PR also changes non-benchmark source (see below).
+SKIPPED_FILE=$(mktemp)
 
 function cleanup {
   for D in "${DIRS[@]}"; do
     rm -f "${CWD}/${D}/meta-temp.json"
   done
-  rm -f "$FAILURES_FILE"
+  rm -f "$FAILURES_FILE" "$SKIPPED_FILE"
 }
 
 trap cleanup EXIT
@@ -69,6 +72,22 @@ CPU_AFFINITY="${CPUSET_START}" # reset for each node.js version
 SPLITS=${SPLITS:-1}
 GROUP=${GROUP:-1}
 
+# With BENCHMARKS_FROM=candidate the baseline runs this PR's benchmark code on
+# the older source. Skip a baseline failure only when the same variant passed on
+# the candidate run -- proof the failure is specific to the older source, not a
+# broken benchmark. The candidate run records its passing variants below.
+SKIP_BASELINE_FAILURES=""
+RECORD_CANDIDATE_PASS=""
+CANDIDATE_PASSED_FILE="${ARTIFACTS_DIR:-/tmp}/candidate-passed-variants.txt"
+if [[ "${TOLERATE_NEW_BENCHMARK_FAILURES:-}" == "1" ]]; then
+  if [[ "${BASELINE_OR_CANDIDATE:-}" == "candidate" ]]; then
+    RECORD_CANDIDATE_PASS="1"
+    : > "$CANDIDATE_PASSED_FILE"
+  elif [[ "${BASELINE_OR_CANDIDATE:-}" == "baseline" ]]; then
+    SKIP_BASELINE_FAILURES="1"
+  fi
+fi
+
 BENCH_COUNT=0
 for D in "${DIRS[@]}"; do
   cd "${D}"
@@ -77,17 +96,24 @@ for D in "${DIRS[@]}"; do
   cd ..
 done
 
+# Auto-shard from the variant count and available cores: each shard pins one variant
+# per core, so the suite needs ceil(BENCH_COUNT / cores) shards. The CI matrix supplies
+# SPLITS shards; fail with the exact number to configure rather than silently dropping
+# variants once the suite outgrows the matrix.
+SHARDS_NEEDED=$(( (BENCH_COUNT + TOTAL_CPU_CORES - 1) / TOTAL_CPU_CORES ))
+if [[ ${SPLITS} -lt ${SHARDS_NEEDED} ]]; then
+  echo "${BENCH_COUNT} variants on ${TOTAL_CPU_CORES} cores need ${SHARDS_NEEDED} shards, but SPLITS=${SPLITS}." >&2
+  echo "Set SPLITS and the GROUP rows per MAJOR_VERSION in .gitlab/benchmarks.yml to ${SHARDS_NEEDED}." >&2
+  exit 1
+fi
+
+# Balance variants evenly across all configured shards; guaranteed <= cores each by the check above.
 GROUP_SIZE=$(($(($BENCH_COUNT+$SPLITS-1))/$SPLITS)) # round up
 
 BENCH_INDEX=0
 BENCH_END=$(($GROUP_SIZE*$GROUP))
 BENCH_START=$(($BENCH_END-$GROUP_SIZE))
 
-if [[ ${GROUP_SIZE} -gt ${TOTAL_CPU_CORES} ]]; then
-  echo "Group size ${GROUP_SIZE} exceeds available CPU cores (${TOTAL_CPU_CORES} from nproc)"
-  exit 1
-fi
-
 for D in "${DIRS[@]}"; do
   cd "${D}"
   variants="$(node ../get-variants.js)"
@@ -103,10 +129,14 @@ for D in "${DIRS[@]}"; do
       (
         if time node ../run-one-variant.js >> ../results.ndjson; then
           echo "${D}/${V} finished."
-        else
-          echo "${D}/${V} FAILED on core ${CPU_AFFINITY}" >&2
+          if [[ -n "${RECORD_CANDIDATE_PASS}" ]]; then echo "${D}/${V}" >> "$CANDIDATE_PASSED_FILE"; fi
+        elif [[ -n "${SKIP_BASELINE_FAILURES}" ]] && grep -Fqx "${D}/${V}" "$CANDIDATE_PASSED_FILE" 2>/dev/null; then
+          echo "${D}/${V} skipped: passed on the candidate but failed on the older baseline source." >&2
           # Append-only writes to a single tempfile from parallel subshells are
           # atomic on Linux below PIPE_BUF (4 KiB); each line here is ~30 bytes.
+          echo "${D}/${V}" >> "$SKIPPED_FILE"
+        else
+          echo "${D}/${V} FAILED on core ${CPU_AFFINITY}" >&2
           echo "${D}/${V}" >> "$FAILURES_FILE"
         fi
       ) &
@@ -137,3 +167,28 @@ if [[ "${FAILED_COUNT}" -gt 0 ]]; then
   sed 's/^/  - /' "$FAILURES_FILE" >&2
   exit 1
 fi
+
+SKIPPED_COUNT=$(wc -l < "$SKIPPED_FILE" | tr -d ' ')
+if [[ "${SKIPPED_COUNT}" -gt 0 ]]; then
+  echo "" >&2
+  echo "${SKIPPED_COUNT} benchmark variant(s) failed on the baseline source and were skipped:" >&2
+  sed 's/^/  - /' "$SKIPPED_FILE" >&2
+
+  # A benchmark-only change is fine -- the skipped benchmark is the work. Any other
+  # source change leaves the A/B comparison incomplete, so fail and ask for the
+  # benchmark to land on its own first. Docs, CODEOWNERS, CI config and tests do
+  # not count as source here.
+  NON_BENCH_SOURCE_CHANGED=""
+  if [[ -d /app/candidate/.git && -n "${COMMIT_SHA:-}" && -n "${CI_COMMIT_SHA:-}" ]]; then
+    NON_BENCH_SOURCE_CHANGED="$(git -C /app/candidate diff --name-only "${COMMIT_SHA}..${CI_COMMIT_SHA}" \
+      | grep -vE '(^benchmark/|^docs/|^\.github/|^\.gitlab/|\.md$|(^|/)CODEOWNERS$|^test/|/test/|/__tests__/|\.spec\.[jt]s$|\.test\.[jt]s$)' || true)"
+  fi
+
+  if [[ -n "${NON_BENCH_SOURCE_CHANGED}" ]]; then
+    echo "" >&2
+    echo "This PR also changes non-benchmark source, so the A/B comparison is incomplete." >&2
+    echo "Land the benchmark change separately first, then rebase. Changed source files:" >&2
+    echo "${NON_BENCH_SOURCE_CHANGED}" | sed 's/^/  - /' >&2
+    exit 1
+  fi
+fi
diff --git a/benchmark/sirun/startup-guard.js b/benchmark/sirun/startup-guard.js
new file mode 100644
index 0000000000..3644b3d7f3
--- /dev/null
+++ b/benchmark/sirun/startup-guard.js
@@ -0,0 +1,51 @@
+'use strict'
+
+// Startup-share guard. Require this FIRST in a loop benchmark so START captures
+// the file's load time (the heavy requires that follow, especially the tracer).
+// Call loopStart() right before the measured loop and done() right after it (for
+// async loops, call done() from the completion callback). done() fails the run
+// if load+setup grew past the allowed share of the total, which is the recurring
+// way a bench rots into measuring startup instead of its hot path.
+//
+//   const guard = require('../startup-guard')
+//   // ...requires, setup...
+//   guard.loopStart()
+//   for (...) { ... }
+//   guard.done()            // default 10% ceiling
+//   guard.done(0.15)        // relaxed ceiling when the loop legitimately can't dominate further
+
+const assert = require('node:assert/strict')
+
+const START = process.hrtime.bigint()
+let loopStartedAt
+
+function loopStart () {
+  loopStartedAt = process.hrtime.bigint()
+}
+
+function done (maxShare = 0.10) {
+  const end = process.hrtime.bigint()
+  assert.ok(loopStartedAt !== undefined, 'startup-guard: loopStart() was never called')
+  const total = Number(end - START)
+  const startup = Number(loopStartedAt - START)
+  const share = total === 0 ? 1 : startup / total
+
+  // Report mode (used by the overview collector): write the share to the given
+  // file and skip the assertion, so a high-startup variant still reports instead
+  // of crashing the data run. Off in normal/CI runs, where the assertion gates.
+  const reportPath = process.env.STARTUP_GUARD_REPORT
+  if (reportPath) {
+    try {
+      require('fs').writeFileSync(reportPath, share.toFixed(4))
+    } catch {}
+    return
+  }
+
+  assert.ok(
+    share <= maxShare,
+    `startup-guard: load+setup was ${(share * 100).toFixed(1)}% of the run ` +
+    `(max ${(maxShare * 100).toFixed(0)}%); grow the loop or load fewer modules up front`
+  )
+}
+
+module.exports = { loopStart, done }