diff --git a/src/node-tools/profiler-edit.ts b/src/node-tools/profiler-edit.ts index f6ae9032f4..7917d3af08 100644 --- a/src/node-tools/profiler-edit.ts +++ b/src/node-tools/profiler-edit.ts @@ -2,7 +2,12 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import fs from 'fs'; -import { Command, CommanderError, Option } from 'commander'; +import { + Command, + CommanderError, + InvalidArgumentError, + Option, +} from 'commander'; import { parse as parseToml } from 'smol-toml'; import { @@ -25,6 +30,7 @@ import { applyWasmSymbolication, type WasmSymbolicationSpec, } from 'firefox-profiler/profile-logic/wasm-symbolication'; +import { getThreadsWithMarkersMatchingSearchFilter } from 'firefox-profiler/profile-logic/marker-data'; import type { Profile } from 'firefox-profiler/types/profile'; import { assertExhaustiveCheck } from 'firefox-profiler/utils/types'; import { @@ -32,6 +38,7 @@ import { type LabelDescription, resolveAllLabels, } from 'firefox-profiler/utils/label-templates'; +import { mergeNonOverlappingThreadsByName } from 'firefox-profiler/profile-logic/merge-compare'; /** * A CLI tool for editing profiles. @@ -52,9 +59,13 @@ import { * * node node-tools-dist/profiler-edit.js --from-hash w1spyw917hg... -o out.json.gz \ * --insert-label-frames known-functions.toml + * + * node node-tools-dist/profiler-edit.js -i big.json.gz -o small.json.gz \ + * --only-keep-threads-with-markers-matching '-async,-sync' \ + * --merge-non-overlapping-threads-by-name */ -type ProfileSource = +export type ProfileSource = | { type: 'FILE'; path: string } | { type: 'URL'; url: string } | { type: 'HASH'; hash: string }; @@ -63,7 +74,7 @@ type ProfileSource = // supplies symbol names, plus (optionally) the URL of the stripped wasm in the // profile to which those names should be applied. If `strippedWasmUrl` is // omitted, the profile must contain exactly one .wasm source, which is used. -interface WasmSymbolicationCliSpec { +export interface WasmSymbolicationCliSpec { // Path to the local unstripped .wasm file (with a "name" custom section). unstrippedWasmPath: string; // URL of the matching stripped wasm as it appears in the profile. @@ -76,9 +87,12 @@ export interface CliOptions { symbolicateWithServer?: string; symbolicateWasm: WasmSymbolicationCliSpec[]; insertLabelFrames?: string; + onlyKeepThreadsWithMarkersMatching?: string; + mergeNonOverlappingThreadsByName?: boolean; + setName?: string; } -function loadWasmSymbolicationSpecs( +export function loadWasmSymbolicationSpecs( cliSpecs: WasmSymbolicationCliSpec[] ): WasmSymbolicationSpec[] { return cliSpecs.map((spec) => { @@ -97,7 +111,7 @@ function loadWasmSymbolicationSpecs( * (mirrors getLabelIndexForFunc in insert-stack-labels.ts), so auto-discovery * sees the same strings the labeler will compare against. */ -function collectFuncNames(profile: Profile): string[] { +export function collectFuncNames(profile: Profile): string[] { const { funcTable, sources, stringArray } = profile.shared; const result: string[] = []; for (let i = 0; i < funcTable.length; i++) { @@ -265,6 +279,32 @@ export async function run(options: CliOptions) { profile = insertStackLabels(profile, labels); } + if ( + options.onlyKeepThreadsWithMarkersMatching !== undefined && + options.onlyKeepThreadsWithMarkersMatching !== '' + ) { + const before = profile.threads.length; + const matchingThreadIndexes = getThreadsWithMarkersMatchingSearchFilter( + profile, + options.onlyKeepThreadsWithMarkersMatching + ); + const matchingThreads = profile.threads.filter((_thread, threadIndex) => + matchingThreadIndexes.has(threadIndex) + ); + profile = { ...profile, threads: matchingThreads }; + console.log( + `Kept ${profile.threads.length} of ${before} threads with markers matching ${JSON.stringify(options.onlyKeepThreadsWithMarkersMatching)}.` + ); + } + + if (options.mergeNonOverlappingThreadsByName) { + profile = mergeNonOverlappingThreadsByName(profile); + } + + if (options.setName !== undefined) { + profile.meta.product = options.setName; + } + const { profile: compactedProfile } = computeCompactedProfile(profile); const outputFilename = options.output; @@ -298,6 +338,15 @@ function collectWasm( return [...previous, { unstrippedWasmPath: value }]; } +function requireNonEmpty(flagName: string): (value: string) => string { + return (value: string) => { + if (value === '') { + throw new InvalidArgumentError(`${flagName} requires a non-empty value`); + } + return value; + }; +} + export function makeOptionsFromArgv(processArgv: string[]): CliOptions { const program = new Command(); program @@ -324,7 +373,20 @@ export function makeOptionsFromArgv(processArgv: string[]): CliOptions { .argParser(collectWasm) .default([] as WasmSymbolicationCliSpec[]) ) - .option('--insert-label-frames ', 'TOML file with label definitions'); + .option('--insert-label-frames ', 'TOML file with label definitions') + .option( + '--only-keep-threads-with-markers-matching ', + 'Keep only threads with markers matching the given search string' + ) + .option( + '--merge-non-overlapping-threads-by-name', + 'Merge same-named threads across non-overlapping process runs' + ) + .option( + '--set-name ', + 'Override the profile product name', + requireNonEmpty('--set-name') + ); program.parse(processArgv); const opts = program.opts(); @@ -376,6 +438,14 @@ export function makeOptionsFromArgv(processArgv: string[]): CliOptions { opts.insertLabelFrames !== '' ? opts.insertLabelFrames : undefined, + onlyKeepThreadsWithMarkersMatching: + typeof opts.onlyKeepThreadsWithMarkersMatching === 'string' && + opts.onlyKeepThreadsWithMarkersMatching !== '' + ? opts.onlyKeepThreadsWithMarkersMatching + : undefined, + mergeNonOverlappingThreadsByName: + opts.mergeNonOverlappingThreadsByName === true, + setName: typeof opts.setName === 'string' ? opts.setName : undefined, }; } diff --git a/src/profile-logic/marker-data.ts b/src/profile-logic/marker-data.ts index a984f89afa..779e95bf73 100644 --- a/src/profile-logic/marker-data.ts +++ b/src/profile-logic/marker-data.ts @@ -1,9 +1,17 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -import { getEmptyRawMarkerTable } from './data-structures'; -import { getFriendlyThreadName } from './profile-data'; -import { removeFilePath, removeURLs, stringsToRegExp } from '../utils/string'; +import { + getDefaultCategories, + getEmptyRawMarkerTable, +} from './data-structures'; +import { getFriendlyThreadName, getTimeRangeForThread } from './profile-data'; +import { + removeFilePath, + removeURLs, + stringsToRegExp, + splitSearchString, +} from '../utils/string'; import { StringTable } from '../utils/string-table'; import { ensureExists, assertExhaustiveCheck } from '../utils/types'; import { @@ -15,6 +23,7 @@ import { import { getSchemaFromMarker, markerPayloadMatchesSearch, + markerSchemaFrontEndOnly, } from './marker-schema'; import type { @@ -42,6 +51,8 @@ import type { MarkerDisplayLocation, Tid, LogMarkerPayload, + ThreadIndex, + Profile, } from 'firefox-profiler/types'; /** @@ -998,6 +1009,77 @@ export function deriveMarkersFromRawMarkerTable( return { markers, markerIndexToRawMarkerIndexes }; } +/** + * Return the set of threads that have at least one marker matching the given + * marker search string, using the same regular marker search syntax: comma- + * separated terms, optional `field:value` and `-field:value` qualifiers. + * + * This is a somewhat expensive operation because we call deriveMarkersFromRawMarkerTable + * for every thread. + */ +export function getThreadsWithMarkersMatchingSearchFilter( + profile: Profile, + markerSearch: string +): Set { + const searchRegExps = stringsToMarkerRegExps(splitSearchString(markerSearch)); + if (searchRegExps === null) { + return new Set(); + } + + const stringTable = StringTable.withBackingArray(profile.shared.stringArray); + const categoryList = profile.meta.categories ?? getDefaultCategories(); + + const frontEndSchemaNames = new Set( + markerSchemaFrontEndOnly.map((schema) => schema.name) + ); + const schemaList = [ + ...(profile.meta.markerSchema ?? []).filter( + (schema) => !frontEndSchemaNames.has(schema.name) + ), + ...markerSchemaFrontEndOnly, + ]; + const markerSchemaByName: MarkerSchemaByName = Object.create(null); + for (const schema of schemaList) { + markerSchemaByName[schema.name] = schema; + } + + const ipcCorrelations = correlateIPCMarkers(profile.threads, profile.shared); + + const matchingThreads = new Set(); + + for ( + let threadIndex = 0; + threadIndex < profile.threads.length; + threadIndex++ + ) { + const thread = profile.threads[threadIndex]; + const { markers } = deriveMarkersFromRawMarkerTable( + thread.markers, + profile.shared.stringArray, + thread.tid, + getTimeRangeForThread(thread, profile.meta.interval), + ipcCorrelations + ); + if (markers.length === 0) { + continue; + } + const markerIndexes = markers.map((_, i) => i); + const filtered = getSearchFilteredMarkerIndexes( + (i) => markers[i], + markerIndexes, + markerSchemaByName, + searchRegExps, + stringTable, + categoryList + ); + if (filtered.length > 0) { + matchingThreads.add(threadIndex); + } + } + + return matchingThreads; +} + /** * This function filters markers from a thread's raw marker table using the * range specified as parameter. It's not used by the normal marker filtering diff --git a/src/profile-logic/merge-compare.ts b/src/profile-logic/merge-compare.ts index 5eeecb1705..7cb504fd47 100644 --- a/src/profile-logic/merge-compare.ts +++ b/src/profile-logic/merge-compare.ts @@ -68,6 +68,8 @@ import type { Tid, RawProfileSharedData, ProfileIndexTranslationMaps, + StartEndRange, + Pid, } from 'firefox-profiler/types'; import { translateTransformStack } from './transforms'; @@ -1359,6 +1361,16 @@ function combineSamplesForMerging(threads: RawThread[]): RawSamplesTable { threadId: newThreadId, }; + // If every source thread has threadCPUDelta, carry the per-sample values + // through unchanged. For non-overlapping inputs the resulting deltas remain + // meaningful; for overlapping inputs the values are nonsensical but harmless + // (still numerically valid). + const allHaveThreadCPUDelta = samplesPerThread.every( + (s) => s.threadCPUDelta !== undefined + ); + const newThreadCPUDelta: Array | undefined = + allHaveThreadCPUDelta ? [] : undefined; + while (true) { let earliestNextSampleThreadIndex: number | null = null; let earliestNextSampleTime = Infinity; @@ -1408,11 +1420,21 @@ function combineSamplesForMerging(threads: RawThread[]): RawSamplesTable { ? sourceThreadSamples.threadId[sourceThreadSampleIndex] : threads[sourceThreadIndex].tid ); + if (newThreadCPUDelta !== undefined) { + newThreadCPUDelta.push( + ensureExists(sourceThreadSamples.threadCPUDelta)[ + sourceThreadSampleIndex + ] + ); + } newSamples.length++; nextSampleIndexPerThread[sourceThreadIndex]++; } + if (newThreadCPUDelta !== undefined) { + return { ...newSamples, threadCPUDelta: newThreadCPUDelta }; + } return newSamples; } @@ -1492,3 +1514,190 @@ function getThreadMarkersAndScreenshotMarkers( return targetMarkerTable; } + +/** + * First-fit interval coloring: partition `items` (sorted by start time) into + * subgroups such that within each subgroup no two items overlap. + */ +function partitionNonOverlapping( + itemsSortedByStart: T[], + rangeOf: (item: T) => StartEndRange +): T[][] { + const subgroups: { items: T[]; lastEnd: number }[] = []; + for (const item of itemsSortedByStart) { + const range = rangeOf(item); + let placed = false; + for (const sg of subgroups) { + if (sg.lastEnd <= range.start) { + sg.items.push(item); + sg.lastEnd = range.end; + placed = true; + break; + } + } + if (!placed) { + subgroups.push({ items: [item], lastEnd: range.end }); + } + } + return subgroups.map((sg) => sg.items); +} + +/** + * Merges threads from sequential runs of the same logical workload. + * + * Two-stage approach: + * + * 1. Group processes (i.e. all threads sharing a pid) by (processName, + * processType, mainThreadName) and partition each group into matched + * bundles of non-overlapping processes via first-fit interval coloring. + * Each non-singleton bundle represents one logical process whose + * lifetime spans multiple runs. + * + * 2. Within each matched bundle, merge same-named threads across the + * bundled processes. Same-named threads inside a single process are + * not merged (they may overlap), so we again partition by non-overlap + * before merging. + * + * Threads belonging to a singleton process bundle are passed through + * unchanged. + */ +export function mergeNonOverlappingThreadsByName(profile: Profile): Profile { + const interval = profile.meta.interval; + const threads = profile.threads; + + const threadRanges = threads.map((t) => getTimeRangeForThread(t, interval)); + + type ProcessInfo = { + pid: Pid; + threadIndices: number[]; + range: StartEndRange; + processName: string | undefined; + processType: string; + mainThreadName: string; + }; + + const processesByPid = new Map(); + for (let i = 0; i < threads.length; i++) { + const t = threads[i]; + let proc = processesByPid.get(t.pid); + if (proc === undefined) { + proc = { + pid: t.pid, + threadIndices: [], + range: { start: Infinity, end: -Infinity }, + processName: t.processName, + processType: t.processType, + mainThreadName: t.name, + }; + processesByPid.set(t.pid, proc); + } + proc.threadIndices.push(i); + if (t.isMainThread) { + proc.mainThreadName = t.name; + if (t.processName !== undefined) { + proc.processName = t.processName; + } + } + const r = threadRanges[i]; + if (r.start < proc.range.start) { + proc.range.start = r.start; + } + if (r.end > proc.range.end) { + proc.range.end = r.end; + } + } + + const processGroups = new Map(); + for (const proc of processesByPid.values()) { + const key = `${proc.processName ?? ''}\u0000${proc.processType}\u0000${proc.mainThreadName}`; + let g = processGroups.get(key); + if (g === undefined) { + g = []; + processGroups.set(key, g); + } + g.push(proc); + } + + const mergedIndexes = new Set(); + const mergeReplacements = new Map(); + let mergedProcessBundles = 0; + + for (const procs of processGroups.values()) { + if (procs.length <= 1) { + continue; + } + procs.sort((a, b) => a.range.start - b.range.start); + const bundles = partitionNonOverlapping(procs, (p) => p.range); + + for (const bundle of bundles) { + if (bundle.length <= 1) { + continue; + } + mergedProcessBundles++; + + // Group threads in this bundle by name, partition each by non-overlap, + // and merge subgroups of size > 1. + const threadsByName = new Map(); + for (const proc of bundle) { + for (const tIdx of proc.threadIndices) { + const name = threads[tIdx].name; + let arr = threadsByName.get(name); + if (arr === undefined) { + arr = []; + threadsByName.set(name, arr); + } + arr.push(tIdx); + } + } + + for (const tIndices of threadsByName.values()) { + if (tIndices.length <= 1) { + continue; + } + tIndices.sort((a, b) => threadRanges[a].start - threadRanges[b].start); + const tBundles = partitionNonOverlapping( + tIndices, + (i) => threadRanges[i] + ); + for (const tb of tBundles) { + if (tb.length <= 1) { + continue; + } + const sourceThreads = tb.map((i) => threads[i]); + const original = sourceThreads[0]; + const merged = mergeThreads(sourceThreads); + merged.name = original.name; + merged.pid = original.pid; + merged.tid = original.tid; + merged.processType = original.processType; + merged.processName = original.processName; + merged.isMainThread = original.isMainThread; + + mergeReplacements.set(tb[0], merged); + for (let k = 1; k < tb.length; k++) { + mergedIndexes.add(tb[k]); + } + } + } + } + } + + if (mergeReplacements.size === 0) { + return profile; + } + + const newThreads: RawThread[] = []; + for (let i = 0; i < threads.length; i++) { + if (mergedIndexes.has(i)) { + continue; + } + const replacement = mergeReplacements.get(i); + newThreads.push(replacement ?? threads[i]); + } + + console.log( + `Matched ${mergedProcessBundles} non-overlapping process bundles. Merged ${mergedIndexes.size + mergeReplacements.size} threads into ${mergeReplacements.size}, going from ${threads.length} to ${newThreads.length} threads.` + ); + + return { ...profile, threads: newThreads }; +}