python · maurycy · Mar 21, 2026 · Mar 21, 2026 · Mar 22, 2026 · Mar 30, 2026
@@ -9,6 +9,15 @@
 from .stack_collector import CollapsedStackCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .jsonl_collector import JsonlCollector
 from .string_table import StringTable
 
-__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "HeatmapCollector", "GeckoCollector", "StringTable")
+__all__ = (
+    "Collector",
+    "PstatsCollector",
+    "CollapsedStackCollector",
+    "HeatmapCollector",
+    "GeckoCollector",
+    "JsonlCollector",
+    "StringTable",
+)
@@ -4,6 +4,7 @@
 
 from .gecko_collector import GeckoCollector
 from .stack_collector import FlamegraphCollector, CollapsedStackCollector
+from .jsonl_collector import JsonlCollector
 from .pstats_collector import PstatsCollector
 
 
@@ -117,6 +118,8 @@ def convert_binary_to_format(input_file, output_file, output_format,
             collector = PstatsCollector(interval)
         elif output_format == 'gecko':
             collector = GeckoCollector(interval)
+        elif output_format == "jsonl":
+            collector = JsonlCollector(interval)
         else:
             raise ValueError(f"Unknown output format: {output_format}")
 

@@ -19,6 +19,7 @@
 from .stack_collector import CollapsedStackCollector, FlamegraphCollector, DiffFlamegraphCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .jsonl_collector import JsonlCollector
 from .binary_collector import BinaryCollector
 from .binary_reader import BinaryReader
 from .constants import (
@@ -95,6 +96,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     "diff_flamegraph": "html",
     "gecko": "json",
     "heatmap": "html",
+    "jsonl": "jsonl",
     "binary": "bin",
 }
 
@@ -105,6 +107,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     "diff_flamegraph": DiffFlamegraphCollector,
     "gecko": GeckoCollector,
     "heatmap": HeatmapCollector,
+    "jsonl": JsonlCollector,
     "binary": BinaryCollector,
 }
 
@@ -482,6 +485,13 @@ def _add_format_options(parser, include_compression=True, include_binary=True):
         action=DiffFlamegraphAction,
         help="Generate differential flamegraph comparing current profile to BASELINE binary file",
     )
+    format_group.add_argument(
+        "--jsonl",
+        action="store_const",
+        const="jsonl",
+        dest="format",
+        help="Generate JSONL snapshot output for external consumers",
+    )
     if include_binary:
         format_group.add_argument(
             "--binary",
@@ -560,15 +570,17 @@ def _sort_to_mode(sort_choice):
     return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
 
 def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=False,
-                      output_file=None, compression='auto', diff_baseline=None):
+                      mode=None, output_file=None, compression='auto', diff_baseline=None):
     """Create the appropriate collector based on format type.
 
     Args:
-        format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary', 'diff_flamegraph')
+        format_type: The output format ('pstats', 'collapsed', 'flamegraph',
+                    'gecko', 'heatmap', 'jsonl', 'binary', 'diff_flamegraph')
         sample_interval_usec: Sampling interval in microseconds
         skip_idle: Whether to skip idle samples
         opcodes: Whether to collect opcode information (only used by gecko format
                  for creating interval markers in Firefox Profiler)
+        mode: Profiling mode for collectors that expose it in metadata
         output_file: Output file path (required for binary format)
         compression: Compression type for binary format ('auto', 'zstd', 'none')
         diff_baseline: Path to baseline binary file for differential flamegraph
@@ -604,6 +616,11 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
         skip_idle = False
         return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes)
 
+    if format_type == "jsonl":
+        return collector_class(
+            sample_interval_usec, skip_idle=skip_idle, mode=mode
+        )
+
     return collector_class(sample_interval_usec, skip_idle=skip_idle)
 
 
@@ -978,7 +995,7 @@ def _handle_attach(args):
 
     # Create the appropriate collector
     collector = _create_collector(
-        args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+        args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
         output_file=output_file,
         compression=getattr(args, 'compression', 'auto'),
         diff_baseline=args.diff_baseline
@@ -1057,7 +1074,7 @@ def _handle_run(args):
 
     # Create the appropriate collector
     collector = _create_collector(
-        args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+        args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
         output_file=output_file,
         compression=getattr(args, 'compression', 'auto'),
         diff_baseline=args.diff_baseline

@@ -20,13 +20,16 @@ def normalize_location(location):
     """Normalize location to a 4-tuple format.
 
     Args:
-        location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
+        location: tuple (lineno, end_lineno, col_offset, end_col_offset),
+            an integer line number, or None
 
     Returns:
         tuple: (lineno, end_lineno, col_offset, end_col_offset)
     """
     if location is None:
         return DEFAULT_LOCATION
+    if isinstance(location, int):
+        return (location, location, -1, -1)
     return location
 
 

@@ -0,0 +1,200 @@
+"""JSONL collector."""
+
+from collections import Counter
+import json
+import uuid
+from itertools import batched
+
+from .constants import (
+    PROFILING_MODE_ALL,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_EXCEPTION,
+    PROFILING_MODE_GIL,
+    PROFILING_MODE_WALL,
+)
+from .collector import normalize_location
+from .stack_collector import StackTraceCollector
+
+
+_CHUNK_SIZE = 256
+
+_MODE_NAMES = {
+    PROFILING_MODE_WALL: "wall",
+    PROFILING_MODE_CPU: "cpu",
+    PROFILING_MODE_GIL: "gil",
+    PROFILING_MODE_ALL: "all",
+    PROFILING_MODE_EXCEPTION: "exception",
+}
+
+
+class JsonlCollector(StackTraceCollector):
+    """Collector that exports finalized profiling data as JSONL."""
+
+    def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
+        super().__init__(sample_interval_usec, skip_idle=skip_idle)
+        self.run_id = uuid.uuid4().hex
+
+        self._string_to_id = {}
+        self._strings = []
+
+        self._frame_to_id = {}
+        self._frames = []
+
+        self._frame_self = Counter()
+        self._frame_cumulative = Counter()
+        self._samples_total = 0
+        self._seen_frame_ids = set()
+
+        self._mode = mode
+
+    def process_frames(self, frames, _thread_id, weight=1):
+        self._samples_total += weight
+        self._seen_frame_ids.clear()
+
+        for i, (filename, location, funcname, _opcode) in enumerate(frames):
+            frame_id = self._get_or_create_frame_id(
+                filename, location, funcname
+            )
+            is_leaf = i == 0
+            count_cumulative = frame_id not in self._seen_frame_ids
+
+            if count_cumulative:
+                self._seen_frame_ids.add(frame_id)
+
+            if is_leaf:
+                self._frame_self[frame_id] += weight
+
+            if count_cumulative:
+                self._frame_cumulative[frame_id] += weight
+
+    def export(self, filename):
+        with open(filename, "w", encoding="utf-8") as output:
+            self._write_message(output, self._build_meta_record())
+            self._write_chunked_records(
+                output,
+                {"type": "str_def", "v": 1, "run_id": self.run_id},
+                "defs",
+                self._strings,
+            )
+            self._write_chunked_records(
+                output,
+                {"type": "frame_def", "v": 1, "run_id": self.run_id},
+                "defs",
+                self._frames,
+            )
+            self._write_chunked_records(
+                output,
+                {
+                    "type": "agg",
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "kind": "frame",
+                    "scope": "final",
+                    "samples_total": self._samples_total,
+                },
+                "entries",
+                self._iter_final_agg_entries(),
+            )
+            self._write_message(output, self._build_end_record())
+
+    def _build_meta_record(self):
+        record = {
+            "type": "meta",
+            "v": 1,
+            "run_id": self.run_id,
+            "sample_interval_usec": self.sample_interval_usec,
+        }
+
+        if self._mode is not None:
+            record["mode"] = _MODE_NAMES.get(self._mode, str(self._mode))
+
+        return record
+
+    def _build_end_record(self):
+        record = {
+            "type": "end",
+            "v": 1,
+            "run_id": self.run_id,
+            "samples_total": self._samples_total,
+        }
+
+        return record
+
+    def _iter_final_agg_entries(self):
+        for frame_record in self._frames:
+            frame_id = frame_record["frame_id"]
+            yield {
+                "frame_id": frame_id,
+                "self": self._frame_self[frame_id],
+                "cumulative": self._frame_cumulative[frame_id],
+            }
+
+    def _get_or_create_frame_id(self, filename, location, funcname):
+        synthetic = location is None
+        location_fields = self._location_to_export_fields(location)
+        func_str_id = self._intern_string(funcname)
+        path_str_id = self._intern_string(filename)
+
+        frame_key = (
+            path_str_id,
+            func_str_id,
+            location_fields["line"],
+            location_fields.get("end_line"),
+            location_fields.get("col"),
+            location_fields.get("end_col"),
+            synthetic,
+        )
+
+        if (frame_id := self._frame_to_id.get(frame_key)) is not None:
+            return frame_id
+
+        frame_id = len(self._frames) + 1
+        frame_record = {
+            "frame_id": frame_id,
+            "path_str_id": path_str_id,
+            "func_str_id": func_str_id,
+            **location_fields,
+        }
+        if synthetic:
+            frame_record["synthetic"] = True
+
+        self._frame_to_id[frame_key] = frame_id
+        self._frames.append(frame_record)
+        return frame_id
+
+    def _intern_string(self, value):
+        value = str(value)
+
+        if (string_id := self._string_to_id.get(value)) is not None:
+            return string_id
+
+        string_id = len(self._strings) + 1
+        self._string_to_id[value] = string_id
+        self._strings.append({"str_id": string_id, "value": value})
+        return string_id
+
+    @staticmethod
+    def _location_to_export_fields(location):
+        lineno, end_lineno, col_offset, end_col_offset = normalize_location(
+            location
+        )
+
+        fields = {"line": lineno}
+        if end_lineno > 0:
+            fields["end_line"] = end_lineno
+        if col_offset >= 0:
+            fields["col"] = col_offset
+        if end_col_offset >= 0:
+            fields["end_col"] = end_col_offset
+        return fields
+
+    def _write_chunked_records(
+        self, output, base_record, chunk_field, entries
+    ):
+        for chunk in batched(entries, _CHUNK_SIZE):
+            self._write_message(output, {**base_record, chunk_field: chunk})
+
+    @staticmethod
+    def _write_message(output, record):
+        output.write(json.dumps(record, separators=(",", ":")))
+        output.write("\n")