perplexityai · longlho · May 22, 2026 · May 22, 2026
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
@@ -46,6 +46,55 @@ flowchart TD
 `codescythe::run_and_fix(cwd, config_path)` runs the same analysis, then applies
 supported unused-file and export removals.
 
+## Profiling
+
+Profiling is compiled behind the `profiling` Cargo feature so default release
+builds do not carry profiler timers, counters, or resolver hot-path branches.
+Build the profiling binary explicitly, then set `CODESCYTHE_PROFILE=1` to print
+stage timings and high-level counters to stderr. The profile output is
+intentionally outside JSON stdout, so it can be used with `--json` and
+redirected independently:
+
+```sh
+bazel build -c opt //crates/codescythe_cli:codescythe_profiling
+CODESCYTHE_PROFILE=1 bazel-bin/crates/codescythe_cli/codescythe_profiling \
+  --json --directory <repo> --config <config> \
+  > report.json 2> profile.txt
+```
+
+The profiler reports discovery, entry classification, resolver setup, reachable
+graph traversal, issue construction, test scans, optional explanation work, and
+finalization. It also breaks graph traversal into frontier parse time and
+frontier inspection time, and includes resolver call counts, cache hits/misses,
+classification counts, and uncached resolver wall time. The CLI adds JSON
+serialization timing for JSON output.
+
+The current Kibana fixture is a useful stress test because its benchmark config
+marks every configured source root as an entry. A representative local run on
+May 22, 2026 processed 85,936 project files, treated all 85,936 as entries,
+parsed all 85,936, found no unused files, and reported 50,869 unused exports.
+The same run spent 12.18s total: 1.18s in project discovery, 10.41s walking the
+reachable graph, 81ms building issue maps, and 13ms serializing JSON. Graph
+frontier parsing accounted for 4.03s, while frontier inspection accounted for
+6.38s. The resolver saw 1,715,145 calls, served 75.9% from the analysis-local
+resolution cache, and still spent 4.77s in uncached resolution.
+
+Those numbers shape the optimization priorities:
+
+- File reads and parsing are not repeated for the same file inside one analysis
+  run; `FileCache` parses each file at most once.
+- JSON serialization is not a meaningful bottleneck for the current 7 MB Kibana
+  report.
+- Since the benchmark config makes the whole project reachable, entry pruning
+  cannot help this fixture. It can still matter for real configs with narrower
+  entries.
+- Resolver work remains important even after memoization because Kibana still
+  has more than 400k unique importer/specifier misses in the resolution cache.
+- Small object-copy, line/column, and output-format experiments have not shown a
+  reliable memory win; the higher-value work is reducing resolver misses,
+  reducing graph inspection work, or changing the configured entry surface when
+  coverage allows it.
+
 ## Config Loading
 
 `load_config` accepts an analysis root and an optional config path.

diff --git a/crates/codescythe/BUILD.bazel b/crates/codescythe/BUILD.bazel
@@ -22,23 +22,38 @@ COMMON_DEPS = [
 
 VERSION = "0.4.12"  # x-release-please-version
 
+LIB_SRCS = [
+    "analyze/discovery.rs",
+    "analyze/doctor.rs",
+    "analyze/explain.rs",
+    "analyze/graph.rs",
+    "analyze/parse.rs",
+    "analyze/profile.rs",
+    "analyze/resolver.rs",
+    "analyze/tests.rs",
+    "analyze/util.rs",
+    "analyze.rs",
+    "config.rs",
+    "fix.rs",
+    "lib.rs",
+]
+
 rust_library(
     name = "codescythe",
-    srcs = [
-        "analyze/discovery.rs",
-        "analyze/doctor.rs",
-        "analyze/explain.rs",
-        "analyze/graph.rs",
-        "analyze/parse.rs",
-        "analyze/resolver.rs",
-        "analyze/tests.rs",
-        "analyze/util.rs",
-        "analyze.rs",
-        "config.rs",
-        "fix.rs",
-        "lib.rs",
-    ],
+    srcs = LIB_SRCS,
+    compile_data = ["//:config_schema"],
+    crate_name = "codescythe",
+    edition = "2024",
+    version = VERSION,
+    visibility = ["//visibility:public"],
+    deps = COMMON_DEPS,
+)
+
+rust_library(
+    name = "codescythe_profiling",
+    srcs = LIB_SRCS,
     compile_data = ["//:config_schema"],
+    crate_features = ["profiling"],
     crate_name = "codescythe",
     edition = "2024",
     version = VERSION,

diff --git a/crates/codescythe/Cargo.toml b/crates/codescythe/Cargo.toml
@@ -10,6 +10,10 @@ description = "Focused TypeScript dead-code analysis and removal core"
 [lib]
 path = "lib.rs"
 
+[features]
+default = []
+profiling = []
+
 [dependencies]
 anyhow.workspace = true
 globset.workspace = true

diff --git a/crates/codescythe/analyze.rs b/crates/codescythe/analyze.rs
@@ -3,6 +3,7 @@ mod doctor;
 mod explain;
 mod graph;
 mod parse;
+mod profile;
 mod resolver;
 mod util;
 
@@ -23,6 +24,9 @@ use graph::{
     mark_source_file, mark_used_export, mark_used_file,
 };
 use parse::{ExportInfo, FileCache, FileData};
+use profile::AnalysisProfile;
+#[cfg(feature = "profiling")]
+use profile::AnalysisProfileReport;
 use resolver::{
     ImportResolution, ModuleResolver, UnresolvedImportPolicy, package_import_keys,
     source_alias_ignore_warnings, source_alias_mappings,
@@ -292,26 +296,50 @@ pub fn analyze_path(
     config: &CodescytheConfig,
     options: AnalysisOptions,
 ) -> Result<Analysis> {
+    let mut profile = AnalysisProfile::new();
     let cwd = absolute_normalize_path(cwd)?;
     if !cwd.exists() {
         anyhow::bail!("analysis root does not exist: {}", cwd.display());
     }
-    let project_files = discover_project_files(&cwd, config)?;
-    let entry_files = discover_entry_files(&cwd, config, &project_files)?;
-    let test_file_indexes = discover_test_file_indexes(&cwd, config, &project_files)?;
+    let project_files = profile.time("discover project files", || {
+        discover_project_files(&cwd, config)
+    })?;
+    let entry_files = profile.time("discover entry files", || {
+        discover_entry_files(&cwd, config, &project_files)
+    })?;
+    let test_file_indexes = profile.time("classify test files", || {
+        discover_test_file_indexes(&cwd, config, &project_files)
+    })?;
     let entry_set = entry_files.iter().cloned().collect::<HashSet<_>>();
     let total_files = project_files.len();
 
-    let index_by_path = project_files
-        .iter()
-        .enumerate()
-        .map(|(index, path)| (normalize_path(path), index))
-        .collect::<HashMap<_, _>>();
-    let module_resolver = ModuleResolver::new(&cwd, &project_files, config)?;
-    let unresolved_policy = UnresolvedImportPolicy::new(config)?;
-    let alias_mappings = source_alias_mappings(&cwd, config)?;
-    let source_alias_ignore_warnings = source_alias_ignore_warnings(config, &alias_mappings)?;
-    let mut files = FileCache::new(&cwd, project_files)?;
+    let (
+        index_by_path,
+        module_resolver,
+        unresolved_policy,
+        alias_mappings,
+        source_alias_ignore_warnings,
+        mut files,
+    ) = profile.time("build indexes and resolver", || {
+        let index_by_path = project_files
+            .iter()
+            .enumerate()
+            .map(|(index, path)| (normalize_path(path), index))
+            .collect::<HashMap<_, _>>();
+        let module_resolver = ModuleResolver::new(&cwd, &project_files, config)?;
+        let unresolved_policy = UnresolvedImportPolicy::new(config)?;
+        let alias_mappings = source_alias_mappings(&cwd, config)?;
+        let source_alias_ignore_warnings = source_alias_ignore_warnings(config, &alias_mappings)?;
+        let files = FileCache::new(&cwd, project_files)?;
+        Ok::<_, anyhow::Error>((
+            index_by_path,
+            module_resolver,
+            unresolved_policy,
+            alias_mappings,
+            source_alias_ignore_warnings,
+            files,
+        ))
+    })?;
 
     let mut entry_indexes = HashSet::<usize>::new();
     let mut used_files = UsedFiles::new();
@@ -339,11 +367,16 @@ pub fn analyze_path(
         }
     }
 
+    let graph_started = profile.start();
     while !queue.is_empty() {
         let batch = queue.drain(..).collect::<Vec<_>>();
+        profile.record_frontier(batch.len());
         queued_files.clear();
+        let parse_started = profile.start();
         files.parse_many(&batch)?;
+        profile.record_frontier_parse(parse_started);
 
+        let inspect_started = profile.start();
         for index in batch {
             let file = files.get(index)?.clone();
             let public_entry = entry_indexes.contains(&index) && !config.include_entry_exports;
@@ -570,8 +603,11 @@ pub fn analyze_path(
                 }
             }
         }
+        profile.record_frontier_inspect(inspect_started);
     }
+    profile.record("walk reachable graph", graph_started);
 
+    let issue_started = profile.start();
     let mut issues = Issues::default();
     let mut unused_file_indexes = HashSet::<usize>::new();
 
@@ -630,28 +666,33 @@ pub fn analyze_path(
             }
         }
     }
+    profile.record("build unused file/export issues", issue_started);
 
-    let live_test_support_files = discover_live_test_support_files(
-        &mut files,
-        &module_resolver,
-        &test_file_indexes,
-        &unused_file_indexes,
-        &used_files,
-    )?;
+    let live_test_support_files = profile.time("scan live test support", || {
+        discover_live_test_support_files(
+            &mut files,
+            &module_resolver,
+            &test_file_indexes,
+            &unused_file_indexes,
+            &used_files,
+        )
+    })?;
     for index in &live_test_support_files {
         let relative = files.relative(*index);
         issues.files.remove(&relative);
         issues.exports.remove(&relative);
         unused_file_indexes.remove(index);
     }
 
-    let removable_test_files = discover_removable_test_files(
-        &mut files,
-        &module_resolver,
-        &test_file_indexes,
-        &unused_file_indexes,
-        &issues.exports,
-    )?;
+    let removable_test_files = profile.time("scan removable tests", || {
+        discover_removable_test_files(
+            &mut files,
+            &module_resolver,
+            &test_file_indexes,
+            &unused_file_indexes,
+            &issues.exports,
+        )
+    })?;
     for index in removable_test_files {
         let relative = files.relative(index);
         issues.files.insert(
@@ -665,17 +706,19 @@ pub fn analyze_path(
     let mut effective_used_files = used_files.clone();
     effective_used_files.extend(live_test_support_files.iter().copied());
 
-    let export_usage = if options.verbose || options.explain_export.is_some() {
-        Some(build_export_usage_index(
-            &mut files,
-            &module_resolver,
-            &effective_used_files,
-            &entry_indexes,
-            &test_file_indexes,
-        )?)
-    } else {
-        None
-    };
+    let export_usage = profile.time("build export usage explanations", || {
+        if options.verbose || options.explain_export.is_some() {
+            Ok(Some(build_export_usage_index(
+                &mut files,
+                &module_resolver,
+                &effective_used_files,
+                &entry_indexes,
+                &test_file_indexes,
+            )?))
+        } else {
+            Ok(None)
+        }
+    })?;
 
     if options.verbose {
         if let Some(export_usage) = &export_usage {
@@ -690,20 +733,23 @@ pub fn analyze_path(
         }
     }
 
-    let explain_export = if let Some(request) = &options.explain_export {
-        Some(explain_requested_export(
-            request,
-            &mut files,
-            &issues,
-            &effective_used_files,
-            export_usage.as_ref(),
-            &alias_mappings,
-            &ignored_unresolved_imports_by_pattern,
-        )?)
-    } else {
-        None
-    };
+    let explain_export = profile.time("explain requested export", || {
+        if let Some(request) = &options.explain_export {
+            Ok(Some(explain_requested_export(
+                request,
+                &mut files,
+                &issues,
+                &effective_used_files,
+                export_usage.as_ref(),
+                &alias_mappings,
+                &ignored_unresolved_imports_by_pattern,
+            )?))
+        } else {
+            Ok(None)
+        }
+    })?;
 
+    let finalize_started = profile.start();
     issues.unresolved = unresolved
         .into_iter()
         .map(|(file, imports)| {
@@ -749,6 +795,22 @@ pub fn analyze_path(
             BTreeMap::new()
         };
 
+    profile.record("finalize report", finalize_started);
+    #[cfg(feature = "profiling")]
+    profile.print(AnalysisProfileReport {
+        project_files: total_files,
+        entry_files: entry_indexes.len(),
+        test_files: test_file_indexes.len(),
+        parsed_files: files.parsed_count(),
+        used_files: effective_used_files.len(),
+        used_exports: used_exports.values().map(HashSet::len).sum(),
+        issue_files: counters.files,
+        issue_exports: counters.exports,
+        unresolved: counters.unresolved,
+        ignored_unresolved: counters.ignored_unresolved,
+        resolver: module_resolver.profile_stats(),
+    });
+
     Ok(Analysis {
         issues,
         counters,

diff --git a/crates/codescythe/analyze/parse.rs b/crates/codescythe/analyze/parse.rs
@@ -108,6 +108,11 @@ impl FileCache {
         Ok(())
     }
 
+    #[cfg(feature = "profiling")]
+    pub(super) fn parsed_count(&self) -> usize {
+        self.parsed.iter().filter(|file| file.is_some()).count()
+    }
+
     pub(super) fn relative(&self, index: usize) -> String {
         relative_path(&self.cwd, &self.paths[index])
     }