Skip to content

Commit 9d98e8c

Browse files
hyperpolymathclaude
andcommitted
perf: OnceLock regex caching, release profile, BLAKE3 mmap, deferred clone
Performance optimizations: - Add [profile.release] with LTO, codegen-units=1, strip, panic=abort - Cache 7 Regex compilations via OnceLock statics in analyzer.rs (avoids recompiling on every file, especially analyze_cross_language which runs on every source file in the project) - Use BLAKE3 update_mmap for memory-mapped file hashing in assemblyline - Remove raw_bytes.clone() in UTF-8 check (use str::from_utf8 borrow) - Defer bindings.clone() in signature engine unify_fact until after variant match (avoids wasted allocation on mismatch path) - Use as_deref() for signal comparison instead of allocating strings - Add missing SPDX header to diagnostics.rs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 88494b1 commit 9d98e8c

6 files changed

Lines changed: 47 additions & 30 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ encoding_rs = "0.8"
2323
crossterm = "0.26"
2424
eframe = "0.27"
2525
rayon = "1.10"
26-
blake3 = "1.5"
26+
blake3 = { version = "1.5", features = ["mmap"] }
2727
sha2 = "0.10"
2828
hex = "0.4"
2929
getrandom = "0.2"
@@ -36,6 +36,13 @@ signing = ["ed25519-dalek"]
3636
[dev-dependencies]
3737
tempfile = "3.8"
3838

39+
[profile.release]
40+
opt-level = 3
41+
lto = "thin"
42+
codegen-units = 1
43+
strip = "symbols"
44+
panic = "abort"
45+
3946
[[bin]]
4047
name = "panic-attack"
4148
path = "src/main.rs"

src/assail/analyzer.rs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@ use regex::Regex;
1313
use std::collections::{HashMap, HashSet};
1414
use std::fs;
1515
use std::path::{Path, PathBuf};
16+
use std::sync::OnceLock;
17+
18+
/// Pre-compiled regexes for hot-path pattern matching.
19+
/// Using OnceLock avoids recompiling on every file analyzed.
20+
static RE_UNCHECKED_MALLOC: OnceLock<Regex> = OnceLock::new();
21+
static RE_ELIXIR_APPLY: OnceLock<Regex> = OnceLock::new();
22+
static RE_PONY_FFI: OnceLock<Regex> = OnceLock::new();
23+
static RE_SHELL_UNQUOTED_VAR: OnceLock<Regex> = OnceLock::new();
24+
static RE_HTTP_URL: OnceLock<Regex> = OnceLock::new();
25+
static RE_HTTP_LOCALHOST: OnceLock<Regex> = OnceLock::new();
26+
static RE_HARDCODED_SECRET: OnceLock<Regex> = OnceLock::new();
1627

1728
pub struct Analyzer {
1829
target: PathBuf,
@@ -115,9 +126,10 @@ impl Analyzer {
115126
}
116127
};
117128

118-
// Try UTF-8 first, then Latin-1 fallback
119-
let content = match String::from_utf8(raw_bytes.clone()) {
120-
Ok(s) => s,
129+
// Try UTF-8 first, then Latin-1 fallback.
130+
// Use str::from_utf8 to borrow rather than cloning raw_bytes.
131+
let content = match std::str::from_utf8(&raw_bytes) {
132+
Ok(s) => s.to_owned(),
121133
Err(_) => {
122134
let (cow, _, had_errors) = encoding_rs::WINDOWS_1252.decode(&raw_bytes);
123135
if had_errors {
@@ -639,7 +651,7 @@ impl Analyzer {
639651
stats.threading_constructs += content.matches("pthread_").count();
640652
stats.threading_constructs += content.matches("std::thread").count();
641653

642-
let unchecked_malloc = Regex::new(r"malloc\([^)]+\)\s*;").unwrap();
654+
let unchecked_malloc = RE_UNCHECKED_MALLOC.get_or_init(|| Regex::new(r"malloc\([^)]+\)\s*;").unwrap());
643655
if unchecked_malloc.is_match(content) {
644656
weak_points.push(WeakPoint {
645657
category: WeakPointCategory::UncheckedAllocation,
@@ -1000,7 +1012,7 @@ impl Analyzer {
10001012
}
10011013

10021014
// Unsafe apply
1003-
let apply_re = Regex::new(r"apply\([^,]+,\s*[^,]+,").unwrap();
1015+
let apply_re = RE_ELIXIR_APPLY.get_or_init(|| Regex::new(r"apply\([^,]+,\s*[^,]+,").unwrap());
10041016
if apply_re.is_match(content) {
10051017
weak_points.push(WeakPoint {
10061018
category: WeakPointCategory::DynamicCodeExecution,
@@ -1787,7 +1799,7 @@ impl Analyzer {
17871799
file_path: &str,
17881800
) -> Result<()> {
17891801
// FFI calls (@ prefix)
1790-
let ffi_re = Regex::new(r"@[a-zA-Z_]\w*\[").unwrap();
1802+
let ffi_re = RE_PONY_FFI.get_or_init(|| Regex::new(r"@[a-zA-Z_]\w*\[").unwrap());
17911803
let ffi_count = ffi_re.find_iter(content).count();
17921804
stats.unsafe_blocks += ffi_count;
17931805

@@ -1914,7 +1926,7 @@ impl Analyzer {
19141926
}
19151927

19161928
// Unquoted variable expansion (potential injection)
1917-
let unquoted_var = Regex::new(r#"\$[A-Za-z_]\w*"#).unwrap();
1929+
let unquoted_var = RE_SHELL_UNQUOTED_VAR.get_or_init(|| Regex::new(r#"\$[A-Za-z_]\w*"#).unwrap());
19181930
let dollar_vars = unquoted_var.find_iter(content).count();
19191931
// Only flag if high number of unquoted vars
19201932
if dollar_vars > 20 {
@@ -2119,9 +2131,9 @@ impl Analyzer {
21192131
) -> Result<()> {
21202132
// HTTP (insecure) URLs - should be HTTPS
21212133
// Count http:// URLs that are NOT localhost/127.0.0.1 (those are fine)
2122-
let http_re = Regex::new(r#"http://[a-zA-Z0-9]"#).unwrap();
2123-
let http_localhost_re =
2124-
Regex::new(r#"http://(localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])"#).unwrap();
2134+
let http_re = RE_HTTP_URL.get_or_init(|| Regex::new(r#"http://[a-zA-Z0-9]"#).unwrap());
2135+
let http_localhost_re = RE_HTTP_LOCALHOST.get_or_init(||
2136+
Regex::new(r#"http://(localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])"#).unwrap());
21252137
let http_total = http_re.find_iter(content).count();
21262138
let http_local = http_localhost_re.find_iter(content).count();
21272139
let http_count = http_total.saturating_sub(http_local);
@@ -2136,9 +2148,9 @@ impl Analyzer {
21362148
}
21372149

21382150
// Hardcoded secrets patterns
2139-
let secret_re = Regex::new(
2151+
let secret_re = RE_HARDCODED_SECRET.get_or_init(|| Regex::new(
21402152
r#"(?i)(api[_-]?key|api[_-]?secret|password|passwd|secret[_-]?key|access[_-]?token|private[_-]?key)\s*[=:]\s*["'][^"']{8,}"#
2141-
).unwrap();
2153+
).unwrap());
21422154
if secret_re.is_match(content) {
21432155
weak_points.push(WeakPoint {
21442156
category: WeakPointCategory::HardcodedSecret,

src/assemblyline.rs

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use rayon::prelude::*;
1616
use serde::{Deserialize, Serialize};
1717
use std::collections::HashMap;
1818
use std::fs;
19-
use std::io::Read;
2019
use std::path::{Path, PathBuf};
2120

2221
/// Configuration for an assemblyline run.
@@ -173,18 +172,10 @@ fn collect_source_hashes(
173172
Ok(())
174173
}
175174

176-
/// Hash a single file with BLAKE3
175+
/// Hash a single file with BLAKE3 using memory-mapped I/O for performance
177176
fn hash_file(path: &Path) -> Result<blake3::Hash> {
178-
let mut file = fs::File::open(path)?;
179177
let mut hasher = blake3::Hasher::new();
180-
let mut buf = [0u8; 16384];
181-
loop {
182-
let n = file.read(&mut buf)?;
183-
if n == 0 {
184-
break;
185-
}
186-
hasher.update(&buf[..n]);
187-
}
178+
hasher.update_mmap(path)?;
188179
Ok(hasher.finalize())
189180
}
190181

src/diagnostics.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// SPDX-License-Identifier: PMPL-1.0-or-later
2+
13
use crate::a2ml::Manifest;
24
use anyhow::{anyhow, Context, Result};
35
use std::env;

src/signatures/engine.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,8 @@ impl SignatureEngine {
141141
/// A variable name (uppercase string like "X") unifies with any string value.
142142
/// Location 0 in a pattern acts as a wildcard matching any location.
143143
fn unify_fact(&self, pattern: &Fact, fact: &Fact, bindings: &Bindings) -> Option<Bindings> {
144-
let mut new_bindings = bindings.clone();
145-
144+
// Clone bindings only after confirming variant match to avoid
145+
// wasted allocations on the common mismatch path.
146146
match (pattern, fact) {
147147
(
148148
Fact::Alloc {
@@ -194,6 +194,7 @@ impl SignatureEngine {
194194
location: floc,
195195
},
196196
) => {
197+
let mut new_bindings = bindings.clone();
197198
self.bind_var(pvar, &BoundValue::Str(fvar.clone()), &mut new_bindings)?;
198199
self.bind_loc(*ploc, *floc, pvar, &mut new_bindings)?;
199200
Some(new_bindings)
@@ -218,6 +219,7 @@ impl SignatureEngine {
218219
location: floc,
219220
},
220221
) => {
222+
let mut new_bindings = bindings.clone();
221223
self.bind_var(pmut, &BoundValue::Str(fmut.clone()), &mut new_bindings)?;
222224
self.bind_loc(*ploc, *floc, pmut, &mut new_bindings)?;
223225
Some(new_bindings)
@@ -242,6 +244,7 @@ impl SignatureEngine {
242244
location: floc,
243245
},
244246
) => {
247+
let mut new_bindings = bindings.clone();
245248
self.bind_var(pid, &BoundValue::Str(fid.clone()), &mut new_bindings)?;
246249
self.bind_loc(*ploc, *floc, pid, &mut new_bindings)?;
247250
Some(new_bindings)
@@ -256,11 +259,12 @@ impl SignatureEngine {
256259
after: fa,
257260
},
258261
) => {
262+
let mut new_bindings = bindings.clone();
259263
self.bind_loc(*pb, *fb, "before", &mut new_bindings)?;
260264
self.bind_loc(*pa, *fa, "after", &mut new_bindings)?;
261265
Some(new_bindings)
262266
}
263-
_ => None, // Variant mismatch — pattern doesn't match this fact
267+
_ => None, // Variant mismatch — no clone wasted
264268
}
265269
}
266270

@@ -490,14 +494,14 @@ impl SignatureEngine {
490494
}
491495

492496
// Signal-based facts
493-
if crash.signal == Some("SIGSEGV".to_string()) {
497+
if crash.signal.as_deref() == Some("SIGSEGV") {
494498
facts.insert(Fact::Use {
495499
var: "null_ptr".to_string(),
496500
location: 0,
497501
});
498502
}
499503

500-
if crash.signal == Some("SIGABRT".to_string()) {
504+
if crash.signal.as_deref() == Some("SIGABRT") {
501505
facts.insert(Fact::Free {
502506
var: "abort_var".to_string(),
503507
location: 0,
@@ -568,7 +572,7 @@ impl SignatureEngine {
568572
}
569573

570574
// Null pointer dereference — SIGSEGV or explicit mention
571-
if crash.signal == Some("SIGSEGV".to_string())
575+
if crash.signal.as_deref() == Some("SIGSEGV")
572576
|| stderr.contains("null pointer")
573577
|| stderr.contains("nullptr")
574578
|| stderr.contains("nil pointer")

0 commit comments

Comments
 (0)