@@ -13,6 +13,17 @@ use regex::Regex;
1313use std:: collections:: { HashMap , HashSet } ;
1414use std:: fs;
1515use std:: path:: { Path , PathBuf } ;
16+ use std:: sync:: OnceLock ;
17+
18+ /// Pre-compiled regexes for hot-path pattern matching.
19+ /// Using OnceLock avoids recompiling on every file analyzed.
20+ static RE_UNCHECKED_MALLOC : OnceLock < Regex > = OnceLock :: new ( ) ;
21+ static RE_ELIXIR_APPLY : OnceLock < Regex > = OnceLock :: new ( ) ;
22+ static RE_PONY_FFI : OnceLock < Regex > = OnceLock :: new ( ) ;
23+ static RE_SHELL_UNQUOTED_VAR : OnceLock < Regex > = OnceLock :: new ( ) ;
24+ static RE_HTTP_URL : OnceLock < Regex > = OnceLock :: new ( ) ;
25+ static RE_HTTP_LOCALHOST : OnceLock < Regex > = OnceLock :: new ( ) ;
26+ static RE_HARDCODED_SECRET : OnceLock < Regex > = OnceLock :: new ( ) ;
1627
1728pub struct Analyzer {
1829 target : PathBuf ,
@@ -115,9 +126,10 @@ impl Analyzer {
115126 }
116127 } ;
117128
118- // Try UTF-8 first, then Latin-1 fallback
119- let content = match String :: from_utf8 ( raw_bytes. clone ( ) ) {
120- Ok ( s) => s,
129+ // Try UTF-8 first, then Latin-1 fallback.
130+ // Use str::from_utf8 to borrow rather than cloning raw_bytes.
131+ let content = match std:: str:: from_utf8 ( & raw_bytes) {
132+ Ok ( s) => s. to_owned ( ) ,
121133 Err ( _) => {
122134 let ( cow, _, had_errors) = encoding_rs:: WINDOWS_1252 . decode ( & raw_bytes) ;
123135 if had_errors {
@@ -639,7 +651,7 @@ impl Analyzer {
639651 stats. threading_constructs += content. matches ( "pthread_" ) . count ( ) ;
640652 stats. threading_constructs += content. matches ( "std::thread" ) . count ( ) ;
641653
642- let unchecked_malloc = Regex :: new ( r"malloc\([^)]+\)\s*;" ) . unwrap ( ) ;
654+ let unchecked_malloc = RE_UNCHECKED_MALLOC . get_or_init ( || Regex :: new ( r"malloc\([^)]+\)\s*;" ) . unwrap ( ) ) ;
643655 if unchecked_malloc. is_match ( content) {
644656 weak_points. push ( WeakPoint {
645657 category : WeakPointCategory :: UncheckedAllocation ,
@@ -1000,7 +1012,7 @@ impl Analyzer {
10001012 }
10011013
10021014 // Unsafe apply
1003- let apply_re = Regex :: new ( r"apply\([^,]+,\s*[^,]+," ) . unwrap ( ) ;
1015+ let apply_re = RE_ELIXIR_APPLY . get_or_init ( || Regex :: new ( r"apply\([^,]+,\s*[^,]+," ) . unwrap ( ) ) ;
10041016 if apply_re. is_match ( content) {
10051017 weak_points. push ( WeakPoint {
10061018 category : WeakPointCategory :: DynamicCodeExecution ,
@@ -1787,7 +1799,7 @@ impl Analyzer {
17871799 file_path : & str ,
17881800 ) -> Result < ( ) > {
17891801 // FFI calls (@ prefix)
1790- let ffi_re = Regex :: new ( r"@[a-zA-Z_]\w*\[" ) . unwrap ( ) ;
1802+ let ffi_re = RE_PONY_FFI . get_or_init ( || Regex :: new ( r"@[a-zA-Z_]\w*\[" ) . unwrap ( ) ) ;
17911803 let ffi_count = ffi_re. find_iter ( content) . count ( ) ;
17921804 stats. unsafe_blocks += ffi_count;
17931805
@@ -1914,7 +1926,7 @@ impl Analyzer {
19141926 }
19151927
19161928 // Unquoted variable expansion (potential injection)
1917- let unquoted_var = Regex :: new ( r#"\$[A-Za-z_]\w*"# ) . unwrap ( ) ;
1929+ let unquoted_var = RE_SHELL_UNQUOTED_VAR . get_or_init ( || Regex :: new ( r#"\$[A-Za-z_]\w*"# ) . unwrap ( ) ) ;
19181930 let dollar_vars = unquoted_var. find_iter ( content) . count ( ) ;
19191931 // Only flag if high number of unquoted vars
19201932 if dollar_vars > 20 {
@@ -2119,9 +2131,9 @@ impl Analyzer {
21192131 ) -> Result < ( ) > {
21202132 // HTTP (insecure) URLs - should be HTTPS
21212133 // Count http:// URLs that are NOT localhost/127.0.0.1 (those are fine)
2122- let http_re = Regex :: new ( r#"http://[a-zA-Z0-9]"# ) . unwrap ( ) ;
2123- let http_localhost_re =
2124- Regex :: new ( r#"http://(localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])"# ) . unwrap ( ) ;
2134+ let http_re = RE_HTTP_URL . get_or_init ( || Regex :: new ( r#"http://[a-zA-Z0-9]"# ) . unwrap ( ) ) ;
2135+ let http_localhost_re = RE_HTTP_LOCALHOST . get_or_init ( ||
2136+ Regex :: new ( r#"http://(localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])"# ) . unwrap ( ) ) ;
21252137 let http_total = http_re. find_iter ( content) . count ( ) ;
21262138 let http_local = http_localhost_re. find_iter ( content) . count ( ) ;
21272139 let http_count = http_total. saturating_sub ( http_local) ;
@@ -2136,9 +2148,9 @@ impl Analyzer {
21362148 }
21372149
21382150 // Hardcoded secrets patterns
2139- let secret_re = Regex :: new (
2151+ let secret_re = RE_HARDCODED_SECRET . get_or_init ( || Regex :: new (
21402152 r#"(?i)(api[_-]?key|api[_-]?secret|password|passwd|secret[_-]?key|access[_-]?token|private[_-]?key)\s*[=:]\s*["'][^"']{8,}"#
2141- ) . unwrap ( ) ;
2153+ ) . unwrap ( ) ) ;
21422154 if secret_re. is_match ( content) {
21432155 weak_points. push ( WeakPoint {
21442156 category : WeakPointCategory :: HardcodedSecret ,
0 commit comments