diff --git a/src/assail/analyzer.rs b/src/assail/analyzer.rs index ef6bd70..cae0097 100644 --- a/src/assail/analyzer.rs +++ b/src/assail/analyzer.rs @@ -240,6 +240,9 @@ static RE_UNCHECKED_MALLOC: OnceLock = OnceLock::new(); static RE_ELIXIR_APPLY: OnceLock = OnceLock::new(); static RE_PONY_FFI: OnceLock = OnceLock::new(); static RE_SHELL_UNQUOTED_VAR: OnceLock = OnceLock::new(); +static RE_SHELL_EVAL_BUILTIN: OnceLock = OnceLock::new(); +static RE_WORD_EVAL_CALL: OnceLock = OnceLock::new(); +static RE_WORD_EVAL_EXEC_CALL: OnceLock = OnceLock::new(); static RE_HTTP_URL: OnceLock = OnceLock::new(); static RE_HTTP_LOCALHOST: OnceLock = OnceLock::new(); static RE_HTTP_JSONLD_IDENTIFIER: OnceLock = OnceLock::new(); @@ -259,6 +262,59 @@ static RE_HARDCODED_SECRET: OnceLock = OnceLock::new(); /// Does not handle OCaml `(* *)` or Forth `\` — edge cases for later. static RE_TODO_COMMENT: OnceLock = OnceLock::new(); +/// Heuristic: is the `malloc` on `lines[idx]` NULL-checked within a short +/// window of following lines? +/// +/// An "UncheckedAllocation" finding should only fire when the result is *not* +/// guarded. This recognises the common C idioms (`if (p == NULL)`, +/// `if (!p)`, `if (p == nullptr)`) on the lvalue assigned from `malloc`. +/// +/// Deliberately conservative to avoid false negatives in a security scanner: +/// a site is treated as checked only on a clear NULL-guard signal that +/// references the allocated variable (or, when no lvalue can be parsed, any +/// NULL/nullptr token in the window). Genuinely-unchecked allocations — the +/// real finding — are still reported. +fn c_alloc_result_is_null_checked(lines: &[&str], idx: usize) -> bool { + let line = lines[idx]; + // Parse the lvalue assigned from malloc: the last identifier left of '='. + // e.g. `cache` in `LRUCache* cache = malloc(...)`, `data` in + // `buf->data = malloc(...)`. + let var = if line.contains('=') { + line.split('=') + .next() + .and_then(|lhs| { + lhs.split(|c: char| !c.is_alphanumeric() && c != '_') + .filter(|s| !s.is_empty()) + .last() + }) + .map(str::to_string) + } else { + None + }; + + let end = (idx + 7).min(lines.len()); + for l in &lines[idx..end] { + let null_token = l.contains("NULL") || l.contains("nullptr"); + match &var { + Some(v) if !v.is_empty() => { + if null_token && l.contains(v.as_str()) { + return true; + } + // `if (!p)` / `if (! p)` negation guard on the variable. + if l.trim_start().starts_with("if") && l.contains(&format!("!{}", v)) { + return true; + } + } + _ => { + if null_token { + return true; + } + } + } + } + false +} + pub struct Analyzer { target: PathBuf, language: Language, @@ -1662,20 +1718,29 @@ impl Analyzer { stats.threading_constructs += content.matches("pthread_").count(); stats.threading_constructs += content.matches("std::thread").count(); + // UncheckedAllocation: report only mallocs whose result is NOT + // NULL-checked within a short window of following lines. Scanning + // per-line (rather than the whole file) lets us (a) skip guarded + // allocations — `if (p == NULL)` / `if (!p)` — instead of flagging + // every malloc, and (b) attach a line number, which in turn lets an + // inline `// panic-attack: accepted` marker suppress a reviewed site. let unchecked_malloc = RE_UNCHECKED_MALLOC .get_or_init(|| Regex::new(r"malloc\([^)]+\)\s*;").expect("static regex is valid")); - if unchecked_malloc.is_match(content) { - weak_points.push(WeakPoint { - file: None, - line: None, - category: WeakPointCategory::UncheckedAllocation, - location: Some(file_path.to_string()), - severity: Severity::Critical, - description: format!("Unchecked malloc in {}", file_path), - recommended_attack: vec![AttackAxis::Memory], - suppressed: false, - test_context: None, - }); + let c_lines: Vec<&str> = content.lines().collect(); + for (idx, line) in c_lines.iter().enumerate() { + if unchecked_malloc.is_match(line) && !c_alloc_result_is_null_checked(&c_lines, idx) { + weak_points.push(WeakPoint { + file: None, + line: Some((idx + 1) as u32), + category: WeakPointCategory::UncheckedAllocation, + location: Some(file_path.to_string()), + severity: Severity::Critical, + description: format!("Unchecked malloc in {}", file_path), + recommended_attack: vec![AttackAxis::Memory], + suppressed: false, + test_context: None, + }); + } } // gets() — no bounds checking, classic buffer overflow vector @@ -2035,7 +2100,11 @@ impl Analyzer { }); } - if content.contains("eval(") || content.contains("exec(") { + // Word-boundary match so identifiers like `safe_eval(` / `myexec(` + // are not flagged as a Python eval/exec call. + let py_eval_exec = RE_WORD_EVAL_EXEC_CALL + .get_or_init(|| Regex::new(r"\b(?:eval|exec)\s*\(").expect("static regex is valid")); + if py_eval_exec.is_match(content) { weak_points.push(WeakPoint { file: None, line: None, @@ -2234,8 +2303,12 @@ impl Analyzer { stats.threading_constructs += content.matches("Worker(").count(); stats.threading_constructs += content.matches("new Worker").count(); - // Skip eval() check for browser extensions using DevTools API - if content.contains("eval(") && !self.browser_extension { + // Skip eval() check for browser extensions using DevTools API. + // Word-boundary match so FFI symbol names like `proven_calculator_eval(` + // are not flagged as a JS `eval()` call. + let word_eval = RE_WORD_EVAL_CALL + .get_or_init(|| Regex::new(r"\beval\s*\(").expect("static regex is valid")); + if word_eval.is_match(content) && !self.browser_extension { weak_points.push(WeakPoint { file: None, line: None, @@ -4475,8 +4548,13 @@ impl Analyzer { stats.io_operations += content.matches("curl ").count(); stats.io_operations += content.matches("wget ").count(); - // Command injection via eval - if content.contains("eval ") || content.contains("eval\t") { + // Command injection via the shell `eval` builtin. Match `eval` only in + // statement position (start of line, or after whitespace/`;`/`&`/`|`/`(`) + // so that `--eval` / `-eval` CLI flags and identifiers like `myeval` + // are not flagged. + let shell_eval = RE_SHELL_EVAL_BUILTIN + .get_or_init(|| Regex::new(r"(?m)(?:^|[\s;&|(])eval[ \t]").expect("static regex is valid")); + if shell_eval.is_match(content) { weak_points.push(WeakPoint { file: None, line: None, diff --git a/tests/analyzer_tests.rs b/tests/analyzer_tests.rs index 0a9702c..e8ea66a 100644 --- a/tests/analyzer_tests.rs +++ b/tests/analyzer_tests.rs @@ -358,3 +358,109 @@ fn main() { assert!(stats.lines > 0); Ok(()) } + +// --- Track C precision fixes (2026-06-23) --------------------------------- + +#[test] +fn test_c_analyzer_skips_null_checked_malloc() -> Result<(), Box> { + let dir = TempDir::new()?; + let file = write_test_file( + &dir, + "checked.c", + r#" +#include + +int* make(void) { + int* ptr = malloc(100); + if (ptr == NULL) { + return NULL; + } + *ptr = 42; + return ptr; +} +"#, + )?; + let report = assail::analyze(&file)?; + let unchecked = report + .weak_points + .iter() + .any(|wp| matches!(wp.category, WeakPointCategory::UncheckedAllocation)); + assert!(!unchecked, "NULL-checked malloc must NOT be flagged"); + Ok(()) +} + +#[test] +fn test_c_analyzer_still_flags_genuinely_unchecked_malloc( +) -> Result<(), Box> { + let dir = TempDir::new()?; + let file = write_test_file( + &dir, + "unchecked.c", + r#" +#include +int main() { + int* ptr = malloc(100); + *ptr = 42; + return 0; +} +"#, + )?; + let report = assail::analyze(&file)?; + let unchecked = report + .weak_points + .iter() + .any(|wp| matches!(wp.category, WeakPointCategory::UncheckedAllocation)); + assert!(unchecked, "unguarded malloc must still be flagged"); + Ok(()) +} + +#[test] +fn test_shell_eval_builtin_flagged_but_not_cli_flag( +) -> Result<(), Box> { + let dir = TempDir::new()?; + // `--eval` is a CLI flag, not the shell eval builtin. + let flag = write_test_file(&dir, "flag.sh", "#!/bin/sh\nmybinary --eval \"window.close()\"\n")?; + let report = assail::analyze(&flag)?; + let flagged = report + .weak_points + .iter() + .any(|wp| matches!(wp.category, WeakPointCategory::CommandInjection)); + assert!(!flagged, "--eval CLI flag must NOT be flagged as shell eval"); + + // The real eval builtin must still be flagged. + let builtin = write_test_file(&dir, "real.sh", "#!/bin/sh\neval \"$user_input\"\n")?; + let report2 = assail::analyze(&builtin)?; + let flagged2 = report2 + .weak_points + .iter() + .any(|wp| matches!(wp.category, WeakPointCategory::CommandInjection)); + assert!(flagged2, "shell eval builtin must still be flagged"); + Ok(()) +} + +#[test] +fn test_js_ffi_eval_symbol_not_flagged() -> Result<(), Box> { + let dir = TempDir::new()?; + // An FFI symbol named *_eval is not a JS eval() call. + let ffi = write_test_file( + &dir, + "ffi.js", + "const r = lib.proven_calculator_eval(buf, buf.length);\n", + )?; + let report = assail::analyze(&ffi)?; + let flagged = report + .weak_points + .iter() + .any(|wp| matches!(wp.category, WeakPointCategory::DynamicCodeExecution)); + assert!(!flagged, "FFI symbol *_eval must NOT be flagged as JS eval()"); + + // A genuine eval() call must still be flagged. + let real = write_test_file(&dir, "real.js", "const r = eval(userInput);\n")?; + let report2 = assail::analyze(&real)?; + let flagged2 = report2 + .weak_points + .iter() + .any(|wp| matches!(wp.category, WeakPointCategory::DynamicCodeExecution)); + assert!(flagged2, "genuine JS eval() must still be flagged"); + Ok(()) +}