Skip to content

Commit 82b69ed

Browse files
hyperpolymathclaude
andcommitted
fix: A2ML TOML-like format support, manifest lookup, and language detection
- Parser now handles both S-expression and TOML-like [section] A2ML formats - Manifest lookup tries 0-AI-MANIFEST.a2ml first, falls back to AI.a2ml - Language detection skips external_corpora/, third_party/, corpus/ directories - Justfile renamed to capital-J (RSR standard) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b8745eb commit 82b69ed

7 files changed

Lines changed: 137 additions & 18 deletions

File tree

.claude/CLAUDE.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ src/
6363
│ ├── reachability.rs # Import scanning for phantom dependency detection
6464
│ ├── classify.rs # Three-way classification: Mitigable/Unmitigable/Informational
6565
│ └── registry.rs # Mitigation lifecycle registry (JSON persistence)
66-
├── a2ml/ # AI manifest protocol
66+
├── a2ml/ # AI manifest protocol (TOML-like format support, 0-AI-MANIFEST.a2ml priority lookup)
6767
├── panll/ # PanLL event-chain export
6868
├── storage/ # Filesystem + VerisimDB persistence
6969
├── i18n/ # Multi-language support (ISO 639-1, 10 languages)
@@ -90,7 +90,7 @@ cp target/release/panic-attack ~/.asdf/installs/rust/nightly/bin/
9090

9191
- **47 language analyzers**: Rust, C/C++, Go, Python, JavaScript, Ruby, Elixir, Erlang, Gleam, ReScript, OCaml, SML, Scheme, Racket, Haskell, PureScript, Idris, Lean, Agda, Prolog, Logtalk, Datalog, Zig, Ada, Odin, Nim, Pony, D, Nickel, Nix, Shell, Julia, Lua, + 12 nextgen DSLs
9292
- **20 weak point categories**: UnsafeCode, PanicPath, CommandInjection, UnsafeDeserialization, AtomExhaustion, UnsafeFFI, PathTraversal, HardcodedSecret, etc.
93-
- **Per-file language detection**: Each file analyzed with its own language-specific patterns
93+
- **Per-file language detection**: Each file analyzed with its own language-specific patterns. Skips `external_corpora/`, `third_party/`, and `corpus/` directories
9494
- **miniKanren logic engine**: Relational reasoning for taint analysis, cross-language vulnerability chains, and search strategy optimisation
9595
- **Latin-1 fallback**: Non-UTF-8 files handled gracefully
9696
- **JSON output**: Machine-readable for pipeline integration

.machine_readable/6a2/STATE.a2ml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@
55
[metadata]
66
project = "panic-attacker"
77
version = "0.1.0"
8-
last-updated = "2026-03-15"
8+
last-updated = "2026-03-23"
99
status = "active"
1010

1111
[project-context]
1212
name = "panic-attacker"
1313
completion-percentage = 0
1414
phase = "In development"
15+
16+
[fixes-2026-03-23]
17+
a2ml-parser = "Now handles TOML-like format in addition to S-expression format"
18+
manifest-lookup = "Tries 0-AI-MANIFEST.a2ml first, then falls back to AI.a2ml"
19+
language-detection = "Skips external_corpora/, third_party/, and corpus/ directories during scanning"

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## [2.0.0+] - 2026-03-23
4+
5+
### Fixed
6+
- **A2ML parser**: Now handles TOML-like format (key = "value") in addition to S-expression format
7+
- **Manifest lookup**: Tries `0-AI-MANIFEST.a2ml` first before falling back to `AI.a2ml`
8+
- **Language detection**: Skips `external_corpora/`, `third_party/`, and `corpus/` directories to avoid false positives from vendored or reference text
9+
310
## [2.0.0+] - 2026-03-01
411

512
### Added
File renamed without changes.

src/a2ml/mod.rs

Lines changed: 107 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -152,26 +152,120 @@ impl Default for Manifest {
152152

153153
impl Manifest {
154154
pub fn load_default() -> Result<Self> {
155-
let path = PathBuf::from("AI.a2ml");
156-
Self::load(&path)
155+
// Try RSR standard name first, then legacy name
156+
let candidates = [
157+
PathBuf::from("0-AI-MANIFEST.a2ml"),
158+
PathBuf::from("AI.a2ml"),
159+
];
160+
for path in &candidates {
161+
if path.exists() {
162+
return Self::load(path);
163+
}
164+
}
165+
// Fall through with the primary name for a clear error message
166+
Self::load(&candidates[0])
157167
}
158168

159169
pub fn load(path: &Path) -> Result<Self> {
160170
let raw = fs::read_to_string(path)
161171
.with_context(|| format!("reading A2ML manifest {}", path.display()))?;
162-
let mut parser = Parser::new(&raw);
163-
let tree = parser.parse_all()?;
164-
if let Sexpr::List(mut items) = tree {
165-
if let Some(Sexpr::Atom(root)) = items.first() {
166-
let root_name = root.clone();
167-
items.remove(0);
168-
return Ok(Self {
169-
root_name,
170-
entries: items,
171-
});
172+
173+
// Detect format: TOML-like ([sections]) vs S-expression ((manifest ...))
174+
let trimmed = raw.trim_start();
175+
let first_meaningful = trimmed
176+
.lines()
177+
.find(|line| {
178+
let l = line.trim();
179+
!l.is_empty() && !l.starts_with('#')
180+
})
181+
.unwrap_or("");
182+
183+
if first_meaningful.trim().starts_with('[') {
184+
// TOML-like A2ML format — parse sections into S-expression tree
185+
Self::parse_toml_like(&raw)
186+
} else {
187+
// Classic S-expression format
188+
let mut parser = Parser::new(&raw);
189+
let tree = parser.parse_all()?;
190+
if let Sexpr::List(mut items) = tree {
191+
if let Some(Sexpr::Atom(root)) = items.first() {
192+
let root_name = root.clone();
193+
items.remove(0);
194+
return Ok(Self {
195+
root_name,
196+
entries: items,
197+
});
198+
}
172199
}
200+
Err(anyhow!("unexpected A2ML manifest structure"))
173201
}
174-
Err(anyhow!("unexpected A2ML manifest structure"))
202+
}
203+
204+
/// Parse TOML-like A2ML format into the same internal structure
205+
fn parse_toml_like(raw: &str) -> Result<Self> {
206+
let mut root_name = "manifest".to_string();
207+
let mut sections: Vec<(String, Vec<(String, String)>)> = Vec::new();
208+
let mut current_section: Option<(String, Vec<(String, String)>)> = None;
209+
210+
for line in raw.lines() {
211+
let trimmed = line.trim();
212+
if trimmed.is_empty() || trimmed.starts_with('#') {
213+
continue;
214+
}
215+
216+
if trimmed.starts_with('[') && trimmed.ends_with(']') {
217+
// Section header
218+
if let Some(section) = current_section.take() {
219+
sections.push(section);
220+
}
221+
let name = trimmed[1..trimmed.len() - 1].trim().to_string();
222+
current_section = Some((name, Vec::new()));
223+
} else if let Some(eq_pos) = trimmed.find('=') {
224+
// Key = value pair
225+
let key = trimmed[..eq_pos].trim().to_string();
226+
let mut value = trimmed[eq_pos + 1..].trim().to_string();
227+
// Strip surrounding quotes
228+
if value.starts_with('"') && value.ends_with('"') && value.len() >= 2 {
229+
value = value[1..value.len() - 1].to_string();
230+
}
231+
// Strip inline comments
232+
if let Some(comment_pos) = value.find(" #") {
233+
value = value[..comment_pos].trim().to_string();
234+
}
235+
if let Some(ref mut section) = current_section {
236+
section.1.push((key, value));
237+
}
238+
}
239+
// Arrays ([...]) are skipped for now — not needed for manifest lookup
240+
}
241+
if let Some(section) = current_section {
242+
sections.push(section);
243+
}
244+
245+
// Find the root name from the first section (typically [manifest])
246+
if let Some((name, _)) = sections.first() {
247+
root_name = name.clone();
248+
}
249+
250+
// Convert sections into Sexpr tree
251+
let entries: Vec<Sexpr> = sections
252+
.into_iter()
253+
.map(|(name, pairs)| {
254+
let mut list = vec![Sexpr::Atom(name)];
255+
for (k, v) in pairs {
256+
list.push(Sexpr::List(vec![
257+
Sexpr::Atom(k),
258+
Sexpr::String(v),
259+
]));
260+
}
261+
Sexpr::List(list)
262+
})
263+
.collect();
264+
265+
Ok(Self {
266+
root_name,
267+
entries,
268+
})
175269
}
176270

177271
pub fn report_formats(&self) -> Vec<ReportOutputFormat> {

src/assail/analyzer.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,13 @@ impl Analyzer {
679679
"zig-cache",
680680
"zig-out",
681681
"ebin",
682+
"external_corpora",
683+
"third_party",
684+
"testdata",
685+
"test_fixtures",
686+
"fixtures",
687+
"corpus",
688+
"corpora",
682689
]
683690
.contains(&name_str)
684691
{

src/main.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ fn run_main() -> Result<()> {
10351035
let manifest = match Manifest::load_default() {
10361036
Ok(manifest) => manifest,
10371037
Err(err) => {
1038-
eprintln!("warning: failed to read AI.a2ml: {}", err);
1038+
eprintln!("warning: failed to read AI manifest: {}", err);
10391039
Manifest::default()
10401040
}
10411041
};
@@ -1660,7 +1660,13 @@ fn run_main() -> Result<()> {
16601660
}
16611661

16621662
Commands::Manifest { path, output } => {
1663-
let target = path.unwrap_or_else(|| PathBuf::from("AI.a2ml"));
1663+
let target = path.unwrap_or_else(|| {
1664+
if PathBuf::from("0-AI-MANIFEST.a2ml").exists() {
1665+
PathBuf::from("0-AI-MANIFEST.a2ml")
1666+
} else {
1667+
PathBuf::from("AI.a2ml")
1668+
}
1669+
});
16641670
let manifest = Manifest::load(&target).unwrap_or_default();
16651671
let nickel = manifest.to_nickel();
16661672
if let Some(output_path) = output {

0 commit comments

Comments
 (0)