Skip to content

Commit b720cdb

Browse files
Improve DDIR parsing (#743)
1 parent cc63ec0 commit b720cdb

1 file changed

Lines changed: 179 additions & 22 deletions

File tree

interactive/src/lower.rs

Lines changed: 179 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,54 @@
11
//! Lowering from AST to IR.
2+
//!
3+
//! Statement order within a scope does not affect semantics. At each scope
4+
//! level we:
5+
//!
6+
//! 1. Bucket statements (and error on duplicate names).
7+
//! 2. Pre-push `Variable` placeholders for every `var` so that anything in
8+
//! the scope can refer to them.
9+
//! 3. Topologically lower `let` bindings and child scopes by dependency:
10+
//! each item is lowered once all the names it transitively needs at this
11+
//! level are bound. A cycle among `let`s is an error (use a `var` to
12+
//! introduce recursion).
13+
//! 4. Lower the (single) `result` expression, if any.
14+
//! 5. Lower each `var`'s body and emit a `Bind` from the placeholder to the
15+
//! resulting value.
216
3-
use std::collections::BTreeMap;
4-
use std::collections::HashMap;
17+
use std::collections::{BTreeMap, BTreeSet, HashMap};
518

619
use crate::parse::*;
720
use crate::ir::{Node, LinearOp, Id, Program};
821

922
struct Lowering {
1023
nodes: BTreeMap<Id, Node>,
1124
next_id: Id,
25+
/// Stack of value-name scopes; innermost last.
1226
scopes: Vec<HashMap<String, Id>>,
27+
/// Inner environments of named scopes, keyed by scope name; the `usize`
28+
/// records the scope's nesting depth (used by `Node::Leave`).
1329
named_scopes: HashMap<String, (usize, HashMap<String, Id>)>,
1430
level: usize,
1531
}
1632

1733
impl Lowering {
18-
fn new() -> Self { Lowering { nodes: BTreeMap::new(), next_id: 0, scopes: vec![HashMap::new()], named_scopes: HashMap::new(), level: 0 } }
34+
fn new() -> Self {
35+
Lowering {
36+
nodes: BTreeMap::new(),
37+
next_id: 0,
38+
scopes: vec![HashMap::new()],
39+
named_scopes: HashMap::new(),
40+
level: 0,
41+
}
42+
}
43+
1944
fn push(&mut self, node: Node) -> Id { let id = self.next_id; self.next_id += 1; self.nodes.insert(id, node); id }
20-
fn resolve_name(&self, name: &str) -> Id { for scope in self.scopes.iter().rev() { if let Some(&id) = scope.get(name) { return id; } } panic!("Unresolved name: {}", name); }
2145
fn bind_name(&mut self, name: String, id: Id) { self.scopes.last_mut().unwrap().insert(name, id); }
46+
fn resolve_name(&self, name: &str) -> Id {
47+
for scope in self.scopes.iter().rev() {
48+
if let Some(&id) = scope.get(name) { return id; }
49+
}
50+
panic!("Unresolved name: {}", name)
51+
}
2252

2353
fn lower_program(mut self, stmts: Vec<Stmt>) -> Program {
2454
let mut result_id = None;
@@ -27,20 +57,78 @@ impl Lowering {
2757
}
2858

2959
fn lower_stmts(&mut self, stmts: Vec<Stmt>, result_id: &mut Option<Id>) {
30-
// First pass: create Variables for all var bindings.
31-
for stmt in &stmts {
32-
if let Stmt::Var(name, _) = stmt {
33-
let var_id = self.push(Node::Variable);
34-
self.bind_name(name.clone(), var_id);
35-
}
36-
}
37-
// Second pass: lower let bindings and collect var bodies.
38-
let mut var_bindings = Vec::new();
60+
// ---- 1. Bucket statements; reject duplicate names. ----
61+
// `order` records the original textual order so the topological pass
62+
// is deterministic when several items are simultaneously ready.
63+
let mut vars: Vec<(String, Expr)> = Vec::new();
64+
let mut lets: HashMap<String, Expr> = HashMap::new();
65+
let mut scopes: HashMap<String, Vec<Stmt>> = HashMap::new();
66+
let mut order: Vec<(ItemKind, String)> = Vec::new();
67+
let mut results: Vec<Expr> = Vec::new();
68+
let mut seen: BTreeSet<String> = BTreeSet::new();
3969
for stmt in stmts {
4070
match stmt {
41-
Stmt::Let(name, expr) => { let id = self.lower_expr(expr); self.bind_name(name, id); },
42-
Stmt::Var(name, expr) => { var_bindings.push((name, expr)); },
71+
Stmt::Let(name, expr) => {
72+
if !seen.insert(name.clone()) { panic!("Duplicate name in scope: {}", name); }
73+
order.push((ItemKind::Let, name.clone()));
74+
lets.insert(name, expr);
75+
},
76+
Stmt::Var(name, expr) => {
77+
if !seen.insert(name.clone()) { panic!("Duplicate name in scope: {}", name); }
78+
vars.push((name, expr));
79+
},
4380
Stmt::Scope(name, body) => {
81+
if !seen.insert(name.clone()) { panic!("Duplicate name in scope: {}", name); }
82+
order.push((ItemKind::Scope, name.clone()));
83+
scopes.insert(name, body);
84+
},
85+
Stmt::Result(expr) => results.push(expr),
86+
}
87+
}
88+
if results.len() > 1 { panic!("Multiple `result` statements at the same scope level"); }
89+
90+
// ---- 2. Pre-bind `Variable` placeholders. ----
91+
for (name, _) in &vars {
92+
let id = self.push(Node::Variable);
93+
self.bind_name(name.clone(), id);
94+
}
95+
96+
// ---- 3. Topologically lower lets and child scopes. ----
97+
// Deps for a let: free names of its expression that are themselves
98+
// defined as let/scope at this level (vars are already bound).
99+
// Deps for a scope: free names that escape the scope body, restricted
100+
// similarly.
101+
let defined_topo: BTreeSet<&str> = lets.keys().chain(scopes.keys()).map(String::as_str).collect();
102+
let mut remaining_deps: HashMap<String, BTreeSet<String>> = HashMap::new();
103+
for (name, expr) in &lets {
104+
remaining_deps.insert(name.clone(), expr_deps(expr, &defined_topo, name));
105+
}
106+
for (name, body) in &scopes {
107+
remaining_deps.insert(name.clone(), scope_body_deps(body, &defined_topo, name));
108+
}
109+
drop(defined_topo);
110+
111+
// Greedy topo: scan `order` for an item with no remaining deps; lower
112+
// it and remove it from every other item's dep set. Repeat until done.
113+
let mut pending: Vec<(ItemKind, String)> = order;
114+
while !pending.is_empty() {
115+
let pick = pending.iter().position(|(_, n)| remaining_deps[n].is_empty());
116+
let Some(idx) = pick else {
117+
let stuck: Vec<String> = pending.iter().map(|(_, n)| n.clone()).collect();
118+
panic!("Cyclic dependency among let/scope bindings: {:?}. Use `var` to introduce recursion.", stuck);
119+
};
120+
let (kind, name) = pending.remove(idx);
121+
remaining_deps.remove(&name);
122+
for deps in remaining_deps.values_mut() { deps.remove(&name); }
123+
124+
match kind {
125+
ItemKind::Let => {
126+
let expr = lets.remove(&name).unwrap();
127+
let id = self.lower_expr(expr);
128+
self.bind_name(name, id);
129+
},
130+
ItemKind::Scope => {
131+
let body = scopes.remove(&name).unwrap();
44132
self.push(Node::Scope);
45133
self.level += 1;
46134
self.scopes.push(HashMap::new());
@@ -52,11 +140,17 @@ impl Lowering {
52140
self.level -= 1;
53141
self.push(Node::EndScope);
54142
},
55-
Stmt::Result(expr) => { let id = self.lower_expr(expr); *result_id = Some(id); },
56143
}
57144
}
58-
// Third pass: bind vars.
59-
for (name, expr) in var_bindings {
145+
146+
// ---- 4. Lower the result expression (if any). ----
147+
if let Some(expr) = results.into_iter().next() {
148+
let id = self.lower_expr(expr);
149+
*result_id = Some(id);
150+
}
151+
152+
// ---- 5. Lower var bodies and emit Bind nodes. ----
153+
for (name, expr) in vars {
60154
let var_id = self.resolve_name(&name);
61155
let value_id = self.lower_expr(expr);
62156
self.push(Node::Bind { variable: var_id, value: value_id });
@@ -87,14 +181,77 @@ impl Lowering {
87181
let id = self.push(Node::Arrange(id));
88182
self.push(Node::Reduce { input: id, reducer })
89183
},
90-
Expr::Filter(input, cond) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::Filter(cond)] }) },
91-
Expr::Negate(input) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::Negate] }) },
92-
Expr::EnterAt(input, field) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::EnterAt(field)] }) },
93-
Expr::Inspect(input, label) => { let id = self.lower_expr(*input); self.push(Node::Inspect { input: id, label }) },
184+
Expr::Filter(input, cond) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::Filter(cond)] }) },
185+
Expr::Negate(input) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::Negate] }) },
186+
Expr::EnterAt(input, fld) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::EnterAt(fld)] }) },
187+
Expr::Inspect(input, lab) => { let id = self.lower_expr(*input); self.push(Node::Inspect { input: id, label: lab }) },
94188
Expr::Concat(exprs) => { let ids: Vec<Id> = exprs.into_iter().map(|e| self.lower_expr(e)).collect(); self.push(Node::Concat(ids)) },
95189
Expr::Arrange(input) => { let id = self.lower_expr(*input); self.push(Node::Arrange(id)) },
96190
}
97191
}
98192
}
99193

194+
#[derive(Clone, Copy)]
195+
enum ItemKind { Let, Scope }
196+
197+
/// Free names of `expr` that are defined at this scope level, excluding the
198+
/// item's own name (a let referencing its own name is just an unresolved
199+
/// reference for the current pass — not a self-dep).
200+
fn expr_deps(expr: &Expr, defined: &BTreeSet<&str>, self_name: &str) -> BTreeSet<String> {
201+
let mut free = BTreeSet::new();
202+
expr_free_names(expr, &mut free);
203+
free.into_iter()
204+
.filter(|n| *n != self_name && defined.contains(n))
205+
.map(String::from)
206+
.collect()
207+
}
208+
209+
/// Free names of a scope body (names referenced inside that aren't bound
210+
/// inside), restricted to names defined at the enclosing level.
211+
fn scope_body_deps(body: &[Stmt], defined: &BTreeSet<&str>, self_name: &str) -> BTreeSet<String> {
212+
let mut free = BTreeSet::new();
213+
collect_body_free_names(body, &mut free);
214+
free.into_iter()
215+
.filter(|n| *n != self_name && defined.contains(n))
216+
.map(String::from)
217+
.collect()
218+
}
219+
220+
/// Names this expression refers to that the surrounding scope must resolve.
221+
/// For `name`, the name itself; for `scope::field`, the scope name (the
222+
/// field is resolved within that scope's environment, not the enclosing one).
223+
fn expr_free_names<'a>(expr: &'a Expr, out: &mut BTreeSet<&'a str>) {
224+
match expr {
225+
Expr::Input(_) => {},
226+
Expr::Name(n) => { out.insert(n.as_str()); },
227+
Expr::Qualified(scope, _) => { out.insert(scope.as_str()); },
228+
Expr::Map(e, _) | Expr::Reduce(e, _) | Expr::Filter(e, _)
229+
| Expr::Negate(e) | Expr::EnterAt(e, _) | Expr::Inspect(e, _)
230+
| Expr::Arrange(e) => expr_free_names(e, out),
231+
Expr::Join(l, r, _) => { expr_free_names(l, out); expr_free_names(r, out); },
232+
Expr::Concat(es) => { for e in es { expr_free_names(e, out); } },
233+
}
234+
}
235+
236+
/// Recursively collect names referenced in `body`'s expressions that aren't
237+
/// bound somewhere within `body` itself. The recursion descends through
238+
/// nested scopes, masking out their local bindings as it goes.
239+
fn collect_body_free_names<'a>(body: &'a [Stmt], out: &mut BTreeSet<&'a str>) {
240+
let mut local: BTreeSet<&'a str> = BTreeSet::new();
241+
for stmt in body {
242+
match stmt {
243+
Stmt::Let(n, _) | Stmt::Var(n, _) | Stmt::Scope(n, _) => { local.insert(n.as_str()); },
244+
Stmt::Result(_) => {},
245+
}
246+
}
247+
let mut inner: BTreeSet<&'a str> = BTreeSet::new();
248+
for stmt in body {
249+
match stmt {
250+
Stmt::Let(_, e) | Stmt::Var(_, e) | Stmt::Result(e) => expr_free_names(e, &mut inner),
251+
Stmt::Scope(_, b) => collect_body_free_names(b, &mut inner),
252+
}
253+
}
254+
for n in inner { if !local.contains(n) { out.insert(n); } }
255+
}
256+
100257
pub fn lower(stmts: Vec<Stmt>) -> Program { Lowering::new().lower_program(stmts) }

0 commit comments

Comments
 (0)