Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 201 additions & 1 deletion crates/perry/src/commands/compile/cjs_wrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,66 @@ pub(super) fn is_commonjs(source: &str) -> bool {
|| (source.contains("require(") && !source.contains("import "))
}

/// JS reserved words that cannot be used as binding identifiers (e.g.
/// in `const X = ...`). Used by `extract_exports_from_source` to skip
/// CJS-style `module.exports.X = ...` patterns where `X` is a keyword —
/// emitting `export const <keyword> = _cjs.<keyword>;` would fail to
/// parse. `default` (pino's `module.exports.default = pino` interop
/// pattern) is the common real-world case; the rest are filtered
/// defensively. Contextual keywords (`async`, `arguments`, `eval`, `as`,
/// `from`, `of`) are legal identifiers and not included.
fn is_js_reserved_word(name: &str) -> bool {
matches!(
name,
"break"
| "case"
| "catch"
| "class"
| "const"
| "continue"
| "debugger"
| "default"
| "delete"
| "do"
| "else"
| "enum"
| "export"
| "extends"
| "false"
| "finally"
| "for"
| "function"
| "if"
| "import"
| "in"
| "instanceof"
| "new"
| "null"
| "return"
| "super"
| "switch"
| "this"
| "throw"
| "true"
| "try"
| "typeof"
| "var"
| "void"
| "while"
| "with"
| "yield"
| "let"
| "static"
| "implements"
| "interface"
| "package"
| "private"
| "protected"
| "public"
| "await"
)
}

/// Wrap CJS source as ESM. `source_path` is the absolute path of the file
/// being wrapped — used to resolve `require('./relative')` targets when
/// peeking at re-export wrappers' transitive named exports.
Expand Down Expand Up @@ -708,9 +768,24 @@ fn extract_object_literal_exports_from_require(source: &str) -> Vec<(String, Str
/// Matches `var` / `const` / `let`. Order is preserved and duplicates
/// are dropped on the alias name (the first binding wins — matches JS
/// hoisting semantics for the original source).
///
/// Issue #845: the trailing `\s*(?:;|$)` (require a semicolon or
/// end-of-line in multiline mode) is intentional. Without it,
/// `const EventEmitter = require('events').EventEmitter;` matches as
/// `const EventEmitter = require('events')` and the blanking pass at
/// line 336 above leaves `.EventEmitter;` dangling at column 0 of the
/// wrapped output, producing a TS1109 ("Expression expected") parse
/// failure 1000+ bytes past EOF. Only whole-statement aliases (those
/// whose require call is followed by `;` or end-of-line) are safe to
/// blank — anything with `.X` trailing member access binds to the
/// property, not the module object, so the alias-rename pass would
/// be wrong anyway. Same-line follow-on statements like
/// `var dep = require('./dep'); module.exports = dep.value;` still
/// match because the `;` form ends the alias matched region before
/// the follow-on.
fn extract_require_aliases_with_ranges(source: &str) -> Vec<(String, String, (usize, usize))> {
let re = regex::Regex::new(
r#"(?m)^\s*(?:var|const|let)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)\s*;?"#,
r#"(?m)^\s*(?:var|const|let)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)\s*(?:;|$)"#,
)
.unwrap();
let mut seen = Vec::new();
Expand Down Expand Up @@ -1137,6 +1212,19 @@ fn extract_exports_from_source(source: &str) -> Vec<String> {
if name == "__esModule" {
return;
}
// Issue #845: skip JS reserved words. `export const default = _cjs.default;`
// (and other reserved-word forms) is invalid syntax — the named-export
// emission emits `export const <NAME> = _cjs.<NAME>;`, which fails to
// parse if `<NAME>` isn't a valid binding identifier. `default` is the
// common real-world case (pino: `module.exports.default = pino` —
// ESM-interop convention). The default export is already covered by
// the separate `export default _cjs;` statement, so skipping `default`
// here doesn't lose any export. Reserved words other than `default`
// are extremely rare as CJS export names but would parse-fail the
// same way, so filter them all.
if is_js_reserved_word(name) {
return;
}
let owned = name.to_string();
if !names.contains(&owned) {
names.push(owned);
Expand Down Expand Up @@ -1758,4 +1846,116 @@ mod tests {
wrapped
);
}

#[test]
fn require_alias_extract_skips_trailing_member_access() {
// Issue #845 — mysql2 sub-bug 2.
//
// `const EventEmitter = require('events').EventEmitter;` binds the
// class, not the module object. The old regex matched it as
// `const EventEmitter = require('events')` (optional-`;?` stopping
// at `)`) and the blanking pass at wrap_commonjs left `.EventEmitter;`
// dangling at column 0 of the wrapped output — TS1109 parse error
// 1000+ bytes past the original-file EOF.
let src = "class B extends EventEmitter { }\n\
const EventEmitter = require('events').EventEmitter;\n\
const Readable = require('stream').Readable;\n\
const Net = require('net');\n";
let aliases = extract_require_aliases_with_ranges(src);
// Only `Net` is a whole-statement alias; the other two have
// trailing `.X` and must be skipped.
assert_eq!(
aliases.len(),
1,
"expected 1 whole-statement alias, got: {:?}",
aliases
);
assert_eq!(aliases[0].0, "Net");
assert_eq!(aliases[0].1, "net");
}

#[test]
fn wrap_does_not_dangle_member_access_after_blanking() {
// Regression test for issue #845: the wrap output must remain
// parseable when a require() has `.X` member access after it,
// even in the presence of top-level class declarations (which is
// what triggers the blanking pass).
let src = "const EventEmitter = require('events').EventEmitter;\n\
class BaseConnection extends EventEmitter {\n\
constructor() { super(); }\n\
}\n\
module.exports = BaseConnection;\n";
let wrapped = wrap_commonjs(src, &PathBuf::from("/tmp/test.js"));
// The post-wrap source must NOT contain a stray `.EventEmitter`
// sitting at column 0 (or anywhere outside a valid expression).
// The simplest invariant: every `.EventEmitter` occurrence must
// be preceded by either `_req` (the inner require dispatch) or
// a non-whitespace, non-newline byte (a valid receiver).
for (i, _) in wrapped.match_indices(".EventEmitter") {
let prev_char = wrapped[..i].chars().rev().next().unwrap_or(' ');
assert!(
prev_char.is_alphanumeric()
|| prev_char == '_'
|| prev_char == '$'
|| prev_char == ')',
".EventEmitter at byte {} has invalid receiver {:?} — would parse-fail:\n{}",
i,
prev_char,
wrapped
);
}
// And it should parse cleanly through SWC.
let parsed = perry_parser::parse_typescript(&wrapped, "test.js");
assert!(
parsed.is_ok(),
"wrap output failed to parse: {:?}\nwrapped:\n{}",
parsed.err(),
wrapped
);
}

#[test]
fn extract_exports_skips_default_reserved_word() {
// Issue #845 — pino: `module.exports.default = pino` flows into the
// named-export loop and pre-fix emitted `export const default =
// _cjs.default;` (invalid syntax — `default` is a reserved word).
// The named-export path must skip reserved words; the separate
// `export default _cjs;` machinery covers the default export.
let src = "module.exports = function pino(){};\n\
module.exports.default = function pino(){};\n\
module.exports.transport = require('./transport');\n\
module.exports.version = '1.0';\n";
let names = extract_exports_from_source(src);
assert!(
!names.contains(&"default".to_string()),
"must skip `default`, got: {:?}",
names
);
assert!(names.contains(&"transport".to_string()));
assert!(names.contains(&"version".to_string()));
}

#[test]
fn wrap_pino_shape_parses_cleanly() {
// Issue #845 — pino sub-bug: end-to-end check that a pino-shaped
// CJS module produces parseable wrap output.
let src = "function pino() { return {}; }\n\
module.exports = pino;\n\
module.exports.default = pino;\n\
module.exports.pino = pino;\n\
module.exports.version = '1.0';\n";
let wrapped = wrap_commonjs(src, &PathBuf::from("/tmp/pino.js"));
assert!(
!wrapped.contains("export const default"),
"must not emit `export const default` (reserved word), got:\n{}",
wrapped
);
let parsed = perry_parser::parse_typescript(&wrapped, "pino.js");
assert!(
parsed.is_ok(),
"pino wrap failed to parse: {:?}\nwrapped:\n{}",
parsed.err(),
wrapped
);
}
}
126 changes: 119 additions & 7 deletions crates/perry/src/commands/compile/collect_modules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ pub(super) fn collect_modules(
// rewrite the source as ESM before SWC parses it. Only fires for files
// inside a `compilePackages` target — user TypeScript and ESM-shaped
// packages skip the wrap. See `cjs_wrap.rs` for the wrap shape.
let source = if is_in_compiled_pkg && super::cjs_wrap::is_commonjs(&raw_source) {
let was_cjs_wrapped = is_in_compiled_pkg && super::cjs_wrap::is_commonjs(&raw_source);
let source = if was_cjs_wrapped {
super::cjs_wrap::wrap_commonjs(&raw_source, &canonical)
} else {
raw_source
Expand Down Expand Up @@ -175,12 +176,31 @@ pub(super) fn collect_modules(
// largest time sink in the hot rebuild path on unchanged files.
let ast_module_owned: swc_ecma_ast::Module;
let ast_module: &swc_ecma_ast::Module = match parse_cache.as_deref_mut() {
Some(cache) => parse_cached(cache, &canonical, &source, filename)?,
None => {
ast_module_owned = perry_parser::parse_typescript(&source, filename)
.map_err(|e| anyhow!("Failed to parse {}: {}", canonical.display(), e))?;
&ast_module_owned
}
Some(cache) => match parse_cached(cache, &canonical, &source, filename) {
Ok(m) => m,
Err(e) => {
return Err(annotate_parse_error(
e,
&canonical,
&source,
was_cjs_wrapped,
))
}
},
None => match perry_parser::parse_typescript(&source, filename) {
Ok(m) => {
ast_module_owned = m;
&ast_module_owned
}
Err(e) => {
return Err(annotate_parse_error(
anyhow!("Failed to parse {}: {}", canonical.display(), e),
&canonical,
&source,
was_cjs_wrapped,
));
}
},
};
let source_file_path = canonical.to_string_lossy().to_string();

Expand Down Expand Up @@ -948,3 +968,95 @@ pub(super) fn collect_modules(
ctx.native_modules.insert(canonical, hir_module);
Ok(())
}

/// Issue #845: when SWC fails to parse a CJS-wrapped source, the byte
/// offset in the error refers to the wrap output, not the on-disk file
/// — so the offset is past EOF of the original. Rewrite the message to
/// say so, and (when we can parse a `(lo..hi, ...)` span out of SWC's
/// Debug-formatted error) include an excerpt of the wrap output around
/// `lo` so the user can see what choked the re-parse. Pass-through for
/// non-wrapped sources.
fn annotate_parse_error(
e: anyhow::Error,
path: &std::path::Path,
parsed_source: &str,
was_cjs_wrapped: bool,
) -> anyhow::Error {
if !was_cjs_wrapped {
return e;
}
let msg = format!("{}", e);
let span_re = regex::Regex::new(r"\((\d+)\.\.(\d+),").ok();
let offset = span_re
.as_ref()
.and_then(|re| re.captures(&msg))
.and_then(|cap| cap.get(1)?.as_str().parse::<usize>().ok());
let excerpt = offset.and_then(|lo| excerpt_around_offset(parsed_source, lo));

let mut extra = format!(
"\nnote: this file is inside a `compilePackages` target and was rewritten by Perry's CJS-to-ESM wrap before parsing. The error offset above refers to the post-wrap source ({} bytes), NOT the {}-byte file on disk. Re-run with `PERRY_DEBUG_CJS_WRAP=1` to see the full wrap output.",
parsed_source.len(),
std::fs::metadata(path)
.map(|m| m.len().to_string())
.unwrap_or_else(|_| "original".to_string()),
);
if let Some(snippet) = excerpt {
extra.push_str("\nwrap-output excerpt around the error offset:\n");
extra.push_str(&snippet);
}
anyhow::anyhow!("{}{}", msg, extra)
}

/// Render up to 2 lines of context on either side of the byte offset
/// `lo`, with the offending line highlighted by a `>>>` prefix. Returns
/// `None` when `lo` is out of range or the source has no newlines.
fn excerpt_around_offset(source: &str, lo: usize) -> Option<String> {
let lo = lo.min(source.len().saturating_sub(1));
let line_start = source[..lo].rfind('\n').map(|i| i + 1).unwrap_or(0);
let line_end = source[lo..]
.find('\n')
.map(|i| lo + i)
.unwrap_or(source.len());
let pre_line = (0..2).fold(line_start, |acc, _| {
source[..acc.saturating_sub(1)]
.rfind('\n')
.map(|i| i + 1)
.unwrap_or(0)
});
let post_line = (0..2).fold(line_end, |acc, _| {
source
.get(acc + 1..)
.and_then(|s| s.find('\n').map(|i| acc + 1 + i))
.unwrap_or(source.len())
});
let line_number_at = |off: usize| source[..off].matches('\n').count() + 1;
let mut out = String::new();
let mut cursor = pre_line;
while cursor < post_line {
let next = source[cursor..]
.find('\n')
.map(|i| cursor + i)
.unwrap_or(post_line);
let line = &source[cursor..next];
let marker = if cursor <= lo && lo <= next {
">>>"
} else {
" "
};
out.push_str(&format!(
"{} {:>5} | {}\n",
marker,
line_number_at(cursor),
line
));
if next >= post_line {
break;
}
cursor = next + 1;
}
if out.is_empty() {
None
} else {
Some(out)
}
}
Loading