From 1e4e9c149dae84c21bb441bff82534257a5cc691 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 25 Mar 2026 12:18:22 +0100 Subject: [PATCH 1/8] gh-146416: Emscripten: Improve standard stream handling in node_entry.mjs --- Platforms/emscripten/__main__.py | 4 + Platforms/emscripten/node_entry.mjs | 6 +- Platforms/emscripten/streams.mjs | 226 ++++++++++++++++++++++++++++ configure | 2 +- configure.ac | 2 +- 5 files changed, 237 insertions(+), 3 deletions(-) create mode 100644 Platforms/emscripten/streams.mjs diff --git a/Platforms/emscripten/__main__.py b/Platforms/emscripten/__main__.py index f6d5ee82c51aa4..c1eac8005474fd 100644 --- a/Platforms/emscripten/__main__.py +++ b/Platforms/emscripten/__main__.py @@ -518,6 +518,10 @@ def configure_emscripten_python(context, working_dir): EMSCRIPTEN_DIR / "node_entry.mjs", working_dir / "node_entry.mjs" ) + shutil.copy( + EMSCRIPTEN_DIR / "streams.mjs", working_dir / "streams.mjs" + ) + node_entry = working_dir / "node_entry.mjs" exec_script = working_dir / "python.sh" exec_script.write_text( diff --git a/Platforms/emscripten/node_entry.mjs b/Platforms/emscripten/node_entry.mjs index 166df40742b7fc..9478b7714adbc8 100644 --- a/Platforms/emscripten/node_entry.mjs +++ b/Platforms/emscripten/node_entry.mjs @@ -1,5 +1,6 @@ import EmscriptenModule from "./python.mjs"; import fs from "node:fs"; +import { initializeStreams } from "./streams.mjs"; if (process?.versions?.node) { const nodeVersion = Number(process.versions.node.split(".", 1)[0]); @@ -39,6 +40,9 @@ const settings = { Object.assign(Module.ENV, process.env); delete Module.ENV.PATH; }, + onRuntimeInitialized() { + initializeStreams(Module.FS); + }, // Ensure that sys.executable, sys._base_executable, etc point to python.sh // not to this file. To properly handle symlinks, python.sh needs to compute // its own path. @@ -49,7 +53,7 @@ const settings = { try { await EmscriptenModule(settings); -} catch(e) { +} catch (e) { // Show JavaScript exception and traceback console.warn(e); // Show Python exception and traceback diff --git a/Platforms/emscripten/streams.mjs b/Platforms/emscripten/streams.mjs new file mode 100644 index 00000000000000..0ad24f32207d57 --- /dev/null +++ b/Platforms/emscripten/streams.mjs @@ -0,0 +1,226 @@ +/** + * This is a pared down version of + * https://github.com/pyodide/pyodide/blob/main/src/js/streams.ts + * + * It replaces the standard streams devices that Emscripten provides with our + * own better ones. It fixes the following deficiencies: + * + * 1. The emscripten std streams always have isatty set to true. These set + * isatty to match the value for the stdin/stdout/stderr that node sees. + * 2. The emscripten std streams don't support the ttygetwinsize ioctl. If + * isatty() returns true, then these do, and it returns the actual window + * size as the OS reports it to Node. + * 3. The emscripten std streams introduce an extra layer of buffering which has + * to be flushed with fsync(). + * 4. The emscripten std streams are slow and complex because they go through a + * character-based handler layer. This is particularly awkward because both + * sides of this character based layer deal with buffers and so we need + * complex adaptors, buffering, etc on both sides. Removing this + * character-based middle layer makes everything better. + * https://github.com/emscripten-core/emscripten/blob/1aa7fb531f11e11e7ae49b75a24e1a8fe6fa4a7d/src/lib/libtty.js?plain=1#L104-L114 + * + * Ideally some version of this should go upstream to Emscripten since it is not + * in any way specific to Python. But I haven't gotten around to it yet. + */ + +import * as tty from "node:tty"; +import * as fs from "node:fs"; + +let FS; +const DEVOPS = {}; +const DEVS = {}; + +function isErrnoError(e) { + return e && typeof e === "object" && "errno" in e; +} + +const waitBuffer = new Int32Array( + new WebAssembly.Memory({ shared: true, initial: 1, maximum: 1 }).buffer, +); +function syncSleep(timeout) { + try { + Atomics.wait(waitBuffer, 0, 0, timeout); + return true; + } catch (_) { + return false; + } +} + +/** + * Calls the callback and handle node EAGAIN errors. + */ +function handleEAGAIN(cb) { + while (true) { + try { + return cb(); + } catch (e) { + if (e && e.code === "EAGAIN") { + // Presumably this means we're in node and tried to read from/write to + // an O_NONBLOCK file descriptor. Synchronously sleep for 100ms as + // requested by EAGAIN and try again. In case for some reason we fail to + // sleep, propagate the error (it will turn into an EOFError). + if (syncSleep(100)) { + continue; + } + } + throw e; + } + } +} + +function readWriteHelper(stream, cb, method) { + let nbytes; + try { + nbytes = handleEAGAIN(cb); + } catch (e) { + if (e && e.code && Module.ERRNO_CODES[e.code]) { + throw new FS.ErrnoError(Module.ERRNO_CODES[e.code]); + } + if (isErrnoError(e)) { + // the handler set an errno, propagate it + throw e; + } + console.error("Error thrown in read:"); + console.error(e); + throw new FS.ErrnoError(Module.ERRNO_CODES.EIO); + } + if (nbytes === undefined) { + // Prevent an infinite loop caused by incorrect code that doesn't return a + // value + // Maybe we should set nbytes = buffer.length here instead? + console.warn( + `${method} returned undefined; a correct implementation must return a number`, + ); + throw new FS.ErrnoError(Module.ERRNO_CODES.EIO); + } + if (nbytes !== 0) { + stream.node.timestamp = Date.now(); + } + return nbytes; +} + +function asUint8Array(arg) { + if (ArrayBuffer.isView(arg)) { + return new Uint8Array(arg.buffer, arg.byteOffset, arg.byteLength); + } else { + return new Uint8Array(arg); + } +} + +const prepareBuffer = (buffer, offset, length) => + asUint8Array(buffer).subarray(offset, offset + length); + +const TTY_OPS = { + ioctl_tiocgwinsz(stream) { + return stream.devops.ioctl_tiocgwinsz(); + }, +}; + +const stream_ops = { + open: function (stream) { + const devops = DEVOPS[stream.node.rdev]; + if (!devops) { + throw new FS.ErrnoError(Module.ERRNO_CODES.ENODEV); + } + stream.devops = devops; + stream.tty = stream.devops.isatty ? { ops: TTY_OPS, devops } : undefined; + stream.seekable = false; + }, + close: function (stream) { + // flush any pending line data + stream.stream_ops.fsync(stream); + }, + fsync: function (stream) { + const ops = stream.devops; + if (ops.fsync) { + ops.fsync(); + } + }, + read: function (stream, buffer, offset, length, pos /* ignored */) { + buffer = prepareBuffer(buffer, offset, length); + return readWriteHelper(stream, () => stream.devops.read(buffer), "read"); + }, + write: function (stream, buffer, offset, length, pos /* ignored */) { + buffer = prepareBuffer(buffer, offset, length); + return readWriteHelper(stream, () => stream.devops.write(buffer), "write"); + }, +}; + +class NodeReader { + constructor(nodeStream) { + this.nodeStream = nodeStream; + this.isatty = tty.isatty(nodeStream.fd); + } + + read(buffer) { + try { + return fs.readSync(this.nodeStream.fd, buffer); + } catch (e) { + // Platform differences: on Windows, reading EOF throws an exception, + // but on other OSes, reading EOF returns 0. Uniformize behavior by + // catching the EOF exception and returning 0. + if (e.toString().includes("EOF")) { + return 0; + } + throw e; + } + } + + fsync() { + nodeFsync(this.nodeStream.fd); + } + ioctl_tiocgwinsz() { + console.log("ioctl_tiocgwinsz"); + return [this.nodeStream.columns ?? 24, this.nodeStream.rows ?? 80]; + } +} + +class NodeWriter { + constructor(nodeStream) { + this.nodeStream = nodeStream; + this.isatty = tty.isatty(nodeStream.fd); + } + + write(buffer) { + return fs.writeSync(this.nodeStream.fd, buffer); + } + + fsync() { + nodeFsync(this.nodeStream.fd); + } + ioctl_tiocgwinsz() { + console.log("ioctl_tiocgwinsz"); + return [this.nodeStream.columns ?? 24, this.nodeStream.rows ?? 80]; + } +} + +export function initializeStreams(fsarg) { + FS = fsarg; + const major = FS.createDevice.major++; + DEVS.stdin = FS.makedev(major, 0); + DEVS.stdout = FS.makedev(major, 1); + DEVS.stderr = FS.makedev(major, 2); + + FS.registerDevice(DEVS.stdin, stream_ops); + FS.registerDevice(DEVS.stdout, stream_ops); + FS.registerDevice(DEVS.stderr, stream_ops); + + FS.unlink("/dev/stdin"); + FS.unlink("/dev/stdout"); + FS.unlink("/dev/stderr"); + + FS.mkdev("/dev/stdin", DEVS.stdin); + FS.mkdev("/dev/stdout", DEVS.stdout); + FS.mkdev("/dev/stderr", DEVS.stderr); + + DEVOPS[DEVS.stdin] = new NodeReader(process.stdin); + DEVOPS[DEVS.stdout] = new NodeWriter(process.stdout); + DEVOPS[DEVS.stderr] = new NodeWriter(process.stderr); + + FS.closeStream(0 /* stdin */); + FS.closeStream(1 /* stdout */); + FS.closeStream(2 /* stderr */); + FS.open("/dev/stdin", 0 /* O_RDONLY */); + FS.open("/dev/stdout", 1 /* O_WRONLY */); + FS.open("/dev/stderr", 1 /* O_WRONLY */); +} diff --git a/configure b/configure index 23f24d51c79e1a..45bfa768a7c673 100755 --- a/configure +++ b/configure @@ -9690,7 +9690,7 @@ fi as_fn_append LDFLAGS_NODIST " -sWASM_BIGINT" as_fn_append LINKFORSHARED " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" - as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY" + as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY,ERRNO_CODES" as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,_PyGILState_GetThisThreadState,__Py_DumpTraceback,__PyEM_EMSCRIPTEN_TRAMPOLINE_OFFSET" as_fn_append LINKFORSHARED " -sSTACK_SIZE=5MB" as_fn_append LINKFORSHARED " -sTEXTDECODER=2" diff --git a/configure.ac b/configure.ac index 635fce3f2e6fad..5d016d4490b5bb 100644 --- a/configure.ac +++ b/configure.ac @@ -2359,7 +2359,7 @@ AS_CASE([$ac_sys_system], dnl Include file system support AS_VAR_APPEND([LINKFORSHARED], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) - AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY,ERRNO_CODES"]) AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,_PyGILState_GetThisThreadState,__Py_DumpTraceback,__PyEM_EMSCRIPTEN_TRAMPOLINE_OFFSET"]) AS_VAR_APPEND([LINKFORSHARED], [" -sSTACK_SIZE=5MB"]) dnl Avoid bugs in JS fallback string decoding path From 01f9307b640589f4d57f2cc70c4d4f1732165391 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 25 Mar 2026 14:48:57 +0100 Subject: [PATCH 2/8] More accurate arg name --- Platforms/emscripten/streams.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Platforms/emscripten/streams.mjs b/Platforms/emscripten/streams.mjs index 0ad24f32207d57..d65f803d2fa669 100644 --- a/Platforms/emscripten/streams.mjs +++ b/Platforms/emscripten/streams.mjs @@ -111,8 +111,8 @@ const prepareBuffer = (buffer, offset, length) => asUint8Array(buffer).subarray(offset, offset + length); const TTY_OPS = { - ioctl_tiocgwinsz(stream) { - return stream.devops.ioctl_tiocgwinsz(); + ioctl_tiocgwinsz(tty) { + return tty.devops.ioctl_tiocgwinsz(); }, }; From 547310710d9f9f4c641a5f0aa6c277212ba81079 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 25 Mar 2026 15:10:05 +0100 Subject: [PATCH 3/8] Remove debug log --- Platforms/emscripten/streams.mjs | 2 -- 1 file changed, 2 deletions(-) diff --git a/Platforms/emscripten/streams.mjs b/Platforms/emscripten/streams.mjs index d65f803d2fa669..4135be857de044 100644 --- a/Platforms/emscripten/streams.mjs +++ b/Platforms/emscripten/streams.mjs @@ -170,7 +170,6 @@ class NodeReader { nodeFsync(this.nodeStream.fd); } ioctl_tiocgwinsz() { - console.log("ioctl_tiocgwinsz"); return [this.nodeStream.columns ?? 24, this.nodeStream.rows ?? 80]; } } @@ -189,7 +188,6 @@ class NodeWriter { nodeFsync(this.nodeStream.fd); } ioctl_tiocgwinsz() { - console.log("ioctl_tiocgwinsz"); return [this.nodeStream.columns ?? 24, this.nodeStream.rows ?? 80]; } } From 34fc32aa24ecb90cf7acc3ec947a105cebe0b024 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 25 Mar 2026 15:11:23 +0100 Subject: [PATCH 4/8] More consistent whitespace --- Platforms/emscripten/streams.mjs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Platforms/emscripten/streams.mjs b/Platforms/emscripten/streams.mjs index 4135be857de044..83286b2148b9f5 100644 --- a/Platforms/emscripten/streams.mjs +++ b/Platforms/emscripten/streams.mjs @@ -169,6 +169,7 @@ class NodeReader { fsync() { nodeFsync(this.nodeStream.fd); } + ioctl_tiocgwinsz() { return [this.nodeStream.columns ?? 24, this.nodeStream.rows ?? 80]; } @@ -187,6 +188,7 @@ class NodeWriter { fsync() { nodeFsync(this.nodeStream.fd); } + ioctl_tiocgwinsz() { return [this.nodeStream.columns ?? 24, this.nodeStream.rows ?? 80]; } From 00efa5f4632d075ec4f70debb5741839e01e5ca7 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 30 Mar 2026 10:50:07 +0200 Subject: [PATCH 5/8] Add nodeFsync definition --- Platforms/emscripten/streams.mjs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/Platforms/emscripten/streams.mjs b/Platforms/emscripten/streams.mjs index 83286b2148b9f5..955127d7f678b7 100644 --- a/Platforms/emscripten/streams.mjs +++ b/Platforms/emscripten/streams.mjs @@ -112,7 +112,7 @@ const prepareBuffer = (buffer, offset, length) => const TTY_OPS = { ioctl_tiocgwinsz(tty) { - return tty.devops.ioctl_tiocgwinsz(); + return tty.devops.ioctl_tiocgwinsz?.(); }, }; @@ -146,6 +146,25 @@ const stream_ops = { }, }; +function nodeFsync(fd) { + try { + fs.fsyncSync(fd); + } catch (e) { + if (e?.code === "EINVAL") { + return; + } + // On Mac, calling fsync when not isatty returns ENOTSUP + // On Windows, stdin/stdout/stderr may be closed, returning EBADF or EPERM + if ( + e?.code === "ENOTSUP" || e?.code === "EBADF" || e?.code === "EPERM" + ) { + return; + } + + throw e; + } +} + class NodeReader { constructor(nodeStream) { this.nodeStream = nodeStream; @@ -169,10 +188,6 @@ class NodeReader { fsync() { nodeFsync(this.nodeStream.fd); } - - ioctl_tiocgwinsz() { - return [this.nodeStream.columns ?? 24, this.nodeStream.rows ?? 80]; - } } class NodeWriter { From 1a818342ea70648905fdb371ef335ca69ea1a106 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 30 Mar 2026 10:56:58 +0200 Subject: [PATCH 6/8] Fix: rows first then columns --- Platforms/emscripten/streams.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Platforms/emscripten/streams.mjs b/Platforms/emscripten/streams.mjs index 955127d7f678b7..85561dc91736c6 100644 --- a/Platforms/emscripten/streams.mjs +++ b/Platforms/emscripten/streams.mjs @@ -205,7 +205,7 @@ class NodeWriter { } ioctl_tiocgwinsz() { - return [this.nodeStream.columns ?? 24, this.nodeStream.rows ?? 80]; + return [this.nodeStream.rows ?? 24, this.nodeStream.columns ?? 80]; } } From 245cb86ea38a3395cacf25cd7896d34107ee15a4 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 30 Mar 2026 12:05:20 +0200 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Victor Stinner --- Platforms/emscripten/streams.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Platforms/emscripten/streams.mjs b/Platforms/emscripten/streams.mjs index 85561dc91736c6..6cb5232be45a79 100644 --- a/Platforms/emscripten/streams.mjs +++ b/Platforms/emscripten/streams.mjs @@ -20,7 +20,7 @@ * https://github.com/emscripten-core/emscripten/blob/1aa7fb531f11e11e7ae49b75a24e1a8fe6fa4a7d/src/lib/libtty.js?plain=1#L104-L114 * * Ideally some version of this should go upstream to Emscripten since it is not - * in any way specific to Python. But I haven't gotten around to it yet. + * in any way specific to Python. But I (Hood) haven't gotten around to it yet. */ import * as tty from "node:tty"; @@ -86,7 +86,7 @@ function readWriteHelper(stream, cb, method) { } if (nbytes === undefined) { // Prevent an infinite loop caused by incorrect code that doesn't return a - // value + // value. // Maybe we should set nbytes = buffer.length here instead? console.warn( `${method} returned undefined; a correct implementation must return a number`, From 36a307e178be395c87fe27d1ca61bea76edf4e50 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 30 Mar 2026 12:09:07 +0200 Subject: [PATCH 8/8] Sleep only 10ms --- Platforms/emscripten/streams.mjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Platforms/emscripten/streams.mjs b/Platforms/emscripten/streams.mjs index 6cb5232be45a79..76ad79f9247f4c 100644 --- a/Platforms/emscripten/streams.mjs +++ b/Platforms/emscripten/streams.mjs @@ -56,10 +56,10 @@ function handleEAGAIN(cb) { } catch (e) { if (e && e.code === "EAGAIN") { // Presumably this means we're in node and tried to read from/write to - // an O_NONBLOCK file descriptor. Synchronously sleep for 100ms as - // requested by EAGAIN and try again. In case for some reason we fail to - // sleep, propagate the error (it will turn into an EOFError). - if (syncSleep(100)) { + // an O_NONBLOCK file descriptor. Synchronously sleep for 10ms then try + // again. In case for some reason we fail to sleep, propagate the error + // (it will turn into an EOFError). + if (syncSleep(10)) { continue; } }