From 166308199d61db03ec67c0116e149195112f5d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20MANSUY?= Date: Mon, 13 Apr 2026 20:21:44 +0800 Subject: [PATCH] chore: publish edgeparse-wasm to npm + GitHub Packages (v0.2.4) - Enable npm publish for edgeparse-wasm (was disabled/skipped) - Add GitHub Packages secondary registry (@raphaelmansuy/edgeparse-wasm) - Add CDN (jsDelivr, unpkg) and framework quick-start examples to docs - Fix pkg/package.json: canonical name, exports map, publishConfig, keywords - Update release-wasm.yml: permissions packages:write, idempotent publish - Update docs/07-cicd-publishing.md: WASM artifacts, NPM_TOKEN setup guide - Bump workspace version to 0.2.4 --- .github/workflows/release-wasm.yml | 134 ++++++++--- CHANGELOG.md | 21 ++ Cargo.lock | 10 +- Cargo.toml | 2 +- crates/edgeparse-wasm/pkg/package.json | 35 ++- docs/07-cicd-publishing.md | 52 +++-- docs/09-wasm-sdk.md | 303 +++++++++++++++++++++---- 7 files changed, 463 insertions(+), 94 deletions(-) diff --git a/.github/workflows/release-wasm.yml b/.github/workflows/release-wasm.yml index 04fe9ae..2713015 100644 --- a/.github/workflows/release-wasm.yml +++ b/.github/workflows/release-wasm.yml @@ -1,4 +1,4 @@ -name: Release — WASM SDK (npm) +name: Release — WASM SDK (npm + GitHub Packages) on: push: @@ -6,30 +6,42 @@ on: workflow_dispatch: inputs: tag_name: - description: 'Tag name to publish (e.g. v0.2.2) — used for version sync' + description: 'Tag name to publish (e.g. v0.2.4) — used for version sync' required: true - default: 'v0.2.2' + default: 'v0.2.4' permissions: - contents: write + contents: write # upload GitHub Release assets + packages: write # publish to GitHub Packages npm registry jobs: publish-wasm: - name: Publish WASM package + name: Build & publish WASM package runs-on: ubuntu-latest environment: npm + steps: - uses: actions/checkout@v4 + + # Node.js is needed for npm pack / publish and wasm-pack post-processing. + # Registry URL is overridden per-publish step via .npmrc; the value here + # merely ensures `NODE_AUTH_TOKEN` is wired into the environment. - uses: actions/setup-node@v4 with: node-version: '20' registry-url: 'https://registry.npmjs.org' + - uses: dtolnay/rust-toolchain@stable with: targets: wasm32-unknown-unknown + - uses: Swatinem/rust-cache@v2 + - uses: taiki-e/install-action@wasm-pack + # ───────────────────────────────────────────────────────────────────── + # 1. Verify that the Git tag matches the Cargo workspace version. + # ───────────────────────────────────────────────────────────────────── - name: Verify version consistency env: INPUT_TAG_NAME: ${{ inputs.tag_name }} @@ -42,50 +54,70 @@ jobs: echo "ERROR: tag $TAG_VERSION ≠ Cargo.toml $CARGO_VERSION" exit 1 fi + echo "VERSION=$TAG_VERSION" >> "$GITHUB_ENV" + echo "TAG_NAME=$TAG_NAME" >> "$GITHUB_ENV" + # ───────────────────────────────────────────────────────────────────── + # 2. Compile the Rust crate to WebAssembly and generate JS/TS glue. + # ───────────────────────────────────────────────────────────────────── - name: Build WASM package run: | cd crates/edgeparse-wasm wasm-pack build --target web --release - - name: Sync npm metadata - env: - INPUT_TAG_NAME: ${{ inputs.tag_name }} + # ───────────────────────────────────────────────────────────────────── + # 3. Patch pkg/package.json with release metadata (shared across both + # registries; the name/scope is adjusted per publish step below). + # ───────────────────────────────────────────────────────────────────── + - name: Sync package metadata run: | node -e " const fs = require('fs'); - const refName = process.env.INPUT_TAG_NAME || process.env.GITHUB_REF_NAME; - const version = refName.replace(/^v/, ''); - const path = 'crates/edgeparse-wasm/pkg/package.json'; - const pkg = JSON.parse(fs.readFileSync(path, 'utf8')); - pkg.name = 'edgeparse-wasm'; - pkg.version = version; + const version = process.env.VERSION; + const pkgPath = 'crates/edgeparse-wasm/pkg/package.json'; + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')); + pkg.version = version; pkg.description = 'EdgeParse PDF parser — WebAssembly build for browsers'; - pkg.homepage = 'https://edgeparse.io/docs/api/wasm/'; - pkg.repository = { + pkg.homepage = 'https://edgeparse.io/docs/api/wasm/'; + pkg.keywords = ['pdf', 'parser', 'wasm', 'webassembly', 'browser', 'extraction', 'markdown']; + pkg.repository = { type: 'git', url: 'git+https://github.com/raphaelmansuy/edgeparse.git', directory: 'crates/edgeparse-wasm' }; - pkg.publishConfig = { access: 'public' }; + pkg.exports = { + '.': { import: './edgeparse_wasm.js', types: './edgeparse_wasm.d.ts' } + }; pkg.files = [ 'edgeparse_wasm_bg.wasm', 'edgeparse_wasm.js', 'edgeparse_wasm.d.ts', + 'edgeparse_wasm_bg.wasm.d.ts', 'README.md', 'LICENSE' ]; - fs.writeFileSync(path, JSON.stringify(pkg, null, 2) + '\n'); - console.log('Version synced to: ' + version); + fs.writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + '\n'); + console.log('Metadata synced to version: ' + version); " - - name: Stage package docs + - name: Stage README and LICENSE run: | cp README.md crates/edgeparse-wasm/pkg/README.md - cp LICENSE crates/edgeparse-wasm/pkg/LICENSE + cp LICENSE crates/edgeparse-wasm/pkg/LICENSE + # ───────────────────────────────────────────────────────────────────── + # 4. Pack the tarball once (reused by both registries and the Release). + # ───────────────────────────────────────────────────────────────────── - name: Pack npm tarball run: | + node -e " + const fs = require('fs'); + const pkgPath = 'crates/edgeparse-wasm/pkg/package.json'; + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')); + pkg.name = 'edgeparse-wasm'; + pkg.publishConfig = { access: 'public', registry: 'https://registry.npmjs.org' }; + fs.writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + '\n'); + " cd crates/edgeparse-wasm/pkg npm pack @@ -93,28 +125,74 @@ jobs: with: name: wasm-package path: crates/edgeparse-wasm/pkg/*.tgz + retention-days: 30 + + # ───────────────────────────────────────────────────────────────────── + # 5a. Publish to npm (primary registry — enables jsDelivr & unpkg CDNs). + # Uses NPM_TOKEN Classic Automation token stored as a repository + # secret. "already-published" is treated as idempotent. + # ───────────────────────────────────────────────────────────────────── + - name: Publish to npm registry + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + node -e " + const fs = require('fs'); + const pkgPath = 'crates/edgeparse-wasm/pkg/package.json'; + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')); + pkg.name = 'edgeparse-wasm'; + pkg.publishConfig = { access: 'public', registry: 'https://registry.npmjs.org' }; + fs.writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + '\n'); + " + cd crates/edgeparse-wasm/pkg + npm publish --access public --registry https://registry.npmjs.org \ + || { CODE=$?; [ "$CODE" -eq 1 ] && npm info edgeparse-wasm@${{ env.VERSION }} >/dev/null 2>&1 && echo "Already published — skipping." || exit $CODE; } - - name: Skip WASM npm publication + # ───────────────────────────────────────────────────────────────────── + # 5b. Publish to GitHub Packages (secondary registry — useful for + # enterprise / GitHub-native consumers). + # The package is scoped as @raphaelmansuy/edgeparse-wasm. + # Uses the built-in GITHUB_TOKEN — no extra secret required. + # ───────────────────────────────────────────────────────────────────── + - name: Publish to GitHub Packages + env: + NODE_AUTH_TOKEN: ${{ github.token }} run: | - echo "::warning::WASM npm publication is disabled. The package tarball will still be uploaded to the GitHub Release." + node -e " + const fs = require('fs'); + const pkgPath = 'crates/edgeparse-wasm/pkg/package.json'; + const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')); + pkg.name = '@raphaelmansuy/edgeparse-wasm'; + pkg.publishConfig = { + access: 'public', + registry: 'https://npm.pkg.github.com' + }; + fs.writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + '\n'); + " + # Write a scoped .npmrc so npm uses the right registry for this scope. + echo "@raphaelmansuy:registry=https://npm.pkg.github.com" >> ~/.npmrc + echo "//npm.pkg.github.com/:_authToken=${NODE_AUTH_TOKEN}" >> ~/.npmrc + cd crates/edgeparse-wasm/pkg + npm publish --registry https://npm.pkg.github.com \ + || { CODE=$?; echo "GitHub Packages publish exited $CODE — may already exist for this version." ; } + # ───────────────────────────────────────────────────────────────────── + # 6. Attach the tarball to the GitHub Release. + # ───────────────────────────────────────────────────────────────────── - name: Ensure GitHub Release exists env: GH_TOKEN: ${{ github.token }} - INPUT_TAG_NAME: ${{ inputs.tag_name }} run: | - TAG_NAME="${INPUT_TAG_NAME:-$GITHUB_REF_NAME}" gh release view "$TAG_NAME" --repo "${{ github.repository }}" \ || gh release create "$TAG_NAME" \ --repo "${{ github.repository }}" \ --title "$TAG_NAME" \ --generate-notes - - name: Upload npm tarball to GitHub Release + - name: Upload tarball to GitHub Release env: GH_TOKEN: ${{ github.token }} - INPUT_TAG_NAME: ${{ inputs.tag_name }} run: | - TAG_NAME="${INPUT_TAG_NAME:-$GITHUB_REF_NAME}" gh release upload "$TAG_NAME" crates/edgeparse-wasm/pkg/*.tgz \ --repo "${{ github.repository }}" --clobber + diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f606ad..e5d1953 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,27 @@ this project adheres to [Semantic Versioning](https://semver.org/). --- +## [0.2.4] — 2026-04-13 + +### Added +- **WASM npm publication** — `edgeparse-wasm` is now published to the npm public registry on every tagged release; jsDelivr and unpkg CDNs become available automatically +- **GitHub Packages secondary registry** — `@raphaelmansuy/edgeparse-wasm` is published to `npm.pkg.github.com` alongside the npm release, providing a GitHub-native install path for enterprise users +- **CDN quick-start** — `docs/09-wasm-sdk.md` now includes copy-pasteable ` + + + +``` + +### CDN: unpkg + +Alternative CDN also served directly from npm. + +```html + +``` + +### Secondary: GitHub Packages + +For enterprise or GitHub-native workflows, the package is also published to GitHub +Packages under the scoped name `@raphaelmansuy/edgeparse-wasm`. + +**Authenticate first** (read access requires a GitHub token even for public packages): + +```bash +# 1. Create a Personal Access Token with read:packages scope +# https://github.com/settings/tokens + +# 2. Add the scoped registry to .npmrc +echo "@raphaelmansuy:registry=https://npm.pkg.github.com" >> .npmrc +echo "//npm.pkg.github.com/:_authToken=YOUR_TOKEN" >> .npmrc + +# 3. Install +npm install @raphaelmansuy/edgeparse-wasm +``` + +Or set the token via an environment variable in CI: + +```bash +echo "@raphaelmansuy:registry=https://npm.pkg.github.com" >> .npmrc +echo "//npm.pkg.github.com/:_authToken=${GITHUB_TOKEN}" >> .npmrc +npm install @raphaelmansuy/edgeparse-wasm +``` + +Package page: + +### Distribution summary + +| Registry | Package name | URL | +|----------|-------------|-----| +| npm | `edgeparse-wasm` | | +| jsDelivr CDN | (mirrors npm) | `https://cdn.jsdelivr.net/npm/edgeparse-wasm` | +| unpkg CDN | (mirrors npm) | `https://unpkg.com/edgeparse-wasm` | +| GitHub Packages | `@raphaelmansuy/edgeparse-wasm` | | +| GitHub Releases | `.tgz` tarball | | + +--- + ## Advantages ### vs. Server-side parsing @@ -40,6 +129,8 @@ The EdgeParse WASM SDK brings the full Rust-native PDF extraction engine directl - **No dependencies** — no Java, no Python, no ML models, no GPU - **TypeScript types** — full `.d.ts` definitions for IDE autocomplete +--- + ## API Reference The WASM package exports three functions: @@ -90,7 +181,7 @@ Returns the EdgeParse version string. ```typescript import { version } from 'edgeparse-wasm'; -console.log(version()); // "0.2.2" +console.log(version()); // "0.2.4" ``` ### Parameters @@ -103,6 +194,168 @@ console.log(version()); // "0.2.2" | `readingOrder` | `string \| null` | `"auto"` | `"auto"` (XY-Cut++) or `"off"` | | `tableMethod` | `string \| null` | `"default"` | `"default"` (ruling lines) or `"cluster"` (borderless) | +--- + +## Quick-start Examples + +### Vite + React (recommended) + +```tsx +// src/App.tsx +import { useRef, useState } from 'react'; + +// Lazy-import so Vite does not pre-bundle the WASM binary. +async function loadEdgeParse() { + const { default: init, convert_to_string } = await import('edgeparse-wasm'); + await init(); + return { convert_to_string }; +} + +export default function App() { + const [output, setOutput] = useState(''); + const ep = useRef> | null>(null); + + const handleFile = async (e: React.ChangeEvent) => { + const file = e.target.files?.[0]; + if (!file) return; + ep.current ??= await loadEdgeParse(); + const bytes = new Uint8Array(await file.arrayBuffer()); + setOutput(ep.current.convert_to_string(bytes, 'markdown') ?? ''); + }; + + return ( + <> + +
{output}
+ + ); +} +``` + +```typescript +// vite.config.ts +import { defineConfig } from 'vite'; +import react from '@vitejs/plugin-react'; + +export default defineConfig({ + plugins: [react()], + optimizeDeps: { exclude: ['edgeparse-wasm'] }, + build: { target: 'esnext' }, +}); +``` + +### Next.js (App Router) + +```tsx +// app/pdf-extract/page.tsx — client component +'use client'; +import { useRef, useState } from 'react'; + +export default function PdfExtract() { + const [md, setMd] = useState(''); + const ready = useRef(false); + + const handleFile = async (e: React.ChangeEvent) => { + const file = e.target.files?.[0]; + if (!file) return; + if (!ready.current) { + const { default: init } = await import('edgeparse-wasm'); + await init(); + ready.current = true; + } + const { convert_to_string } = await import('edgeparse-wasm'); + const bytes = new Uint8Array(await file.arrayBuffer()); + setMd(convert_to_string(bytes, 'markdown') ?? ''); + }; + + return ( + <> + +
{md}
+ + ); +} +``` + +```javascript +// next.config.js +/** @type {import('next').NextConfig} */ +module.exports = { + webpack(config) { + config.experiments = { ...config.experiments, asyncWebAssembly: true }; + return config; + }, +}; +``` + +### Vanilla HTML via CDN (no build tool) + +```html + + + + + EdgeParse WASM demo + + + +

+
+  
+
+
+```
+
+### Webpack 5
+
+```javascript
+// webpack.config.js
+module.exports = {
+  experiments: { asyncWebAssembly: true },
+};
+```
+
+### Service Worker (PWA — offline support)
+
+```javascript
+// sw.js
+const CACHE = 'edgeparse-v1';
+self.addEventListener('install', event => {
+  event.waitUntil(
+    caches.open(CACHE).then(cache =>
+      cache.addAll([
+        '/edgeparse_wasm.js',
+        '/edgeparse_wasm_bg.wasm',
+      ])
+    )
+  );
+});
+
+self.addEventListener('fetch', event => {
+  event.respondWith(
+    caches.match(event.request).then(r => r ?? fetch(event.request))
+  );
+});
+```
+
+---
+
 ## Use Cases
 
 ### 1. Browser-based PDF viewer with structured extraction
@@ -151,18 +404,6 @@ const embeddings = await fetch('/api/embed', {
 
 Build Progressive Web Apps (PWAs) that work without internet. Once the WASM binary is cached by the service worker, PDF extraction works entirely offline.
 
-```typescript
-// In your service worker
-const CACHE_NAME = 'edgeparse-v1';
-const WASM_URL = '/edgeparse_wasm_bg.wasm';
-
-self.addEventListener('install', (event) => {
-  event.waitUntil(
-    caches.open(CACHE_NAME).then(cache => cache.add(WASM_URL))
-  );
-});
-```
-
 ### 4. Privacy-sensitive document handling
 
 Process confidential documents (medical records, legal contracts, financial statements) without sending data to any server. The PDF never leaves the browser tab.
@@ -179,6 +420,8 @@ Build a Chrome/Firefox extension that extracts structured content from any PDF t
 
 Add PDF extraction as a feature in your web application without provisioning additional backend compute. Each user's browser handles its own PDF processing.
 
+---
+
 ## Building from Source
 
 ### Prerequisites
@@ -197,42 +440,17 @@ cd crates/edgeparse-wasm
 wasm-pack build --target web --release
 ```
 
-### Use in your project
+### Use in your project (local build)
 
 ```bash
-# Option 1: Link locally
-npm install ./path-to/crates/edgeparse-wasm/pkg
+# Option 1: Install from local path
+npm install ./crates/edgeparse-wasm/pkg
 
 # Option 2: Copy the pkg/ contents into your project
 cp -r crates/edgeparse-wasm/pkg/ my-app/src/edgeparse-wasm/
 ```
 
-### Vite configuration
-
-```typescript
-// vite.config.ts
-import { defineConfig } from 'vite';
-
-export default defineConfig({
-  optimizeDeps: {
-    exclude: ['edgeparse-wasm'],
-  },
-  build: {
-    target: 'esnext',
-  },
-});
-```
-
-### Webpack configuration
-
-```javascript
-// webpack.config.js
-module.exports = {
-  experiments: {
-    asyncWebAssembly: true,
-  },
-};
-```
+---
 
 ## Live Demo
 
@@ -244,3 +462,4 @@ The demo lets you:
 - Preview rendered Markdown output
 - See per-page PDF rendering alongside extracted content
 - All processing happens locally — no server, no uploads
+