diff --git a/docs/plans/UI_LAYOUT_REFACTOR_PLAN.md b/docs/plans/UI_LAYOUT_REFACTOR_PLAN.md new file mode 100644 index 0000000..0bfd8ce --- /dev/null +++ b/docs/plans/UI_LAYOUT_REFACTOR_PLAN.md @@ -0,0 +1,119 @@ +# Outer Navigation + Unified Settings Plan + +## Summary +Add a global app shell around the existing central card: a desktop left nav, a lightweight top app bar, and a mobile bottom nav. The current chat/audio card layouts remain the main product surface, including the in-card chat history split. The outer chrome handles workspace switching and app-level actions only. + +Locked decisions: +- Chat history stays inside the card. +- Desktop left nav has `Chat`, `Voice`, `Settings`, and `GitHub`. +- Mobile bottom nav has `Chat`, `Voice`, and `Settings`. +- `Manage Data` is removed as a separate nav action and folded into `Settings`. +- `Settings` becomes a unified surface with three sections: generation/app settings, storage/data, and model cache. +- Desktop `Settings` stays dialog-based; mobile `Settings` becomes a dedicated full-screen page. +- Clicking `Voice` restores the last-used voice/audio section after first use. +- Model switchers remain inside the card, not in the outer nav or app bar. + +## Key Changes +### App shell and navigation +- Introduce a new shell around the card: + - desktop left rail outside the card + - top app bar above the content region + - centered card area that preserves the current card footprint +- Desktop left rail: + - top brand block with logo + title + - workspace actions: `Chat`, `Voice` + - utility actions: `Settings`, `GitHub` + - no profile/auth UI +- Top app bar: + - workspace/context title only + - no settings button + - no model controls +- Mobile: + - replace the left rail with a 3-item bottom nav: `Chat`, `Voice`, `Settings` + - `Settings` opens a full-screen settings page + - `GitHub` moves into the settings page as an external action/footer item + +### Workspace behavior +- Keep chat behavior as-is: + - no active thread => chat landing card + - active thread => split chat card with history on the left and messages on the right +- Keep audio/voice as one workspace with three internal sections: + - `overview` + - `transcribe` + - `speak` +- Voice entry behavior: + - first visit opens `overview` + - later visits restore the last-used voice section +- Preserve current guards: + - do not allow workspace switches during chat generation + - do not allow workspace switches during recording or running voice tasks + +### Card refactor +- Keep the existing card content and split layouts visually intact. +- Remove only duplicated global chrome from inside the card: + - workspace mode tabs + - global settings button + - app-level title/branding in card headers +- Keep in-card controls that are content-specific: + - chat model switcher/status + - audio task tabs + - audio model switcher/status + - chat history rail +- Landing screens remain, but become card content only rather than owning app-level navigation. + +### Unified settings surface +- Replace the current concept of separate app-level `Settings` and `Manage Data` entry points with a single `Settings` surface. +- Keep the existing logic, but present it as one unified settings experience with three sections: + - `Generation` + - `Storage & Data` + - `Model Cache` +- Desktop presentation: + - one dialog containing all three sections in a single surface +- Mobile presentation: + - one full-screen settings page with the same three sections +- Remove the separate `Manage Data` dialog/page from the plan. +- Keep destructive data/model actions in the unified settings surface. + +## State and Interface Changes +- Replace the coarse screen/mode split with explicit workspace and voice-view state: + - `Workspace = "chat" | "audio"` internally, even if the UI label says `Voice` + - `AudioView = "overview" | "transcribe" | "speak"` +- Persist `lastAudioView` instead of only `lastAudioTab`. +- Keep chat view derived from thread state instead of adding a separate chat-view enum unless implementation needs one. +- Replace settings entry state with a single app-level settings surface: + - desktop dialog open/close state + - mobile settings route/view state +- Refactor settings UI composition so the existing generation/data/model controls can render in: + - desktop unified dialog + - mobile full-screen settings page + +## Test Plan +- Navigation shell: + - desktop renders left rail + top app bar + unchanged centered card + - mobile renders bottom nav with `Chat`, `Voice`, `Settings` +- Chat workspace: + - chat history remains inside the card + - switching to voice and back preserves active thread, draft, and scroll state + - chat model switcher still works from inside the card +- Voice workspace: + - first entry opens `overview` + - returning to voice restores the last-used section + - `overview` still leads into `transcribe` and `speak` + - in-card task tabs and model controls continue to work +- Settings: + - desktop opens one unified settings dialog with all three sections visible + - mobile opens one full-screen settings page with the same sections + - storage cleanup and model-cache actions still work from the unified settings surface +- Guard behavior: + - workspace switch is blocked during chat generation + - workspace switch is blocked during recording or running voice tasks +- Regression checks: + - model picker remains workspace-aware + - chat model changes still trigger the current new-thread confirmation when required + - card width, split layout, and glass styling remain visually stable after the shell is added + +## Assumptions and Defaults +- UI label uses `Voice`; internal code can continue using `audio` naming to minimize churn. +- The top app bar is intentionally minimal and informational. +- `GitHub` stays visible on desktop nav and moves into the mobile settings page rather than getting its own mobile nav slot. +- The unified settings surface uses stacked sections rather than separate app-level tabs/actions, to avoid splitting “settings” and “data” into separate flows. diff --git a/docs/plans/sidebar_implementation_plan.md b/docs/plans/sidebar_implementation_plan.md new file mode 100644 index 0000000..b03b930 --- /dev/null +++ b/docs/plans/sidebar_implementation_plan.md @@ -0,0 +1,55 @@ +# Global Sidebar Navigation Implementation Plan + +We will implement a Workspace Sidebar layout ("Pattern 1") as discussed, converting the application into an AI toolkit with easy switching between "Chat" and "Voice Tools". + +## User Review Required +None of the existing inner UI components or application logic will be fundamentally rewritten; instead, they will be embedded inside a top-level layout wrapper. To achieve the "Manage Data" feature, we will open the existing `ModelPickerDialog` or construct a new dedicated clear data dialog if you prefer. Please review the "Manage Data" mapping below. + +## Proposed Changes + +### Configuration / Layout +#### [NEW] `src/components/AppLayout.tsx` +- Build a new responsive wrapper component (`AppLayout`) that includes: + - **Left Sidebar** (Desktop) / **Bottom Nav or Hamburger** (Mobile): + - Top: Brand Logo and Title ("Browser LLM"). + - Primary Links: `Chat` and `Voice Tools` (highlights active state). + - Bottom Actions: `Manage Data` (database icon), `Settings` (gear icon), and `GitHub` (repo link). + - **Main Content Area**: Injects the children (the screens) cleanly into the existing `.shell > .panel` structure. + +#### [MODIFY] `src/styles.css` +- Add CSS utilities for the layout structure: + - `.app-layout`: A flex row container stretching 100vh. + - `.app-sidebar`: Glassmorphic sidebar styling with fixed width. + - `.app-sidebar-nav`, `.sidebar-link`, `.sidebar-bottom-actions`. +- Ensure `.shell` gracefully adapts to the remaining width alongside the sidebar. + +### UI Screens & State Updates +#### [MODIFY] `src/App.tsx` +- **Routing Logic**: Map the main `screen` state strictly to `"chat"` and `"voice"`. + - When `chat` and thread is active -> Render ``. + - When `chat` and no thread is active -> Render ``. + - When `voice` -> Render ``. +- Wrap the main return block with the new `` component. +- **Manage Data mapping**: Map the new sidebar "Manage Data" button to clear user data logic, OR open a new dialog specifically meant for managing downloaded models. *Will ask for clarification on this in open questions.* + +#### [MODIFY] `src/components/LandingScreen.tsx` +- Remove the "Try Audio" button from the landing page. It is no longer necessary as "Voice Tools" acts as a global entry point via the Sidebar. +- Clean up the header to match a pure "Start a Chat" empty state rather than pitching both chat and audio. + +#### [MODIFY] `src/components/ChatScreen.tsx` +- Remove the "Try Audio" button/header action. +- Remove "Settings" from the `ChatScreen` header since it now lives globally in the Sidebar. + +#### [MODIFY] `src/components/AudioScreen.tsx` +- Remove "Try Chat" from the `AudioScreen` header since switching to chat is managed via the global Sidebar. + +## Open Questions +1. **Manage Data Behavior**: For the new "Manage Data" sidebar icon, should this open the existing `SettingsDialog` -> "Clear All Data" section, open a new dedicated screen/modal, or open the `ModelPickerDialog` to see downloaded models? How do you envision "Manage Data" working right now? +2. **Mobile View**: For small screens, should the Sidebar convert into a hamburger menu (slide out from left), or should it become a fixed Bottom Navigation bar? + +## Verification Plan +### Manual Verification +- Resize the browser window to verify the sidebar collapses/responds nicely on smaller screens. +- Click between `Chat` and `Voice Tools` to ensure the application state is maintained (e.g., chat input doesn't clear if you switch to Voice Tools and back). +- Verify the GitHub link opens an external tab. +- Validate that the existing glassmorphic cards (`.panel`) maintain their visual integrity within the new flex container. diff --git a/docs/research/browser-audio-model-inventory-2026-04-01.json b/docs/research/browser-audio-model-inventory-2026-04-01.json new file mode 100644 index 0000000..beb67fa --- /dev/null +++ b/docs/research/browser-audio-model-inventory-2026-04-01.json @@ -0,0 +1,5933 @@ +{ + "inventory": [ + { + "task": "stt", + "model_id": "ihanif/pashto-asr-v4", + "publisher": "ihanif", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2-bert", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 50883996919, + "repo_used_storage_mb": 50884.0, + "minimum_runtime_download_bytes": 3988, + "minimum_runtime_download_mb": 0.0, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 0.004, + "notes": "", + "model_url": "https://huggingface.co/ihanif/pashto-asr-v4" + }, + { + "task": "stt", + "model_id": "tyanfarm/aimate-asr-ONNX", + "publisher": "tyanfarm", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1262480107, + "repo_used_storage_mb": 1262.48, + "minimum_runtime_download_bytes": 9967, + "minimum_runtime_download_mb": 0.01, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 0.01, + "notes": "", + "model_url": "https://huggingface.co/tyanfarm/aimate-asr-ONNX" + }, + { + "task": "stt", + "model_id": "Aspik101/w2v-bert-2.0-polish-CV16.0", + "publisher": "Aspik101", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "wav2vec2-bert", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 7269416494, + "repo_used_storage_mb": 7269.42, + "minimum_runtime_download_bytes": 10272, + "minimum_runtime_download_mb": 0.01, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 0.01, + "notes": "", + "model_url": "https://huggingface.co/Aspik101/w2v-bert-2.0-polish-CV16.0" + }, + { + "task": "stt", + "model_id": "mattkimcreates/wav2vec2-lv-60-espeak-cv-ft-js", + "publisher": "mattkimcreates", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 0, + "repo_used_storage_mb": 0.0, + "minimum_runtime_download_bytes": 21217, + "minimum_runtime_download_mb": 0.02, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 0.021, + "notes": "", + "model_url": "https://huggingface.co/mattkimcreates/wav2vec2-lv-60-espeak-cv-ft-js" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-tiny-ja-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "27M", + "repo_used_storage_bytes": 360429850, + "repo_used_storage_mb": 360.43, + "minimum_runtime_download_bytes": 3899253, + "minimum_runtime_download_mb": 3.9, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-tiny-ja-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-tiny-zh-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "27M", + "repo_used_storage_bytes": 360429848, + "repo_used_storage_mb": 360.43, + "minimum_runtime_download_bytes": 3899253, + "minimum_runtime_download_mb": 3.9, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-tiny-zh-ONNX" + }, + { + "task": "stt", + "model_id": "Aspik101/distil-whisper-large-v3-pl", + "publisher": "Aspik101", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 10094567845, + "repo_used_storage_mb": 10094.57, + "minimum_runtime_download_bytes": 4189460, + "minimum_runtime_download_mb": 4.19, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 4.189, + "notes": "", + "model_url": "https://huggingface.co/Aspik101/distil-whisper-large-v3-pl" + }, + { + "task": "stt", + "model_id": "tel4vn-team/distil-whisper-small-en", + "publisher": "tel4vn-team", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 332297344, + "repo_used_storage_mb": 332.3, + "minimum_runtime_download_bytes": 4237427, + "minimum_runtime_download_mb": 4.24, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 4.237, + "notes": "", + "model_url": "https://huggingface.co/tel4vn-team/distil-whisper-small-en" + }, + { + "task": "stt", + "model_id": "chrislife/whisper-tiny", + "publisher": "chrislife", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 560212274, + "repo_used_storage_mb": 560.21, + "minimum_runtime_download_bytes": 4362527, + "minimum_runtime_download_mb": 4.36, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/chrislife/whisper-tiny" + }, + { + "task": "stt", + "model_id": "trandackhoa/PhoWhisper-small", + "publisher": "trandackhoa", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 967102863, + "repo_used_storage_mb": 967.1, + "minimum_runtime_download_bytes": 4392961, + "minimum_runtime_download_mb": 4.39, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 4.393, + "notes": "", + "model_url": "https://huggingface.co/trandackhoa/PhoWhisper-small" + }, + { + "task": "stt", + "model_id": "sikaro/whisper-large-v3-turbo-onnx", + "publisher": "sikaro", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 3871497319, + "repo_used_storage_mb": 3871.5, + "minimum_runtime_download_bytes": 4621082, + "minimum_runtime_download_mb": 4.62, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 4.621, + "notes": "", + "model_url": "https://huggingface.co/sikaro/whisper-large-v3-turbo-onnx" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-small-cv11-french-ONNX-fp16", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 455608392, + "repo_used_storage_mb": 455.61, + "minimum_runtime_download_bytes": 5214391, + "minimum_runtime_download_mb": 5.21, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.214, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-small-cv11-french-ONNX-fp16" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-whisper-large-v2-fp16-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 6521274350, + "repo_used_storage_mb": 6521.27, + "minimum_runtime_download_bytes": 5638178, + "minimum_runtime_download_mb": 5.64, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-whisper-large-v2-fp16-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-whisper-large-v3-fp16-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 6522764419, + "repo_used_storage_mb": 6522.76, + "minimum_runtime_download_bytes": 5638316, + "minimum_runtime_download_mb": 5.64, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-whisper-large-v3-fp16-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-whisper-large-v2-int4-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 1510900403, + "repo_used_storage_mb": 1510.9, + "minimum_runtime_download_bytes": 5638643, + "minimum_runtime_download_mb": 5.64, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.639, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-whisper-large-v2-int4-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-whisper-large-v2-int8-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 2534901299, + "repo_used_storage_mb": 2534.9, + "minimum_runtime_download_bytes": 5638643, + "minimum_runtime_download_mb": 5.64, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.639, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-whisper-large-v2-int8-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-whisper-large-v3-int4-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 1511459927, + "repo_used_storage_mb": 1511.46, + "minimum_runtime_download_bytes": 5638781, + "minimum_runtime_download_mb": 5.64, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.639, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-whisper-large-v3-int4-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-whisper-large-v3-int8-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 2535460823, + "repo_used_storage_mb": 2535.46, + "minimum_runtime_download_bytes": 5638781, + "minimum_runtime_download_mb": 5.64, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.639, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-whisper-large-v3-int8-ov" + }, + { + "task": "stt", + "model_id": "Xenova/tiny-random-WhisperForConditionalGeneration", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 66948131, + "repo_used_storage_mb": 66.95, + "minimum_runtime_download_bytes": 5651334, + "minimum_runtime_download_mb": 5.65, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.39, + "notes": "", + "model_url": "https://huggingface.co/Xenova/tiny-random-WhisperForConditionalGeneration" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-medium.en-fp16-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1580180523, + "repo_used_storage_mb": 1580.18, + "minimum_runtime_download_bytes": 5686325, + "minimum_runtime_download_mb": 5.69, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.686, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-medium.en-fp16-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-medium.en-int4-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 243633555, + "repo_used_storage_mb": 243.63, + "minimum_runtime_download_bytes": 5686325, + "minimum_runtime_download_mb": 5.69, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.686, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-medium.en-int4-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-medium.en-int8-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 403577139, + "repo_used_storage_mb": 403.58, + "minimum_runtime_download_bytes": 5686325, + "minimum_runtime_download_mb": 5.69, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.686, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-medium.en-int8-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-small.en-fp16-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 667301577, + "repo_used_storage_mb": 667.3, + "minimum_runtime_download_bytes": 5686557, + "minimum_runtime_download_mb": 5.69, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-small.en-fp16-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-small.en-int4-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 115215153, + "repo_used_storage_mb": 115.22, + "minimum_runtime_download_bytes": 5686557, + "minimum_runtime_download_mb": 5.69, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-small.en-int4-ov" + }, + { + "task": "stt", + "model_id": "OpenVINO/distil-small.en-int8-ov", + "publisher": "OpenVINO", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 173393361, + "repo_used_storage_mb": 173.39, + "minimum_runtime_download_bytes": 5686557, + "minimum_runtime_download_mb": 5.69, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/OpenVINO/distil-small.en-int8-ov" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-bnb4-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 179690911, + "repo_used_storage_mb": 179.69, + "minimum_runtime_download_bytes": 5838956, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-bnb4-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 264831874, + "repo_used_storage_mb": 264.83, + "minimum_runtime_download_bytes": 5838956, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-int8-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 227828103, + "repo_used_storage_mb": 227.83, + "minimum_runtime_download_bytes": 5838956, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-int8-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-q4-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 181238599, + "repo_used_storage_mb": 181.24, + "minimum_runtime_download_bytes": 5838956, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-q4-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-uint8-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 227828169, + "repo_used_storage_mb": 227.83, + "minimum_runtime_download_bytes": 5838956, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-uint8-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-bnb4-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 259179116, + "repo_used_storage_mb": 259.18, + "minimum_runtime_download_bytes": 5839016, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-bnb4-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 486238787, + "repo_used_storage_mb": 486.24, + "minimum_runtime_download_bytes": 5839016, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-int8-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 337237758, + "repo_used_storage_mb": 337.24, + "minimum_runtime_download_bytes": 5839016, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-int8-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-q4-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 263306732, + "repo_used_storage_mb": 263.31, + "minimum_runtime_download_bytes": 5839016, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-q4-full" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-uint8-full", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 337237826, + "repo_used_storage_mb": 337.24, + "minimum_runtime_download_bytes": 5839016, + "minimum_runtime_download_mb": 5.84, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-uint8-full" + }, + { + "task": "stt", + "model_id": "Xenova/tiny-random-WhisperForConditionalGeneration_timestamped", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 87792524, + "repo_used_storage_mb": 87.79, + "minimum_runtime_download_bytes": 6351874, + "minimum_runtime_download_mb": 6.35, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/Xenova/tiny-random-WhisperForConditionalGeneration_timestamped" + }, + { + "task": "stt", + "model_id": "bofenghuang/whisper-large-v3-distil-multi4-v0.2", + "publisher": "bofenghuang", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 10605248417, + "repo_used_storage_mb": 10605.25, + "minimum_runtime_download_bytes": 7687368, + "minimum_runtime_download_mb": 7.69, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 7.687, + "notes": "", + "model_url": "https://huggingface.co/bofenghuang/whisper-large-v3-distil-multi4-v0.2" + }, + { + "task": "stt", + "model_id": "bofenghuang/whisper-large-v3-distil-multi7-v0.2", + "publisher": "bofenghuang", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 10608800275, + "repo_used_storage_mb": 10608.8, + "minimum_runtime_download_bytes": 7687368, + "minimum_runtime_download_mb": 7.69, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 7.687, + "notes": "", + "model_url": "https://huggingface.co/bofenghuang/whisper-large-v3-distil-multi7-v0.2" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-tiny-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "27M", + "repo_used_storage_bytes": 1700366996, + "repo_used_storage_mb": 1700.37, + "minimum_runtime_download_bytes": 32008035, + "minimum_runtime_download_mb": 32.01, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-tiny-ko-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "27M", + "repo_used_storage_bytes": 892538778, + "repo_used_storage_mb": 892.54, + "minimum_runtime_download_bytes": 32288624, + "minimum_runtime_download_mb": 32.29, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-tiny-ko-ONNX" + }, + { + "task": "stt", + "model_id": "bh4/moonshine-tiny-vi-ONNX", + "publisher": "bh4", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "27M", + "repo_used_storage_bytes": 892538784, + "repo_used_storage_mb": 892.54, + "minimum_runtime_download_bytes": 32288625, + "minimum_runtime_download_mb": 32.29, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/bh4/moonshine-tiny-vi-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-tiny-ar-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "27M", + "repo_used_storage_bytes": 892538784, + "repo_used_storage_mb": 892.54, + "minimum_runtime_download_bytes": 32288625, + "minimum_runtime_download_mb": 32.29, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-tiny-ar-ONNX" + }, + { + "task": "stt", + "model_id": "tahirahmed/tahir_moonshine-ONNX", + "publisher": "tahirahmed", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 899537184, + "repo_used_storage_mb": 899.54, + "minimum_runtime_download_bytes": 32587900, + "minimum_runtime_download_mb": 32.59, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.069, + "notes": "", + "model_url": "https://huggingface.co/tahirahmed/tahir_moonshine-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-tiny.en", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 7651671953, + "repo_used_storage_mb": 7651.67, + "minimum_runtime_download_bytes": 44458284, + "minimum_runtime_download_mb": 44.46, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 3.651, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-tiny.en" + }, + { + "task": "stt", + "model_id": "mohdasif81/whisper-tiny.en", + "publisher": "mohdasif81", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 7651671953, + "repo_used_storage_mb": 7651.67, + "minimum_runtime_download_bytes": 44458284, + "minimum_runtime_download_mb": 44.46, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 3.651, + "notes": "", + "model_url": "https://huggingface.co/mohdasif81/whisper-tiny.en" + }, + { + "task": "stt", + "model_id": "Xenova/nb-whisper-tiny-beta", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 2662329786, + "repo_used_storage_mb": 2662.33, + "minimum_runtime_download_bytes": 44918584, + "minimum_runtime_download_mb": 44.92, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 4.111, + "notes": "", + "model_url": "https://huggingface.co/Xenova/nb-whisper-tiny-beta" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-tiny.en_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 3554557834, + "repo_used_storage_mb": 3554.56, + "minimum_runtime_download_bytes": 45074573, + "minimum_runtime_download_mb": 45.07, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.248, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-tiny.en_timestamped" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-tiny.en", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1310956572, + "repo_used_storage_mb": 1310.96, + "minimum_runtime_download_bytes": 45091431, + "minimum_runtime_download_mb": 45.09, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.248, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-tiny.en" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-tiny", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 4726338305, + "repo_used_storage_mb": 4726.34, + "minimum_runtime_download_bytes": 45170090, + "minimum_runtime_download_mb": 45.17, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-tiny" + }, + { + "task": "stt", + "model_id": "huuquyet/PhoWhisper-tiny", + "publisher": "huuquyet", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 2660313441, + "repo_used_storage_mb": 2660.31, + "minimum_runtime_download_bytes": 45174208, + "minimum_runtime_download_mb": 45.17, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.394, + "notes": "", + "model_url": "https://huggingface.co/huuquyet/PhoWhisper-tiny" + }, + { + "task": "stt", + "model_id": "ivanthepevt/PhoWhisper-tiny-for-L-N", + "publisher": "ivanthepevt", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 2660313441, + "repo_used_storage_mb": 2660.31, + "minimum_runtime_download_bytes": 45174208, + "minimum_runtime_download_mb": 45.17, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.394, + "notes": "", + "model_url": "https://huggingface.co/ivanthepevt/PhoWhisper-tiny-for-L-N" + }, + { + "task": "stt", + "model_id": "whalesos/whisper-tiny", + "publisher": "whalesos", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 560212274, + "repo_used_storage_mb": 560.21, + "minimum_runtime_download_bytes": 45215202, + "minimum_runtime_download_mb": 45.22, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/whalesos/whisper-tiny" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-tiny_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 3554607754, + "repo_used_storage_mb": 3554.61, + "minimum_runtime_download_bytes": 45226906, + "minimum_runtime_download_mb": 45.23, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-tiny_timestamped" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-tiny", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1370568912, + "repo_used_storage_mb": 1370.57, + "minimum_runtime_download_bytes": 45243764, + "minimum_runtime_download_mb": 45.24, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-tiny" + }, + { + "task": "stt", + "model_id": "square-zero-labs/whisper-tiny", + "publisher": "square-zero-labs", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1370568912, + "repo_used_storage_mb": 1370.57, + "minimum_runtime_download_bytes": 45243764, + "minimum_runtime_download_mb": 45.24, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/square-zero-labs/whisper-tiny" + }, + { + "task": "stt", + "model_id": "whitphx/test-transformersjs-whisper-tiny", + "publisher": "whitphx", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1513649017, + "repo_used_storage_mb": 1513.65, + "minimum_runtime_download_bytes": 45243764, + "minimum_runtime_download_mb": 45.24, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/whitphx/test-transformersjs-whisper-tiny" + }, + { + "task": "stt", + "model_id": "astronova001/whisper-kannada-tiny-ONNX", + "publisher": "astronova001", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1926840736, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57410691, + "minimum_runtime_download_mb": 57.41, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.263, + "notes": "", + "model_url": "https://huggingface.co/astronova001/whisper-kannada-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "noery/output-tiny-id-ONNX", + "publisher": "noery", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1926840674, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57413961, + "minimum_runtime_download_mb": 57.41, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.267, + "notes": "", + "model_url": "https://huggingface.co/noery/output-tiny-id-ONNX" + }, + { + "task": "stt", + "model_id": "PierreMesure/whisper-tiny-faroese-8k-steps-100h-ONNX", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840714, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57414223, + "minimum_runtime_download_mb": 57.41, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.267, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/whisper-tiny-faroese-8k-steps-100h-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-tiny-ar-tashkeel-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840672, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57732501, + "minimum_runtime_download_mb": 57.73, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.585, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-tiny-ar-tashkeel-ONNX" + }, + { + "task": "stt", + "model_id": "PengZhang424242/whisper-tiny.en-ONNX", + "publisher": "PengZhang424242", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926810746, + "repo_used_storage_mb": 1926.81, + "minimum_runtime_download_bytes": 57833521, + "minimum_runtime_download_mb": 57.83, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/PengZhang424242/whisper-tiny.en-ONNX" + }, + { + "task": "stt", + "model_id": "harisnaeem/whisper-tiny.en-ONNX", + "publisher": "harisnaeem", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926812034, + "repo_used_storage_mb": 1926.81, + "minimum_runtime_download_bytes": 57834060, + "minimum_runtime_download_mb": 57.83, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/harisnaeem/whisper-tiny.en-ONNX" + }, + { + "task": "stt", + "model_id": "geexmmo/whisper-tiny.en-ONNX", + "publisher": "geexmmo", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926812160, + "repo_used_storage_mb": 1926.81, + "minimum_runtime_download_bytes": 57834078, + "minimum_runtime_download_mb": 57.83, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/geexmmo/whisper-tiny.en-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-tiny-zh-HK-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840713, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986161, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-tiny-zh-HK-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-tiny-ar-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840671, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986190, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-tiny-ar-ONNX" + }, + { + "task": "stt", + "model_id": "xwdznb/whisper-tiny-ONNX", + "publisher": "xwdznb", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840671, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986231, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/xwdznb/whisper-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "PengZhang424242/whisper-tiny-ONNX", + "publisher": "PengZhang424242", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840755, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986244, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/PengZhang424242/whisper-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-tiny-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840755, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986244, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "astronova001/whisper-tiny-hindi-ONNX", + "publisher": "astronova001", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840664, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986295, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/astronova001/whisper-tiny-hindi-ONNX" + }, + { + "task": "stt", + "model_id": "harisnaeem/whisper-tiny-ONNX", + "publisher": "harisnaeem", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926842001, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986777, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/harisnaeem/whisper-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "landazur/whisper-tiny-ONNX", + "publisher": "landazur", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926842127, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986796, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/landazur/whisper-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "PierreMesure/whisper-tiny-finnish-ONNX", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840753, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57986812, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/whisper-tiny-finnish-ONNX" + }, + { + "task": "stt", + "model_id": "PierreMesure/kb-whisper-tiny-ONNX", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840693, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57989058, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.842, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/kb-whisper-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "PierreMesure/nb-whisper-tiny-ONNX", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 1926840703, + "repo_used_storage_mb": 1926.84, + "minimum_runtime_download_bytes": 57989231, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.842, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/nb-whisper-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/kb-whisper-tiny-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 2133770878, + "repo_used_storage_mb": 2133.77, + "minimum_runtime_download_bytes": 57991419, + "minimum_runtime_download_mb": 57.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/kb-whisper-tiny-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-base-ja-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "61M", + "repo_used_storage_bytes": 1762647120, + "repo_used_storage_mb": 1762.65, + "minimum_runtime_download_bytes": 64318159, + "minimum_runtime_download_mb": 64.32, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-base-ja-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-base-zh-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "61M", + "repo_used_storage_bytes": 1762647120, + "repo_used_storage_mb": 1762.65, + "minimum_runtime_download_bytes": 64318159, + "minimum_runtime_download_mb": 64.32, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-base-zh-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-base-ko-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "61M", + "repo_used_storage_bytes": 1825608022, + "repo_used_storage_mb": 1825.61, + "minimum_runtime_download_bytes": 64318160, + "minimum_runtime_download_mb": 64.32, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-base-ko-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/wav2vec2-base-960h-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "95M", + "repo_used_storage_bytes": 998274997, + "repo_used_storage_mb": 998.27, + "minimum_runtime_download_bytes": 66442366, + "minimum_runtime_download_mb": 66.44, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/wav2vec2-base-960h-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/wav2vec2-base-10k-voxpopuli-ft-pl-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 998342603, + "repo_used_storage_mb": 998.34, + "minimum_runtime_download_bytes": 66446569, + "minimum_runtime_download_mb": 66.45, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/wav2vec2-base-10k-voxpopuli-ft-pl-ONNX" + }, + { + "task": "stt", + "model_id": "therajasekhar/swecha-gonthuka-asr-ONNX", + "publisher": "therajasekhar", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 998665625, + "repo_used_storage_mb": 998.67, + "minimum_runtime_download_bytes": 66468867, + "minimum_runtime_download_mb": 66.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.01, + "notes": "", + "model_url": "https://huggingface.co/therajasekhar/swecha-gonthuka-asr-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/wav2vec2-base-960h", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "95M", + "repo_used_storage_bytes": 1852650662, + "repo_used_storage_mb": 1852.65, + "minimum_runtime_download_bytes": 66478024, + "minimum_runtime_download_mb": 66.48, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.008, + "notes": "", + "model_url": "https://huggingface.co/Xenova/wav2vec2-base-960h" + }, + { + "task": "stt", + "model_id": "Xenova/unispeech-sat-base-100h-libri-ft", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "unispeech-sat", + "support_level": "direct_pipeline", + "parameter_count_estimate": "95M", + "repo_used_storage_bytes": 998715341, + "repo_used_storage_mb": 998.72, + "minimum_runtime_download_bytes": 66497361, + "minimum_runtime_download_mb": 66.5, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/Xenova/unispeech-sat-base-100h-libri-ft" + }, + { + "task": "stt", + "model_id": "onnx-community/moonshine-base-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "61M", + "repo_used_storage_bytes": 1941439908, + "repo_used_storage_mb": 1941.44, + "minimum_runtime_download_bytes": 66814751, + "minimum_runtime_download_mb": 66.81, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/moonshine-base-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-base.en", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 7581809457, + "repo_used_storage_mb": 7581.81, + "minimum_runtime_download_bytes": 80491276, + "minimum_runtime_download_mb": 80.49, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 3.651, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-base.en" + }, + { + "task": "stt", + "model_id": "Xenova/nb-whisper-base-beta", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 4243906308, + "repo_used_storage_mb": 4243.91, + "minimum_runtime_download_bytes": 80951836, + "minimum_runtime_download_mb": 80.95, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 4.111, + "notes": "", + "model_url": "https://huggingface.co/Xenova/nb-whisper-base-beta" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-base.en_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 5868637080, + "repo_used_storage_mb": 5868.64, + "minimum_runtime_download_bytes": 81118852, + "minimum_runtime_download_mb": 81.12, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.248, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-base.en_timestamped" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-base.en", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 2172400430, + "repo_used_storage_mb": 2172.4, + "minimum_runtime_download_bytes": 81141606, + "minimum_runtime_download_mb": 81.14, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.248, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-base.en" + }, + { + "task": "stt", + "model_id": "Sagicc/whisper-base-sr-onnx", + "publisher": "Sagicc", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3562353394, + "repo_used_storage_mb": 3562.35, + "minimum_runtime_download_bytes": 81193254, + "minimum_runtime_download_mb": 81.19, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.393, + "notes": "", + "model_url": "https://huggingface.co/Sagicc/whisper-base-sr-onnx" + }, + { + "task": "stt", + "model_id": "huuquyet/PhoWhisper-base", + "publisher": "huuquyet", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 4376734148, + "repo_used_storage_mb": 4376.73, + "minimum_runtime_download_bytes": 81193277, + "minimum_runtime_download_mb": 81.19, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.393, + "notes": "", + "model_url": "https://huggingface.co/huuquyet/PhoWhisper-base" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-base", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 7581904155, + "repo_used_storage_mb": 7581.9, + "minimum_runtime_download_bytes": 81203344, + "minimum_runtime_download_mb": 81.2, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-base" + }, + { + "task": "stt", + "model_id": "laosix/whisper-base", + "publisher": "laosix", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 7581904155, + "repo_used_storage_mb": 7581.9, + "minimum_runtime_download_bytes": 81203344, + "minimum_runtime_download_mb": 81.2, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/laosix/whisper-base" + }, + { + "task": "stt", + "model_id": "bd4sur/whisper-base-fork", + "publisher": "bd4sur", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 1021389792, + "repo_used_storage_mb": 1021.39, + "minimum_runtime_download_bytes": 81270976, + "minimum_runtime_download_mb": 81.27, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/bd4sur/whisper-base-fork" + }, + { + "task": "stt", + "model_id": "whalesos/whisper-base", + "publisher": "whalesos", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 1021389792, + "repo_used_storage_mb": 1021.39, + "minimum_runtime_download_bytes": 81270976, + "minimum_runtime_download_mb": 81.27, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/whalesos/whisper-base" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-base_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 5868703564, + "repo_used_storage_mb": 5868.7, + "minimum_runtime_download_bytes": 81271463, + "minimum_runtime_download_mb": 81.27, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-base_timestamped" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-base", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 2277152799, + "repo_used_storage_mb": 2277.15, + "minimum_runtime_download_bytes": 81294217, + "minimum_runtime_download_mb": 81.29, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-base" + }, + { + "task": "stt", + "model_id": "square-zero-labs/whisper-base", + "publisher": "square-zero-labs", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 2277152799, + "repo_used_storage_mb": 2277.15, + "minimum_runtime_download_bytes": 81294217, + "minimum_runtime_download_mb": 81.29, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/square-zero-labs/whisper-base" + }, + { + "task": "stt", + "model_id": "PierreMesure/whisper-base-faroese-8k-steps-100h-ONNX", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019526823, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 87683442, + "minimum_runtime_download_mb": 87.68, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.267, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/whisper-base-faroese-8k-steps-100h-ONNX" + }, + { + "task": "stt", + "model_id": "bichnhan2701/PhoWhisper-base-ONNX", + "publisher": "bichnhan2701", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019526822, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 87684132, + "minimum_runtime_download_mb": 87.68, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.268, + "notes": "", + "model_url": "https://huggingface.co/bichnhan2701/PhoWhisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/PhoWhisper-base-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019528362, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 87684886, + "minimum_runtime_download_mb": 87.68, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.268, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/PhoWhisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "PengZhang424242/whisper-base.en-ONNX", + "publisher": "PengZhang424242", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019486901, + "repo_used_storage_mb": 3019.49, + "minimum_runtime_download_bytes": 88102335, + "minimum_runtime_download_mb": 88.1, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/PengZhang424242/whisper-base.en-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-base.en-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019486901, + "repo_used_storage_mb": 3019.49, + "minimum_runtime_download_bytes": 88102335, + "minimum_runtime_download_mb": 88.1, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-base.en-ONNX" + }, + { + "task": "stt", + "model_id": "harisnaeem/whisper-base.en-ONNX", + "publisher": "harisnaeem", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019488441, + "repo_used_storage_mb": 3019.49, + "minimum_runtime_download_bytes": 88103088, + "minimum_runtime_download_mb": 88.1, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/harisnaeem/whisper-base.en-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/ipa-whisper-base-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019526816, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 88254521, + "minimum_runtime_download_mb": 88.25, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.838, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/ipa-whisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-base-ar-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019526821, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 88255413, + "minimum_runtime_download_mb": 88.26, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-base-ar-ONNX" + }, + { + "task": "stt", + "model_id": "PengZhang424242/whisper-base-ONNX", + "publisher": "PengZhang424242", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019526818, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 88255458, + "minimum_runtime_download_mb": 88.26, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/PengZhang424242/whisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-base-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019526818, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 88255458, + "minimum_runtime_download_mb": 88.26, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "harisnaeem/whisper-base-ONNX", + "publisher": "harisnaeem", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019528358, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 88256211, + "minimum_runtime_download_mb": 88.26, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/harisnaeem/whisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "PierreMesure/nb-whisper-base-ONNX", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019526799, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 88258451, + "minimum_runtime_download_mb": 88.26, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.842, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/nb-whisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/nb-whisper-base-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3019528339, + "repo_used_storage_mb": 3019.53, + "minimum_runtime_download_bytes": 88259205, + "minimum_runtime_download_mb": 88.26, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.842, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/nb-whisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/kb-whisper-base-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 3356127998, + "repo_used_storage_mb": 3356.13, + "minimum_runtime_download_bytes": 88263739, + "minimum_runtime_download_mb": 88.26, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/kb-whisper-base-ONNX" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-bnb4-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 94632781, + "repo_used_storage_mb": 94.63, + "minimum_runtime_download_bytes": 100471737, + "minimum_runtime_download_mb": 100.47, + "runtime_variant": "bnb4", + "runtime_files": "onnx/encoder_model_bnb4.onnx; onnx/decoder_model_merged_bnb4.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-bnb4-merged" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-q4-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 95664285, + "repo_used_storage_mb": 95.66, + "minimum_runtime_download_bytes": 101503241, + "minimum_runtime_download_mb": 101.5, + "runtime_variant": "q4", + "runtime_files": "onnx/encoder_model_q4.onnx; onnx/decoder_model_merged_q4.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-q4-merged" + }, + { + "task": "stt", + "model_id": "markusingvarsson/whisper-test", + "publisher": "markusingvarsson", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 677204967, + "repo_used_storage_mb": 677.2, + "minimum_runtime_download_bytes": 108119707, + "minimum_runtime_download_mb": 108.12, + "runtime_variant": "fp32", + "runtime_files": "onnx/encoder_model.onnx; onnx/decoder_model_merged.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/markusingvarsson/whisper-test" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-int8-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 119756935, + "repo_used_storage_mb": 119.76, + "minimum_runtime_download_bytes": 125595891, + "minimum_runtime_download_mb": 125.6, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-int8-merged" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-uint8-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 119756978, + "repo_used_storage_mb": 119.76, + "minimum_runtime_download_bytes": 125595934, + "minimum_runtime_download_mb": 125.6, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-uint8-merged" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-bnb4-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 139515817, + "repo_used_storage_mb": 139.52, + "minimum_runtime_download_bytes": 145354833, + "minimum_runtime_download_mb": 145.35, + "runtime_variant": "bnb4", + "runtime_files": "onnx/encoder_model_bnb4.onnx; onnx/decoder_model_merged_bnb4.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-bnb4-merged" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-q4-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 142267131, + "repo_used_storage_mb": 142.27, + "minimum_runtime_download_bytes": 148106147, + "minimum_runtime_download_mb": 148.11, + "runtime_variant": "q4", + "runtime_files": "onnx/encoder_model_q4.onnx; onnx/decoder_model_merged_q4.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-q4-merged" + }, + { + "task": "stt", + "model_id": "wmoto-ai/moonshine-tiny-ja-ONNX", + "publisher": "wmoto-ai", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "moonshine", + "support_level": "direct_pipeline", + "parameter_count_estimate": "27M", + "repo_used_storage_bytes": 963652862, + "repo_used_storage_mb": 963.65, + "minimum_runtime_download_bytes": 151174085, + "minimum_runtime_download_mb": 151.17, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 3.899, + "notes": "", + "model_url": "https://huggingface.co/wmoto-ai/moonshine-tiny-ja-ONNX" + }, + { + "task": "stt", + "model_id": "eventhorizon0/tarteel-ai-onnx-whisper-base-ar-quran", + "publisher": "eventhorizon0", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 2623969861, + "repo_used_storage_mb": 2623.97, + "minimum_runtime_download_bytes": 152301236, + "minimum_runtime_download_mb": 152.3, + "runtime_variant": "q4", + "runtime_files": "onnx/encoder_model_q4.onnx; onnx/decoder_model_merged_q4.onnx", + "small_overhead_mb": 10.124, + "notes": "", + "model_url": "https://huggingface.co/eventhorizon0/tarteel-ai-onnx-whisper-base-ar-quran" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "39M", + "repo_used_storage_bytes": 151388750, + "repo_used_storage_mb": 151.39, + "minimum_runtime_download_bytes": 157227706, + "minimum_runtime_download_mb": 157.23, + "runtime_variant": "fp32", + "runtime_files": "onnx/encoder_model.onnx; onnx/decoder_model_merged.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-tiny-ONNX-multi-merged" + }, + { + "task": "stt", + "model_id": "onnx-community/distil-small.en_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 10415077613, + "repo_used_storage_mb": 10415.08, + "minimum_runtime_download_bytes": 176240895, + "minimum_runtime_download_mb": 176.24, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.247, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/distil-small.en_timestamped" + }, + { + "task": "stt", + "model_id": "distil-whisper/distil-small.en", + "publisher": "distil-whisper", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 5589559467, + "repo_used_storage_mb": 5589.56, + "minimum_runtime_download_bytes": 176312944, + "minimum_runtime_download_mb": 176.31, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.237, + "notes": "", + "model_url": "https://huggingface.co/distil-whisper/distil-small.en" + }, + { + "task": "stt", + "model_id": "nsarang/distil-whisper-small.en", + "publisher": "nsarang", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 3781701621, + "repo_used_storage_mb": 3781.7, + "minimum_runtime_download_bytes": 176315543, + "minimum_runtime_download_mb": 176.32, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.247, + "notes": "", + "model_url": "https://huggingface.co/nsarang/distil-whisper-small.en" + }, + { + "task": "stt", + "model_id": "onnx-community/distil-small.en", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 3781701621, + "repo_used_storage_mb": 3781.7, + "minimum_runtime_download_bytes": 176315543, + "minimum_runtime_download_mb": 176.32, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.247, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/distil-small.en" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-int8-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 182073479, + "repo_used_storage_mb": 182.07, + "minimum_runtime_download_bytes": 187912495, + "minimum_runtime_download_mb": 187.91, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-int8-merged" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-uint8-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 182073524, + "repo_used_storage_mb": 182.07, + "minimum_runtime_download_bytes": 187912540, + "minimum_runtime_download_mb": 187.91, + "runtime_variant": "uint8", + "runtime_files": "onnx/encoder_model_uint8.onnx; onnx/decoder_model_merged_uint8.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-uint8-merged" + }, + { + "task": "stt", + "model_id": "onnx-community/mms-300m-1130-forced-aligner-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "300M", + "repo_used_storage_bytes": 3189518033, + "repo_used_storage_mb": 3189.52, + "minimum_runtime_download_bytes": 196709898, + "minimum_runtime_download_mb": 196.71, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/mms-300m-1130-forced-aligner-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/wav2vec2-large-xlsr-english-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 3189538041, + "repo_used_storage_mb": 3189.54, + "minimum_runtime_download_bytes": 196711009, + "minimum_runtime_download_mb": 196.71, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/wav2vec2-large-xlsr-english-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/wav2vec2-large-xlsr-53-russian-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 3189598092, + "repo_used_storage_mb": 3189.6, + "minimum_runtime_download_bytes": 196714875, + "minimum_runtime_download_mb": 196.71, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/wav2vec2-large-xlsr-53-russian-ONNX" + }, + { + "task": "stt", + "model_id": "pteacher/wav2vec2-ky-hiva", + "publisher": "pteacher", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 4452094980, + "repo_used_storage_mb": 4452.09, + "minimum_runtime_download_bytes": 196717296, + "minimum_runtime_download_mb": 196.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.008, + "notes": "", + "model_url": "https://huggingface.co/pteacher/wav2vec2-ky-hiva" + }, + { + "task": "stt", + "model_id": "PierreMesure/roest-315m-onnx", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "315M", + "repo_used_storage_bytes": 3189668160, + "repo_used_storage_mb": 3189.67, + "minimum_runtime_download_bytes": 196718214, + "minimum_runtime_download_mb": 196.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/roest-315m-onnx" + }, + { + "task": "stt", + "model_id": "onnx-community/indicwav2vec-hindi-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 3189888313, + "repo_used_storage_mb": 3189.89, + "minimum_runtime_download_bytes": 196732650, + "minimum_runtime_download_mb": 196.73, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.008, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/indicwav2vec-hindi-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/hubert-large-ls960-ft", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "hubert", + "support_level": "direct_pipeline", + "parameter_count_estimate": "317M", + "repo_used_storage_bytes": 3190936990, + "repo_used_storage_mb": 3190.94, + "minimum_runtime_download_bytes": 196754636, + "minimum_runtime_download_mb": 196.75, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/Xenova/hubert-large-ls960-ft" + }, + { + "task": "stt", + "model_id": "Xenova/wav2vec2-large-xlsr-53-english", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "317M", + "repo_used_storage_bytes": 5402721752, + "repo_used_storage_mb": 5402.72, + "minimum_runtime_download_bytes": 196780872, + "minimum_runtime_download_mb": 196.78, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.008, + "notes": "", + "model_url": "https://huggingface.co/Xenova/wav2vec2-large-xlsr-53-english" + }, + { + "task": "stt", + "model_id": "onnx-community/wav2vec2-lv-60-espeak-cv-ft-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 3193130956, + "repo_used_storage_mb": 3193.13, + "minimum_runtime_download_bytes": 196914751, + "minimum_runtime_download_mb": 196.91, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.003, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/wav2vec2-lv-60-espeak-cv-ft-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-small.en", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 17761354629, + "repo_used_storage_mb": 17761.35, + "minimum_runtime_download_bytes": 203874295, + "minimum_runtime_download_mb": 203.87, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 3.651, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-small.en" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 8088354811, + "repo_used_storage_mb": 8088.35, + "minimum_runtime_download_bytes": 204105144, + "minimum_runtime_download_mb": 204.11, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.39, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-small", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 19030107322, + "repo_used_storage_mb": 19030.11, + "minimum_runtime_download_bytes": 204587038, + "minimum_runtime_download_mb": 204.59, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-small" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small-cv11-french-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283150, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 204849463, + "minimum_runtime_download_mb": 204.85, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.214, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small-cv11-french-ONNX" + }, + { + "task": "stt", + "model_id": "Rank002/whisper-hindi-small-ONNX", + "publisher": "Rank002", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 7218283157, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 204898479, + "minimum_runtime_download_mb": 204.9, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.263, + "notes": "", + "model_url": "https://huggingface.co/Rank002/whisper-hindi-small-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-hindi-small-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 7218283157, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 204898479, + "minimum_runtime_download_mb": 204.9, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.263, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-hindi-small-ONNX" + }, + { + "task": "stt", + "model_id": "nzhenev/whisper-small-ru-1k-steps-ONNX", + "publisher": "nzhenev", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283155, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 204899027, + "minimum_runtime_download_mb": 204.9, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.264, + "notes": "", + "model_url": "https://huggingface.co/nzhenev/whisper-small-ru-1k-steps-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small-cantonese-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283161, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 204899667, + "minimum_runtime_download_mb": 204.9, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.265, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small-cantonese-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small-tonga-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283155, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 204902094, + "minimum_runtime_download_mb": 204.9, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.267, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small-tonga-ONNX" + }, + { + "task": "stt", + "model_id": "PierreMesure/whisper-small-faroese-5k-steps-100h-ONNX", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283153, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 204902140, + "minimum_runtime_download_mb": 204.9, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.267, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/whisper-small-faroese-5k-steps-100h-ONNX" + }, + { + "task": "stt", + "model_id": "PengZhang424242/whisper-small.en-ONNX", + "publisher": "PengZhang424242", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218223246, + "repo_used_storage_mb": 7218.22, + "minimum_runtime_download_bytes": 205320858, + "minimum_runtime_download_mb": 205.32, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/PengZhang424242/whisper-small.en-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small-ar-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283152, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 205473979, + "minimum_runtime_download_mb": 205.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small-ar-ONNX" + }, + { + "task": "stt", + "model_id": "Chillarmo/whisper-small-hy-2-ONNX", + "publisher": "Chillarmo", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283151, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 205474114, + "minimum_runtime_download_mb": 205.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/Chillarmo/whisper-small-hy-2-ONNX" + }, + { + "task": "stt", + "model_id": "willopcbeta/whisper-small-ONNX", + "publisher": "willopcbeta", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283155, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 205474151, + "minimum_runtime_download_mb": 205.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/willopcbeta/whisper-small-ONNX" + }, + { + "task": "stt", + "model_id": "Lasisi/whisper-small-ONNX", + "publisher": "Lasisi", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283155, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 205474155, + "minimum_runtime_download_mb": 205.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/Lasisi/whisper-small-ONNX" + }, + { + "task": "stt", + "model_id": "PengZhang424242/whisper-small-ONNX", + "publisher": "PengZhang424242", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283155, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 205474155, + "minimum_runtime_download_mb": 205.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/PengZhang424242/whisper-small-ONNX" + }, + { + "task": "stt", + "model_id": "astronova001/whisper-small-kannada-ONNX", + "publisher": "astronova001", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283150, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 205474200, + "minimum_runtime_download_mb": 205.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/astronova001/whisper-small-kannada-ONNX" + }, + { + "task": "stt", + "model_id": "noery/whisper-small-id-cv17-ONNX", + "publisher": "noery", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283150, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 205474751, + "minimum_runtime_download_mb": 205.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/noery/whisper-small-id-cv17-ONNX" + }, + { + "task": "stt", + "model_id": "PierreMesure/nb-whisper-small-ONNX", + "publisher": "PierreMesure", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218283146, + "repo_used_storage_mb": 7218.28, + "minimum_runtime_download_bytes": 205477269, + "minimum_runtime_download_mb": 205.48, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.842, + "notes": "", + "model_url": "https://huggingface.co/PierreMesure/nb-whisper-small-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/nb-whisper-small-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218286849, + "repo_used_storage_mb": 7218.29, + "minimum_runtime_download_bytes": 205478864, + "minimum_runtime_download_mb": 205.48, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.842, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/nb-whisper-small-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small-ita-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 7218762384, + "repo_used_storage_mb": 7218.76, + "minimum_runtime_download_bytes": 205489680, + "minimum_runtime_download_mb": 205.49, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.842, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small-ita-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/kb-whisper-small-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 8087009773, + "repo_used_storage_mb": 8087.01, + "minimum_runtime_download_bytes": 205490568, + "minimum_runtime_download_mb": 205.49, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/kb-whisper-small-ONNX" + }, + { + "task": "stt", + "model_id": "cmaree/Bagus-whisper-small-id-onnx", + "publisher": "cmaree", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 2465380725, + "repo_used_storage_mb": 2465.38, + "minimum_runtime_download_bytes": 205591829, + "minimum_runtime_download_mb": 205.59, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.957, + "notes": "", + "model_url": "https://huggingface.co/cmaree/Bagus-whisper-small-id-onnx" + }, + { + "task": "stt", + "model_id": "chiyo123/whisper-small-tonga2", + "publisher": "chiyo123", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 2687385076, + "repo_used_storage_mb": 2687.39, + "minimum_runtime_download_bytes": 252711828, + "minimum_runtime_download_mb": 252.71, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 3.822, + "notes": "", + "model_url": "https://huggingface.co/chiyo123/whisper-small-tonga2" + }, + { + "task": "stt", + "model_id": "chiyo123/whisper-small-bemba2", + "publisher": "chiyo123", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 2687385079, + "repo_used_storage_mb": 2687.39, + "minimum_runtime_download_bytes": 252711830, + "minimum_runtime_download_mb": 252.71, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 3.822, + "notes": "", + "model_url": "https://huggingface.co/chiyo123/whisper-small-bemba2" + }, + { + "task": "stt", + "model_id": "Xenova/nb-whisper-small-beta", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 6013694456, + "repo_used_storage_mb": 6013.69, + "minimum_runtime_download_bytes": 253216888, + "minimum_runtime_download_mb": 253.22, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.111, + "notes": "", + "model_url": "https://huggingface.co/Xenova/nb-whisper-small-beta" + }, + { + "task": "stt", + "model_id": "Sagicc/whisper-small-sr-onnx", + "publisher": "Sagicc", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 2687383943, + "repo_used_storage_mb": 2687.38, + "minimum_runtime_download_bytes": 253282110, + "minimum_runtime_download_mb": 253.28, + "runtime_variant": "q4", + "runtime_files": "onnx/encoder_model_q4.onnx; onnx/decoder_model_merged_q4.onnx", + "small_overhead_mb": 4.393, + "notes": "", + "model_url": "https://huggingface.co/Sagicc/whisper-small-sr-onnx" + }, + { + "task": "stt", + "model_id": "huuquyet/PhoWhisper-small", + "publisher": "huuquyet", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 12712005264, + "repo_used_storage_mb": 12712.01, + "minimum_runtime_download_bytes": 253282129, + "minimum_runtime_download_mb": 253.28, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.393, + "notes": "", + "model_url": "https://huggingface.co/huuquyet/PhoWhisper-small" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small.en_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 15534197325, + "repo_used_storage_mb": 15534.2, + "minimum_runtime_download_bytes": 253283358, + "minimum_runtime_download_mb": 253.28, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.248, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small.en_timestamped" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small.en", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 5792884777, + "repo_used_storage_mb": 5792.88, + "minimum_runtime_download_bytes": 253324115, + "minimum_runtime_download_mb": 253.32, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.248, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small.en" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 15534297144, + "repo_used_storage_mb": 15534.3, + "minimum_runtime_download_bytes": 253435866, + "minimum_runtime_download_mb": 253.44, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small_timestamped" + }, + { + "task": "stt", + "model_id": "whalesos/whisper-small", + "publisher": "whalesos", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 3173298072, + "repo_used_storage_mb": 3173.3, + "minimum_runtime_download_bytes": 253468391, + "minimum_runtime_download_mb": 253.47, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/whalesos/whisper-small" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-small", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 6101504725, + "repo_used_storage_mb": 6101.5, + "minimum_runtime_download_bytes": 253476623, + "minimum_runtime_download_mb": 253.48, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.4, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-small" + }, + { + "task": "stt", + "model_id": "huggingworld/whisper-base", + "publisher": "huggingworld", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 290989606, + "repo_used_storage_mb": 290.99, + "minimum_runtime_download_bytes": 293759168, + "minimum_runtime_download_mb": 293.76, + "runtime_variant": "fp32", + "runtime_files": "onnx/encoder_model.onnx; onnx/decoder_model_merged.onnx", + "small_overhead_mb": 2.77, + "notes": "", + "model_url": "https://huggingface.co/huggingworld/whisper-base" + }, + { + "task": "stt", + "model_id": "EvgenyShivchenkoUIT/whisper-base-ONNX-multi-merged", + "publisher": "EvgenyShivchenkoUIT", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 290881921, + "repo_used_storage_mb": 290.88, + "minimum_runtime_download_bytes": 296720937, + "minimum_runtime_download_mb": 296.72, + "runtime_variant": "fp32", + "runtime_files": "onnx/encoder_model.onnx; onnx/decoder_model_merged.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/EvgenyShivchenkoUIT/whisper-base-ONNX-multi-merged" + }, + { + "task": "stt", + "model_id": "BricksDisplay/whisper-small-intel", + "publisher": "BricksDisplay", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 2503812824, + "repo_used_storage_mb": 2503.81, + "minimum_runtime_download_bytes": 327859543, + "minimum_runtime_download_mb": 327.86, + "runtime_variant": "q4", + "runtime_files": "onnx/encoder_model_q4.onnx; onnx/decoder_model_merged_q4.onnx", + "small_overhead_mb": 3.925, + "notes": "", + "model_url": "https://huggingface.co/BricksDisplay/whisper-small-intel" + }, + { + "task": "stt", + "model_id": "onnx-community/lite-whisper-large-v3-turbo-fast-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "lite-whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 12359054722, + "repo_used_storage_mb": 12359.05, + "minimum_runtime_download_bytes": 389628465, + "minimum_runtime_download_mb": 389.63, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.639, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/lite-whisper-large-v3-turbo-fast-ONNX" + }, + { + "task": "stt", + "model_id": "JaeyeongYang/whisper-base-komixv2-onnx", + "publisher": "JaeyeongYang", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "74M", + "repo_used_storage_bytes": 537052615, + "repo_used_storage_mb": 537.05, + "minimum_runtime_download_bytes": 402648866, + "minimum_runtime_download_mb": 402.65, + "runtime_variant": "fp32", + "runtime_files": "onnx/encoder_model.onnx; onnx/decoder_model_merged.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/JaeyeongYang/whisper-base-komixv2-onnx" + }, + { + "task": "stt", + "model_id": "distil-whisper/distil-medium.en", + "publisher": "distil-whisper", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 13884077762, + "repo_used_storage_mb": 13884.08, + "minimum_runtime_download_bytes": 406350133, + "minimum_runtime_download_mb": 406.35, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.036, + "notes": "", + "model_url": "https://huggingface.co/distil-whisper/distil-medium.en" + }, + { + "task": "stt", + "model_id": "onnx-community/distil-medium.en_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 19086756437, + "repo_used_storage_mb": 19086.76, + "minimum_runtime_download_bytes": 406491222, + "minimum_runtime_download_mb": 406.49, + "runtime_variant": "int8", + "runtime_files": "onnx/encoder_model_int8.onnx; onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 4.247, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/distil-medium.en_timestamped" + }, + { + "task": "stt", + "model_id": "onnx-community/lite-whisper-large-v3-turbo-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "lite-whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 12948632694, + "repo_used_storage_mb": 12948.63, + "minimum_runtime_download_bytes": 423467583, + "minimum_runtime_download_mb": 423.47, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/lite-whisper-large-v3-turbo-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/lite-whisper-large-v3-turbo-acc-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "lite-whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 13405668863, + "repo_used_storage_mb": 13405.67, + "minimum_runtime_download_bytes": 449626804, + "minimum_runtime_download_mb": 449.63, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/lite-whisper-large-v3-turbo-acc-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/wav2vec2-bert-CV16-en", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2-bert", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 6660405561, + "repo_used_storage_mb": 6660.41, + "minimum_runtime_download_bytes": 470749748, + "minimum_runtime_download_mb": 470.75, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/wav2vec2-bert-CV16-en" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-medium.en", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 44707406991, + "repo_used_storage_mb": 44707.41, + "minimum_runtime_download_bytes": 521714011, + "minimum_runtime_download_mb": 521.71, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 3.652, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-medium.en" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-ben", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 20400587550, + "repo_used_storage_mb": 20400.59, + "minimum_runtime_download_bytes": 522071434, + "minimum_runtime_download_mb": 522.07, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.391, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-ben" + }, + { + "task": "stt", + "model_id": "Xenova/nb-whisper-medium-beta", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 27819108137, + "repo_used_storage_mb": 27819.11, + "minimum_runtime_download_bytes": 522175655, + "minimum_runtime_download_mb": 522.18, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.111, + "notes": "", + "model_url": "https://huggingface.co/Xenova/nb-whisper-medium-beta" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-medium", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 44707558565, + "repo_used_storage_mb": 44707.56, + "minimum_runtime_download_bytes": 522427015, + "minimum_runtime_download_mb": 522.43, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-medium" + }, + { + "task": "stt", + "model_id": "urroxyz/whisper-medium.en_timestamped", + "publisher": "urroxyz", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 18560128821, + "repo_used_storage_mb": 18560.13, + "minimum_runtime_download_bytes": 522567225, + "minimum_runtime_download_mb": 522.57, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.687, + "notes": "", + "model_url": "https://huggingface.co/urroxyz/whisper-medium.en_timestamped" + }, + { + "task": "stt", + "model_id": "nicky48/whisper-medium-ONNX", + "publisher": "nicky48", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 18560201528, + "repo_used_storage_mb": 18560.2, + "minimum_runtime_download_bytes": 522717671, + "minimum_runtime_download_mb": 522.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/nicky48/whisper-medium-ONNX" + }, + { + "task": "stt", + "model_id": "PengZhang424242/whisper-medium-ONNX", + "publisher": "PengZhang424242", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 18560201528, + "repo_used_storage_mb": 18560.2, + "minimum_runtime_download_bytes": 522717672, + "minimum_runtime_download_mb": 522.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/PengZhang424242/whisper-medium-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-medium-fr-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 18560201527, + "repo_used_storage_mb": 18560.2, + "minimum_runtime_download_bytes": 522717692, + "minimum_runtime_download_mb": 522.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-medium-fr-ONNX" + }, + { + "task": "stt", + "model_id": "urroxyz/whisper-medium_timestamped", + "publisher": "urroxyz", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 18560208700, + "repo_used_storage_mb": 18560.21, + "minimum_runtime_download_bytes": 522720904, + "minimum_runtime_download_mb": 522.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/urroxyz/whisper-medium_timestamped" + }, + { + "task": "stt", + "model_id": "flackzz/whisper-medium-ONNX", + "publisher": "flackzz", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 18560208700, + "repo_used_storage_mb": 18560.21, + "minimum_runtime_download_bytes": 522720905, + "minimum_runtime_download_mb": 522.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/flackzz/whisper-medium-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-medium_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 24378207777, + "repo_used_storage_mb": 24378.21, + "minimum_runtime_download_bytes": 522724683, + "minimum_runtime_download_mb": 522.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.843, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-medium_timestamped" + }, + { + "task": "stt", + "model_id": "onnx-community/kb-whisper-medium-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 18560399853, + "repo_used_storage_mb": 18560.4, + "minimum_runtime_download_bytes": 522749479, + "minimum_runtime_download_mb": 522.75, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/kb-whisper-medium-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-medium.en_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 41175344225, + "repo_used_storage_mb": 41175.34, + "minimum_runtime_download_bytes": 526805026, + "minimum_runtime_download_mb": 526.81, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 9.925, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-medium.en_timestamped" + }, + { + "task": "stt", + "model_id": "onnx-community/distil-large-v3.5-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 12977134917, + "repo_used_storage_mb": 12977.13, + "minimum_runtime_download_bytes": 539409855, + "minimum_runtime_download_mb": 539.41, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.787, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/distil-large-v3.5-ONNX" + }, + { + "task": "stt", + "model_id": "distil-whisper/distil-large-v3.5-ONNX", + "publisher": "distil-whisper", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 12977134959, + "repo_used_storage_mb": 12977.13, + "minimum_runtime_download_bytes": 539409969, + "minimum_runtime_download_mb": 539.41, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.787, + "notes": "", + "model_url": "https://huggingface.co/distil-whisper/distil-large-v3.5-ONNX" + }, + { + "task": "stt", + "model_id": "mohdasif81/distil-large-v3.5-ONNX", + "publisher": "mohdasif81", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 12977134959, + "repo_used_storage_mb": 12977.13, + "minimum_runtime_download_bytes": 539409969, + "minimum_runtime_download_mb": 539.41, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.787, + "notes": "", + "model_url": "https://huggingface.co/mohdasif81/distil-large-v3.5-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/kotoba-whisper-bilingual-v1.0-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 12977134970, + "repo_used_storage_mb": 12977.13, + "minimum_runtime_download_bytes": 539462169, + "minimum_runtime_download_mb": 539.46, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/kotoba-whisper-bilingual-v1.0-ONNX" + }, + { + "task": "stt", + "model_id": "Juanpablozarza292/whisper_spanish_add-ONNX", + "publisher": "Juanpablozarza292", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 12977134920, + "repo_used_storage_mb": 12977.13, + "minimum_runtime_download_bytes": 539462230, + "minimum_runtime_download_mb": 539.46, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/Juanpablozarza292/whisper_spanish_add-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/kotoba-whisper-v2.2-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 12977134970, + "repo_used_storage_mb": 12977.13, + "minimum_runtime_download_bytes": 539463191, + "minimum_runtime_download_mb": 539.46, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/kotoba-whisper-v2.2-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-large-v3-turbo", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 24167760595, + "repo_used_storage_mb": 24167.76, + "minimum_runtime_download_bytes": 567868336, + "minimum_runtime_download_mb": 567.87, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-large-v3-turbo" + }, + { + "task": "stt", + "model_id": "rodrigomt/whisper-large-v3-turbo", + "publisher": "rodrigomt", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 24167760595, + "repo_used_storage_mb": 24167.76, + "minimum_runtime_download_bytes": 567868336, + "minimum_runtime_download_mb": 567.87, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/rodrigomt/whisper-large-v3-turbo" + }, + { + "task": "stt", + "model_id": "textagent/whisper-large-v3-turbo", + "publisher": "textagent", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 24167760595, + "repo_used_storage_mb": 24167.76, + "minimum_runtime_download_bytes": 567868336, + "minimum_runtime_download_mb": 567.87, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/textagent/whisper-large-v3-turbo" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-large-v3-turbo_timestamped", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 22510145143, + "repo_used_storage_mb": 22510.15, + "minimum_runtime_download_bytes": 567990544, + "minimum_runtime_download_mb": 567.99, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-large-v3-turbo_timestamped" + }, + { + "task": "stt", + "model_id": "willopcbeta/whisper-large-v3-turbo-ONNX", + "publisher": "willopcbeta", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14450418291, + "repo_used_storage_mb": 14450.42, + "minimum_runtime_download_bytes": 568808613, + "minimum_runtime_download_mb": 568.81, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/willopcbeta/whisper-large-v3-turbo-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-large-v3-turbo-korean-ggml-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14451848945, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 568931441, + "minimum_runtime_download_mb": 568.93, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.585, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-large-v3-turbo-korean-ggml-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-podlodka-turbo-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 14451848946, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 569185137, + "minimum_runtime_download_mb": 569.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-podlodka-turbo-ONNX" + }, + { + "task": "stt", + "model_id": "nicky48/whisper-large-v3-turbo-ONNX", + "publisher": "nicky48", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14451848943, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 569185146, + "minimum_runtime_download_mb": 569.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/nicky48/whisper-large-v3-turbo-ONNX" + }, + { + "task": "stt", + "model_id": "Knigtmares23/whisper-large-v3-turbo-ONNX", + "publisher": "Knigtmares23", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14451848943, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 569185147, + "minimum_runtime_download_mb": 569.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/Knigtmares23/whisper-large-v3-turbo-ONNX" + }, + { + "task": "stt", + "model_id": "PengZhang424242/whisper-large-v3-turbo-ONNX", + "publisher": "PengZhang424242", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14451848943, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 569185147, + "minimum_runtime_download_mb": 569.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/PengZhang424242/whisper-large-v3-turbo-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-large-v3-turbo-german-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14451848946, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 569185206, + "minimum_runtime_download_mb": 569.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-large-v3-turbo-german-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-large-v3-turbo-swiss-german-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14451848946, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 569185311, + "minimum_runtime_download_mb": 569.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-large-v3-turbo-swiss-german-ONNX" + }, + { + "task": "stt", + "model_id": "gavinfukaml/whisper-large-v3-turbo-cantonese-yue-english-ONNX", + "publisher": "gavinfukaml", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14451850312, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 569185670, + "minimum_runtime_download_mb": 569.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/gavinfukaml/whisper-large-v3-turbo-cantonese-yue-english-ONNX" + }, + { + "task": "stt", + "model_id": "flackzz/whisper-large-v3-turbo-german-ONNX", + "publisher": "flackzz", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 14451850318, + "repo_used_storage_mb": 14451.85, + "minimum_runtime_download_bytes": 569185758, + "minimum_runtime_download_mb": 569.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/flackzz/whisper-large-v3-turbo-german-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/mms-1b-fl102", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1B", + "repo_used_storage_bytes": 9580493517, + "repo_used_storage_mb": 9580.49, + "minimum_runtime_download_bytes": 572032751, + "minimum_runtime_download_mb": 572.03, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.739, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-1b-fl102" + }, + { + "task": "stt", + "model_id": "Xenova/mms-1b-l1107", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1B", + "repo_used_storage_bytes": 9580055700, + "repo_used_storage_mb": 9580.06, + "minimum_runtime_download_bytes": 573382999, + "minimum_runtime_download_mb": 573.38, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 2.115, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-1b-l1107" + }, + { + "task": "stt", + "model_id": "onnx-community/mms-1b-all-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1B", + "repo_used_storage_bytes": 9577703212, + "repo_used_storage_mb": 9577.7, + "minimum_runtime_download_bytes": 574081668, + "minimum_runtime_download_mb": 574.08, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 2.87, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/mms-1b-all-ONNX" + }, + { + "task": "stt", + "model_id": "Xenova/mms-1b-all", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wav2vec2", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1B", + "repo_used_storage_bytes": 9581444240, + "repo_used_storage_mb": 9581.44, + "minimum_runtime_download_bytes": 574218347, + "minimum_runtime_download_mb": 574.22, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 2.87, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-1b-all" + }, + { + "task": "stt", + "model_id": "flackzz/distil-whisper-large-v3-german_timestamped-ONNX", + "publisher": "flackzz", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 6030154083, + "repo_used_storage_mb": 6030.15, + "minimum_runtime_download_bytes": 737130267, + "minimum_runtime_download_mb": 737.13, + "runtime_variant": "q4", + "runtime_files": "onnx/encoder_model_q4.onnx; onnx/decoder_model_merged_q4.onnx", + "small_overhead_mb": 11.275, + "notes": "", + "model_url": "https://huggingface.co/flackzz/distil-whisper-large-v3-german_timestamped-ONNX" + }, + { + "task": "stt", + "model_id": "distil-whisper/distil-large-v2", + "publisher": "distil-whisper", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 26380426662, + "repo_used_storage_mb": 26380.43, + "minimum_runtime_download_bytes": 770732848, + "minimum_runtime_download_mb": 770.73, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.189, + "notes": "", + "model_url": "https://huggingface.co/distil-whisper/distil-large-v2" + }, + { + "task": "stt", + "model_id": "Crystalcareai/Whisper-Medicalv1", + "publisher": "Crystalcareai", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 12524348407, + "repo_used_storage_mb": 12524.35, + "minimum_runtime_download_bytes": 771190944, + "minimum_runtime_download_mb": 771.19, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/Crystalcareai/Whisper-Medicalv1" + }, + { + "task": "stt", + "model_id": "IsGarrido/Whisper-Medicalv1", + "publisher": "IsGarrido", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 12524348407, + "repo_used_storage_mb": 12524.35, + "minimum_runtime_download_bytes": 771190944, + "minimum_runtime_download_mb": 771.19, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/IsGarrido/Whisper-Medicalv1" + }, + { + "task": "stt", + "model_id": "tensorlake/distil-large-v3", + "publisher": "tensorlake", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 14729035728, + "repo_used_storage_mb": 14729.04, + "minimum_runtime_download_bytes": 771190944, + "minimum_runtime_download_mb": 771.19, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/tensorlake/distil-large-v3" + }, + { + "task": "stt", + "model_id": "zou8944/echo-distil-large-v3", + "publisher": "zou8944", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 14729035728, + "repo_used_storage_mb": 14729.04, + "minimum_runtime_download_bytes": 771190944, + "minimum_runtime_download_mb": 771.19, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/zou8944/echo-distil-large-v3" + }, + { + "task": "stt", + "model_id": "distil-whisper/distil-large-v3", + "publisher": "distil-whisper", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 15472518311, + "repo_used_storage_mb": 15472.52, + "minimum_runtime_download_bytes": 771191051, + "minimum_runtime_download_mb": 771.19, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.389, + "notes": "", + "model_url": "https://huggingface.co/distil-whisper/distil-large-v3" + }, + { + "task": "stt", + "model_id": "huuquyet/PhoWhisper-medium", + "publisher": "huuquyet", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 14269417003, + "repo_used_storage_mb": 14269.42, + "minimum_runtime_download_bytes": 780070092, + "minimum_runtime_download_mb": 780.07, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.377, + "notes": "", + "model_url": "https://huggingface.co/huuquyet/PhoWhisper-medium" + }, + { + "task": "stt", + "model_id": "Sagicc/whisper-medium-sr-onnx", + "publisher": "Sagicc", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 8158117672, + "repo_used_storage_mb": 8158.12, + "minimum_runtime_download_bytes": 780086307, + "minimum_runtime_download_mb": 780.09, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.393, + "notes": "", + "model_url": "https://huggingface.co/Sagicc/whisper-medium-sr-onnx" + }, + { + "task": "stt", + "model_id": "whalesos/whisper-medium", + "publisher": "whalesos", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "769M", + "repo_used_storage_bytes": 8658918579, + "repo_used_storage_mb": 8658.92, + "minimum_runtime_download_bytes": 780492147, + "minimum_runtime_download_mb": 780.49, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/whalesos/whisper-medium" + }, + { + "task": "stt", + "model_id": "onnx-community/lite-whisper-large-v3-fast-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "lite-whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 37101144017, + "repo_used_storage_mb": 37101.14, + "minimum_runtime_download_bytes": 803038447, + "minimum_runtime_download_mb": 803.04, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.639, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/lite-whisper-large-v3-fast-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/lite-whisper-large-v3-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "lite-whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 37166217101, + "repo_used_storage_mb": 37166.22, + "minimum_runtime_download_bytes": 842062544, + "minimum_runtime_download_mb": 842.06, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/lite-whisper-large-v3-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/lite-whisper-large-v3-acc-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "lite-whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 37677755089, + "repo_used_storage_mb": 37677.76, + "minimum_runtime_download_bytes": 871446493, + "minimum_runtime_download_mb": 871.45, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/lite-whisper-large-v3-acc-ONNX" + }, + { + "task": "stt", + "model_id": "varsan-g/hviske-v2-onnx", + "publisher": "varsan-g", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 23315679765, + "repo_used_storage_mb": 23315.68, + "minimum_runtime_download_bytes": 950248010, + "minimum_runtime_download_mb": 950.25, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.654, + "notes": "", + "model_url": "https://huggingface.co/varsan-g/hviske-v2-onnx" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-d-v1a-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 16241660057, + "repo_used_storage_mb": 16241.66, + "minimum_runtime_download_bytes": 981036366, + "minimum_runtime_download_mb": 981.04, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 1.905, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-d-v1a-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/CrisperWhisper-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 35098407472, + "repo_used_storage_mb": 35098.41, + "minimum_runtime_download_bytes": 984585338, + "minimum_runtime_download_mb": 984.59, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.082, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/CrisperWhisper-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-native-children-3-dutch-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 35094989765, + "repo_used_storage_mb": 35094.99, + "minimum_runtime_download_bytes": 984971224, + "minimum_runtime_download_mb": 984.97, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-native-children-3-dutch-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-large-v2-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 35094989764, + "repo_used_storage_mb": 35094.99, + "minimum_runtime_download_bytes": 984971357, + "minimum_runtime_download_mb": 984.97, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-large-v2-ONNX" + }, + { + "task": "stt", + "model_id": "a2d8a4v/Breeze-ASR-25-ONNX", + "publisher": "a2d8a4v", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 35094989764, + "repo_used_storage_mb": 35094.99, + "minimum_runtime_download_bytes": 984971722, + "minimum_runtime_download_mb": 984.97, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/a2d8a4v/Breeze-ASR-25-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/whisper-large-v3-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 35098407453, + "repo_used_storage_mb": 35098.41, + "minimum_runtime_download_bytes": 985342328, + "minimum_runtime_download_mb": 985.34, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/whisper-large-v3-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/kb-whisper-large-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 35098683942, + "repo_used_storage_mb": 35098.68, + "minimum_runtime_download_bytes": 985386306, + "minimum_runtime_download_mb": 985.39, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 5.84, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/kb-whisper-large-ONNX" + }, + { + "task": "stt", + "model_id": "JaeyeongYang/whisper-small-komixv2-onnx", + "publisher": "JaeyeongYang", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "244M", + "repo_used_storage_bytes": 1384807951, + "repo_used_storage_mb": 1384.81, + "minimum_runtime_download_bytes": 1132719657, + "minimum_runtime_download_mb": 1132.72, + "runtime_variant": "fp32", + "runtime_files": "onnx/encoder_model.onnx; onnx/decoder_model_merged.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/JaeyeongYang/whisper-small-komixv2-onnx" + }, + { + "task": "stt", + "model_id": "huuquyet/PhoWhisper-large", + "publisher": "huuquyet", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 12665273315, + "repo_used_storage_mb": 12665.27, + "minimum_runtime_download_bytes": 1563335474, + "minimum_runtime_download_mb": 1563.34, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 3.801, + "notes": "", + "model_url": "https://huggingface.co/huuquyet/PhoWhisper-large" + }, + { + "task": "stt", + "model_id": "Sagicc/whisper-large-v3-sr-onnx", + "publisher": "Sagicc", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 12666210245, + "repo_used_storage_mb": 12666.21, + "minimum_runtime_download_bytes": 1564059716, + "minimum_runtime_download_mb": 1564.06, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.339, + "notes": "", + "model_url": "https://huggingface.co/Sagicc/whisper-large-v3-sr-onnx" + }, + { + "task": "stt", + "model_id": "Xenova/nb-whisper-large-beta", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 38002929987, + "repo_used_storage_mb": 38002.93, + "minimum_runtime_download_bytes": 1564245784, + "minimum_runtime_download_mb": 1564.25, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.111, + "notes": "", + "model_url": "https://huggingface.co/Xenova/nb-whisper-large-beta" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-large", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 48264757703, + "repo_used_storage_mb": 48264.76, + "minimum_runtime_download_bytes": 1564497244, + "minimum_runtime_download_mb": 1564.5, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-large" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-large-v2", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 68089771124, + "repo_used_storage_mb": 68089.77, + "minimum_runtime_download_bytes": 1564497769, + "minimum_runtime_download_mb": 1564.5, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.363, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-large-v2" + }, + { + "task": "stt", + "model_id": "Xenova/whisper-large-v3", + "publisher": "Xenova", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 27801759500, + "repo_used_storage_mb": 27801.76, + "minimum_runtime_download_bytes": 1564659466, + "minimum_runtime_download_mb": 1564.66, + "runtime_variant": "quantized", + "runtime_files": "onnx/encoder_model_quantized.onnx; onnx/decoder_model_merged_quantized.onnx", + "small_overhead_mb": 4.339, + "notes": "", + "model_url": "https://huggingface.co/Xenova/whisper-large-v3" + }, + { + "task": "stt", + "model_id": "ipsilondev/lite-whisper-large-v3-turbo-timestamped-onnx", + "publisher": "ipsilondev", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 3429396876, + "repo_used_storage_mb": 3429.4, + "minimum_runtime_download_bytes": 2196406745, + "minimum_runtime_download_mb": 2196.41, + "runtime_variant": "fp32", + "runtime_files": "onnx/encoder_model.onnx; onnx/decoder_model_merged.onnx", + "small_overhead_mb": 5.839, + "notes": "", + "model_url": "https://huggingface.co/ipsilondev/lite-whisper-large-v3-turbo-timestamped-onnx" + }, + { + "task": "stt", + "model_id": "JaeyeongYang/whisper-large-v3-turbo-korean-onnx", + "publisher": "JaeyeongYang", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "whisper", + "support_level": "direct_pipeline", + "parameter_count_estimate": "1.55B", + "repo_used_storage_bytes": 6425845428, + "repo_used_storage_mb": 6425.85, + "minimum_runtime_download_bytes": 3507359488, + "minimum_runtime_download_mb": 3507.36, + "runtime_variant": "fp32", + "runtime_files": "onnx/encoder_model.onnx; onnx/decoder_model_merged.onnx", + "small_overhead_mb": 5.638, + "notes": "", + "model_url": "https://huggingface.co/JaeyeongYang/whisper-large-v3-turbo-korean-onnx" + }, + { + "task": "stt", + "model_id": "huggingworld/granite-4.0-1b-speech-ONNX", + "publisher": "huggingworld", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "granite_speech", + "support_level": "custom_browser", + "parameter_count_estimate": "1B", + "repo_used_storage_bytes": 20175375100, + "repo_used_storage_mb": 20175.38, + "minimum_runtime_download_bytes": 1492983345, + "minimum_runtime_download_mb": 1492.98, + "runtime_variant": "q4f16", + "runtime_files": "onnx/audio_encoder_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx; onnx/embed_tokens_q4f16.onnx", + "small_overhead_mb": 4.136, + "notes": "", + "model_url": "https://huggingface.co/huggingworld/granite-4.0-1b-speech-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/granite-4.0-1b-speech-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "granite_speech", + "support_level": "custom_browser", + "parameter_count_estimate": "1B", + "repo_used_storage_bytes": 20182061122, + "repo_used_storage_mb": 20182.06, + "minimum_runtime_download_bytes": 1492983345, + "minimum_runtime_download_mb": 1492.98, + "runtime_variant": "q4f16", + "runtime_files": "onnx/audio_encoder_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx; onnx/embed_tokens_q4f16.onnx", + "small_overhead_mb": 4.136, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/granite-4.0-1b-speech-ONNX" + }, + { + "task": "stt", + "model_id": "hlevring/parakeet-rnnt-110m-da-dk-onnx", + "publisher": "hlevring", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "nemo-conformer-tdt", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "110M", + "repo_used_storage_bytes": 1472605447, + "repo_used_storage_mb": 1472.61, + "minimum_runtime_download_bytes": 8164943, + "minimum_runtime_download_mb": 8.16, + "runtime_variant": "fp16", + "runtime_files": "onnx/decoder_model_merged_fp16.onnx", + "small_overhead_mb": 0.004, + "notes": "", + "model_url": "https://huggingface.co/hlevring/parakeet-rnnt-110m-da-dk-onnx" + }, + { + "task": "stt", + "model_id": "ysdede/parakeet-tdt-0.6b-v2-onnx-tfjs4", + "publisher": "ysdede", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "nemo-conformer-tdt", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "0.6B", + "repo_used_storage_bytes": 4472936062, + "repo_used_storage_mb": 4472.94, + "minimum_runtime_download_bytes": 9051413, + "minimum_runtime_download_mb": 9.05, + "runtime_variant": "int8", + "runtime_files": "onnx/decoder_model_merged_int8.onnx", + "small_overhead_mb": 0.053, + "notes": "", + "model_url": "https://huggingface.co/ysdede/parakeet-tdt-0.6b-v2-onnx-tfjs4" + }, + { + "task": "stt", + "model_id": "onnx-community/wavlm-persian-base-plus-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "wavlm", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1058048551, + "repo_used_storage_mb": 1058.05, + "minimum_runtime_download_bytes": 76796382, + "minimum_runtime_download_mb": 76.8, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 0.005, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/wavlm-persian-base-plus-ONNX" + }, + { + "task": "stt", + "model_id": "akkikiki/VibeVoice-ASR-onnx", + "publisher": "akkikiki", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "vibevoice-asr", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 155595653096, + "repo_used_storage_mb": 155595.65, + "minimum_runtime_download_bytes": 764883022, + "minimum_runtime_download_mb": 764.88, + "runtime_variant": "q4", + "runtime_files": "onnx/encoder_model_q4.onnx", + "small_overhead_mb": 15.878, + "notes": "", + "model_url": "https://huggingface.co/akkikiki/VibeVoice-ASR-onnx" + }, + { + "task": "stt", + "model_id": "valoomba/Qwen3-ForcedAligner-0.6B-ONNX", + "publisher": "valoomba", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "qwen3_asr", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "0.6B", + "repo_used_storage_bytes": 4732713992, + "repo_used_storage_mb": 4732.71, + "minimum_runtime_download_bytes": 1064302643, + "minimum_runtime_download_mb": 1064.3, + "runtime_variant": "q4", + "runtime_files": "onnx/model_q4.onnx", + "small_overhead_mb": 15.905, + "notes": "", + "model_url": "https://huggingface.co/valoomba/Qwen3-ForcedAligner-0.6B-ONNX" + }, + { + "task": "stt", + "model_id": "onnx-community/cohere-transcribe-03-2026-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "automatic-speech-recognition", + "library_name": "transformers.js", + "model_type": "cohere_asr", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 19892657060, + "repo_used_storage_mb": 19892.66, + "minimum_runtime_download_bytes": 1536222202, + "minimum_runtime_download_mb": 1536.22, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx", + "small_overhead_mb": 1.163, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/cohere-transcribe-03-2026-ONNX" + }, + { + "task": "tts", + "model_id": "Xenova/tiny-random-vits", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1679360, + "repo_used_storage_mb": 1.68, + "minimum_runtime_download_bytes": 517452, + "minimum_runtime_download_mb": 0.52, + "runtime_variant": "fp16", + "runtime_files": "onnx/model_fp16.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/Xenova/tiny-random-vits" + }, + { + "task": "tts", + "model_id": "eligapris/microsoft-speecht5_tts-ONNX", + "publisher": "eligapris", + "pipeline_tag": "text-to-audio", + "library_name": "transformers.js", + "model_type": "speecht5", + "support_level": "direct_pipeline", + "parameter_count_estimate": "144M", + "repo_used_storage_bytes": 73922173, + "repo_used_storage_mb": 73.92, + "minimum_runtime_download_bytes": 18502865, + "minimum_runtime_download_mb": 18.5, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 0.251, + "notes": "Includes default Xenova/speecht5_hifigan quantized vocoder | Requires speaker embeddings file at inference time", + "model_url": "https://huggingface.co/eligapris/microsoft-speecht5_tts-ONNX" + }, + { + "task": "tts", + "model_id": "elloza/mms-tts-mlg-onnx", + "publisher": "elloza", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 528178079, + "repo_used_storage_mb": 528.18, + "minimum_runtime_download_bytes": 38323150, + "minimum_runtime_download_mb": 38.32, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.005, + "notes": "", + "model_url": "https://huggingface.co/elloza/mms-tts-mlg-onnx" + }, + { + "task": "tts", + "model_id": "BricksDisplay/vits-cmn", + "publisher": "BricksDisplay", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 311260734, + "repo_used_storage_mb": 311.26, + "minimum_runtime_download_bytes": 38351397, + "minimum_runtime_download_mb": 38.35, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/BricksDisplay/vits-cmn" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-kor", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210729911, + "repo_used_storage_mb": 210.73, + "minimum_runtime_download_bytes": 38365101, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-kor" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-ron", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210742006, + "repo_used_storage_mb": 210.74, + "minimum_runtime_download_bytes": 38367380, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-ron" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-eng", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210747382, + "repo_used_storage_mb": 210.75, + "minimum_runtime_download_bytes": 38368011, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-eng" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-ara", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210748727, + "repo_used_storage_mb": 210.75, + "minimum_runtime_download_bytes": 38368666, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-ara" + }, + { + "task": "tts", + "model_id": "ylacombe/mms-spa-finetuned-argentinian-monospeaker", + "publisher": "ylacombe", + "pipeline_tag": "text-to-speech", + "library_name": "transformers", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 542919680, + "repo_used_storage_mb": 542.92, + "minimum_runtime_download_bytes": 38369076, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/ylacombe/mms-spa-finetuned-argentinian-monospeaker" + }, + { + "task": "tts", + "model_id": "ylacombe/mms-spa-finetuned-chilean-monospeaker", + "publisher": "ylacombe", + "pipeline_tag": "text-to-speech", + "library_name": "transformers", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 542919680, + "repo_used_storage_mb": 542.92, + "minimum_runtime_download_bytes": 38369076, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/ylacombe/mms-spa-finetuned-chilean-monospeaker" + }, + { + "task": "tts", + "model_id": "ylacombe/mms-spa-finetuned-colombian-monospeaker", + "publisher": "ylacombe", + "pipeline_tag": "text-to-speech", + "library_name": "transformers", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 542919680, + "repo_used_storage_mb": 542.92, + "minimum_runtime_download_bytes": 38369076, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.006, + "notes": "", + "model_url": "https://huggingface.co/ylacombe/mms-spa-finetuned-colombian-monospeaker" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-por", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210754104, + "repo_used_storage_mb": 210.75, + "minimum_runtime_download_bytes": 38369303, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-por" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-yor", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210754104, + "repo_used_storage_mb": 210.75, + "minimum_runtime_download_bytes": 38369353, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-yor" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-fra", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210755448, + "repo_used_storage_mb": 210.76, + "minimum_runtime_download_bytes": 38369550, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-fra" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-deu", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210756793, + "repo_used_storage_mb": 210.76, + "minimum_runtime_download_bytes": 38369680, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-deu" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-spa", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210756792, + "repo_used_storage_mb": 210.76, + "minimum_runtime_download_bytes": 38369688, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-spa" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-rus", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210755449, + "repo_used_storage_mb": 210.76, + "minimum_runtime_download_bytes": 38369749, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-rus" + }, + { + "task": "tts", + "model_id": "ylacombe/mms-tam-finetuned-monospeaker", + "publisher": "ylacombe", + "pipeline_tag": "text-to-speech", + "library_name": "transformers", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 542947137, + "repo_used_storage_mb": 542.95, + "minimum_runtime_download_bytes": 38372453, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/ylacombe/mms-tam-finetuned-monospeaker" + }, + { + "task": "tts", + "model_id": "ylacombe/mms-guj-finetuned-monospeaker", + "publisher": "ylacombe", + "pipeline_tag": "text-to-speech", + "library_name": "transformers", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 542951360, + "repo_used_storage_mb": 542.95, + "minimum_runtime_download_bytes": 38373036, + "minimum_runtime_download_mb": 38.37, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.007, + "notes": "", + "model_url": "https://huggingface.co/ylacombe/mms-guj-finetuned-monospeaker" + }, + { + "task": "tts", + "model_id": "ylacombe/mms-mar-finetuned-monospeaker", + "publisher": "ylacombe", + "pipeline_tag": "text-to-speech", + "library_name": "transformers", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 542978816, + "repo_used_storage_mb": 542.98, + "minimum_runtime_download_bytes": 38376042, + "minimum_runtime_download_mb": 38.38, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.008, + "notes": "", + "model_url": "https://huggingface.co/ylacombe/mms-mar-finetuned-monospeaker" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-hin", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210793080, + "repo_used_storage_mb": 210.79, + "minimum_runtime_download_bytes": 38376419, + "minimum_runtime_download_mb": 38.38, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.008, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-hin" + }, + { + "task": "tts", + "model_id": "Xenova/mms-tts-vie", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "36.3M", + "repo_used_storage_bytes": 210823994, + "repo_used_storage_mb": 210.82, + "minimum_runtime_download_bytes": 38381658, + "minimum_runtime_download_mb": 38.38, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx", + "small_overhead_mb": 0.009, + "notes": "", + "model_url": "https://huggingface.co/Xenova/mms-tts-vie" + }, + { + "task": "tts", + "model_id": "BricksDisplay/vits-eng-welsh-female", + "publisher": "BricksDisplay", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 114379500, + "repo_used_storage_mb": 114.38, + "minimum_runtime_download_bytes": 114390972, + "minimum_runtime_download_mb": 114.39, + "runtime_variant": "fp32", + "runtime_files": "onnx/model.onnx", + "small_overhead_mb": 0.011, + "notes": "", + "model_url": "https://huggingface.co/BricksDisplay/vits-eng-welsh-female" + }, + { + "task": "tts", + "model_id": "BricksDisplay/vits-eng", + "publisher": "BricksDisplay", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vits", + "support_level": "direct_pipeline", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 450887180, + "repo_used_storage_mb": 450.89, + "minimum_runtime_download_bytes": 114404781, + "minimum_runtime_download_mb": 114.4, + "runtime_variant": "fp32", + "runtime_files": "onnx/model.onnx", + "small_overhead_mb": 0.012, + "notes": "", + "model_url": "https://huggingface.co/BricksDisplay/vits-eng" + }, + { + "task": "tts", + "model_id": "Xenova/speecht5_tts", + "publisher": "Xenova", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "speecht5", + "support_level": "direct_pipeline", + "parameter_count_estimate": "144M", + "repo_used_storage_bytes": 4412793223, + "repo_used_storage_mb": 4412.79, + "minimum_runtime_download_bytes": 131814991, + "minimum_runtime_download_mb": 131.81, + "runtime_variant": "q4f16", + "runtime_files": "onnx/encoder_model_q4f16.onnx; onnx/decoder_model_merged_q4f16.onnx; onnx/decoder_postnet_and_vocoder_q4f16.onnx", + "small_overhead_mb": 0.251, + "notes": "Includes default Xenova/speecht5_hifigan quantized vocoder | Requires speaker embeddings file at inference time", + "model_url": "https://huggingface.co/Xenova/speecht5_tts" + }, + { + "task": "tts", + "model_id": "onnx-community/Supertonic-TTS-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "supertonic", + "support_level": "direct_pipeline", + "parameter_count_estimate": "66M", + "repo_used_storage_bytes": 263287416, + "repo_used_storage_mb": 263.29, + "minimum_runtime_download_bytes": 262824412, + "minimum_runtime_download_mb": 262.82, + "runtime_variant": "onnx", + "runtime_files": "onnx/latent_denoiser.onnx; onnx/text_encoder.onnx; onnx/voice_decoder.onnx; voices/F1.bin", + "small_overhead_mb": 0.002, + "notes": "Includes one preset voice file", + "model_url": "https://huggingface.co/onnx-community/Supertonic-TTS-ONNX" + }, + { + "task": "tts", + "model_id": "square-zero-labs/Supertonic-TTS-ONNX", + "publisher": "square-zero-labs", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "supertonic", + "support_level": "direct_pipeline", + "parameter_count_estimate": "66M", + "repo_used_storage_bytes": 263287416, + "repo_used_storage_mb": 263.29, + "minimum_runtime_download_bytes": 262824412, + "minimum_runtime_download_mb": 262.82, + "runtime_variant": "onnx", + "runtime_files": "onnx/latent_denoiser.onnx; onnx/text_encoder.onnx; onnx/voice_decoder.onnx; voices/F1.bin", + "small_overhead_mb": 0.002, + "notes": "Includes one preset voice file", + "model_url": "https://huggingface.co/square-zero-labs/Supertonic-TTS-ONNX" + }, + { + "task": "tts", + "model_id": "onnx-community/Supertonic-TTS-2-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "supertonic", + "support_level": "direct_pipeline", + "parameter_count_estimate": "66M", + "repo_used_storage_bytes": 263389857, + "repo_used_storage_mb": 263.39, + "minimum_runtime_download_bytes": 262930473, + "minimum_runtime_download_mb": 262.93, + "runtime_variant": "onnx", + "runtime_files": "onnx/latent_denoiser.onnx; onnx/text_encoder.onnx; onnx/voice_decoder.onnx; voices/F1.bin", + "small_overhead_mb": 0.006, + "notes": "Includes one preset voice file", + "model_url": "https://huggingface.co/onnx-community/Supertonic-TTS-2-ONNX" + }, + { + "task": "tts", + "model_id": "onnx-community/kitten-tts-nano-0.1-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 23800684, + "repo_used_storage_mb": 23.8, + "minimum_runtime_download_bytes": 23798194, + "minimum_runtime_download_mb": 23.8, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/expr-voice-2-f.bin", + "small_overhead_mb": 0.005, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/onnx-community/kitten-tts-nano-0.1-ONNX" + }, + { + "task": "tts", + "model_id": "onnx-community/KittenTTS-Micro-v0.8-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "~41M", + "repo_used_storage_bytes": 44663872, + "repo_used_storage_mb": 44.66, + "minimum_runtime_download_bytes": 41385487, + "minimum_runtime_download_mb": 41.39, + "runtime_variant": "fp32", + "runtime_files": "onnx/model.onnx", + "small_overhead_mb": 0.001, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/KittenTTS-Micro-v0.8-ONNX" + }, + { + "task": "tts", + "model_id": "onnx-community/KittenTTS-Nano-v0.8-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "~57M", + "repo_used_storage_bytes": 60045997, + "repo_used_storage_mb": 60.05, + "minimum_runtime_download_bytes": 56767827, + "minimum_runtime_download_mb": 56.77, + "runtime_variant": "fp32", + "runtime_files": "onnx/model.onnx", + "small_overhead_mb": 0.001, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/KittenTTS-Nano-v0.8-ONNX" + }, + { + "task": "tts", + "model_id": "onnx-community/KittenTTS-Mini-v0.8-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "~78M", + "repo_used_storage_bytes": 81546918, + "repo_used_storage_mb": 81.55, + "minimum_runtime_download_bytes": 78268530, + "minimum_runtime_download_mb": 78.27, + "runtime_variant": "fp32", + "runtime_files": "onnx/model.onnx", + "small_overhead_mb": 0.001, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/KittenTTS-Mini-v0.8-ONNX" + }, + { + "task": "tts", + "model_id": "narrate-so/kokoro-latest", + "publisher": "narrate-so", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 3157797186, + "repo_used_storage_mb": 3157.8, + "minimum_runtime_download_bytes": 92886437, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.004, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/narrate-so/kokoro-latest" + }, + { + "task": "tts", + "model_id": "onnx-community/Kokoro-82M-v1.0-ONNX-timestamped", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 1447372003, + "repo_used_storage_mb": 1447.37, + "minimum_runtime_download_bytes": 92886949, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.004, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX-timestamped" + }, + { + "task": "tts", + "model_id": "anyam12/Kokoro-82M-v1.0-ONNX", + "publisher": "anyam12", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 1447363167, + "repo_used_storage_mb": 1447.36, + "minimum_runtime_download_bytes": 92887010, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.004, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/anyam12/Kokoro-82M-v1.0-ONNX" + }, + { + "task": "tts", + "model_id": "onnx-community/Kokoro-82M-v1.0-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 1447363167, + "repo_used_storage_mb": 1447.36, + "minimum_runtime_download_bytes": 92887010, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.004, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX" + }, + { + "task": "tts", + "model_id": "textagent/Kokoro-82M-v1.0-ONNX", + "publisher": "textagent", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 1447363167, + "repo_used_storage_mb": 1447.36, + "minimum_runtime_download_bytes": 92887010, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.004, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/textagent/Kokoro-82M-v1.0-ONNX" + }, + { + "task": "tts", + "model_id": "wide-video/Kokoro-82M-v1.0-ONNX", + "publisher": "wide-video", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 1459972265, + "repo_used_storage_mb": 1459.97, + "minimum_runtime_download_bytes": 92887010, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.004, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/wide-video/Kokoro-82M-v1.0-ONNX" + }, + { + "task": "tts", + "model_id": "chae12121/kokorotest", + "publisher": "chae12121", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1447363167, + "repo_used_storage_mb": 1447.36, + "minimum_runtime_download_bytes": 92888834, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.005, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/chae12121/kokorotest" + }, + { + "task": "tts", + "model_id": "cshbli/Moxin-TTS", + "publisher": "cshbli", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1447737625, + "repo_used_storage_mb": 1447.74, + "minimum_runtime_download_bytes": 92889438, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.006, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/cshbli/Moxin-TTS" + }, + { + "task": "tts", + "model_id": "BoyNamedShoe/Kokoro-ONNX", + "publisher": "BoyNamedShoe", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 2543706041, + "repo_used_storage_mb": 2543.71, + "minimum_runtime_download_bytes": 92889596, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af.bin", + "small_overhead_mb": 0.005, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/BoyNamedShoe/Kokoro-ONNX" + }, + { + "task": "tts", + "model_id": "onnx-community/Kokoro-82M-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 3978533400, + "repo_used_storage_mb": 3978.53, + "minimum_runtime_download_bytes": 92889596, + "minimum_runtime_download_mb": 92.89, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af.bin", + "small_overhead_mb": 0.005, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/onnx-community/Kokoro-82M-ONNX" + }, + { + "task": "tts", + "model_id": "adrianlyjak/kokoro-onnx", + "publisher": "adrianlyjak", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 2562714843, + "repo_used_storage_mb": 2562.71, + "minimum_runtime_download_bytes": 93018268, + "minimum_runtime_download_mb": 93.02, + "runtime_variant": "quantized", + "runtime_files": "onnx/model_quantized.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.004, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/adrianlyjak/kokoro-onnx" + }, + { + "task": "tts", + "model_id": "onnx-community/Kokoro-82M-v1.1-zh-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 3264515804, + "repo_used_storage_mb": 3264.52, + "minimum_runtime_download_bytes": 127883845, + "minimum_runtime_download_mb": 127.88, + "runtime_variant": "int8", + "runtime_files": "onnx/model_int8.onnx; voices/af_maple.bin", + "small_overhead_mb": 0.005, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/onnx-community/Kokoro-82M-v1.1-zh-ONNX" + }, + { + "task": "tts", + "model_id": "textagent/Kokoro-82M-v1.1-zh-ONNX", + "publisher": "textagent", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 3264515804, + "repo_used_storage_mb": 3264.52, + "minimum_runtime_download_bytes": 127883845, + "minimum_runtime_download_mb": 127.88, + "runtime_variant": "int8", + "runtime_files": "onnx/model_int8.onnx; voices/af_maple.bin", + "small_overhead_mb": 0.005, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/textagent/Kokoro-82M-v1.1-zh-ONNX" + }, + { + "task": "tts", + "model_id": "huggingworld/Kokoro-82M-v1.0-ONNX", + "publisher": "huggingworld", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "style_text_to_speech_2", + "support_level": "custom_browser", + "parameter_count_estimate": "82M", + "repo_used_storage_bytes": 354257480, + "repo_used_storage_mb": 354.26, + "minimum_runtime_download_bytes": 326058126, + "minimum_runtime_download_mb": 326.06, + "runtime_variant": "fp32", + "runtime_files": "onnx/model.onnx; voices/af_alloy.bin", + "small_overhead_mb": 0.004, + "notes": "Includes one voice embedding file", + "model_url": "https://huggingface.co/huggingworld/Kokoro-82M-v1.0-ONNX" + }, + { + "task": "tts", + "model_id": "huggingworld/chatterbox-ONNX", + "publisher": "huggingworld", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "chatterbox", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 3934769236, + "repo_used_storage_mb": 3934.77, + "minimum_runtime_download_bytes": 30107, + "minimum_runtime_download_mb": 0.03, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 0.03, + "notes": "", + "model_url": "https://huggingface.co/huggingworld/chatterbox-ONNX" + }, + { + "task": "tts", + "model_id": "ipsilondev/chatterbox-multilingual-ONNX-q4", + "publisher": "ipsilondev", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 828595527, + "repo_used_storage_mb": 828.6, + "minimum_runtime_download_bytes": 1992298, + "minimum_runtime_download_mb": 1.99, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 1.992, + "notes": "", + "model_url": "https://huggingface.co/ipsilondev/chatterbox-multilingual-ONNX-q4" + }, + { + "task": "tts", + "model_id": "varsan-g/plapre-pico-ONNX", + "publisher": "varsan-g", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "llama", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1100302400, + "repo_used_storage_mb": 1100.3, + "minimum_runtime_download_bytes": 3234892, + "minimum_runtime_download_mb": 3.23, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 3.235, + "notes": "", + "model_url": "https://huggingface.co/varsan-g/plapre-pico-ONNX" + }, + { + "task": "tts", + "model_id": "varsan-g/plapre-nano-ONNX", + "publisher": "varsan-g", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "llama", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 3002465601, + "repo_used_storage_mb": 3002.47, + "minimum_runtime_download_bytes": 3234897, + "minimum_runtime_download_mb": 3.23, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 3.235, + "notes": "", + "model_url": "https://huggingface.co/varsan-g/plapre-nano-ONNX" + }, + { + "task": "tts", + "model_id": "spacekaren/chatterbox-turbo-webgpu", + "publisher": "spacekaren", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "chatterbox", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 2563410647, + "repo_used_storage_mb": 2563.41, + "minimum_runtime_download_bytes": 3564059, + "minimum_runtime_download_mb": 3.56, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 3.564, + "notes": "", + "model_url": "https://huggingface.co/spacekaren/chatterbox-turbo-webgpu" + }, + { + "task": "tts", + "model_id": "ttslab/chatterbox-turbo-webgpu", + "publisher": "ttslab", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "chatterbox", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 604577464, + "repo_used_storage_mb": 604.58, + "minimum_runtime_download_bytes": 3564059, + "minimum_runtime_download_mb": 3.56, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 3.564, + "notes": "", + "model_url": "https://huggingface.co/ttslab/chatterbox-turbo-webgpu" + }, + { + "task": "tts", + "model_id": "FluffyBunnies/vibevoice-onnx-int4", + "publisher": "FluffyBunnies", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "vibevoice_streaming", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 1161037230, + "repo_used_storage_mb": 1161.04, + "minimum_runtime_download_bytes": 7039803, + "minimum_runtime_download_mb": 7.04, + "runtime_variant": "", + "runtime_files": "", + "small_overhead_mb": 7.04, + "notes": "", + "model_url": "https://huggingface.co/FluffyBunnies/vibevoice-onnx-int4" + }, + { + "task": "tts", + "model_id": "BricksDisplay/ellie-Bert-VITS2-small-onnx", + "publisher": "BricksDisplay", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "bert_vits2", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 2653481678, + "repo_used_storage_mb": 2653.48, + "minimum_runtime_download_bytes": 338692226, + "minimum_runtime_download_mb": 338.69, + "runtime_variant": "q4", + "runtime_files": "onnx/model_q4.onnx", + "small_overhead_mb": 0.56, + "notes": "", + "model_url": "https://huggingface.co/BricksDisplay/ellie-Bert-VITS2-small-onnx" + }, + { + "task": "tts", + "model_id": "Xinyue615/ellie-Bert-VITS2-small-onnx", + "publisher": "Xinyue615", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "bert_vits2", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "unknown", + "repo_used_storage_bytes": 2653481678, + "repo_used_storage_mb": 2653.48, + "minimum_runtime_download_bytes": 338692226, + "minimum_runtime_download_mb": 338.69, + "runtime_variant": "q4", + "runtime_files": "onnx/model_q4.onnx", + "small_overhead_mb": 0.56, + "notes": "", + "model_url": "https://huggingface.co/Xinyue615/ellie-Bert-VITS2-small-onnx" + }, + { + "task": "tts", + "model_id": "onnx-community/OuteTTS-0.2-500M", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "qwen2", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "500M", + "repo_used_storage_bytes": 6100102469, + "repo_used_storage_mb": 6100.1, + "minimum_runtime_download_bytes": 506673613, + "minimum_runtime_download_mb": 506.67, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 17.996, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/OuteTTS-0.2-500M" + }, + { + "task": "tts", + "model_id": "OuteAI/Llama-OuteTTS-1.0-1B-ONNX", + "publisher": "OuteAI", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "llama", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "1B", + "repo_used_storage_bytes": 14658454908, + "repo_used_storage_mb": 14658.45, + "minimum_runtime_download_bytes": 1128721295, + "minimum_runtime_download_mb": 1128.72, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 13.826, + "notes": "", + "model_url": "https://huggingface.co/OuteAI/Llama-OuteTTS-1.0-1B-ONNX" + }, + { + "task": "tts", + "model_id": "onnx-community/orpheus-3b-0.1-ft-ONNX", + "publisher": "onnx-community", + "pipeline_tag": "text-to-speech", + "library_name": "transformers.js", + "model_type": "llama", + "support_level": "unknown_or_not_supported", + "parameter_count_estimate": "3B", + "repo_used_storage_bytes": 58512867306, + "repo_used_storage_mb": 58512.87, + "minimum_runtime_download_bytes": 2199189595, + "minimum_runtime_download_mb": 2199.19, + "runtime_variant": "q4f16", + "runtime_files": "onnx/model_q4f16.onnx", + "small_overhead_mb": 15.729, + "notes": "", + "model_url": "https://huggingface.co/onnx-community/orpheus-3b-0.1-ft-ONNX" + } + ], + "errors": [] +} \ No newline at end of file diff --git a/docs/research/browser-audio-models-2026-04-01.md b/docs/research/browser-audio-models-2026-04-01.md new file mode 100644 index 0000000..5cdd384 --- /dev/null +++ b/docs/research/browser-audio-models-2026-04-01.md @@ -0,0 +1,64 @@ +# Browser Audio Models Inventory + +Date: 2026-04-01 + +This research pass inventories Hugging Face speech repos that are discoverable with the Hub filter `transformers.js` and audio task tags, then estimates the minimum model payload needed to run them in-browser. + +Files: +- `browser-audio-model-inventory-2026-04-01.csv` +- `browser-audio-model-inventory-2026-04-01.json` + +Inventory scope: +- 254 STT/ASR repos +- 58 TTS repos +- 312 total repos + +Support buckets: +- `direct_pipeline`: matches the current high-level Transformers.js speech pipeline path in this repo +- `custom_browser`: browser-capable, but likely needs custom model wiring instead of the simple pipeline API +- `unknown_or_not_supported`: tagged for browser use on the Hub, but not a clean fit for the installed package or current pipeline path + +Smallest practical canonical models: + +STT: +- `onnx-community/moonshine-tiny-ONNX` + - Parameter estimate: 27M + - Minimum runtime download estimate: 32.01 MB + - Repo storage on Hub: 1700.37 MB +- `Xenova/whisper-tiny.en` + - Parameter estimate: 39M + - Minimum runtime download estimate: 44.46 MB + - Repo storage on Hub: 7651.67 MB +- `Xenova/wav2vec2-base-960h` + - Parameter estimate: 95M + - Minimum runtime download estimate: 66.48 MB + - Repo storage on Hub: 1852.65 MB + +TTS: +- `Xenova/mms-tts-eng` + - Parameter estimate: 36.3M + - Minimum runtime download estimate: 38.37 MB + - Repo storage on Hub: 210.75 MB +- `Xenova/speecht5_tts` + - Parameter estimate: 144M + - Minimum runtime download estimate: 131.81 MB + - Repo storage on Hub: 4412.79 MB + - Note: estimate includes default `Xenova/speecht5_hifigan`; still needs speaker embeddings at inference time +- `onnx-community/Supertonic-TTS-ONNX` + - Parameter estimate: 66M + - Minimum runtime download estimate: 262.82 MB + - Repo storage on Hub: 263.29 MB + +Smallest browser-capable custom TTS models: +- `onnx-community/kitten-tts-nano-0.1-ONNX` + - Minimum runtime download estimate: 23.80 MB +- `onnx-community/KittenTTS-Micro-v0.8-ONNX` + - Parameter estimate: ~41M + - Minimum runtime download estimate: 41.39 MB +- `onnx-community/Kokoro-82M-ONNX` + - Parameter estimate: 82M + - Minimum runtime download estimate: 92.89 MB + +Notes: +- Some repos expose very large total storage because they publish many quantization variants. The `minimum_runtime_download` estimate is the more useful number for browser UX. +- A few repos are clearly tests or incomplete community exports. The CSV/JSON include them for completeness, but the canonical picks above are the safest starting points for product work. diff --git a/package-lock.json b/package-lock.json index ad96dd5..4a4f457 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,6 +7,7 @@ "": { "name": "browser-llm", "version": "0.1.0", + "license": "MIT", "dependencies": { "@huggingface/transformers": "4.0.0-next.9", "react": "19.2.4", diff --git a/src/App.tsx b/src/App.tsx index fe8edca..1604f35 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -1,6 +1,10 @@ -import type { FormEvent, KeyboardEvent } from "react"; -import { useEffect, useMemo, useRef } from "react"; +import type { ChangeEvent, FormEvent, KeyboardEvent } from "react"; +import { useEffect, useMemo, useRef, useState } from "react"; +import { + EMPTY_TRANSCRIPTION_MESSAGE, + normalizeTranscriptionText, +} from "./app/audio-helpers"; import { applyAssistantContent, computeGenerationOptions, @@ -19,31 +23,56 @@ import { UI_STATE_FLUSH_DEBOUNCE_MS, } from "./app/constants"; import { + buildAudioSections, buildCuratedSections, + buildRecentAudioModels, buildRecentModels, buildStarterModels, decorateModel, + getAudioTaskForModel, + getFallbackAudioModel, getFallbackThreadModel, getRecommendedModel, } from "./app/model-helpers"; +import { + createIdleWaveform, + createWavBlob, + decodeAudioBlob, + downloadBlob, + measureWaveformLevels, +} from "./audio"; import { initializeChatStore } from "./chat-store"; +import AppLayout, { type LayoutMode } from "./components/AppLayout"; +import AudioScreen from "./components/AudioScreen"; import ChatScreen from "./components/ChatScreen"; +import DataPage from "./components/DataPage"; import LandingScreen from "./components/LandingScreen"; import ModelPickerDialog from "./components/ModelPickerDialog"; -import SettingsDialog from "./components/SettingsDialog"; +import SettingsPage from "./components/SettingsPage"; import { detectDeviceCapabilities } from "./device"; import { enrichModelDescriptor, fetchHubModelDetails } from "./hf"; import { useModelSearch } from "./hooks/useModelSearch"; import { useModelWorker } from "./hooks/useModelWorker"; +import { getCanonicalCuratedModel } from "./models"; import { clearLightweightAppState, deriveStorageFeedback, getDefaultStorageMessage, loadActiveChatThreadId, + loadLastAudioTab, + loadLastAudioView, + loadLastSttModel, + loadLastTtsModel, + loadLastWorkspace, pushRecentModel, saveActiveChatThreadId, saveAppSettings, + saveLastAudioTab, + saveLastAudioView, saveLastModel, + saveLastSttModel, + saveLastTtsModel, + saveLastWorkspace, savePickerTab, saveRecentModels, saveShowExperimental, @@ -52,6 +81,9 @@ import { import { useAppStore } from "./store/app-store"; import type { AppSettings, + AudioTab, + AudioTranscriptionChunk, + AudioView, ChatStore, ChatThread, ModelDescriptor, @@ -59,13 +91,33 @@ import type { ThreadMessage, ThreadUiState, WorkerResponse, + WorkspaceMode, } from "./types"; import { DEFAULT_APP_SETTINGS } from "./types"; +const RECORDING_WAVE_BAR_COUNT = 20; +const GITHUB_URL = "https://github.com/Jefreesujit/browser-llm"; +const MOBILE_LAYOUT_QUERY = "(max-width: 720px)"; +const PREFERRED_RECORDING_MIME_TYPES = [ + "audio/webm;codecs=opus", + "audio/webm", + "audio/mp4", + "audio/ogg;codecs=opus", +] as const; + function App() { const fileInputRef = useRef(null); + const audioUploadRef = useRef(null); const chatLogRef = useRef(null); const chatStoreRef = useRef(null); + const recordingAudioContextRef = useRef(null); + const recordingAnimationFrameRef = useRef(null); + const mediaRecorderRef = useRef(null); + const mediaStreamRef = useRef(null); + const recordingChunksRef = useRef([]); + const audioRequestIdRef = useRef(null); + const recordingStartedAtRef = useRef(null); + const recordingTimerRef = useRef(null); const pendingScrollRestoreRef = useRef(null); const shouldStickToBottomRef = useRef(true); const pendingScrollStateRef = useRef<{ @@ -79,6 +131,50 @@ function App() { >({}); const uiStateFlushTimerRef = useRef(null); const threadOpenNonceRef = useRef(0); + const [workspace, setWorkspace] = useState(() => + loadLastWorkspace(), + ); + const [audioView, setAudioView] = useState(() => + loadLastAudioView(), + ); + const [dataOpen, setDataOpen] = useState(false); + const [audioTab, setAudioTab] = useState(loadLastAudioTab()); + const [isMobileLayout, setIsMobileLayout] = useState(() => + typeof window !== "undefined" + ? window.matchMedia(MOBILE_LAYOUT_QUERY).matches + : false, + ); + const [selectedSttModel, setSelectedSttModel] = useState( + () => loadLastSttModel() ?? getFallbackAudioModel("transcribe")!, + ); + const [selectedTtsModel, setSelectedTtsModel] = useState( + () => loadLastTtsModel() ?? getFallbackAudioModel("speak")!, + ); + const [audioTaskBusy, setAudioTaskBusy] = useState(false); + const [audioTaskStatus, setAudioTaskStatus] = useState(null); + const [isRecording, setIsRecording] = useState(false); + const [recordingDurationMs, setRecordingDurationMs] = useState(0); + const [recordingLevels, setRecordingLevels] = useState(() => + createIdleWaveform(RECORDING_WAVE_BAR_COUNT), + ); + const [audioInputLabel, setAudioInputLabel] = useState(null); + const [transcriptText, setTranscriptText] = useState(""); + const [transcriptChunks, setTranscriptChunks] = useState< + AudioTranscriptionChunk[] + >([]); + const [timestampsEnabled, setTimestampsEnabled] = useState(false); + const [speakText, setSpeakText] = useState(""); + const [selectedVoice, setSelectedVoice] = useState( + (loadLastTtsModel() ?? getFallbackAudioModel("speak")!)?.runtime + .defaultVoice ?? "default", + ); + const [speakSpeed, setSpeakSpeed] = useState(1); + const [generatedAudioUrl, setGeneratedAudioUrl] = useState( + null, + ); + const [generatedAudioDurationSec, setGeneratedAudioDurationSec] = useState< + number | null + >(null); const { booting, @@ -173,6 +269,74 @@ function App() { setChatPersistence(feedback.status, feedback.warning); }; + const stopRecordingVisualization = () => { + if (recordingAnimationFrameRef.current !== null) { + window.cancelAnimationFrame(recordingAnimationFrameRef.current); + recordingAnimationFrameRef.current = null; + } + + if (recordingTimerRef.current !== null) { + window.clearInterval(recordingTimerRef.current); + recordingTimerRef.current = null; + } + + recordingStartedAtRef.current = null; + setRecordingDurationMs(0); + setRecordingLevels(createIdleWaveform(RECORDING_WAVE_BAR_COUNT)); + + const audioContext = recordingAudioContextRef.current; + recordingAudioContextRef.current = null; + void audioContext?.close().catch(() => {}); + }; + + const startRecordingVisualization = async (stream: MediaStream) => { + const audioContext = new AudioContext(); + recordingAudioContextRef.current = audioContext; + + try { + await audioContext.resume(); + } catch { + // Some browsers already start in a running state. + } + + const source = audioContext.createMediaStreamSource(stream); + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 1024; + analyser.smoothingTimeConstant = 0.82; + source.connect(analyser); + + const timeDomainData = new Uint8Array(analyser.fftSize); + const tick = () => { + analyser.getByteTimeDomainData(timeDomainData); + const nextLevels = measureWaveformLevels( + timeDomainData, + RECORDING_WAVE_BAR_COUNT, + ); + + setRecordingLevels((current) => + nextLevels.map((level, index) => { + const previous = current[index] ?? level; + return previous * 0.42 + level * 0.58; + }), + ); + recordingAnimationFrameRef.current = window.requestAnimationFrame(tick); + }; + + recordingStartedAtRef.current = performance.now(); + setRecordingDurationMs(0); + tick(); + + recordingTimerRef.current = window.setInterval(() => { + if (recordingStartedAtRef.current === null) { + return; + } + + setRecordingDurationMs( + Math.max(0, performance.now() - recordingStartedAtRef.current), + ); + }, 100); + }; + const flushThreadSnapshot = async (threadId: string) => { const store = chatStoreRef.current; const pending = pendingThreadFlushRef.current[threadId]; @@ -350,7 +514,16 @@ function App() { }); setLocalVerdicts(nextCache); setRecentModels(pushRecentModel(storedModel)); - saveLastModel(storedModel); + if ( + currentSelectedModel.task === "text" || + currentSelectedModel.task === "vision" + ) { + saveLastModel(storedModel); + } else if (currentSelectedModel.task === "stt") { + saveLastSttModel(storedModel); + } else if (currentSelectedModel.task === "tts") { + saveLastTtsModel(storedModel); + } } else { setLocalVerdicts( upsertModelVerdict(currentSelectedModel.id, { @@ -361,6 +534,64 @@ function App() { } break; } + case "TASK_STATUS": { + if ( + event.data.payload.requestId !== audioRequestIdRef.current || + event.data.payload.modelId !== currentSelectedModel?.id + ) { + return; + } + + setAudioTaskStatus(event.data.payload.status); + break; + } + case "TRANSCRIPTION_DONE": { + if ( + event.data.payload.requestId !== audioRequestIdRef.current || + event.data.payload.modelId !== currentSelectedModel?.id + ) { + return; + } + + const transcriptChunks = event.data.payload.chunks ?? []; + const transcriptText = normalizeTranscriptionText( + event.data.payload.text, + transcriptChunks, + ); + + setTranscriptText(transcriptText); + setTranscriptChunks(transcriptChunks); + setAudioTaskBusy(false); + setAudioTaskStatus( + transcriptText ? null : EMPTY_TRANSCRIPTION_MESSAGE, + ); + audioRequestIdRef.current = null; + break; + } + case "SPEECH_DONE": { + if ( + event.data.payload.requestId !== audioRequestIdRef.current || + event.data.payload.modelId !== currentSelectedModel?.id + ) { + return; + } + + if (generatedAudioUrl) { + URL.revokeObjectURL(generatedAudioUrl); + } + + const audioBlob = createWavBlob( + new Float32Array(event.data.payload.audioBuffer), + event.data.payload.sampleRate, + ); + + setGeneratedAudioUrl(URL.createObjectURL(audioBlob)); + setGeneratedAudioDurationSec(event.data.payload.durationSec); + setAudioTaskBusy(false); + setAudioTaskStatus(null); + audioRequestIdRef.current = null; + break; + } case "STREAM_TOKEN": { const payload = event.data.payload; if ( @@ -446,6 +677,18 @@ function App() { } case "ERROR": { const payload = event.data.payload; + if (payload.requestId && !payload.threadId) { + if (payload.requestId !== audioRequestIdRef.current) { + return; + } + + setAudioTaskBusy(false); + setAudioTaskStatus(null); + audioRequestIdRef.current = null; + setError(payload.message); + return; + } + if (payload.threadId && payload.requestId) { if ( !currentGeneration || @@ -489,6 +732,13 @@ function App() { lastLoadedAt: new Date().toISOString(), }), ); + if ( + currentSelectedModel.task === "stt" || + currentSelectedModel.task === "tts" + ) { + setAudioTaskBusy(false); + setAudioTaskStatus(null); + } setLoadedModelId(null); setError(payload.message); setAppState("loading"); @@ -504,6 +754,33 @@ function App() { onMessage: handleWorkerMessage, }); + useEffect(() => { + if (typeof window === "undefined") { + return; + } + + const mediaQuery = window.matchMedia(MOBILE_LAYOUT_QUERY); + const handleChange = (event: MediaQueryListEvent | MediaQueryList) => { + setIsMobileLayout(event.matches); + }; + + handleChange(mediaQuery); + + if (typeof mediaQuery.addEventListener === "function") { + mediaQuery.addEventListener("change", handleChange); + + return () => { + mediaQuery.removeEventListener("change", handleChange); + }; + } + + mediaQuery.addListener(handleChange); + + return () => { + mediaQuery.removeListener(handleChange); + }; + }, []); + useEffect(() => { detectDeviceCapabilities() .then((capabilities) => { @@ -582,6 +859,50 @@ function App() { setThreadUiStates, ]); + useEffect(() => { + if (workspace !== "audio" || audioView === "overview") { + return; + } + + const nextModel = + audioTab === "transcribe" ? selectedSttModel : selectedTtsModel; + if (selectedModel?.id === nextModel.id) { + return; + } + + setSelectedModel(nextModel); + }, [ + audioTab, + audioView, + selectedModel, + selectedSttModel, + selectedTtsModel, + setSelectedModel, + workspace, + ]); + + useEffect(() => { + saveLastAudioTab(audioTab); + }, [audioTab]); + + useEffect(() => { + saveLastAudioView(audioView); + }, [audioView]); + + useEffect(() => { + saveLastWorkspace(workspace); + }, [workspace]); + + useEffect(() => { + const voices = selectedTtsModel.runtime.voices ?? []; + const fallbackVoice = + selectedTtsModel.runtime.defaultVoice ?? voices[0]?.id ?? "default"; + + if (!voices.some((voice) => voice.id === selectedVoice)) { + setSelectedVoice(fallbackVoice); + } + }, [selectedTtsModel, selectedVoice]); + useEffect(() => { if (!workerReady || !selectedModel || isGenerating) { return; @@ -619,7 +940,7 @@ function App() { ]); useEffect(() => { - if (!activeThread || !workerReady || isGenerating) { + if (workspace !== "chat" || !activeThread || !workerReady || isGenerating) { return; } @@ -633,9 +954,22 @@ function App() { isGenerating, selectedModel, setSelectedModel, + workspace, workerReady, ]); + useEffect(() => { + if (workspace !== "chat" || activeThread || isGenerating) { + return; + } + + if (selectedModel === null) { + return; + } + + setSelectedModel(null); + }, [activeThread, isGenerating, selectedModel, setSelectedModel, workspace]); + useEffect(() => { if (!activeThreadId) { return; @@ -733,6 +1067,25 @@ function App() { }; }, []); + useEffect(() => { + return () => { + if (recordingAnimationFrameRef.current !== null) { + window.cancelAnimationFrame(recordingAnimationFrameRef.current); + } + if (recordingTimerRef.current !== null) { + window.clearInterval(recordingTimerRef.current); + } + void recordingAudioContextRef.current?.close().catch(() => {}); + mediaRecorderRef.current?.stream + ?.getTracks() + .forEach((track) => track.stop()); + mediaStreamRef.current?.getTracks().forEach((track) => track.stop()); + if (generatedAudioUrl) { + URL.revokeObjectURL(generatedAudioUrl); + } + }; + }, [generatedAudioUrl]); + useEffect(() => { saveRecentModels(recentModels); }, [recentModels]); @@ -750,6 +1103,27 @@ function App() { [deviceCapabilities, localVerdicts], ); + const audioCuratedSections = useMemo( + () => buildAudioSections(audioTab, deviceCapabilities, localVerdicts), + [audioTab, deviceCapabilities, localVerdicts], + ); + + const audioLandingStarterModels = useMemo( + () => ({ + transcribe: buildAudioSections( + "transcribe", + deviceCapabilities, + localVerdicts, + ) + .flatMap((section) => section.models) + .slice(0, 3), + speak: buildAudioSections("speak", deviceCapabilities, localVerdicts) + .flatMap((section) => section.models) + .slice(0, 3), + }), + [deviceCapabilities, localVerdicts], + ); + const starterModels = useMemo( () => buildStarterModels(deviceCapabilities, localVerdicts), [deviceCapabilities, localVerdicts], @@ -760,6 +1134,25 @@ function App() { [deviceCapabilities, localVerdicts, recentModels], ); + const recentAudioModelsWithCompatibility = useMemo( + () => + buildRecentAudioModels( + audioTab, + recentModels, + deviceCapabilities, + localVerdicts, + ), + [audioTab, deviceCapabilities, localVerdicts, recentModels], + ); + + const allRecentAudioModelsWithCompatibility = useMemo( + () => + recentModelsWithCompatibility.filter( + ({ model }) => model.task === "stt" || model.task === "tts", + ), + [recentModelsWithCompatibility], + ); + const recommendedModel = useMemo( () => getRecommendedModel(deviceCapabilities, localVerdicts), [deviceCapabilities, localVerdicts], @@ -773,6 +1166,21 @@ function App() { [deviceCapabilities, localVerdicts, selectedModel], ); + const selectedSttModelWithCompatibility = useMemo( + () => decorateModel(selectedSttModel, deviceCapabilities, localVerdicts), + [deviceCapabilities, localVerdicts, selectedSttModel], + ); + + const selectedTtsModelWithCompatibility = useMemo( + () => decorateModel(selectedTtsModel, deviceCapabilities, localVerdicts), + [deviceCapabilities, localVerdicts, selectedTtsModel], + ); + + const activeAudioModelWithCompatibility = + audioTab === "transcribe" + ? selectedSttModelWithCompatibility + : selectedTtsModelWithCompatibility; + const activeThreadModelWithCompatibility = useMemo( () => activeThread @@ -811,16 +1219,87 @@ function App() { [searchResults], ); + const pickerMode: WorkspaceMode = workspace; + const pickerCuratedSections = + pickerMode === "audio" ? audioCuratedSections : curatedSections; + const pickerRecentModels = + pickerMode === "audio" + ? pickerTab === "recent" + ? allRecentAudioModelsWithCompatibility + : recentAudioModelsWithCompatibility + : recentModelsWithCompatibility; + const pickerAvailableTabs = + pickerMode === "audio" + ? ([] satisfies PickerTab[]) + : (["curated", "search", "recent"] satisfies PickerTab[]); + const openPicker = (tab: PickerTab) => { setPickerTab(tab); setPickerOpen(true); }; + const openSettings = () => { + setDataOpen(false); + setPickerOpen(false); + setPendingModel(null); + setSettingsOpen(true); + }; + + const openData = () => { + setSettingsOpen(false); + setPickerOpen(false); + setPendingModel(null); + setDataOpen(true); + }; + + const switchToAudioWorkspace = (nextView: AudioView = audioView) => { + if (isGenerating || audioTaskBusy || isRecording) { + return; + } + + setWorkspace("audio"); + setSettingsOpen(false); + setDataOpen(false); + setAudioView(nextView); + if (nextView !== "overview") { + setAudioTab(nextView); + } + setPickerOpen(false); + setPendingModel(null); + setError(null); + }; + + const switchToChatWorkspace = () => { + if (audioTaskBusy || isRecording) { + return; + } + + setWorkspace("chat"); + setSettingsOpen(false); + setDataOpen(false); + setPickerOpen(false); + setPendingModel(null); + setError(null); + + if ( + activeThreadModelWithCompatibility && + selectedModel?.id !== activeThreadModelWithCompatibility.id + ) { + setSelectedModel(activeThreadModelWithCompatibility); + } else if (!activeThreadModelWithCompatibility) { + setSelectedModel(null); + } + }; + const resolveSelectedModel = async (model: ModelDescriptor) => { + const canonicalModel = getCanonicalCuratedModel(model.id); + const seedModel = canonicalModel + ? { ...canonicalModel, source: model.source } + : model; const baseModel = - model.source === "search" + model.source === "search" && !canonicalModel ? enrichModelDescriptor(model, await fetchHubModelDetails(model.id)) - : model; + : seedModel; const resolvedModel = decorateModel( baseModel, deviceCapabilities, @@ -837,6 +1316,53 @@ function App() { return resolvedModel; }; + const requestAudioModelLoad = async ( + model: ModelDescriptor, + task: AudioTab = audioTab, + ) => { + if (audioTaskBusy || isRecording) { + return; + } + + setLoadingModelId(model.id); + setError(null); + setAudioTab(task); + + try { + const resolvedModel = await resolveSelectedModel(model); + const storedModel = stripModelCompatibility(resolvedModel); + + if (task === "transcribe") { + setSelectedSttModel(storedModel); + } else { + setSelectedTtsModel(storedModel); + setSelectedVoice( + storedModel.runtime.defaultVoice ?? + storedModel.runtime.voices?.[0]?.id ?? + "default", + ); + } + + setSelectedModel(storedModel); + setWorkspace("audio"); + setAudioView(task); + setAudioTab(task); + setPickerOpen(false); + setLoadingModelId(null); + } catch (selectionIssue) { + setLoadingModelId(null); + setError( + selectionIssue instanceof Error + ? selectionIssue.message + : "Unable to prepare this model for loading.", + ); + } + }; + + const handleAudioPickerModelLoad = (model: ModelDescriptor) => { + void requestAudioModelLoad(model, getAudioTaskForModel(model)); + }; + const createNewThread = async (preferredModel?: ModelDescriptor) => { if (isGenerating) { return; @@ -901,51 +1427,76 @@ function App() { } }; - const activateModel = async (model: ModelDescriptor) => { + const activateModel = async ( + model: ModelDescriptor, + mode: "auto" | "same-thread" | "new-thread" = "auto", + ) => { setLoadingModelId(model.id); setError(null); + setWorkspace("chat"); try { const resolvedModel = await resolveSelectedModel(model); + const nextModel = stripModelCompatibility(resolvedModel); const shouldReuseActiveThread = Boolean( activeThread && activeMessages.length === 0 && !isGenerating, ); - const nextThread = shouldReuseActiveThread - ? { - ...activeThread!, - ...createThreadRecord(stripModelCompatibility(resolvedModel)), - id: activeThread!.id, - createdAt: activeThread!.createdAt, - } - : createThreadRecord(stripModelCompatibility(resolvedModel)); - const uiState = - threadUiStates[nextThread.id] ?? - createDefaultThreadUiState(nextThread.id); - - shouldStickToBottomRef.current = true; - upsertThread(nextThread); - replaceThreadMessages(nextThread.id, []); - setThreadUiStates({ - ...useAppStore.getState().threadUiStates, - [nextThread.id]: uiState, - }); - setActiveThreadId(nextThread.id); - saveActiveChatThreadId(nextThread.id); - setSelectedModel(nextThread.model); - setPickerOpen(false); - setPendingModel(null); - clearDraftAttachment(); - setError(null); - pendingScrollRestoreRef.current = 0; - if (chatStoreRef.current) { - const threadResult = await chatStoreRef.current.putThread(nextThread); - const messagesResult = await chatStoreRef.current.putMessages( - nextThread.id, - [], - ); - const uiResult = await chatStoreRef.current.putUiState(uiState); - handleStorageWriteResults(threadResult, messagesResult, uiResult); + if (mode === "same-thread" && activeThread) { + const nextThread = { + ...activeThread, + model: nextModel, + updatedAt: new Date().toISOString(), + }; + + upsertThread(nextThread); + setSelectedModel(nextThread.model); + setPickerOpen(false); + setPendingModel(null); + setError(null); + + if (chatStoreRef.current) { + await persistThreadSnapshotNow(nextThread, activeMessages); + } + } else { + const nextThread = + mode === "new-thread" || !shouldReuseActiveThread + ? createThreadRecord(nextModel) + : { + ...activeThread!, + ...createThreadRecord(nextModel), + id: activeThread!.id, + createdAt: activeThread!.createdAt, + }; + const uiState = + threadUiStates[nextThread.id] ?? + createDefaultThreadUiState(nextThread.id); + + shouldStickToBottomRef.current = true; + upsertThread(nextThread); + replaceThreadMessages(nextThread.id, []); + setThreadUiStates({ + ...useAppStore.getState().threadUiStates, + [nextThread.id]: uiState, + }); + setActiveThreadId(nextThread.id); + saveActiveChatThreadId(nextThread.id); + setSelectedModel(nextThread.model); + setPickerOpen(false); + setPendingModel(null); + clearDraftAttachment(); + setError(null); + pendingScrollRestoreRef.current = 0; + + if (chatStoreRef.current) { + const threadResult = await chatStoreRef.current.putThread(nextThread); + const messagesResult = await chatStoreRef.current.putMessages( + nextThread.id, + [], + ); + const uiResult = await chatStoreRef.current.putUiState(uiState); + handleStorageWriteResults(threadResult, messagesResult, uiResult); + } } } catch (selectionIssue) { setLoadingModelId(null); @@ -974,14 +1525,269 @@ function App() { await activateModel(model); }; + const copyPlainText = async (value: string) => { + if (!value.trim()) { + return; + } + + if (navigator.clipboard?.writeText) { + await navigator.clipboard.writeText(value); + return; + } + + const textarea = document.createElement("textarea"); + textarea.value = value; + textarea.setAttribute("readonly", "true"); + textarea.style.position = "absolute"; + textarea.style.left = "-9999px"; + document.body.append(textarea); + textarea.select(); + document.execCommand("copy"); + textarea.remove(); + }; + + const createTranscriptFilename = () => { + const baseName = (audioInputLabel ?? "transcript") + .replace(/\.[a-z0-9]+$/i, "") + .replace(/[^\w-]+/g, "-") + .replace(/^-+|-+$/g, ""); + + return `${baseName || "transcript"}.txt`; + }; + + const createSpeechFilename = () => { + const stem = speakText + .trim() + .slice(0, 32) + .replace(/[^\w-]+/g, "-") + .replace(/^-+|-+$/g, "") + .toLowerCase(); + + return `${stem || "speech"}.wav`; + }; + + const transcribeAudioBlob = async (blob: Blob, label: string) => { + if ( + !selectedSttModelWithCompatibility.compatibility?.canLoad || + audioTaskBusy + ) { + return; + } + + setError(null); + setAudioTaskBusy(true); + setAudioTaskStatus("Decoding audio"); + setAudioInputLabel(label); + setTranscriptText(""); + setTranscriptChunks([]); + + try { + const targetSampleRate = + selectedSttModelWithCompatibility.runtime.audioSampleRate ?? 16000; + const { samples, durationSec } = await decodeAudioBlob( + blob, + targetSampleRate, + ); + const requestId = crypto.randomUUID(); + + audioRequestIdRef.current = requestId; + setAudioTaskStatus("Sending audio to the transcription model"); + postWorkerMessage( + { + type: "TRANSCRIBE_AUDIO", + payload: { + requestId, + model: selectedSttModelWithCompatibility, + audio: samples, + returnTimestamps: timestampsEnabled, + fileName: label, + durationSec, + }, + }, + [samples.buffer], + ); + } catch (transcriptionIssue) { + setAudioTaskBusy(false); + setAudioTaskStatus(null); + setError( + transcriptionIssue instanceof Error + ? transcriptionIssue.message + : "Unable to read this audio file in the browser.", + ); + audioRequestIdRef.current = null; + } + }; + + const handleAudioFileChange = (event: ChangeEvent) => { + const file = event.target.files?.[0]; + event.target.value = ""; + + if (!file) { + return; + } + + void transcribeAudioBlob(file, file.name); + }; + + const handleStartRecording = async () => { + if (audioTaskBusy || appState !== "ready") { + return; + } + + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mimeType = PREFERRED_RECORDING_MIME_TYPES.find((candidate) => + typeof MediaRecorder.isTypeSupported === "function" + ? MediaRecorder.isTypeSupported(candidate) + : false, + ); + const recorder = mimeType + ? new MediaRecorder(stream, { mimeType }) + : new MediaRecorder(stream); + recordingChunksRef.current = []; + mediaStreamRef.current = stream; + mediaRecorderRef.current = recorder; + void startRecordingVisualization(stream).catch(() => { + setRecordingLevels(createIdleWaveform(RECORDING_WAVE_BAR_COUNT)); + }); + + recorder.addEventListener("dataavailable", (event) => { + if (event.data.size > 0) { + recordingChunksRef.current.push(event.data); + } + }); + + recorder.addEventListener("stop", () => { + const mimeType = recorder.mimeType || "audio/webm"; + const blob = new Blob(recordingChunksRef.current, { type: mimeType }); + recordingChunksRef.current = []; + stopRecordingVisualization(); + mediaStreamRef.current?.getTracks().forEach((track) => track.stop()); + mediaStreamRef.current = null; + mediaRecorderRef.current = null; + setIsRecording(false); + void transcribeAudioBlob(blob, "microphone recording"); + }); + + recorder.start(); + setIsRecording(true); + setError(null); + setAudioTaskStatus("Recording from microphone"); + } catch (recordingIssue) { + stopRecordingVisualization(); + setError( + recordingIssue instanceof Error + ? recordingIssue.message + : "Microphone access was blocked in this browser.", + ); + mediaStreamRef.current?.getTracks().forEach((track) => track.stop()); + mediaStreamRef.current = null; + mediaRecorderRef.current = null; + setIsRecording(false); + setAudioTaskStatus(null); + } + }; + + const handleStopRecording = () => { + if ( + !mediaRecorderRef.current || + mediaRecorderRef.current.state === "inactive" + ) { + return; + } + + setAudioTaskStatus("Finalizing recording"); + mediaRecorderRef.current.stop(); + }; + + const handleGenerateSpeech = () => { + if ( + !selectedTtsModelWithCompatibility.compatibility?.canLoad || + !speakText.trim() || + audioTaskBusy + ) { + return; + } + + if (generatedAudioUrl) { + URL.revokeObjectURL(generatedAudioUrl); + setGeneratedAudioUrl(null); + } + + const requestId = crypto.randomUUID(); + audioRequestIdRef.current = requestId; + setAudioTaskBusy(true); + setAudioTaskStatus("Preparing speech generation"); + setGeneratedAudioDurationSec(null); + setError(null); + + postWorkerMessage({ + type: "SYNTHESIZE_SPEECH", + payload: { + requestId, + model: selectedTtsModelWithCompatibility, + text: speakText.trim(), + voice: selectedVoice, + speed: speakSpeed, + }, + }); + }; + const handleGetStarted = async () => { if (!recommendedModel?.compatibility?.canLoad) { return; } + setWorkspace("chat"); await activateModel(recommendedModel); }; + const handleAudioGetStarted = () => { + switchToAudioWorkspace("transcribe"); + }; + + const handleSearchModels = () => { + openPicker(pickerMode === "audio" ? "recent" : "search"); + }; + + const handleBrowseAudio = () => { + audioUploadRef.current?.click(); + }; + + const handleCopyTranscript = () => { + void copyPlainText(transcriptText); + }; + + const handleDownloadTranscript = () => { + if (!transcriptText) { + return; + } + + downloadBlob( + new Blob([transcriptText], { type: "text/plain;charset=utf-8" }), + createTranscriptFilename(), + ); + }; + + const handleUseInSpeak = () => { + if (!transcriptText) { + return; + } + + setSpeakText(transcriptText); + switchToAudioWorkspace("speak"); + }; + + const handleDownloadAudio = async () => { + if (!generatedAudioUrl) { + return; + } + + const response = await fetch(generatedAudioUrl); + const blob = await response.blob(); + downloadBlob(blob, createSpeechFilename()); + }; + const handleInputChange = (value: string) => { if (!activeThreadId) { return; @@ -1180,6 +1986,14 @@ function App() { const clearAllData = async () => { await clearAllChats(); + stopRecordingVisualization(); + mediaRecorderRef.current?.stream + ?.getTracks() + .forEach((track) => track.stop()); + mediaStreamRef.current?.getTracks().forEach((track) => track.stop()); + mediaRecorderRef.current = null; + mediaStreamRef.current = null; + setIsRecording(false); setLocalVerdicts({}); setRecentModels([]); setAppSettings(DEFAULT_APP_SETTINGS); @@ -1189,21 +2003,60 @@ function App() { verifiedOnly: false, showExperimental: false, }); + if (generatedAudioUrl) { + URL.revokeObjectURL(generatedAudioUrl); + } + setAudioTab("transcribe"); + setSelectedSttModel(getFallbackAudioModel("transcribe")!); + setSelectedTtsModel(getFallbackAudioModel("speak")!); + setAudioTaskBusy(false); + setAudioTaskStatus(null); + audioRequestIdRef.current = null; + setAudioInputLabel(null); + setTranscriptText(""); + setTranscriptChunks([]); + setTimestampsEnabled(false); + setSpeakText(""); + setSelectedVoice( + getFallbackAudioModel("speak")!.runtime.defaultVoice ?? "default", + ); + setSpeakSpeed(1); + setGeneratedAudioUrl(null); + setGeneratedAudioDurationSec(null); + setWorkspace("chat"); + setAudioView("overview"); + setSettingsOpen(false); + setDataOpen(false); clearLightweightAppState(); }; + const workspaceSwitchDisabled = isGenerating || audioTaskBusy || isRecording; + if (booting) { return ( -
+ { + if (nextWorkspace === "audio") { + switchToAudioWorkspace(audioView); + return; + } + + switchToChatWorkspace(); + }} + onOpenSettings={openSettings} + onOpenData={openData} + >
-
-
+
); } @@ -1212,102 +2065,227 @@ function App() { activeThreadModelWithCompatibility?.id ? appState : "loading"; + const audioAppState = + selectedModelWithCompatibility?.id === activeAudioModelWithCompatibility.id + ? appState + : "loading"; + const layoutMode: LayoutMode = + settingsOpen || dataOpen + ? "workspace" + : workspace === "audio" + ? audioView === "overview" + ? "landing" + : "workspace" + : activeThreadModelWithCompatibility + ? "workspace" + : "landing"; return ( <> - {activeThreadModelWithCompatibility ? ( - { - void createNewThread(); - }} - onSelectThread={(threadId) => { - void openThread(threadId); - }} - onDeleteThread={(threadId) => { - void deleteThread(threadId); - }} - onChangeModel={() => openPicker("curated")} - onOpenSettings={() => setSettingsOpen(true)} - onInputChange={handleInputChange} - onSubmit={handleSubmit} - onComposerKeyDown={handleComposerKeyDown} - onFileChange={handleFileChange} - onRemoveAttachment={clearDraftAttachment} - onChatScroll={handleChatScroll} - onStopGeneration={handleStopGeneration} - stopRequested={stopRequested} - /> - ) : ( - - )} + { + if (nextWorkspace === "audio") { + switchToAudioWorkspace(audioView); + return; + } + + switchToChatWorkspace(); + }} + onOpenSettings={openSettings} + onOpenData={openData} + > + {settingsOpen ? ( + + ) : dataOpen ? ( + { + void clearAllChats(); + }} + onClearAllData={() => { + void clearAllData(); + }} + onClearAllDownloadedModels={() => { + setChatPersistence( + chatPersistenceStatus, + getDefaultStorageMessage(chatPersistenceStatus), + ); + }} + /> + ) : workspace === "audio" ? ( + audioView === "overview" ? ( + switchToAudioWorkspace("transcribe")} + onTrySpeak={() => switchToAudioWorkspace("speak")} + onSelectChatModel={requestModelLoad} + onSelectTranscribeModel={(model) => { + void requestAudioModelLoad(model, "transcribe"); + }} + onSelectSpeakModel={(model) => { + void requestAudioModelLoad(model, "speak"); + }} + /> + ) : ( + openPicker("curated")} + onStartRecording={() => { + void handleStartRecording(); + }} + onStopRecording={handleStopRecording} + onBrowseAudio={handleBrowseAudio} + onAudioFileChange={handleAudioFileChange} + onToggleTimestamps={setTimestampsEnabled} + onCopyTranscript={handleCopyTranscript} + onDownloadTranscript={handleDownloadTranscript} + onUseInSpeak={handleUseInSpeak} + onSpeakTextChange={setSpeakText} + onVoiceChange={setSelectedVoice} + onSpeedChange={setSpeakSpeed} + onGenerateSpeech={handleGenerateSpeech} + onDownloadAudio={() => { + void handleDownloadAudio(); + }} + /> + ) + ) : activeThreadModelWithCompatibility ? ( + { + void createNewThread(); + }} + onSelectThread={(threadId) => { + void openThread(threadId); + }} + onDeleteThread={(threadId) => { + void deleteThread(threadId); + }} + onChangeModel={() => openPicker("curated")} + onInputChange={handleInputChange} + onSubmit={handleSubmit} + onComposerKeyDown={handleComposerKeyDown} + onFileChange={handleFileChange} + onRemoveAttachment={clearDraftAttachment} + onChatScroll={handleChatScroll} + onStopGeneration={handleStopGeneration} + stopRequested={stopRequested} + /> + ) : ( + switchToAudioWorkspace("transcribe")} + onTrySpeak={() => switchToAudioWorkspace("speak")} + onSelectChatModel={requestModelLoad} + onSelectTranscribeModel={(model) => { + void requestAudioModelLoad(model, "transcribe"); + }} + onSelectSpeakModel={(model) => { + void requestAudioModelLoad(model, "speak"); + }} + /> + )} + setPickerOpen(false)} onTabChange={setPickerTab} onSearchQueryChange={setSearchQuery} onToggleFilter={toggleSearchFilter} - onLoadModel={requestModelLoad} - /> - - setSettingsOpen(false)} - onSave={saveSettings} - onClearChatHistory={() => { - void clearAllChats(); - }} - onClearAllData={() => { - void clearAllData(); - }} - onClearAllDownloadedModels={() => { - setChatPersistence( - chatPersistenceStatus, - getDefaultStorageMessage(chatPersistenceStatus), - ); - }} /> - {pendingModel && (
Change Model

Start a new chat with this model?

- Switching models starts a new conversation with{" "} - {pendingModel.label}. + Choose whether {pendingModel.label} should keep + this conversation going or start a separate thread.

+ + + + +
+ + + + + GitHub + +
+ + +
+
{children}
+
+ + +
+
+ ); +} + +export default AppLayout; diff --git a/src/components/AudioScreen.tsx b/src/components/AudioScreen.tsx new file mode 100644 index 0000000..58f8d3e --- /dev/null +++ b/src/components/AudioScreen.tsx @@ -0,0 +1,532 @@ +import type { ChangeEvent, RefObject } from "react"; + +import type { + AudioTab, + AudioTranscriptionChunk, + ModelDescriptor, + ModelLoadProgress, + ModelLoadState, +} from "../types"; + +type AudioScreenProps = { + activeTab: AudioTab; + selectedModel: ModelDescriptor; + appState: ModelLoadState; + progress: ModelLoadProgress; + error: string | null; + taskBusy: boolean; + taskStatus: string | null; + isRecording: boolean; + recordingLevels: number[]; + recordingDurationMs: number; + audioInputLabel: string | null; + transcriptText: string; + transcriptChunks: AudioTranscriptionChunk[]; + showTimestamps: boolean; + timestampsEnabled: boolean; + speakText: string; + selectedVoice: string; + speakSpeed: number; + audioUrl: string | null; + audioDurationSec: number | null; + audioUploadRef: RefObject; + onSwitchTab: (tab: AudioTab) => void; + onChangeModel: () => void; + onStartRecording: () => void; + onStopRecording: () => void; + onBrowseAudio: () => void; + onAudioFileChange: (event: ChangeEvent) => void; + onToggleTimestamps: (enabled: boolean) => void; + onCopyTranscript: () => void; + onDownloadTranscript: () => void; + onUseInSpeak: () => void; + onSpeakTextChange: (value: string) => void; + onVoiceChange: (voice: string) => void; + onSpeedChange: (speed: number) => void; + onGenerateSpeech: () => void; + onDownloadAudio: () => void; +}; + +const formatDuration = (seconds: number | null) => { + if (!seconds || Number.isNaN(seconds)) { + return null; + } + + if (seconds < 60) { + return `${seconds.toFixed(1)}s`; + } + + const minutes = Math.floor(seconds / 60); + const remainder = Math.round(seconds % 60); + return `${minutes}:${String(remainder).padStart(2, "0")}`; +}; + +function AudioScreen({ + activeTab, + selectedModel, + appState, + progress, + error, + taskBusy, + taskStatus, + isRecording, + recordingLevels, + recordingDurationMs, + audioInputLabel, + transcriptText, + transcriptChunks, + showTimestamps, + timestampsEnabled, + speakText, + selectedVoice, + speakSpeed, + audioUrl, + audioDurationSec, + audioUploadRef, + onSwitchTab, + onChangeModel, + onStartRecording, + onStopRecording, + onBrowseAudio, + onAudioFileChange, + onToggleTimestamps, + onCopyTranscript, + onDownloadTranscript, + onUseInSpeak, + onSpeakTextChange, + onVoiceChange, + onSpeedChange, + onGenerateSpeech, + onDownloadAudio, +}: AudioScreenProps) { + const interactionLocked = taskBusy || isRecording; + const canGenerateSpeech = + appState === "ready" && !taskBusy && !isRecording && speakText.trim().length > 0; + const modelStatus = error + ? { + label: "Failed", + className: "model-switcher-status model-switcher-status-error", + } + : appState === "ready" + ? { + label: interactionLocked ? "Running" : "Live", + className: "model-switcher-status model-switcher-status-live", + } + : progress?.loaded && progress.total + ? { + label: "Loading", + className: "model-switcher-status model-switcher-status-loading", + } + : { + label: "Preparing", + className: "model-switcher-status model-switcher-status-loading", + }; + const durationLabel = formatDuration(audioDurationSec); + const recordingDurationLabel = formatDuration(recordingDurationMs / 1000); + const progressLabel = + typeof progress?.progress === "number" + ? `${Math.round(progress.progress)}%` + : null; + const isTranscriptionProcessing = taskBusy && activeTab === "transcribe"; + const isSpeechGenerating = taskBusy && activeTab === "speak"; + const transcribePlaceholder = isRecording + ? "Recording in progress. Stop recording to generate the transcript." + : taskBusy + ? progressLabel + ? `${progressLabel} downloaded` + : "Transcription is processing. The transcript will appear here once it finishes." + : taskStatus ?? "No transcript yet. Record audio or upload a file to get started."; + const speechOutputPlaceholder = taskBusy + ? "Speech generation is in progress. Your audio preview will appear here when it is ready." + : "No generated speech yet. Paste some text and generate audio."; + const generateSpeechLabel = taskBusy ? "Generating..." : "Generate Speech"; + const waveformHeightForLevel = (level: number) => + `${Math.min(100, Math.max(24, Math.round(level * 180)))}%`; + + return ( +
+
+
+
+

+ Audio Workspace +

+

{activeTab === "transcribe" ? "Transcribe" : "Speak"}

+

+ {activeTab === "transcribe" + ? "Record or upload audio and turn it into text in your browser." + : "Paste text, pick a voice, and generate speech locally."} +

+
+ +
+
+ +
+
+
+ +
+ + +
+ +
+ {error &&

{error}

} + + {activeTab === "transcribe" ? ( +
+
+
+
+

Input

+

Audio input

+
+
+ {isRecording ? ( + + ) : ( + <> + + + + )} +
+
+ +
+
+ {isRecording ? ( +
+
+ + + + {recordingDurationLabel ?? "0.0s"} + +
+ + + +

+ The live meter reacts to your microphone input while the + recorder is active. +

+
+ ) : audioInputLabel ? ( +
+ Audio source ready + + Record again or upload a different file whenever you + want to replace the current source. + +
+ ) : ( +
+ No audio source selected yet. Record with the microphone + or upload a file to begin. +
+ )} +
+ + + + {selectedModel.runtime.supportsTimestamps && ( + + )} +
+
+ +
+
+
+

Output

+

Audio to text

+
+
+ + +
+
+ +
+ {isRecording || isTranscriptionProcessing ? ( +
+ + ) : ( +