|
| 1 | +{ |
| 2 | + "title": "Voice Chatbot", |
| 3 | + "description": "In this coding challenge, I build a conversational voice chatbot entirely in the browser with p5.js. I combine three pieces: speech-to-text with OpenAI's Whisper model, text-to-speech with Kokoro TTS, and a \"brain\" for the bot. I also explore the transformers.js pipeline API and the Web Audio API. For the bot's brain, I start with a simple ELIZA-style therapist, then incorporate a RiveScript number-guessing game, and finally a local LLM.", |
| 4 | + "videoNumber": "188", |
| 5 | + "videoId": "s2jm2Z22ibA", |
| 6 | + "date": "2026-04-27", |
| 7 | + "nebulaSlug": "codingtrain-coding-challenge-188-voice-chatbot", |
| 8 | + "languages": ["JavaScript", "p5.js"], |
| 9 | + "topics": ["text-to-speech", "speech-to-text", "chatbot", "Rivescript", "LLMS", "Agents", "AI", "transformers.js", "Web Audio API"], |
| 10 | + "canContribute": true, |
| 11 | + "relatedChallenges": ["42-markov-chain-name-generator", "43-context-free-grammar", "80-voice-chatbot-with-p5speech"], |
| 12 | + "timestamps": [ |
| 13 | + { "time": "0:00:00", "title": "Hello!" }, |
| 14 | + { "time": "0:00:35", "title": "Mapping out the pieces: speech-to-text, text-to-speech, and the brain" }, |
| 15 | + { "time": "0:01:07", "title": "Thoughts on AI and creative exploration" }, |
| 16 | + { "time": "0:02:44", "title": "Choosing the tools: Whisper and Kokoro TTS" }, |
| 17 | + { "time": "0:04:06", "title": "Building a push-to-talk UI in p5.js" }, |
| 18 | + { "time": "0:04:51", "title": "Finding models on Hugging Face with Transformers.js" }, |
| 19 | + { "time": "0:05:36", "title": "About the Whisper model and model cards" }, |
| 20 | + { "time": "0:06:55", "title": "Loading the Whisper pipeline in p5.js" }, |
| 21 | + { "time": "0:09:04", "title": "Accessing the microphone with getUserMedia" }, |
| 22 | + { "time": "0:10:44", "title": "Capturing audio with MediaRecorder" }, |
| 23 | + { "time": "0:12:05", "title": "Processing audio chunks into a waveform" }, |
| 24 | + { "time": "0:15:55", "title": "Speech-to-text working!" }, |
| 25 | + { "time": "0:16:36", "title": "Building the chatbot brain (ELIZA-style therapist)" }, |
| 26 | + { "time": "0:18:50", "title": "Setting up Kokoro TTS for text-to-speech" }, |
| 27 | + { "time": "0:21:07", "title": "Playing synthesized audio with AudioBufferSource" }, |
| 28 | + { "time": "0:23:41", "title": "Text-to-speech working!" }, |
| 29 | + { "time": "0:25:32", "title": "Handling playback events" }, |
| 30 | + { "time": "0:26:56", "title": "Swapping in a RiveScript number-guessing brain" }, |
| 31 | + { "time": "0:31:22", "title": "Adding a language model (SmolLM2) as the brain" }, |
| 32 | + { "time": "0:38:33", "title": "Final demo: the random number chatbot" }, |
| 33 | + { "time": "0:39:03", "title": "Goodbye!" } |
| 34 | + ], |
| 35 | + "codeExamples": [ |
| 36 | + { |
| 37 | + "title": "LLM Chatbot", |
| 38 | + "description": "This final voice chatbot, prompted to only reply with random numbers.", |
| 39 | + "image": "img.jpg", |
| 40 | + "urls": { |
| 41 | + "p5": "https://editor.p5js.org/codingtrain/sketches/RHhT9I4Nm" |
| 42 | + } |
| 43 | + }, |
| 44 | + { |
| 45 | + "title": "Number Guessing Bot", |
| 46 | + "image": "number_bot.jpg", |
| 47 | + "description": "Voice chatbot that uses RiveScript to play a number-guessing game.", |
| 48 | + "urls": { |
| 49 | + "p5": "https://editor.p5js.org/codingtrain/sketches/AJw7zMN9q" |
| 50 | + } |
| 51 | + }, |
| 52 | + { |
| 53 | + "title": "Therapy Bot", |
| 54 | + "description": "Started voice chatbot with an ELIZA-style therapist brain.", |
| 55 | + "image": "img.jpg", |
| 56 | + "urls": { |
| 57 | + "p5": "https://editor.p5js.org/codingtrain/sketches/37LFEPUVV" |
| 58 | + } |
| 59 | + }, |
| 60 | + { |
| 61 | + "title": "Model Loading Bars", |
| 62 | + "description": "The voice chatbot that displays loading bars for the models.", |
| 63 | + "image": "loading_bars.jpg", |
| 64 | + "urls": { |
| 65 | + "p5": "https://editor.p5js.org/codingtrain/sketches/E9Ob3x8eJ" |
| 66 | + } |
| 67 | + }, |
| 68 | + { |
| 69 | + "title": "Waveform of Recording", |
| 70 | + "description": "The sketch graphs a waveform from recorded mic input.", |
| 71 | + "image": "waveform.jpg", |
| 72 | + "urls": { |
| 73 | + "p5": "https://editor.p5js.org/codingtrain/sketches/cck49wDub" |
| 74 | + } |
| 75 | + }, |
| 76 | + { |
| 77 | + "title": "Real Time Waveform", |
| 78 | + "description": "The sketch graphs a waveform from mic input in real time.", |
| 79 | + "image": "realtime.jpg", |
| 80 | + "urls": { |
| 81 | + "p5": "https://editor.p5js.org/codingtrain/sketches/aaRIT-x6a" |
| 82 | + } |
| 83 | + } |
| 84 | + ], |
| 85 | + "groupLinks": [ |
| 86 | + { |
| 87 | + "title": "References", |
| 88 | + "links": [ |
| 89 | + { |
| 90 | + "icon": "📓", |
| 91 | + "title": "p5.2 Reference", |
| 92 | + "url": "https://beta.p5js.org", |
| 93 | + "description": "Reference page for the beta version of p5.js 2.0" |
| 94 | + }, |
| 95 | + { |
| 96 | + "icon": "📓", |
| 97 | + "title": "Introducing Whisper", |
| 98 | + "url": "https://cdn.openai.com/papers/whisper.pdf", |
| 99 | + "description": "Paper introducing the Whisper model." |
| 100 | + }, |
| 101 | + { |
| 102 | + "icon": "📓", |
| 103 | + "title": "Model Cards for Model Reporting", |
| 104 | + "url": "https://arxiv.org/abs/1810.03993", |
| 105 | + "description": "In this paper, the authors propose a framework for model cards to encourage transparency on model reporting." |
| 106 | + }, |
| 107 | + { |
| 108 | + "icon": "📓", |
| 109 | + "title": "Open Neural Network Exchange", |
| 110 | + "url": "https://onnx.ai", |
| 111 | + "description": "ONNX facilitates greater interoperability in the AI tools community." |
| 112 | + }, |
| 113 | + { |
| 114 | + "icon": "📓", |
| 115 | + "title": "Onnx-community Whisper-tiny.en model", |
| 116 | + "url": "https://huggingface.co/onnx-community/whisper-tiny.en", |
| 117 | + "description": "Model card for Whisper-tiny.en." |
| 118 | + }, |
| 119 | + { |
| 120 | + "icon": "📓", |
| 121 | + "title": "Xenova", |
| 122 | + "url": "https://github.com/xenova", |
| 123 | + "description": "Github repo for Joshua Lochner, the creator of transformers.js." |
| 124 | + }, |
| 125 | + { |
| 126 | + "icon": "📓", |
| 127 | + "title": "Transformers.js", |
| 128 | + "url": "https://huggingface.co/docs/transformers.js/installation", |
| 129 | + "description": "Installation page for Transformers.js." |
| 130 | + }, |
| 131 | + { |
| 132 | + "icon": "📓", |
| 133 | + "title": "Announcing the new p5.sound.js library!", |
| 134 | + "url": "https://medium.com/processing-foundation/announcing-the-new-p5-sound-js-library-42efc154bed0", |
| 135 | + "description": "Article discussing the latest version of p5.sound.js." |
| 136 | + }, |
| 137 | + { |
| 138 | + "icon": "📓", |
| 139 | + "title": "getUserMedia() documentation", |
| 140 | + "url": "https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia", |
| 141 | + "description": "Documentation about getting the sound from the mic." |
| 142 | + }, |
| 143 | + { |
| 144 | + "icon": "📓", |
| 145 | + "title": "MediaRecorder() documentation", |
| 146 | + "url": "https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder", |
| 147 | + "description": "Documentation about the MediaRecorder object." |
| 148 | + }, |
| 149 | + { |
| 150 | + "icon": "📓", |
| 151 | + "title": "Kokoro Repo", |
| 152 | + "url": "https://github.com/hexgrad/kokoro", |
| 153 | + "description": "Github repo for the Kokoro speech-to-text model." |
| 154 | + }, |
| 155 | + { |
| 156 | + "icon": "📓", |
| 157 | + "title": "KokoroTTS Model", |
| 158 | + "url": "https://huggingface.co/hexgrad/Kokoro-82M", |
| 159 | + "description": "Model card for the Kokoro-82M on Hugging Face." |
| 160 | + }, |
| 161 | + { |
| 162 | + "icon": "📓", |
| 163 | + "title": "ELIZA", |
| 164 | + "url": "https://en.wikipedia.org/wiki/ELIZA", |
| 165 | + "description": "Wikipedia article about the early natural language processing program ELIZA." |
| 166 | + }, |
| 167 | + { |
| 168 | + "icon": "📓", |
| 169 | + "title": "Rivescript", |
| 170 | + "url": "https://www.rivescript.com", |
| 171 | + "description": "RiveScript is a simple scripting language for chatbots." |
| 172 | + }, |
| 173 | + { |
| 174 | + "icon": "📓", |
| 175 | + "title": "SmolLM3", |
| 176 | + "url": "https://huggingface.co/HuggingFaceTB/SmolLM3-3B", |
| 177 | + "description": "Model card for SmolLM3" |
| 178 | + }, |
| 179 | + { |
| 180 | + "icon": "📓", |
| 181 | + "title": "Running models on WebGPU", |
| 182 | + "url": "https://huggingface.co/docs/transformers.js/guides/webgpu", |
| 183 | + "description": "Documentation about running models on the webGPU." |
| 184 | + }, |
| 185 | + { |
| 186 | + "icon": "📓", |
| 187 | + "title": "Using quantized models (dtypes)", |
| 188 | + "url": "https://huggingface.co/docs/transformers.js/v3.8.1/guides/dtypes", |
| 189 | + "description": "Documentation about available quantization options." |
| 190 | + } |
| 191 | + ] |
| 192 | + }, |
| 193 | + |
| 194 | + { |
| 195 | + "title": "Videos", |
| 196 | + "links": [ |
| 197 | + { |
| 198 | + "icon": "🚂", |
| 199 | + "title": "p5.js 2.0 async and await", |
| 200 | + "url": "/tracks/p5js-2.0/p5js-2.0/async-await", |
| 201 | + "description": "This video discusses loading data with async and await." |
| 202 | + }, |
| 203 | + { |
| 204 | + "icon": "🚂", |
| 205 | + "title": "What is Transformers.js", |
| 206 | + "url": "/tracks/transformers-js/transformers-js/introduction", |
| 207 | + "description": "In this video, I cover what Transformers.js is, how to load it into a p5.js sketch, explain the pipeline API, and demonstrate sentiment analysis and language detection examples." |
| 208 | + } |
| 209 | + ] |
| 210 | + }, |
| 211 | + { |
| 212 | + "title": "Live Stream Archives", |
| 213 | + "links": [ |
| 214 | + { |
| 215 | + "icon": "🔴", |
| 216 | + "title": "Full Live Stream", |
| 217 | + "url": "https://www.youtube.com/watch?v=KRDJAHArqaw", |
| 218 | + "description": "Complete unedited live stream showing the development process with all mistakes and problem-solving." |
| 219 | + } |
| 220 | + ] |
| 221 | + } |
| 222 | + ], |
| 223 | + "credits": [ |
| 224 | + { "title": "Editing", "name": "Mathieu Blanchette" }, |
| 225 | + { "title": "Animations", "name": "Jason Heglund" } |
| 226 | + ] |
| 227 | +} |
0 commit comments