Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,8 @@ The speechmarkdown-js library ships dedicated formatters for every major provide

### Usage

Note: ElevenLabs audio tags like `[sarcastically]` are preserved only for ElevenLabs; other engines strip them when `useSpeechMarkdown` is enabled.

```typescript
// Use Speech Markdown with any engine
const markdown =
Expand Down
11 changes: 11 additions & 0 deletions src/__tests__/speech-markdown-converter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ describe("SpeechMarkdown", () => {
expect(googleResult).toContain("<break");
expect(microsoftResult).toContain("<break");
});

it("should strip ElevenLabs audio tags for non-ElevenLabs platforms", async () => {
const markdown = "Hello [sarcastically] world";
const result = await SpeechMarkdown.toSSML(markdown, "amazon-alexa");
expect(result).not.toContain("[sarcastically]");
expect(result).not.toContain("sarcastically");
});
});

describe("isSpeechMarkdown", () => {
Expand Down Expand Up @@ -62,6 +69,10 @@ describe("SpeechMarkdown", () => {
expect(SpeechMarkdown.isSpeechMarkdown("Hello (loud)[volume:\"loud\"] world")).toBe(true);
});

it("should detect ElevenLabs audio tags", () => {
expect(SpeechMarkdown.isSpeechMarkdown("Hello [sarcastically] world")).toBe(true);
});

it("should return false for plain text", () => {
expect(SpeechMarkdown.isSpeechMarkdown("Hello world")).toBe(false);
});
Expand Down
15 changes: 12 additions & 3 deletions src/markdown/converter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ function convertSpeechMarkdownFallback(markdown: string): string {
return out;
}

const ELEVENLABS_AUDIO_TAG_PATTERN = /\[[A-Za-z][A-Za-z\s-]*\]/g;

function stripElevenLabsAudioTags(text: string): string {
return text.replace(ELEVENLABS_AUDIO_TAG_PATTERN, "");
}

/**
* SpeechMarkdownConverter class for converting Speech Markdown to SSML
*/
Expand Down Expand Up @@ -148,19 +154,21 @@ export class SpeechMarkdownConverter {
* @returns SSML text
*/
async toSSML(markdown: string, platform = "amazon-alexa"): Promise<string> {
const normalized = platform === "elevenlabs" ? markdown : stripElevenLabsAudioTags(markdown);

if (!isSpeechMarkdownEnabled()) {
this.speechMarkdownInstance = null;
const converted = convertSpeechMarkdownFallback(markdown);
const converted = convertSpeechMarkdownFallback(normalized);
return `<speak>${converted}</speak>`;
}

// Attempt to initialize the full converter (no-op if disabled/unavailable)
await this.ensureInitialized();
if (this.speechMarkdownInstance) {
return this.speechMarkdownInstance.toSSML(markdown, { platform });
return this.speechMarkdownInstance.toSSML(normalized, { platform });
}
// Fallback: minimal conversion
const converted = convertSpeechMarkdownFallback(markdown);
const converted = convertSpeechMarkdownFallback(normalized);
return `<speak>${converted}</speak>`;
}

Expand Down Expand Up @@ -223,6 +231,7 @@ export function isSpeechMarkdown(text: string): boolean {
const patterns = [
/\[\d+m?s\]/, // Breaks: [500ms]
/\[break:"[^"\]]+"\]/, // Breaks with quotes: [break:"weak"] or [break:"500ms"]
/\[[A-Za-z][A-Za-z\s-]*\]/, // ElevenLabs audio tags: [sarcastically]
/\+\+.*?\+\+/, // Strong emphasis: ++text++
/\+.*?\+/, // Moderate emphasis: +text+
/~.*?~/, // No emphasis: ~text~
Expand Down