diff --git a/.changeset/fix-subtext-codeblocks.md b/.changeset/fix-subtext-codeblocks.md new file mode 100644 index 000000000..0fb07c8a2 --- /dev/null +++ b/.changeset/fix-subtext-codeblocks.md @@ -0,0 +1,5 @@ +--- +default: patch +--- + +Fix small text being parsed in code blocks and not being escapeable. diff --git a/src/app/plugins/markdown/bidirectional.test.ts b/src/app/plugins/markdown/bidirectional.test.ts index 216ad9d05..56c036121 100644 --- a/src/app/plugins/markdown/bidirectional.test.ts +++ b/src/app/plugins/markdown/bidirectional.test.ts @@ -107,6 +107,16 @@ describe('bidirectional round-trip', () => { expect(result).toContain('||hidden message||'); }); + it('round-trips literal line-start -# (escaped) in a paragraph', () => { + const markdown = '\\-# not small text'; + const html = markdownToHtml(markdown); + expect(html).not.toContain(' { const markdown = '$E = mc^2$'; const html = markdownToHtml(markdown); diff --git a/src/app/plugins/markdown/extensions/matrix-math.ts b/src/app/plugins/markdown/extensions/matrix-math.ts index 42abbae67..da94783fa 100644 --- a/src/app/plugins/markdown/extensions/matrix-math.ts +++ b/src/app/plugins/markdown/extensions/matrix-math.ts @@ -3,6 +3,22 @@ import type { TokenizerExtension, RendererExtension } from 'marked'; /** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). {@link shieldDollarRunsForMarked} uses U+E021–U+E022. */ export const MATH_CODE_DOLLAR_MASK = '\uE020'; +/** + * Replaces the `-` of line-start `-# …` inside markdown code so the Matrix subscript block + * extension does not match before marked's `fences` rule (custom block extensions run first). + * {@link unmaskSubscriptCodeLinePlaceholders} restores output HTML. + */ +export const SUBSCRIPT_CODE_LINE_MASK = '\uE023'; + +function maskSubscriptLineStartsInCodeInner(inner: string): string { + return inner.replace(/(^|\n)-#( +)/g, `$1${SUBSCRIPT_CODE_LINE_MASK}#$2`); +} + +/** Applies {@link MATH_CODE_DOLLAR_MASK} and subscript masking inside a fence or inline-code region. */ +function maskMathAndSubscriptInCodeInner(inner: string): string { + return maskSubscriptLineStartsInCodeInner(inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK)); +} + function findSameLineFenceClose(md: string, from: number, tick: string, minLen: number): number { let j = from; while (j < md.length && md[j] !== '\n') { @@ -62,11 +78,11 @@ function tryConsumeFence(md: string, i: number): { text: string; end: number } | const close = findMultilineFenceEnd(md, contentStart, tick, openLen); if (!close) { const inner = md.slice(contentStart, md.length); - const masked = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK); + const masked = maskMathAndSubscriptInCodeInner(inner); return { text: md.slice(i, contentStart) + masked, end: md.length }; } const inner = md.slice(contentStart, close.contentEnd); - const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK); + const maskedInner = maskMathAndSubscriptInCodeInner(inner); return { text: md.slice(i, contentStart) + maskedInner + md.slice(close.contentEnd, close.blockEnd), end: close.blockEnd, @@ -80,7 +96,7 @@ function tryConsumeFence(md: string, i: number): { text: string; end: number } | while (closeIdx + closeRun < md.length && md[closeIdx + closeRun] === tick) closeRun++; const inner = md.slice(afterOpen, closeIdx); - const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK); + const maskedInner = maskMathAndSubscriptInCodeInner(inner); return { text: md.slice(i, afterOpen) + maskedInner + md.slice(closeIdx, closeIdx + closeRun), end: closeIdx + closeRun, @@ -99,7 +115,7 @@ function tryConsumeInlineCode(md: string, i: number): { text: string; end: numbe while (j + cr < md.length && md[j + cr] === '`') cr++; if (cr === run) { const inner = md.slice(contentStart, j); - const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK); + const maskedInner = maskMathAndSubscriptInCodeInner(inner); return { text: md.slice(i, contentStart) + maskedInner + md.slice(j, j + run), end: j + run, @@ -155,6 +171,10 @@ export function unmaskMathCodeDollarPlaceholders(html: string): string { return html.replaceAll(MATH_CODE_DOLLAR_MASK, '$'); } +export function unmaskSubscriptCodeLinePlaceholders(html: string): string { + return html.replaceAll(`${SUBSCRIPT_CODE_LINE_MASK}#`, '-#'); +} + const MARKED_MATH_BLOCK_SHIELD = '\uE021'; const MARKED_MATH_BLOCK_SHIELD_END = '\uE022'; diff --git a/src/app/plugins/markdown/extensions/matrix-subscript.ts b/src/app/plugins/markdown/extensions/matrix-subscript.ts index 922f9377d..9a804f888 100644 --- a/src/app/plugins/markdown/extensions/matrix-subscript.ts +++ b/src/app/plugins/markdown/extensions/matrix-subscript.ts @@ -4,25 +4,24 @@ import type { TokenizerExtension, RendererExtension, Tokens } from 'marked'; export const matrixSubscriptExtension = { name: 'subscript', level: 'block', - start(src: string) { - return src.indexOf('-#'); - }, tokenizer( - this: { lexer: { inlineTokens: (t: string, tokens: Tokens.Generic[]) => void } }, + this: { + lexer: { inlineTokens: (t: string, tokens: Tokens.Generic[]) => void }; + }, src: string ) { - const match = /^-# +(.+)/.exec(src); - if (match) { - const token = { - type: 'subscript', - raw: match[0], - text: match[1], - tokens: [] as Tokens.Generic[], - }; - this.lexer.inlineTokens(token.text!, token.tokens); - return token; + const match = /^-# +([^\n]+)/.exec(src); + if (!match) { + return undefined; } - return undefined; + const token = { + type: 'subscript', + raw: match[0], + text: match[1], + tokens: [] as Tokens.Generic[], + }; + this.lexer.inlineTokens(token.text!, token.tokens); + return token; }, renderer( this: { parser: { parseInline: (tokens: Tokens.Generic[]) => string } }, diff --git a/src/app/plugins/markdown/markdownToHtml.test.ts b/src/app/plugins/markdown/markdownToHtml.test.ts index 0352b71b0..20ea8186e 100644 --- a/src/app/plugins/markdown/markdownToHtml.test.ts +++ b/src/app/plugins/markdown/markdownToHtml.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from 'vitest'; +import { htmlToMarkdown } from './htmlToMarkdown'; import { markdownToHtml } from './markdownToHtml'; describe('markdownToHtml', () => { @@ -89,6 +90,41 @@ describe('markdownToHtml', () => { expect(result).toContain('$$test$$'); }); + it('converts -# small/sub syntax outside code', () => { + const result = markdownToHtml('-# caption'); + expect(result).toContain(' { + expect(markdownToHtml('```\n-# not sub\n```')).not.toContain(' { + expect(markdownToHtml('`-# lit`')).not.toContain(' { + const html = markdownToHtml('-# caption\n```\nfenced\n```'); + expect(html).toContain('caption'); + expect(html).toContain('
');
+    expect(html).toContain('fenced');
+  });
+
+  it('does not parse escaped \\-# as small/sub', () => {
+    const result = markdownToHtml('\\-# literal caption');
+    expect(result).not.toContain(' {
+    expect(htmlToMarkdown('

-# plain words

')).toContain('\\-#'); + }); + it('converts block math syntax', () => { const result = markdownToHtml('$$\\frac{a}{b}$$'); expect(result).toContain('data-mx-maths'); diff --git a/src/app/plugins/markdown/markdownToHtml.ts b/src/app/plugins/markdown/markdownToHtml.ts index 8e4eb6d17..c1fe0bf07 100644 --- a/src/app/plugins/markdown/markdownToHtml.ts +++ b/src/app/plugins/markdown/markdownToHtml.ts @@ -7,6 +7,7 @@ import { maskDollarSignsInsideMarkdownCode, shieldDollarRunsForMarked, unmaskMathCodeDollarPlaceholders, + unmaskSubscriptCodeLinePlaceholders, } from './extensions/matrix-math'; import { matrixSubscriptExtension } from './extensions/matrix-subscript'; import { matrixEmoticonExtension, preprocessEmoticon } from './extensions/matrix-emoticon'; @@ -150,7 +151,7 @@ export function markdownToHtml(markdown: string): string { DOMPurify.removeHook('afterSanitizeAttributes'); - const unmasked = unmaskMathCodeDollarPlaceholders(sanitized); + const unmasked = unmaskSubscriptCodeLinePlaceholders(unmaskMathCodeDollarPlaceholders(sanitized)); // DOMPurify's Node/JSdom build can drop size attributes even when allowlisted. // For Matrix custom emojis, always emit a stable height so outgoing messages have diff --git a/src/app/plugins/markdown/utils.ts b/src/app/plugins/markdown/utils.ts index ddaf66a02..c6ea432a2 100644 --- a/src/app/plugins/markdown/utils.ts +++ b/src/app/plugins/markdown/utils.ts @@ -78,7 +78,12 @@ export const escapeMarkdownInlineSequences = (text: string): string => { (t) => t ); - return parts.join(''); + let out = parts.join(''); + out = out.replace( + /(^|\n)-(# +)/gm, + (_, lineStart: string, hashSp: string) => `${lineStart}\\-${hashSp}` + ); + return out; }; /**