Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-subtext-codeblocks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
default: patch
---

Fix small text being parsed in code blocks and not being escapeable.
10 changes: 10 additions & 0 deletions src/app/plugins/markdown/bidirectional.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ describe('bidirectional round-trip', () => {
expect(result).toContain('||hidden message||');
});

it('round-trips literal line-start -# (escaped) in a paragraph', () => {
const markdown = '\\-# not small text';
const html = markdownToHtml(markdown);
expect(html).not.toContain('<sub');
const injected = injectDataMd(html);
const result = htmlToMarkdown(injected);
expect(result).toContain('\\-#');
expect(result).toContain('not small text');
});

it('round-trips inline math', () => {
const markdown = '$E = mc^2$';
const html = markdownToHtml(markdown);
Expand Down
28 changes: 24 additions & 4 deletions src/app/plugins/markdown/extensions/matrix-math.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,22 @@ import type { TokenizerExtension, RendererExtension } from 'marked';
/** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). {@link shieldDollarRunsForMarked} uses U+E021–U+E022. */
export const MATH_CODE_DOLLAR_MASK = '\uE020';

/**
* Replaces the `-` of line-start `-# …` inside markdown code so the Matrix subscript block
* extension does not match before marked's `fences` rule (custom block extensions run first).
* {@link unmaskSubscriptCodeLinePlaceholders} restores output HTML.
*/
export const SUBSCRIPT_CODE_LINE_MASK = '\uE023';

function maskSubscriptLineStartsInCodeInner(inner: string): string {
return inner.replace(/(^|\n)-#( +)/g, `$1${SUBSCRIPT_CODE_LINE_MASK}#$2`);
}

/** Applies {@link MATH_CODE_DOLLAR_MASK} and subscript masking inside a fence or inline-code region. */
function maskMathAndSubscriptInCodeInner(inner: string): string {
return maskSubscriptLineStartsInCodeInner(inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK));
}

function findSameLineFenceClose(md: string, from: number, tick: string, minLen: number): number {
let j = from;
while (j < md.length && md[j] !== '\n') {
Expand Down Expand Up @@ -62,11 +78,11 @@ function tryConsumeFence(md: string, i: number): { text: string; end: number } |
const close = findMultilineFenceEnd(md, contentStart, tick, openLen);
if (!close) {
const inner = md.slice(contentStart, md.length);
const masked = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
const masked = maskMathAndSubscriptInCodeInner(inner);
return { text: md.slice(i, contentStart) + masked, end: md.length };
}
const inner = md.slice(contentStart, close.contentEnd);
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
const maskedInner = maskMathAndSubscriptInCodeInner(inner);
return {
text: md.slice(i, contentStart) + maskedInner + md.slice(close.contentEnd, close.blockEnd),
end: close.blockEnd,
Expand All @@ -80,7 +96,7 @@ function tryConsumeFence(md: string, i: number): { text: string; end: number } |
while (closeIdx + closeRun < md.length && md[closeIdx + closeRun] === tick) closeRun++;

const inner = md.slice(afterOpen, closeIdx);
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
const maskedInner = maskMathAndSubscriptInCodeInner(inner);
return {
text: md.slice(i, afterOpen) + maskedInner + md.slice(closeIdx, closeIdx + closeRun),
end: closeIdx + closeRun,
Expand All @@ -99,7 +115,7 @@ function tryConsumeInlineCode(md: string, i: number): { text: string; end: numbe
while (j + cr < md.length && md[j + cr] === '`') cr++;
if (cr === run) {
const inner = md.slice(contentStart, j);
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
const maskedInner = maskMathAndSubscriptInCodeInner(inner);
return {
text: md.slice(i, contentStart) + maskedInner + md.slice(j, j + run),
end: j + run,
Expand Down Expand Up @@ -155,6 +171,10 @@ export function unmaskMathCodeDollarPlaceholders(html: string): string {
return html.replaceAll(MATH_CODE_DOLLAR_MASK, '$');
}

export function unmaskSubscriptCodeLinePlaceholders(html: string): string {
return html.replaceAll(`${SUBSCRIPT_CODE_LINE_MASK}#`, '-#');
}

const MARKED_MATH_BLOCK_SHIELD = '\uE021';
const MARKED_MATH_BLOCK_SHIELD_END = '\uE022';

Expand Down
29 changes: 14 additions & 15 deletions src/app/plugins/markdown/extensions/matrix-subscript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,24 @@ import type { TokenizerExtension, RendererExtension, Tokens } from 'marked';
export const matrixSubscriptExtension = {
name: 'subscript',
level: 'block',
start(src: string) {
return src.indexOf('-#');
},
tokenizer(
this: { lexer: { inlineTokens: (t: string, tokens: Tokens.Generic[]) => void } },
this: {
lexer: { inlineTokens: (t: string, tokens: Tokens.Generic[]) => void };
},
src: string
) {
const match = /^-# +(.+)/.exec(src);
if (match) {
const token = {
type: 'subscript',
raw: match[0],
text: match[1],
tokens: [] as Tokens.Generic[],
};
this.lexer.inlineTokens(token.text!, token.tokens);
return token;
const match = /^-# +([^\n]+)/.exec(src);
if (!match) {
return undefined;
}
return undefined;
const token = {
type: 'subscript',
raw: match[0],
text: match[1],
tokens: [] as Tokens.Generic[],
};
this.lexer.inlineTokens(token.text!, token.tokens);
return token;
},
renderer(
this: { parser: { parseInline: (tokens: Tokens.Generic[]) => string } },
Expand Down
36 changes: 36 additions & 0 deletions src/app/plugins/markdown/markdownToHtml.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { describe, expect, it } from 'vitest';
import { htmlToMarkdown } from './htmlToMarkdown';
import { markdownToHtml } from './markdownToHtml';

describe('markdownToHtml', () => {
Expand Down Expand Up @@ -89,6 +90,41 @@ describe('markdownToHtml', () => {
expect(result).toContain('$$test$$');
});

it('converts -# small/sub syntax outside code', () => {
const result = markdownToHtml('-# caption');
expect(result).toContain('<sub');
expect(result).toContain('data-md="-#"');
expect(result).toContain('caption');
});

it('does not parse -# inside fenced code as subscript', () => {
expect(markdownToHtml('```\n-# not sub\n```')).not.toContain('<sub');
expect(markdownToHtml('```\n-# not sub\n```')).toContain('-# not sub');
});

it('does not parse -# inside inline code as subscript', () => {
expect(markdownToHtml('`-# lit`')).not.toContain('<sub');
expect(markdownToHtml('`-# lit`')).toContain('-# lit');
});

it('parses -# as single-line only so fenced code below stays code', () => {
const html = markdownToHtml('-# caption\n```\nfenced\n```');
expect(html).toContain('caption');
expect(html).toContain('<pre>');
expect(html).toContain('fenced');
});

it('does not parse escaped \\-# as small/sub', () => {
const result = markdownToHtml('\\-# literal caption');
expect(result).not.toContain('<sub');
expect(result).not.toContain('data-md="-#"');
expect(result).toContain('literal caption');
});

it('escapes literal -# when converting paragraph HTML to markdown', () => {
expect(htmlToMarkdown('<p>-# plain words</p>')).toContain('\\-#');
});

it('converts block math syntax', () => {
const result = markdownToHtml('$$\\frac{a}{b}$$');
expect(result).toContain('data-mx-maths');
Expand Down
3 changes: 2 additions & 1 deletion src/app/plugins/markdown/markdownToHtml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
maskDollarSignsInsideMarkdownCode,
shieldDollarRunsForMarked,
unmaskMathCodeDollarPlaceholders,
unmaskSubscriptCodeLinePlaceholders,
} from './extensions/matrix-math';
import { matrixSubscriptExtension } from './extensions/matrix-subscript';
import { matrixEmoticonExtension, preprocessEmoticon } from './extensions/matrix-emoticon';
Expand Down Expand Up @@ -150,7 +151,7 @@ export function markdownToHtml(markdown: string): string {

DOMPurify.removeHook('afterSanitizeAttributes');

const unmasked = unmaskMathCodeDollarPlaceholders(sanitized);
const unmasked = unmaskSubscriptCodeLinePlaceholders(unmaskMathCodeDollarPlaceholders(sanitized));

// DOMPurify's Node/JSdom build can drop <img> size attributes even when allowlisted.
// For Matrix custom emojis, always emit a stable height so outgoing messages have
Expand Down
7 changes: 6 additions & 1 deletion src/app/plugins/markdown/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@ export const escapeMarkdownInlineSequences = (text: string): string => {
(t) => t
);

return parts.join('');
let out = parts.join('');
out = out.replace(
/(^|\n)-(# +)/gm,
(_, lineStart: string, hashSp: string) => `${lineStart}\\-${hashSp}`
);
return out;
};

/**
Expand Down
Loading