Skip to content

Commit 770c07b

Browse files
heiskrCopilot
andauthored
🌎 Fix dangling heading/blockquote/bold markers in translated docs (#61107)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent de43007 commit 770c07b

2 files changed

Lines changed: 247 additions & 0 deletions

File tree

src/languages/lib/correct-translation-content.ts

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,17 @@ export function correctTranslatedContentStrings(
6767
content = content.replace(/^([ \t]*)\* ?\n[ \t]+/gm, '$1* ')
6868
content = content.replace(/^\|[ \t]*\n[ \t]+/gm, '| ')
6969

70+
// The same translator wrapping habit also strands heading markers
71+
// (`#`/`##`/...), blockquote markers (`>`), and the opening `**` of a
72+
// bold span on their own line, with the actual content pushed to the
73+
// next line as deeply indented text. This breaks heading/blockquote/
74+
// bold rendering and leaves Liquid tags and `[AUTOTITLE]` links
75+
// unexpanded. Rejoin them. Fence- and frontmatter-aware so we don't
76+
// disturb fenced markdown examples or YAML frontmatter.
77+
// ~3k headings, ~1.6k blockquotes, ~3.5k bold-after-marker cases
78+
// measured across all eight translated languages.
79+
content = joinDanglingMarkers(content)
80+
7081
// --- Per-language fixes (es, ja, pt, zh, ru, fr, ko, de) ---
7182

7283
if (context.code === 'es') {
@@ -2042,3 +2053,113 @@ export function correctTranslatedContentStrings(
20422053

20432054
return content
20442055
}
2056+
2057+
/**
2058+
* Rejoin marker lines that the translation pipeline split from their content.
2059+
*
2060+
* Translators sometimes leave a heading marker (`#`/`##`/...), blockquote
2061+
* marker (`>`), or the opening `**` of a bold span (immediately following a
2062+
* list/heading/blockquote/table marker) on its own line, with the rest of
2063+
* the content pushed to the next line as deeply indented text. This breaks
2064+
* rendering (empty headings, broken blockquotes, unrendered bold, unexpanded
2065+
* Liquid and `[AUTOTITLE]` links).
2066+
*
2067+
* Conservative thresholds:
2068+
* - Marker line has 0–3 leading spaces (CommonMark heading/blockquote rule).
2069+
* - Continuation line has 6+ leading spaces (avoids 4-space indented code).
2070+
* - Marker line contains *only* the marker (and optional trailing whitespace).
2071+
* - Skip fenced code blocks (``` and ~~~) and YAML frontmatter (`---`...`---`).
2072+
*/
2073+
function joinDanglingMarkers(content: string): string {
2074+
const lines = content.split('\n')
2075+
const out: string[] = []
2076+
let inFence = false
2077+
let fenceChar = ''
2078+
let fenceLen = 0
2079+
let inFrontmatter = lines[0] === '---'
2080+
2081+
// Marker-only line patterns (run only against non-fenced, non-frontmatter lines).
2082+
const headingOnly = /^([ \t]{0,3})(#{1,6})[ \t]*$/
2083+
const blockquoteOnly = /^([ \t]{0,3}>)[ \t]*$/
2084+
// Bold-open after a list/heading/blockquote/table marker (no other content).
2085+
const markerThenBoldOnly =
2086+
/^([ \t]{0,3}(?:[*+-]|\d+\.)[ \t]+|[ \t]{0,3}>[ \t]+|[ \t]{0,3}#{1,6}[ \t]+|\|[ \t]*)\*\*[ \t]*$/
2087+
// Continuation: 6+ leading spaces and at least one non-whitespace character.
2088+
const deepIndented = /^[ \t]{6,}(\S.*)$/
2089+
2090+
for (let i = 0; i < lines.length; i++) {
2091+
const line = lines[i]
2092+
2093+
// YAML frontmatter close: `---` or `...` after the opening `---`.
2094+
if (inFrontmatter && i > 0 && (line === '---' || line === '...')) {
2095+
inFrontmatter = false
2096+
out.push(line)
2097+
continue
2098+
}
2099+
2100+
// While inside frontmatter, pass lines through verbatim. Crucially,
2101+
// do NOT run fence detection here — a frontmatter line starting with
2102+
// ``` or ~~~ (e.g. inside a multiline scalar) would otherwise toggle
2103+
// `inFence` and cause the rest of the document after frontmatter
2104+
// closes to be (mis-)treated as inside a fence.
2105+
if (inFrontmatter) {
2106+
out.push(line)
2107+
continue
2108+
}
2109+
2110+
// CommonMark fenced code block: 0–3 leading spaces, then 3+ ` or ~.
2111+
const fenceMatch = line.match(/^[ \t]{0,3}(`{3,}|~{3,})/)
2112+
if (fenceMatch) {
2113+
const marker = fenceMatch[1]
2114+
if (!inFence) {
2115+
inFence = true
2116+
fenceChar = marker[0]
2117+
fenceLen = marker.length
2118+
} else if (marker[0] === fenceChar && marker.length >= fenceLen) {
2119+
inFence = false
2120+
fenceChar = ''
2121+
fenceLen = 0
2122+
}
2123+
out.push(line)
2124+
continue
2125+
}
2126+
2127+
if (inFence) {
2128+
out.push(line)
2129+
continue
2130+
}
2131+
2132+
const next = i + 1 < lines.length ? lines[i + 1] : undefined
2133+
const nextDeep = next !== undefined ? next.match(deepIndented) : null
2134+
if (!nextDeep) {
2135+
out.push(line)
2136+
continue
2137+
}
2138+
const nextContent = nextDeep[1]
2139+
2140+
const heading = line.match(headingOnly)
2141+
if (heading) {
2142+
out.push(`${heading[1]}${heading[2]} ${nextContent}`)
2143+
i++
2144+
continue
2145+
}
2146+
2147+
const bq = line.match(blockquoteOnly)
2148+
if (bq) {
2149+
out.push(`${bq[1]} ${nextContent}`)
2150+
i++
2151+
continue
2152+
}
2153+
2154+
const boldOpen = line.match(markerThenBoldOnly)
2155+
if (boldOpen) {
2156+
out.push(`${boldOpen[1]}**${nextContent}`)
2157+
i++
2158+
continue
2159+
}
2160+
2161+
out.push(line)
2162+
}
2163+
2164+
return out.join('\n')
2165+
}

src/languages/tests/correct-translation-content.ts

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,6 +1597,132 @@ describe('correctTranslatedContentStrings', () => {
15971597
// Valid table rows are not modified
15981598
expect(fix('| a | b |\n| c | d |', 'es')).toBe('| a | b |\n| c | d |')
15991599
})
1600+
1601+
test('rejoins dangling heading markers (all languages)', () => {
1602+
const broken = '### \n {% data variables.product.github %} の使用'
1603+
const expected = '### {% data variables.product.github %} の使用'
1604+
for (const lang of ['ja', 'de', 'es', 'fr', 'ko', 'pt', 'ru', 'zh']) {
1605+
expect(fix(broken, lang)).toBe(expected)
1606+
}
1607+
// All heading levels
1608+
expect(fix('# \n Title', 'ja')).toBe('# Title')
1609+
expect(fix('###### \n Title', 'ja')).toBe('###### Title')
1610+
// 0–3 leading spaces are accepted
1611+
expect(fix(' ### \n Title', 'ja')).toBe(' ### Title')
1612+
// Valid headings are not modified
1613+
expect(fix('### Already correct', 'ja')).toBe('### Already correct')
1614+
// 4-space indented heading-like text is not collapsed (looks like code)
1615+
expect(fix(' ###\n code', 'ja')).toBe(' ###\n code')
1616+
// Shallow next-line indent (<6) is not collapsed
1617+
expect(fix('### \n Title', 'ja')).toBe('### \n Title')
1618+
})
1619+
1620+
test('rejoins dangling blockquote markers (all languages)', () => {
1621+
const broken = '> \n {% data variables.product.github %} は preview 中です。'
1622+
const expected = '> {% data variables.product.github %} は preview 中です。'
1623+
for (const lang of ['ja', 'de', 'es', 'fr', 'ko', 'pt', 'ru', 'zh']) {
1624+
expect(fix(broken, lang)).toBe(expected)
1625+
}
1626+
// 0–3 leading spaces are accepted
1627+
expect(fix(' > \n Quote', 'ja')).toBe(' > Quote')
1628+
// Valid blockquotes are not modified
1629+
expect(fix('> Already correct', 'ja')).toBe('> Already correct')
1630+
expect(fix('>\n> Continued blockquote', 'ja')).toBe('>\n> Continued blockquote')
1631+
})
1632+
1633+
test('rejoins dangling bold-open after a marker (all languages)', () => {
1634+
const broken =
1635+
'* **\n {% data variables.product.prodname_copilot_short %}へのアクセス**。 More text'
1636+
const expected =
1637+
'* **{% data variables.product.prodname_copilot_short %}へのアクセス**。 More text'
1638+
for (const lang of ['ja', 'de', 'es', 'fr', 'ko', 'pt', 'ru', 'zh']) {
1639+
expect(fix(broken, lang)).toBe(expected)
1640+
}
1641+
// Numbered list marker
1642+
expect(fix('1. **\n Important**: text', 'ja')).toBe('1. **Important**: text')
1643+
// Heading marker
1644+
expect(fix('### **\n Bold heading**', 'ja')).toBe('### **Bold heading**')
1645+
// Blockquote marker
1646+
expect(fix('> **\n Quoted bold**', 'ja')).toBe('> **Quoted bold**')
1647+
// Table cell
1648+
expect(fix('| **\n Cell bold** | x', 'ja')).toBe('| **Cell bold** | x')
1649+
// Bare `**` (no preceding marker) is not collapsed — could be a closing
1650+
// bold marker followed by legitimate indented continuation.
1651+
expect(fix('**\n text', 'ja')).toBe('**\n text')
1652+
})
1653+
1654+
test('does not modify content inside fenced code blocks', () => {
1655+
// Markdown example inside ```md fence should be preserved verbatim
1656+
const fenced = '```md\n### \n Heading example\n```'
1657+
expect(fix(fenced, 'ja')).toBe(fenced)
1658+
// Tilde fences are also respected
1659+
const tilde = '~~~md\n> \n Quote example\n~~~'
1660+
expect(fix(tilde, 'ja')).toBe(tilde)
1661+
// Bold-open inside code fence
1662+
const boldFenced = '```md\n* **\n bold example**\n```'
1663+
expect(fix(boldFenced, 'ja')).toBe(boldFenced)
1664+
})
1665+
1666+
test('does not modify YAML frontmatter', () => {
1667+
// Multiline YAML scalars and indented values must not be joined
1668+
const fm = `---
1669+
title: Example
1670+
intro: >
1671+
Multiline
1672+
continued
1673+
versions:
1674+
fpt: '*'
1675+
---
1676+
1677+
###
1678+
Real heading after frontmatter`
1679+
const expected = `---
1680+
title: Example
1681+
intro: >
1682+
Multiline
1683+
continued
1684+
versions:
1685+
fpt: '*'
1686+
---
1687+
1688+
### Real heading after frontmatter`
1689+
expect(fix(fm, 'ja')).toBe(expected)
1690+
})
1691+
1692+
test('frontmatter containing fence-like characters does not break body fence tracking', () => {
1693+
// A multiline scalar in frontmatter that includes ``` (or ~~~) must
1694+
// NOT toggle the body's fence-tracking state. After frontmatter
1695+
// closes, dangling markers in the body should still be rejoined.
1696+
const fm = `---
1697+
title: Example
1698+
intro: |
1699+
\`\`\`
1700+
fence-like text inside frontmatter
1701+
\`\`\`
1702+
---
1703+
1704+
###
1705+
Real heading after frontmatter`
1706+
const expected = `---
1707+
title: Example
1708+
intro: |
1709+
\`\`\`
1710+
fence-like text inside frontmatter
1711+
\`\`\`
1712+
---
1713+
1714+
### Real heading after frontmatter`
1715+
expect(fix(fm, 'ja')).toBe(expected)
1716+
})
1717+
1718+
test('does not collapse nested-list indented code blocks', () => {
1719+
// A list item followed by blank line + 6-space-indented "code" should
1720+
// be left alone because the marker line itself is empty (not a
1721+
// bare `>`/`#`/`* **` form), and the previous content line is not
1722+
// a heading/blockquote/bold-open marker.
1723+
const nested = '1. Run this command:\n\n gh auth login'
1724+
expect(fix(nested, 'ja')).toBe(nested)
1725+
})
16001726
})
16011727

16021728
// ─── EDGE CASES ────────────────────────────────────────────────────

0 commit comments

Comments
 (0)