Skip to content

Commit 7d07c63

Browse files
Copilotdwjohnston
andcommitted
Replace deprecated escape() with proper UTF-8 decoding using TextDecoder
The escape/unescape pattern works but uses deprecated methods. This replaces it with the modern TextDecoder API which properly handles UTF-8 multi-byte characters like emojis. Why the deprecated pattern worked: - atob() decodes base64 but treats bytes as Latin-1 - escape() percent-encodes the malformed string - decodeURIComponent() interprets percent-encoded bytes as UTF-8 Modern solution: - atob() decodes base64 to binary string - Convert to Uint8Array byte array - TextDecoder properly interprets bytes as UTF-8 Added comprehensive tests verifying both patterns give identical results. Co-authored-by: dwjohnston <2467377+dwjohnston@users.noreply.github.com>
1 parent 98216cf commit 7d07c63

2 files changed

Lines changed: 137 additions & 1 deletion

File tree

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import { describe, it, expect } from 'vitest';
2+
3+
/**
4+
* This is a copy of the helper function from defaultFunctions.ts for testing purposes.
5+
*/
6+
function decodeBase64WithUTF8(base64: string): string {
7+
const cleanedBase64 = base64.replace(/\s/g, '');
8+
const binaryString = atob(cleanedBase64);
9+
const bytes = new Uint8Array(binaryString.length);
10+
for (let i = 0; i < binaryString.length; i++) {
11+
bytes[i] = binaryString.charCodeAt(i);
12+
}
13+
const decoder = new TextDecoder('utf-8');
14+
return decoder.decode(bytes);
15+
}
16+
17+
describe('Base64 UTF-8 Decoding', () => {
18+
it('should correctly decode base64 with ASCII text', () => {
19+
// Simple ASCII text
20+
const text = 'Hello World';
21+
const encoder = new TextEncoder();
22+
const bytes = encoder.encode(text);
23+
const binaryString = String.fromCharCode(...bytes);
24+
const base64 = btoa(binaryString);
25+
26+
const result = decodeBase64WithUTF8(base64);
27+
expect(result).toBe(text);
28+
});
29+
30+
it('should correctly decode base64 with emoji', () => {
31+
// Text with emoji - the actual issue from GitHub
32+
const text = '// 👇 Check this out';
33+
const encoder = new TextEncoder();
34+
const bytes = encoder.encode(text);
35+
const binaryString = String.fromCharCode(...bytes);
36+
const base64 = btoa(binaryString);
37+
38+
const result = decodeBase64WithUTF8(base64);
39+
expect(result).toBe(text);
40+
expect(result).toContain('👇');
41+
});
42+
43+
it('should correctly decode base64 with pointing up emoji', () => {
44+
// The specific emoji mentioned in the user's comment
45+
const text = '☝️';
46+
const encoder = new TextEncoder();
47+
const bytes = encoder.encode(text);
48+
const binaryString = String.fromCharCode(...bytes);
49+
const base64 = btoa(binaryString);
50+
51+
const result = decodeBase64WithUTF8(base64);
52+
expect(result).toBe(text);
53+
});
54+
55+
it('should correctly decode base64 with multiple emojis', () => {
56+
const text = '👍 👎 😕 ❤️ 🎉';
57+
const encoder = new TextEncoder();
58+
const bytes = encoder.encode(text);
59+
const binaryString = String.fromCharCode(...bytes);
60+
const base64 = btoa(binaryString);
61+
62+
const result = decodeBase64WithUTF8(base64);
63+
expect(result).toBe(text);
64+
});
65+
66+
it('should correctly decode base64 with mixed content', () => {
67+
const text = 'export function Example() {\n // 👇 This is a comment\n return <div>Hello</div>;\n}';
68+
const encoder = new TextEncoder();
69+
const bytes = encoder.encode(text);
70+
const binaryString = String.fromCharCode(...bytes);
71+
const base64 = btoa(binaryString);
72+
73+
const result = decodeBase64WithUTF8(base64);
74+
expect(result).toBe(text);
75+
expect(result).toContain('👇');
76+
});
77+
78+
it('should handle base64 with whitespace (as GitHub API might return)', () => {
79+
const text = 'Test 👍';
80+
const encoder = new TextEncoder();
81+
const bytes = encoder.encode(text);
82+
const binaryString = String.fromCharCode(...bytes);
83+
const base64 = btoa(binaryString);
84+
const base64WithWhitespace = base64.slice(0, 5) + '\n' + base64.slice(5);
85+
86+
const result = decodeBase64WithUTF8(base64WithWhitespace);
87+
expect(result).toBe(text);
88+
});
89+
90+
it('should match the deprecated escape/unescape pattern behavior', () => {
91+
// Verify our solution gives the same result as the deprecated method
92+
const text = '// 👇 This is a comment with emoji';
93+
const encoder = new TextEncoder();
94+
const bytes = encoder.encode(text);
95+
const binaryString = String.fromCharCode(...bytes);
96+
const base64 = btoa(binaryString);
97+
98+
const modernResult = decodeBase64WithUTF8(base64);
99+
100+
// The deprecated pattern: decodeURIComponent(escape(atob(base64)))
101+
// escape() converts the incorrectly-decoded UTF-8 bytes to percent-encoding
102+
// decodeURIComponent() then interprets those percent-encoded bytes as UTF-8
103+
const deprecatedResult = decodeURIComponent(escape(atob(base64)));
104+
105+
expect(modernResult).toBe(deprecatedResult);
106+
expect(modernResult).toBe(text);
107+
});
108+
});

src/library/config/defaultFunctions.ts

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,34 @@ import { parseGithubIssueLink, parseGithubPermalinkUrl } from "../utils/urlParse
33
import { GithubPermalinkDataResponse } from "./GithubPermalinkContext";
44
import { ErrorResponses } from "./GithubPermalinkContext";
55

6+
/**
7+
* Properly decode base64 string with UTF-8 support.
8+
* GitHub API returns base64-encoded content that may contain UTF-8 characters like emojis.
9+
*
10+
* The issue: atob() decodes base64 to a binary string, but treats each byte as a Latin-1 character.
11+
* For UTF-8 multi-byte characters (like emojis), this corrupts the data.
12+
*
13+
* The solution: Convert the binary string to a byte array, then use TextDecoder to properly
14+
* interpret those bytes as UTF-8.
15+
*/
16+
function decodeBase64WithUTF8(base64: string): string {
17+
// Remove whitespace that GitHub API might include
18+
const cleanedBase64 = base64.replace(/\s/g, '');
19+
20+
// Decode base64 to binary string (each character represents a byte)
21+
const binaryString = atob(cleanedBase64);
22+
23+
// Convert binary string to byte array
24+
const bytes = new Uint8Array(binaryString.length);
25+
for (let i = 0; i < binaryString.length; i++) {
26+
bytes[i] = binaryString.charCodeAt(i);
27+
}
28+
29+
// Decode UTF-8 bytes to string
30+
const decoder = new TextDecoder('utf-8');
31+
return decoder.decode(bytes);
32+
}
33+
634

735
export async function defaultGetIssueFn(issueLink: string, githubToken?: string, onError?: (err: unknown) => void): Promise<GithubIssueLinkDataResponse> {
836
const config = parseGithubIssueLink(issueLink);
@@ -61,7 +89,7 @@ export async function defaultGetIssueFn(issueLink: string, githubToken?: string,
6189
}
6290

6391
const [contentJson, commitJson] = await Promise.all([contentResult.json(), commitResult.json()]);
64-
const content = decodeURIComponent(escape(atob(contentJson.content)));
92+
const content = decodeBase64WithUTF8(contentJson.content);
6593
const lines = content.split("\n");
6694

6795
return {

0 commit comments

Comments
 (0)