diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 0eb4c188..d5d8550d 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -9,10 +9,8 @@ "version": "0.0.0", "dependencies": { "@reduxjs/toolkit": "^2.11.2", - "@types/dompurify": "^3.0.5", "@xyflow/react": "^12.8.3", "antd": "^5.27.0", - "dompurify": "^3.4.10", "i18next": "^25.8.0", "jssha": "^3.3.1", "jszip": "^3.10.1", @@ -2418,15 +2416,6 @@ "@types/ms": "*" } }, - "node_modules/@types/dompurify": { - "version": "3.0.5", - "resolved": "https://registry.npmjs.org/@types/dompurify/-/dompurify-3.0.5.tgz", - "integrity": "sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg==", - "license": "MIT", - "dependencies": { - "@types/trusted-types": "*" - } - }, "node_modules/@types/estree": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", @@ -2555,12 +2544,6 @@ "dev": true, "license": "MIT" }, - "node_modules/@types/trusted-types": { - "version": "2.0.7", - "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz", - "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==", - "license": "MIT" - }, "node_modules/@types/unist": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", @@ -4005,15 +3988,6 @@ "csstype": "^3.0.2" } }, - "node_modules/dompurify": { - "version": "3.4.10", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.4.10.tgz", - "integrity": "sha512-0xzNv0e7oYC6yyuOGZIABPM4qtg3QxLFniDNPP4ZP90wR8Yq3zgwpRbrNiT4N3IKqDbbYFEJLV+JWEs19aZ//w==", - "license": "(MPL-2.0 OR Apache-2.0)", - "optionalDependencies": { - "@types/trusted-types": "^2.0.7" - } - }, "node_modules/duck": { "version": "0.1.12", "resolved": "https://registry.npmjs.org/duck/-/duck-0.1.12.tgz", diff --git a/frontend/package.json b/frontend/package.json index d66587a1..b3be598e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -12,10 +12,8 @@ }, "dependencies": { "@reduxjs/toolkit": "^2.11.2", - "@types/dompurify": "^3.0.5", "@xyflow/react": "^12.8.3", "antd": "^5.27.0", - "dompurify": "^3.4.10", "i18next": "^25.8.0", "jssha": "^3.3.1", "jszip": "^3.10.1", diff --git a/frontend/src/components/file-preview/DocxPreview.tsx b/frontend/src/components/file-preview/DocxPreview.tsx index 316d3345..e688090c 100644 --- a/frontend/src/components/file-preview/DocxPreview.tsx +++ b/frontend/src/components/file-preview/DocxPreview.tsx @@ -1,6 +1,6 @@ import React, { useEffect, useState } from 'react'; -import DOMPurify from 'dompurify'; import mammoth from 'mammoth'; +import { sanitizeDocxHtml } from './docxSanitizer'; export interface DocxPreviewProps { blob?: Blob; @@ -9,7 +9,6 @@ export interface DocxPreviewProps { export const DocxPreview: React.FC = ({ blob, - fileName }) => { const [html, setHtml] = useState(''); const [loading, setLoading] = useState(true); @@ -29,16 +28,9 @@ export const DocxPreview: React.FC = ({ const arrayBuffer = await blob.arrayBuffer(); const result = await mammoth.convertToHtml({ arrayBuffer }); - // Sanitize HTML to prevent XSS from malicious DOCX files (FCE) - const sanitized = DOMPurify.sanitize(result.value, { - ALLOWED_TAGS: ['h1','h2','h3','h4','h5','h6','p','br','hr','ul','ol','li', - 'table','thead','tbody','tr','th','td','strong','em','b','i','u','s', - 'a','img','sup','sub','pre','code','blockquote','span','div'], - ALLOWED_ATTR: ['href','target','src','alt','width','height','colspan', - 'rowspan','style','class','id','data-*'], - }); + const sanitized = sanitizeDocxHtml(result.value); setHtml(sanitized); - } catch (err) { + } catch { setError('Failed to convert Word document'); } finally { setLoading(false); diff --git a/frontend/src/components/file-preview/docxSanitizer.ts b/frontend/src/components/file-preview/docxSanitizer.ts new file mode 100644 index 00000000..bdc510e7 --- /dev/null +++ b/frontend/src/components/file-preview/docxSanitizer.ts @@ -0,0 +1,131 @@ +const ALLOWED_DOCX_TAGS = new Set([ + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'p', 'br', 'hr', 'ul', 'ol', 'li', + 'table', 'thead', 'tbody', 'tr', 'th', 'td', + 'strong', 'em', 'b', 'i', 'u', 's', + 'a', 'img', 'sup', 'sub', 'pre', 'code', 'blockquote', 'span', 'div', +]); + +const DROP_WITH_CONTENT_TAGS = new Set([ + 'base', 'button', 'embed', 'form', 'iframe', 'input', 'link', 'math', + 'meta', 'object', 'script', 'select', 'style', 'svg', 'textarea', +]); + +const VOID_TAGS = new Set(['br', 'hr', 'img']); + +const SAFE_LINK_PROTOCOLS = new Set(['http:', 'https:', 'mailto:', 'tel:']); +const SAFE_IMAGE_DATA_URI_PATTERN = /^data:image\/(?:png|jpe?g|gif|webp|bmp);base64,[a-z0-9+/=\s]+$/i; +const SAFE_ANCHOR_PATTERN = /^#[a-z0-9_.:-]+$/i; +const SAFE_DIMENSION_PATTERN = /^(?:[1-9]\d{0,3}|10000)$/; +const SAFE_SPAN_PATTERN = /^(?:[1-9]|[1-9]\d|100)$/; + +function isSafeDocxLink(rawHref: string | null): boolean { + if (!rawHref) { + return false; + } + + const href = rawHref.trim(); + if (SAFE_ANCHOR_PATTERN.test(href)) { + return true; + } + + try { + const parsed = new URL(href); + return SAFE_LINK_PROTOCOLS.has(parsed.protocol); + } catch { + return false; + } +} + +function isSafeDocxImageSource(rawSrc: string | null): boolean { + return Boolean(rawSrc && SAFE_IMAGE_DATA_URI_PATTERN.test(rawSrc.trim())); +} + +function setPlainTextAttribute(target: HTMLElement, source: Element, attrName: string): void { + const value = source.getAttribute(attrName); + if (value) { + target.setAttribute(attrName, value); + } +} + +function setNumericAttribute( + target: HTMLElement, + source: Element, + attrName: string, + pattern: RegExp, +): void { + const value = source.getAttribute(attrName)?.trim(); + if (value && pattern.test(value)) { + target.setAttribute(attrName, value); + } +} + +function copySafeAttributes(target: HTMLElement, source: Element, tagName: string): boolean { + setPlainTextAttribute(target, source, 'title'); + + if (tagName === 'a') { + const href = source.getAttribute('href'); + if (isSafeDocxLink(href)) { + target.setAttribute('href', href!.trim()); + } + } + + if (tagName === 'img') { + const src = source.getAttribute('src'); + if (!isSafeDocxImageSource(src)) { + return false; + } + target.setAttribute('src', src!.trim()); + setPlainTextAttribute(target, source, 'alt'); + setNumericAttribute(target, source, 'width', SAFE_DIMENSION_PATTERN); + setNumericAttribute(target, source, 'height', SAFE_DIMENSION_PATTERN); + } + + if (tagName === 'td' || tagName === 'th') { + setNumericAttribute(target, source, 'colspan', SAFE_SPAN_PATTERN); + setNumericAttribute(target, source, 'rowspan', SAFE_SPAN_PATTERN); + } + + return true; +} + +function sanitizeNode(node: Node): Node[] { + if (node.nodeType === Node.TEXT_NODE) { + return [document.createTextNode(node.textContent ?? '')]; + } + + if (node.nodeType !== Node.ELEMENT_NODE) { + return []; + } + + const element = node as Element; + const tagName = element.tagName.toLowerCase(); + if (DROP_WITH_CONTENT_TAGS.has(tagName)) { + return []; + } + + const sanitizedChildren = Array.from(element.childNodes).flatMap(sanitizeNode); + if (!ALLOWED_DOCX_TAGS.has(tagName)) { + return sanitizedChildren; + } + + const sanitizedElement = document.createElement(tagName); + if (!copySafeAttributes(sanitizedElement, element, tagName)) { + return []; + } + + if (!VOID_TAGS.has(tagName)) { + sanitizedElement.append(...sanitizedChildren); + } + + return [sanitizedElement]; +} + +export function sanitizeDocxHtml(html: string): string { + const source = document.createElement('template'); + source.innerHTML = html; + + const container = document.createElement('div'); + container.append(...Array.from(source.content.childNodes).flatMap(sanitizeNode)); + return container.innerHTML; +}