diff --git a/package-lock.json b/package-lock.json index 99a0169..2157266 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@redhat-developer/vscode-redhat-telemetry", - "version": "0.10.0", + "version": "0.10.1", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@redhat-developer/vscode-redhat-telemetry", - "version": "0.10.0", + "version": "0.10.1", "license": "Apache-2.0", "dependencies": { "@segment/analytics-node": "2.3.0", diff --git a/package.json b/package.json index 6b35128..ffb5360 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@redhat-developer/vscode-redhat-telemetry", - "version": "0.10.0", + "version": "0.10.1", "description": "Provides Telemetry APIs for Red Hat applications", "main": "lib/index.js", "types": "lib", diff --git a/src/common/utils/telemetryUtils.ts b/src/common/utils/telemetryUtils.ts index 2e37007..894ce3b 100644 --- a/src/common/utils/telemetryUtils.ts +++ b/src/common/utils/telemetryUtils.ts @@ -8,60 +8,144 @@ const NODE_MODULES_REGEX = /[\\\/]?(node_modules|node_modules\.asar)[\\\/]/; const FILE_REGEX_PATTERN = /(file:\/\/)?([a-zA-Z]:(\\\\|\\|\/)|(\\\\|\\|\/))?([\w-\._]+(\\\\|\\|\/))+[\w-\._]+/g; +// Default cleanup patterns to preserve common system/library paths +const DEFAULT_CLEANUP_PATTERNS: RegExp[] = [ + /java\.\S*/, // Java standard library and related packages (any non-whitespace chars) +]; + /** * Cleans a given stack of possible paths * @param stack The stack to sanitize - * @param cleanupPatterns Cleanup patterns to remove from the stack + * @param cleanupPatterns Cleanup patterns to preserve (paths matching these patterns won't be anonymized). + * If not provided, uses default patterns for common system/library paths. * @returns The cleaned stack */ -export function anonymizeFilePaths(stack: string): string { +export function anonymizeFilePaths(stack: string, cleanupPatterns: RegExp[] = DEFAULT_CLEANUP_PATTERNS): string { // Fast check to see if it is a file path to avoid doing unnecessary heavy regex work if (!stack || (!stack.includes('/') && !stack.includes('\\'))) { return stack; } + // Process multiline strings line by line + const lines = stack.split('\n'); + const processedLines = lines.map(line => anonymizeFilePathsInLine(line, cleanupPatterns)); + return processedLines.join('\n'); +} + +/** + * Anonymizes file paths in a single line + * @param line The line to sanitize + * @param cleanupPatterns Cleanup patterns to preserve + * @returns The cleaned line + */ +function anonymizeFilePathsInLine(line: string, cleanupPatterns: RegExp[]): string { + // Fast check to see if it is a file path to avoid doing unnecessary heavy regex work + if (!line || (!line.includes('/') && !line.includes('\\'))) { + return line; + } + + // Find all cleanup pattern matches and store their positions + const cleanUpIndexes: [number, number][] = []; + for (const regexp of cleanupPatterns) { + try { + // Create a new regex instance with global flag to avoid lastIndex mutation issues + const pattern = new RegExp(regexp.source, regexp.flags + 'g'); + let match; + let iterationCount = 0; + const maxIterations = 300; + const patternMatches: [number, number][] = []; + let patternFailed = false; + + while ((match = pattern.exec(line)) !== null) { + // Guard against infinite loops from bad regex patterns + if (++iterationCount > maxIterations) { + console.warn(`Warning: Cleanup pattern ${regexp.source} exceeded ${maxIterations} iterations, breaking to prevent infinite loop`); + patternFailed = true; + break; + } + + // Additional guard: if we're stuck at the same position, break + if (iterationCount > 1 && match.index === patternMatches[patternMatches.length - 1]?.[0]) { + console.warn(`Warning: Cleanup pattern ${regexp.source} is stuck at position ${match.index}, breaking to prevent infinite loop`); + patternFailed = true; + break; + } + + patternMatches.push([match.index, pattern.lastIndex]); + } + + // Only add matches if the pattern completed successfully (didn't fail due to infinite loop protection) + if (!patternFailed) { + cleanUpIndexes.push(...patternMatches); + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + console.warn(`Warning: Invalid cleanup pattern ${regexp.source}: ${errorMessage}`); + // Continue with other patterns + } + } + // Create a new regex instance for this function call to avoid lastIndex mutation issues const fileRegex = new RegExp(FILE_REGEX_PATTERN); - let updatedStack = ''; let lastIndex = 0; + let updatedLine = ''; + let filePathIterationCount = 0; + const maxFilePathIterations = 300; while (true) { - const result = fileRegex.exec(stack); + const result = fileRegex.exec(line); if (!result) { break; } + // Guard against infinite loops from bad regex patterns + if (++filePathIterationCount > maxFilePathIterations) { + console.warn(`Warning: File path regex exceeded ${maxFilePathIterations} iterations, breaking to prevent infinite loop`); + break; + } + + // Check if any cleanup pattern matches overlap with this file path match + const overlappingRange = cleanUpIndexes.some(([start, end]) => + result.index < end && start < fileRegex.lastIndex + ); + // Check if this is a node_modules path const isNodeModules = NODE_MODULES_REGEX.test(result[0]); - // anoynimize user file paths that do not need to be retained or cleaned up. - if (!isNodeModules) { - updatedStack += stack.substring(lastIndex, result.index) + ''; - } else { - // For node_modules paths, anonymize the user part but preserve the node_modules part - const match = result[0]; - const nodeModulesMatch = match.match(NODE_MODULES_REGEX); - if (nodeModulesMatch) { - const nodeModulesIndex = match.indexOf(nodeModulesMatch[0]); - // If the path starts with node_modules (no user part), preserve the entire path - if (nodeModulesIndex === 0) { - updatedStack += stack.substring(lastIndex, fileRegex.lastIndex); + // Preserve paths that match cleanup patterns or are node_modules + if (overlappingRange || isNodeModules) { + if (isNodeModules) { + // For node_modules paths, anonymize the user part but preserve the node_modules part + const match = result[0]; + const nodeModulesMatch = match.match(NODE_MODULES_REGEX); + if (nodeModulesMatch) { + const nodeModulesIndex = match.indexOf(nodeModulesMatch[0]); + // If the path starts with node_modules (no user part), preserve the entire path + if (nodeModulesIndex === 0) { + updatedLine += line.substring(lastIndex, fileRegex.lastIndex); + } else { + // Otherwise, anonymize the user part and preserve the node_modules part + const nodeModulesPart = match.substring(nodeModulesIndex); + updatedLine += line.substring(lastIndex, result.index) + '' + nodeModulesPart; + } } else { - // Otherwise, anonymize the user part and preserve the node_modules part - const nodeModulesPart = match.substring(nodeModulesIndex); - updatedStack += stack.substring(lastIndex, result.index) + '' + nodeModulesPart; + // Fallback: preserve the original text + updatedLine += line.substring(lastIndex, fileRegex.lastIndex); } } else { - // Fallback: preserve the original text - updatedStack += stack.substring(lastIndex, fileRegex.lastIndex); + // For cleanup pattern matches, preserve the entire path + updatedLine += line.substring(lastIndex, fileRegex.lastIndex); } + } else { + // Anonymize user file paths that don't match cleanup patterns or node_modules + updatedLine += line.substring(lastIndex, result.index) + ''; } lastIndex = fileRegex.lastIndex; } - if (lastIndex < stack.length) { - updatedStack += stack.substring(lastIndex); + if (lastIndex < line.length) { + updatedLine += line.substring(lastIndex); } - return updatedStack; + return updatedLine; } \ No newline at end of file diff --git a/src/tests/utils/telemetryUtils.test.ts b/src/tests/utils/telemetryUtils.test.ts index 64c05ee..7a02338 100644 --- a/src/tests/utils/telemetryUtils.test.ts +++ b/src/tests/utils/telemetryUtils.test.ts @@ -159,4 +159,140 @@ suite('Test anonymizeFilePaths', () => { assert.strictEqual(result, 'Error in '); }); + // Tests for default cleanup patterns + test('should preserve Java standard library paths with default patterns', () => { + const input = 'Error at java.base/java.lang.String.(String.java:123)'; + const result = anonymizeFilePaths(input); + assert.strictEqual(result, input); + }); + + test('should anonymize user paths but preserve Java paths with default patterns', () => { + const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.lang.String.(String.java:123)'; + const result = anonymizeFilePaths(input); + assert.strictEqual(result, 'Error at :45 and java.base/java.lang.String.(String.java:123)'); + }); + + test('should handle complex Java stack trace with default patterns', () => { + const stackTrace = `java.lang.NullPointerException + at java.base/java.lang.String.(String.java:123) + at /Users/john/project/src/main.ts:45 + at java.util.ArrayList.add(ArrayList.java:456) + at /home/user/app/index.js:12 + at org.springframework.context.ApplicationContext.getBean(ApplicationContext.java:789)`; + + const result = anonymizeFilePaths(stackTrace); + assert(result.includes('java.base/java.lang.String.(String.java:123)')); + assert(result.includes('java.util.ArrayList.add(ArrayList.java:456)')); + assert(result.includes('org.springframework.context.ApplicationContext.getBean(ApplicationContext.java:789)')); + assert(result.includes('')); + assert(!result.includes('/Users/john/project/src/main.ts:45')); + assert(!result.includes('/home/user/app/index.js:12')); + }); + + // Tests for custom cleanup patterns + test('should use custom cleanup patterns when provided', () => { + const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.lang.String.(String.java:123)'; + const customPatterns = [/java\.base\//]; + const result = anonymizeFilePaths(input, customPatterns); + assert.strictEqual(result, 'Error at :45 and java.base/java.lang.String.(String.java:123)'); + }); + + test('should not preserve patterns not in custom cleanup patterns', () => { + const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.lang.String.(String.java:123)'; + const customPatterns = [/java\.util\.\S*/]; // Only preserve java.util, not java.lang + const result = anonymizeFilePaths(input, customPatterns); + assert.strictEqual(result, 'Error at :45 and (String.java:123)'); + }); + + test('should handle empty custom cleanup patterns array', () => { + const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.lang.String.(String.java:123)'; + const result = anonymizeFilePaths(input, []); + assert.strictEqual(result, 'Error at :45 and (String.java:123)'); + }); + + test('should handle multiple custom cleanup patterns', () => { + const input = 'Error at /Users/john/project/src/main.ts:45, java.base/java.lang.String.(String.java:123), and org.springframework.context.ApplicationContext.getBean(ApplicationContext.java:456)'; + const customPatterns = [/java\.base\//, /org\.springframework\..*/]; + const result = anonymizeFilePaths(input, customPatterns); + assert(result.includes('java.base/java.lang.String.(String.java:123)')); + assert(result.includes('org.springframework.context.ApplicationContext.getBean(ApplicationContext.java:456)')); + assert(result.includes('Error at :45')); + assert(!result.includes('/Users/john/project/src/main.ts:45')); + }); + + test('should handle custom cleanup patterns with complex regex', () => { + const input = 'Error at /Users/john/project/src/main.ts:45 and my.custom.library/SomeClass.method(SomeClass.java:123)'; + const customPatterns = [/my\.custom\..*/]; + const result = anonymizeFilePaths(input, customPatterns); + assert.strictEqual(result, 'Error at :45 and my.custom.library/SomeClass.method(SomeClass.java:123)'); + }); + + test('should handle overlapping cleanup patterns', () => { + const input = 'Error at /Users/john/project/src/main.ts:45 and java.base/java.util.ArrayList.add(ArrayList.java:123)'; + const customPatterns = [/java\.base\//, /java\.util\..*/]; + const result = anonymizeFilePaths(input, customPatterns); + assert(result.includes('')); + assert(!result.includes('/Users/john/project/src/main.ts:45')); + assert(result.includes('java.base/java.util.ArrayList.add(ArrayList.java:123)')); + }); + + test('should preserve node_modules even with custom cleanup patterns', () => { + const input = 'Error at /Users/john/project/src/main.ts:45 and /Users/john/project/node_modules/package/index.js:12'; + const customPatterns = [/java\.base\//]; + const result = anonymizeFilePaths(input, customPatterns); + assert.strictEqual(result, 'Error at :45 and /node_modules/package/index.js:12'); + }); + + test('should handle cleanup patterns that match partial paths', () => { + const input = 'Error at /Users/john/project/src/main.ts:45 and some.java.package.Class.method(Class.java:123)'; + const customPatterns = [/java\.package\..*/]; + const result = anonymizeFilePaths(input, customPatterns); + assert.strictEqual(result, 'Error at :45 and some.java.package.Class.method(Class.java:123)'); + }); + + test('should guard against infinite loops from bad regex patterns', () => { + // Test with a regex that could cause infinite loops + const input = 'Error at /Users/john/project/src/main.ts:45'; + const badPatterns = [/(.*)*/]; // This regex can cause catastrophic backtracking + + // Capture console.warn to verify the warning is issued + const originalWarn = console.warn; + let warningIssued = false; + console.warn = (message: string) => { + if (message.includes('stuck at position') || message.includes('exceeded')) { + warningIssued = true; + } + originalWarn(message); + }; + + try { + const result = anonymizeFilePaths(input, badPatterns); + // The function should still return a result (even if it's not perfect) + assert(typeof result === 'string'); + // The warning should have been issued + assert(warningIssued, 'Expected warning about infinite loop prevention to be issued'); + } finally { + console.warn = originalWarn; + } + }); + + test('should handle empty cleanup patterns gracefully', () => { + const input = 'Error at /Users/john/project/src/main.ts:45'; + const result = anonymizeFilePaths(input, []); + assert.strictEqual(result, 'Error at :45'); + }); + + test('should handle malformed regex patterns gracefully', () => { + const input = 'Error at /Users/john/project/src/main.ts:45'; + // Test with a regex that could cause issues but is syntactically valid + const problematicPatterns = [/(a+)+/]; // Catastrophic backtracking pattern + + // This should not throw an error and should complete without hanging + const result = anonymizeFilePaths(input, problematicPatterns); + assert(typeof result === 'string'); + // The pattern (a+)+ matches 'a' characters, so parts of the path containing 'a' will be preserved + // This is expected behavior - when cleanup patterns match, those parts are preserved + assert(result.includes('Error at')); + }); + });