From ba7e19d75b797d00d62669e449739f0c9b9f1a24 Mon Sep 17 00:00:00 2001 From: Vishakh Date: Mon, 22 Jun 2026 15:04:13 -0400 Subject: [PATCH 1/3] Enahnced genotype file parsing support --- lib/genotype-parser.ts | 372 ++++++++++++++++++++--------------------- next-env.d.ts | 2 +- 2 files changed, 181 insertions(+), 193 deletions(-) diff --git a/lib/genotype-parser.ts b/lib/genotype-parser.ts index 9e2847e..1df0dc2 100644 --- a/lib/genotype-parser.ts +++ b/lib/genotype-parser.ts @@ -14,64 +14,68 @@ export type ParseResult = { detectedFormat?: 'monadic' | '23andme' | 'ancestrydna'; }; +// Chromosome 26 = mitochondrial in AncestryDNA exports. +const VALID_CHROMOSOMES = new Set([ + '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', + '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', + 'X', 'Y', 'MT', 'M', '26', +]); + +const VALID_BASES = new Set(['A', 'T', 'G', 'C', 'I', 'D', '0', '-']); + +function splitLines(content: string): string[] { + return content.split(/\r?\n/); +} + +function stripQuotes(value: string): string { + // Remove all quote characters — DNA field values (rsid, chr, position, allele) never contain quotes. + return value.trim().replace(/"/g, ''); +} + export function parse23andMeFile(content: string): ParseResult { try { - const lines = content.split('\n'); + const lines = splitLines(content); const genotypeData: GenotypeData[] = []; let totalVariants = 0; let validVariants = 0; for (const line of lines) { const trimmedLine = line.trim(); - - // Skip empty lines and comments - if (!trimmedLine || trimmedLine.startsWith('#')) { - continue; - } + if (!trimmedLine || trimmedLine.startsWith('#')) continue; totalVariants++; const parts = trimmedLine.split(/\s+/); - // Expected format: rsid chromosome position genotype - if (parts.length !== 4) { - continue; - } + if (parts.length < 4) continue; - const [rsid, chromosome, positionStr, genotype] = parts; + const [rsid, chromosome, positionStr] = parts; + let genotype = parts[3]; - // Validate rsid format (should start with rs) - if (!rsid.startsWith('rs')) { - continue; + // Some providers (e.g. LivingDNA alt format) output allele1 allele2 as separate columns. + // Combine single-char alleles into a 2-char genotype. + if (parts.length >= 5 && genotype.length === 1 && parts[4].length === 1) { + genotype = genotype + parts[4]; } - // Validate chromosome (1-22, X, Y, MT) - const validChromosomes = new Set(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', - '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y', 'MT']); - if (!validChromosomes.has(chromosome)) { - continue; - } + if (!rsid.startsWith('rs')) continue; + + if (!VALID_CHROMOSOMES.has(chromosome)) continue; - // Validate position (should be a positive integer) const position = parseInt(positionStr, 10); - if (!Number.isInteger(position) || position <= 0) { - continue; - } + if (!Number.isInteger(position) || position <= 0) continue; - // Validate genotype (should be 2 characters, A, T, G, C, I, D, or --) - const validBases = new Set(['A', 'T', 'G', 'C', 'I', 'D', '-']); - if (genotype.length !== 2 || - !validBases.has(genotype[0]) || - !validBases.has(genotype[1])) { + // Normalize no-calls + if (genotype === '--' || genotype === '-') { + genotypeData.push({ rsid, chromosome, position, genotype: '--' }); + validVariants++; continue; } - genotypeData.push({ - rsid, - chromosome, - position, - genotype, - }); + if (genotype.length !== 2) continue; + const validSnpBases = new Set(['A', 'T', 'G', 'C', 'I', 'D', '-']); + if (!validSnpBases.has(genotype[0]) || !validSnpBases.has(genotype[1])) continue; + genotypeData.push({ rsid, chromosome, position, genotype }); validVariants++; } @@ -82,81 +86,60 @@ export function parse23andMeFile(content: string): ParseResult { }; } - return { - success: true, - data: genotypeData, - totalVariants, - validVariants, - detectedFormat: '23andme', - }; + return { success: true, data: genotypeData, totalVariants, validVariants, detectedFormat: '23andme' }; } catch (error) { - return { - success: false, - error: `Failed to parse file: ${error instanceof Error ? error.message : 'Unknown error'}`, - }; + return { success: false, error: `Failed to parse file: ${error instanceof Error ? error.message : 'Unknown error'}` }; } } export function parseMonadicDNAFile(content: string): ParseResult { try { - const lines = content.split('\n'); + const lines = splitLines(content); const genotypeData: GenotypeData[] = []; let totalVariants = 0; let validVariants = 0; let headerFound = false; + let delimiter = ','; for (const line of lines) { const trimmedLine = line.trim(); + if (!trimmedLine || trimmedLine.startsWith('#')) continue; - // Skip empty lines - if (!trimmedLine) { - continue; - } - - // Check for header line - if (trimmedLine.toUpperCase().startsWith('RSID,CHROMOSOME,POSITION,RESULT')) { - headerFound = true; - continue; - } - - // Skip if we haven't found the header yet if (!headerFound) { + const upper = trimmedLine.toUpperCase().replace(/"/g, ''); + const normalized = upper.replace(/\s+/g, '\t'); + // Accept RSID,CHROMOSOME,POSITION,RESULT or RSID,CHROMOSOME,POSITION,GENOTYPE + // Also accept tab/space-separated generic format. + if ( + upper.startsWith('RSID,CHROMOSOME,POSITION,RESULT') || + upper.startsWith('RSID,CHROMOSOME,POSITION,GENOTYPE') || + normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tGENOTYPE') || + normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tRESULT') + ) { + headerFound = true; + delimiter = trimmedLine.includes('\t') ? '\t' : ','; + continue; + } continue; } totalVariants++; - const parts = trimmedLine.split(','); + const parts = trimmedLine.split(delimiter).map(stripQuotes); - // Expected format: RSID,CHROMOSOME,POSITION,RESULT - if (parts.length !== 4) { - continue; - } + if (parts.length < 4) continue; const [rsid, chromosome, positionStr, genotype] = parts; - // Skip entries without valid rsid (must start with rs) - // GSA- and -Y- entries are internal IDs, not standard rsids - if (!rsid.startsWith('rs')) { - continue; - } + if (!rsid.startsWith('rs')) continue; - // Parse position (can be 0 for Monadic DNA files) const position = parseInt(positionStr, 10); - if (!Number.isInteger(position) || position < 0) { - continue; - } + if (!Number.isInteger(position) || position < 0) continue; - // Validate genotype (should be 2 characters: AA, TT, GG, CC, or --) - if (genotype.length !== 2) { - continue; - } + if (genotype.length !== 2) continue; const validBases = new Set(['A', 'T', 'G', 'C', '-']); - if (!validBases.has(genotype[0]) || !validBases.has(genotype[1])) { - continue; - } + if (!validBases.has(genotype[0]) || !validBases.has(genotype[1])) continue; - // Store the entry (chromosome can be '0' for Monadic DNA files) genotypeData.push({ rsid, chromosome: chromosome === '0' ? '0' : chromosome, @@ -168,109 +151,114 @@ export function parseMonadicDNAFile(content: string): ParseResult { } if (!headerFound) { - return { - success: false, - error: 'No valid Monadic DNA header found. Expected: RSID,CHROMOSOME,POSITION,RESULT', - }; + return { success: false, error: 'No valid Monadic DNA header found. Expected: RSID,CHROMOSOME,POSITION,RESULT' }; } if (validVariants === 0) { - return { - success: false, - error: 'No valid genotype data found in file. Please ensure the file is in Monadic DNA format.', - }; + return { success: false, error: 'No valid genotype data found in file. Please ensure the file is in Monadic DNA format.' }; } - return { - success: true, - data: genotypeData, - totalVariants, - validVariants, - detectedFormat: 'monadic', - }; + return { success: true, data: genotypeData, totalVariants, validVariants, detectedFormat: 'monadic' }; } catch (error) { - return { - success: false, - error: `Failed to parse file: ${error instanceof Error ? error.message : 'Unknown error'}`, - }; + return { success: false, error: `Failed to parse file: ${error instanceof Error ? error.message : 'Unknown error'}` }; } } export function parseAncestryDNAFile(content: string): ParseResult { try { - const lines = content.split('\n'); + const lines = splitLines(content); const genotypeData: GenotypeData[] = []; let totalVariants = 0; let validVariants = 0; let headerFound = false; + let allele1Idx = 3; + let allele2Idx = 4; + let delimiter = '\t'; + + const configureFromHeader = (headerLine: string) => { + delimiter = headerLine.includes('\t') ? '\t' : ','; + const cols = headerLine.split(delimiter).map(c => stripQuotes(c).toLowerCase()); + allele1Idx = cols.findIndex(c => /allele.?1|allele$/.test(c)); + allele2Idx = cols.findIndex(c => /allele.?2/.test(c)); + if (allele1Idx === -1) allele1Idx = 3; + if (allele2Idx === -1) allele2Idx = 4; + }; for (const line of lines) { const trimmedLine = line.trim(); + if (!trimmedLine) continue; - // Skip empty lines and comments - if (!trimmedLine || trimmedLine.startsWith('#')) { - continue; - } - - // Check for header line (AncestryDNA uses rsid, chromosome, position, allele1, allele2) - if (trimmedLine.toLowerCase().includes('rsid') && - trimmedLine.toLowerCase().includes('chromosome') && - trimmedLine.toLowerCase().includes('position')) { - headerFound = true; - continue; - } - - // Skip if we haven't found the header yet if (!headerFound) { + if (trimmedLine.startsWith('#')) { + // FTDNA famfinder puts the column header inside a comment line. + // Only treat it as a header if it contains "allele" (to avoid matching 23andMe comment headers). + const commentContent = trimmedLine.slice(1).trim(); + const lower = commentContent.toLowerCase(); + if ( + (lower.includes('rsid') || lower.includes('name')) && + lower.includes('chromosome') && + lower.includes('position') && + lower.includes('allele') + ) { + headerFound = true; + configureFromHeader(commentContent); + } + continue; + } + + const lower = trimmedLine.toLowerCase(); + if (lower.includes('rsid') && lower.includes('chromosome') && lower.includes('position')) { + headerFound = true; + configureFromHeader(trimmedLine); + continue; + } continue; } totalVariants++; - const parts = trimmedLine.split(/\t/); // AncestryDNA uses tabs - - // Expected format: rsid chromosome position allele1 allele2 - if (parts.length < 5) { - continue; - } - - const [rsid, chromosome, positionStr, allele1, allele2] = parts; - - // Validate rsid format (should start with rs or be a numeric ID) - if (!rsid.startsWith('rs') && !/^\d+$/.test(rsid)) { + // Split and filter out empty fields caused by inconsistent multi-separator usage. + const rawParts = trimmedLine.split(delimiter).map(p => stripQuotes(p)); + const parts = rawParts.filter(p => p !== ''); + + if (parts.length <= Math.max(allele1Idx, 2)) continue; + + const rsid = parts[0]; + const chromosome = parts[1]; + const positionStr = parts[2]; + const allele1Raw = parts[allele1Idx] ?? ''; + const allele2Raw = allele2Idx < parts.length ? (parts[allele2Idx] ?? '') : ''; + + // If no allele2 but allele1 is 2 chars, treat it as a combined genotype (generic 4-col format). + if (!allele2Raw && allele1Raw.length === 2) { + const a1 = allele1Raw[0]; + const a2 = allele1Raw[1]; + if (!rsid.startsWith('rs') && !/^\d+$/.test(rsid)) continue; + if (!VALID_CHROMOSOMES.has(chromosome)) continue; + const position = parseInt(positionStr, 10); + if (!Number.isInteger(position) || position <= 0) continue; + const validBases = new Set(['A', 'T', 'G', 'C', 'I', 'D', '-']); + if (!validBases.has(a1) || !validBases.has(a2)) continue; + genotypeData.push({ rsid, chromosome: chromosome === 'M' ? 'MT' : chromosome, position, genotype: allele1Raw }); + validVariants++; continue; } - // Validate chromosome (1-22, X, Y, MT) - const validChromosomes = new Set(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', - '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y', 'MT', 'M']); - if (!validChromosomes.has(chromosome)) { - continue; - } + if (!rsid.startsWith('rs') && !/^\d+$/.test(rsid)) continue; + if (!VALID_CHROMOSOMES.has(chromosome)) continue; - // Validate position (should be a positive integer) const position = parseInt(positionStr, 10); - if (!Number.isInteger(position) || position <= 0) { - continue; - } + if (!Number.isInteger(position) || position <= 0) continue; - // Validate alleles (should be single characters: A, T, G, C, I, D, or 0/-) - const validBases = new Set(['A', 'T', 'G', 'C', 'I', 'D', '0', '-']); - if (!validBases.has(allele1) || !validBases.has(allele2)) { - continue; - } + const a1 = allele1Raw || '0'; + const a2 = allele2Raw || '0'; - // Combine alleles into genotype format - // AncestryDNA uses '0' for no-call, convert to '--' - let genotype: string; - if (allele1 === '0' || allele2 === '0') { - genotype = '--'; - } else { - genotype = allele1 + allele2; - } + if (!VALID_BASES.has(a1) || !VALID_BASES.has(a2)) continue; + + const genotype = (a1 === '0' || a2 === '0') ? '--' : a1 + a2; genotypeData.push({ rsid, - chromosome: chromosome === 'M' ? 'MT' : chromosome, // Normalize MT chromosome + chromosome: chromosome === 'M' ? 'MT' : chromosome === '26' ? 'MT' : chromosome, position, genotype, }); @@ -286,78 +274,78 @@ export function parseAncestryDNAFile(content: string): ParseResult { } if (validVariants === 0) { - return { - success: false, - error: 'No valid genotype data found in file. Please ensure the file is in AncestryDNA format.', - }; + return { success: false, error: 'No valid genotype data found in file. Please ensure the file is in AncestryDNA format.' }; } - return { - success: true, - data: genotypeData, - totalVariants, - validVariants, - detectedFormat: 'ancestrydna', - }; + return { success: true, data: genotypeData, totalVariants, validVariants, detectedFormat: 'ancestrydna' }; } catch (error) { - return { - success: false, - error: `Failed to parse file: ${error instanceof Error ? error.message : 'Unknown error'}`, - }; + return { success: false, error: `Failed to parse file: ${error instanceof Error ? error.message : 'Unknown error'}` }; } } export function detectAndParseGenotypeFile(content: string): ParseResult { - // Try to detect format by looking at first few non-comment lines - const lines = content.split('\n').slice(0, 20); - - // Check for Monadic DNA format (CSV with header) - const hasMonadicHeader = lines.some(line => - line.trim().toUpperCase().startsWith('RSID,CHROMOSOME,POSITION,RESULT') - ); - + // Scan first 50 lines — some files have long comment/metadata sections before the header. + const lines = splitLines(content).slice(0, 50); + + // Monadic DNA: CSV/TSV with specific header (also matches MyHeritage, FTDNA, generic 4-col formats). + // Normalize whitespace so space-delimited headers match alongside tab/comma. + const hasMonadicHeader = lines.some(line => { + const upper = line.trim().toUpperCase().replace(/"/g, ''); + const normalized = upper.replace(/\s+/g, '\t'); + return ( + upper.startsWith('RSID,CHROMOSOME,POSITION,RESULT') || + upper.startsWith('RSID,CHROMOSOME,POSITION,GENOTYPE') || + normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tGENOTYPE') || + normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tRESULT') + ); + }); if (hasMonadicHeader) { return parseMonadicDNAFile(content); } - // Check for AncestryDNA format (tab-separated with specific header) + // AncestryDNA: non-comment header line with rsid + chromosome + position + allele columns. + // Skip lines starting with # to avoid matching 23andMe's comment-based column header. const hasAncestryHeader = lines.some(line => { - const lower = line.trim().toLowerCase(); + const trimmed = line.trim(); + if (trimmed.startsWith('#')) return false; + const lower = trimmed.toLowerCase(); return lower.includes('rsid') && lower.includes('chromosome') && - lower.includes('position') && - lower.includes('allele1') && - lower.includes('allele2'); + lower.includes('position'); }); - if (hasAncestryHeader) { return parseAncestryDNAFile(content); } - // Check for 23andMe format (comment lines starting with #) + // 23andMe and compatible formats: comment lines starting with #. + // But check if the data rows are AncestryDNA-style (5 columns with separate alleles) — + // FTDNA famfinder puts its column header in a comment line. const has23andMeComments = lines.some(line => line.trim().startsWith('#')); - if (has23andMeComments) { + // If any comment line looks like an AncestryDNA header (has "allele"), try AncestryDNA first. + const commentHasAlleleHeader = lines.some(line => { + const trimmed = line.trim(); + if (!trimmed.startsWith('#')) return false; + const lower = trimmed.slice(1).toLowerCase(); + return (lower.includes('rsid') || lower.includes('name')) && lower.includes('chromosome') && lower.includes('allele'); + }); + if (commentHasAlleleHeader) { + const ancestryResult = parseAncestryDNAFile(content); + if (ancestryResult.success) return ancestryResult; + } return parse23andMeFile(content); } - // Try parsers in order of popularity + // Blind fallback. const result23andMe = parse23andMeFile(content); - if (result23andMe.success) { - return result23andMe; - } + if (result23andMe.success) return result23andMe; const resultAncestry = parseAncestryDNAFile(content); - if (resultAncestry.success) { - return resultAncestry; - } + if (resultAncestry.success) return resultAncestry; const resultMonadic = parseMonadicDNAFile(content); - if (resultMonadic.success) { - return resultMonadic; - } + if (resultMonadic.success) return resultMonadic; - // If all fail, return generic error return { success: false, error: 'Unable to detect file format. Supported formats: 23andMe (.txt), AncestryDNA (.txt), or Monadic DNA (.csv)', diff --git a/next-env.d.ts b/next-env.d.ts index c4b7818..9edff1c 100644 --- a/next-env.d.ts +++ b/next-env.d.ts @@ -1,6 +1,6 @@ /// /// -import "./.next/dev/types/routes.d.ts"; +import "./.next/types/routes.d.ts"; // NOTE: This file should not be edited // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. From c8529e27476008b03d2a6d874286e65e539ffc51 Mon Sep 17 00:00:00 2001 From: Vishakh Date: Mon, 22 Jun 2026 15:15:51 -0400 Subject: [PATCH 2/3] Better file upload messaging and tracking. --- app/components/UserDataUpload.tsx | 42 ++++++++----------------------- app/globals.css | 7 ++++++ lib/analytics.ts | 7 ++++-- 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/app/components/UserDataUpload.tsx b/app/components/UserDataUpload.tsx index 8828078..eed2c1c 100644 --- a/app/components/UserDataUpload.tsx +++ b/app/components/UserDataUpload.tsx @@ -5,7 +5,6 @@ import { GenotypeData, detectAndParseGenotypeFile, validateFileSize, validateFil import { calculateFileHash } from "@/lib/file-hash"; import { trackFileCleared, - trackFileUploadError, trackGenotypeFileLoaded, trackGenotypeFileUploadFailed, trackGenotypeFileUploadStarted, @@ -38,59 +37,52 @@ export function GenotypeProvider({ children }: { children: React.ReactNode }) { const [originalFileName, setOriginalFileName] = useState(null); const uploadGenotype = async (file: File, source: string = 'unknown') => { - const startTime = performance.now(); - const fileExtension = file.name.split('.').pop() || ''; + const fileExtension = file.name.split('.').pop()?.toLowerCase() || ''; setIsLoading(true); setError(null); trackGenotypeFileUploadStarted(source); try { - // Validate file size (50MB limit) if (!validateFileSize(file, 50)) { throw new Error('File too large. Maximum size is 50MB.'); } - // Validate file format if (!validateFileFormat(file)) { - throw new Error('Invalid file format. Please upload a .txt, .tsv, or .csv file from 23andMe, AncestryDNA, or Monadic DNA.'); + throw new Error('Unsupported file type. Please upload a .txt, .tsv, or .csv file exported from 23andMe, AncestryDNA, MyHeritage, FTDNA, LivingDNA, or a compatible provider.'); } - // Read and parse file entirely client-side const fileContent = await file.text(); const hash = calculateFileHash(fileContent); - // Parse the genotype file client-side const parseResult = detectAndParseGenotypeFile(fileContent); if (!parseResult.success) { - throw new Error(parseResult.error || 'Failed to parse genotype data'); + const reason = parseResult.error || 'Failed to parse genotype data'; + console.error('[Upload] Parse failed', { file: file.name, ext: fileExtension, reason }); + throw new Error(reason); } - // Create a map for quick SNP lookup const genotypeMap = new Map(); parseResult.data!.forEach((variant: GenotypeData) => { genotypeMap.set(variant.rsid, variant.genotype); }); - const parseDuration = performance.now() - startTime; - - // Track successful genotype file load - trackGenotypeFileLoaded(file.size, genotypeMap.size, source); + trackGenotypeFileLoaded(file.size, genotypeMap.size, source, parseResult.detectedFormat, fileExtension); setGenotypeData(genotypeMap); setFileHash(hash); setOriginalFileName(file.name); - // Call the callback if it exists if (onDataLoadedRef.current) { onDataLoadedRef.current(); } return true; } catch (err) { const errorMessage = err instanceof Error ? err.message : 'Upload failed'; + console.error('[Upload] Failed', { file: file.name, ext: fileExtension, source, reason: errorMessage }); setError(errorMessage); - trackGenotypeFileUploadFailed(source, errorMessage); + trackGenotypeFileUploadFailed(source, errorMessage, fileExtension); return false; } finally { setIsLoading(false); @@ -159,19 +151,6 @@ export default function UserDataUpload() { const file = event.target.files?.[0]; if (!file) return; - // Validate file type - const fileName = file.name.toLowerCase(); - if (!fileName.endsWith('.txt') && !fileName.endsWith('.tsv') && !fileName.endsWith('.csv')) { - trackFileUploadError('unsupported_file_type'); - return; - } - - // Validate file size (50MB limit) - if (file.size > 50 * 1024 * 1024) { - trackFileUploadError('file_too_large'); - return; - } - // Dev mode: Try to use File System Access API to save handle for future auto-load if (isDevModeEnabled()) { try { @@ -228,9 +207,10 @@ export default function UserDataUpload() { +

23andMe, AncestryDNA, MyHeritage, FTDNA, LivingDNA, and more

{error && ( -
- Upload failed +
+ {error}
)}
diff --git a/app/globals.css b/app/globals.css index 02ee244..eb608af 100644 --- a/app/globals.css +++ b/app/globals.css @@ -1857,6 +1857,13 @@ tbody tr:hover { cursor: wait; } +.upload-format-hint { + margin: 0.3rem 0 0; + font-size: 0.75rem; + color: var(--text-secondary); + opacity: 0.8; +} + .sample-data-section { display: flex; flex-direction: column; diff --git a/lib/analytics.ts b/lib/analytics.ts index c499ac7..fe1af0f 100644 --- a/lib/analytics.ts +++ b/lib/analytics.ts @@ -327,18 +327,21 @@ export function trackGenotypeFileUploadStarted(source: string = 'unknown') { }); } -export function trackGenotypeFileUploadFailed(source: string = 'unknown', reason?: string) { +export function trackGenotypeFileUploadFailed(source: string = 'unknown', reason?: string, fileExtension?: string) { trackEvent('genotype_file_upload_failed', { source, reason: sanitizeErrorReason(reason), + ...(fileExtension && { file_extension: fileExtension }), }); } -export function trackGenotypeFileLoaded(fileSize: number, variantCount: number, source: string = 'unknown') { +export function trackGenotypeFileLoaded(fileSize: number, variantCount: number, source: string = 'unknown', detectedFormat?: string, fileExtension?: string) { const metadata = { file_size_kb: Math.round(fileSize / 1024), variant_count: variantCount, source, + ...(detectedFormat && { detected_format: detectedFormat }), + ...(fileExtension && { file_extension: fileExtension }), }; trackEvent('genotype_file_loaded', metadata); From 4e8a708a8440352952e430f019e06f1605f73281 Mon Sep 17 00:00:00 2001 From: Vishakh Date: Mon, 22 Jun 2026 17:00:13 -0400 Subject: [PATCH 3/3] Bug fixes --- app/components/UserDataUpload.tsx | 3 ++- lib/genotype-parser.ts | 16 ++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/app/components/UserDataUpload.tsx b/app/components/UserDataUpload.tsx index eed2c1c..2bbdb8c 100644 --- a/app/components/UserDataUpload.tsx +++ b/app/components/UserDataUpload.tsx @@ -37,7 +37,8 @@ export function GenotypeProvider({ children }: { children: React.ReactNode }) { const [originalFileName, setOriginalFileName] = useState(null); const uploadGenotype = async (file: File, source: string = 'unknown') => { - const fileExtension = file.name.split('.').pop()?.toLowerCase() || ''; + const dotIdx = file.name.lastIndexOf('.'); + const fileExtension = dotIdx !== -1 ? file.name.slice(dotIdx + 1).toLowerCase() : ''; setIsLoading(true); setError(null); diff --git a/lib/genotype-parser.ts b/lib/genotype-parser.ts index 1df0dc2..f4007b0 100644 --- a/lib/genotype-parser.ts +++ b/lib/genotype-parser.ts @@ -108,13 +108,15 @@ export function parseMonadicDNAFile(content: string): ParseResult { if (!headerFound) { const upper = trimmedLine.toUpperCase().replace(/"/g, ''); const normalized = upper.replace(/\s+/g, '\t'); + const hasExplicitDelimiter = upper.includes(',') || upper.includes('\t'); // Accept RSID,CHROMOSOME,POSITION,RESULT or RSID,CHROMOSOME,POSITION,GENOTYPE - // Also accept tab/space-separated generic format. + // For tab/space headers, require at least one real delimiter so purely + // space-separated files fall through to the 23andMe whitespace parser instead. if ( upper.startsWith('RSID,CHROMOSOME,POSITION,RESULT') || upper.startsWith('RSID,CHROMOSOME,POSITION,GENOTYPE') || - normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tGENOTYPE') || - normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tRESULT') + (hasExplicitDelimiter && normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tGENOTYPE')) || + (hasExplicitDelimiter && normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tRESULT')) ) { headerFound = true; delimiter = trimmedLine.includes('\t') ? '\t' : ','; @@ -288,15 +290,17 @@ export function detectAndParseGenotypeFile(content: string): ParseResult { const lines = splitLines(content).slice(0, 50); // Monadic DNA: CSV/TSV with specific header (also matches MyHeritage, FTDNA, generic 4-col formats). - // Normalize whitespace so space-delimited headers match alongside tab/comma. + // Require an explicit tab or comma so purely space-delimited files fall through to the + // 23andMe whitespace parser rather than being misrouted here with a comma delimiter. const hasMonadicHeader = lines.some(line => { const upper = line.trim().toUpperCase().replace(/"/g, ''); const normalized = upper.replace(/\s+/g, '\t'); + const hasExplicitDelimiter = upper.includes(',') || upper.includes('\t'); return ( upper.startsWith('RSID,CHROMOSOME,POSITION,RESULT') || upper.startsWith('RSID,CHROMOSOME,POSITION,GENOTYPE') || - normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tGENOTYPE') || - normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tRESULT') + (hasExplicitDelimiter && normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tGENOTYPE')) || + (hasExplicitDelimiter && normalized.startsWith('RSID\tCHROMOSOME\tPOSITION\tRESULT')) ); }); if (hasMonadicHeader) {