diff --git a/frontend/package.json b/frontend/package.json index 08b298260e3..4e117cd05cc 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -47,6 +47,7 @@ "d3-shape": "2.1.0", "dagre": "0.8.5", "file-saver": "2.0.5", + "file-type": "^22.0.1", "fuse.js": "6.5.3", "html2canvas": "1.4.1", "jointjs": "3.5.4", diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html index fd0ba3af152..7092d3294e4 100644 --- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html +++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html @@ -44,6 +44,36 @@ nzType="warning" nzMessage="Preview of the file type is currently not supported"> +
+ +
+ +
+ +
+ +
+ + + + + +
- {{ column }} + +
{{ column }}
+
+ {{ fileMetadata?.columnTypes?.[i] }} + + {{ fileMetadata?.nullCounts?.[i] }} null + +
+ @@ -79,6 +119,14 @@ alt="{{filePath}}" class="full-size-image" /> + +
+ +
+
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss index e6424f529d8..c692589f767 100644 --- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss +++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss @@ -40,3 +40,65 @@ max-width: 90%; max-height: 90%; } + +.file-metadata-strip { + display: flex; + flex-wrap: wrap; + gap: 6px; + margin-bottom: 10px; + padding: 6px 0; + border-bottom: 1px solid #f0f0f0; +} + +.metadata-pill { + display: inline-flex; + align-items: center; + gap: 4px; + padding: 2px 8px 2px 6px; + background: #fafafa; + border: 1px solid #e8e8e8; + border-radius: 4px; + font-size: 12px; + white-space: nowrap; +} + +.metadata-label { + color: #8c8c8c; + font-weight: 500; +} + +.metadata-value { + color: #262626; +} + +.column-name { + font-weight: 600; +} + +.column-meta { + display: flex; + align-items: center; + gap: 6px; + margin-top: 2px; + font-weight: 400; +} + +.column-type-tag { + display: inline-block; + padding: 0 6px; + font-size: 11px; + color: #1890ff; + background: #e6f4ff; + border: 1px solid #91caff; + border-radius: 3px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.column-null-hint { + font-size: 11px; + color: #d4380d; +} + +.open-in-workflow-btn { + margin-left: auto; +} diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts index 9e70a444df8..568dac0ecd5 100644 --- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts +++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts @@ -19,11 +19,13 @@ import { TestBed } from "@angular/core/testing"; import { HttpClientTestingModule } from "@angular/common/http/testing"; -import { UserDatasetFileRendererComponent } from "./user-dataset-file-renderer.component"; +import { UserDatasetFileRendererComponent, MIME_TYPES, getMimeType, inferColumnSchema } from "./user-dataset-file-renderer.component"; import { DatasetService } from "../../../../../service/user/dataset/dataset.service"; import { NotificationService } from "../../../../../../common/service/notification/notification.service"; import { DomSanitizer } from "@angular/platform-browser"; import { commonTestProviders } from "../../../../../../common/testing/test-utils"; +import { Router } from "@angular/router"; +import { WorkflowPersistService } from "../../../../../../common/service/workflow-persist/workflow-persist.service"; describe("UserDatasetFileRendererComponent", () => { let component: UserDatasetFileRendererComponent; @@ -34,7 +36,15 @@ describe("UserDatasetFileRendererComponent", () => { providers: [ DatasetService, NotificationService, - { provide: DomSanitizer, useValue: { bypassSecurityTrustUrl: vi.fn() } }, + WorkflowPersistService, + { provide: Router, useValue: { navigate: vi.fn() } }, + { + provide: DomSanitizer, + useValue: { + bypassSecurityTrustUrl: vi.fn((url: string) => url), + bypassSecurityTrustResourceUrl: vi.fn((url: string) => url), + }, + }, ...commonTestProviders, ], }); @@ -42,15 +52,250 @@ describe("UserDatasetFileRendererComponent", () => { component = fixture.componentInstance; }); - it("should return true for supported MIME type", () => { - const supportedMimeType = "image/jpeg"; // Example of a supported MIME type - const result = component.isPreviewSupported(supportedMimeType); - expect(result).toBe(true); + describe("isPreviewSupported", () => { + it("should return true for known MIME types", () => { + expect(component.isPreviewSupported("image/jpeg")).toBe(true); + expect(component.isPreviewSupported("application/pdf")).toBe(true); + expect(component.isPreviewSupported("application/x-parquet")).toBe(true); + }); + + it("should return false only for unidentified binary (octet-stream)", () => { + expect(component.isPreviewSupported(MIME_TYPES.OCTET_STREAM)).toBe(false); + }); }); - it("should return false for unsupported MIME type", () => { - const unsupportedMimeType = "application/unknown"; // Example of an unsupported MIME type - const result = component.isPreviewSupported(unsupportedMimeType); - expect(result).toBe(false); + describe("getMimeType (extension-based fallback)", () => { + it("should resolve common image extensions", () => { + expect(getMimeType("photo.jpg")).toBe(MIME_TYPES.JPEG); + expect(getMimeType("photo.PNG")).toBe(MIME_TYPES.PNG); + expect(getMimeType("anim.gif")).toBe(MIME_TYPES.GIF); + }); + + it("should resolve xlsx separately from xls", () => { + expect(getMimeType("data.xlsx")).toBe(MIME_TYPES.XLSX); + expect(getMimeType("data.xls")).toBe(MIME_TYPES.MSEXCEL); + }); + + it("should resolve data format extensions", () => { + expect(getMimeType("data.parquet")).toBe(MIME_TYPES.PARQUET); + expect(getMimeType("data.arrow")).toBe(MIME_TYPES.ARROW); + expect(getMimeType("data.feather")).toBe(MIME_TYPES.ARROW); + }); + + it("should return octet-stream for unknown extensions", () => { + expect(getMimeType("file.xyz")).toBe(MIME_TYPES.OCTET_STREAM); + expect(getMimeType("noextension")).toBe(MIME_TYPES.OCTET_STREAM); + }); + }); + + describe("detectMimeType (magic byte detection)", () => { + it("should detect Parquet files from PAR1 magic bytes", async () => { + const magic = new Uint8Array([0x50, 0x41, 0x52, 0x31, 0x00, 0x00, 0x00, 0x00]); + const blob = new Blob([magic]); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.PARQUET); + }); + + it("should detect Arrow IPC files from ARROW1 magic bytes", async () => { + const magic = new Uint8Array([0x41, 0x52, 0x52, 0x4f, 0x57, 0x31, 0x00, 0x00]); + const blob = new Blob([magic]); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.ARROW); + }); + + it("should detect JSON via text sniffing (object)", async () => { + const blob = new Blob(['{"key": "value"}'], { type: "text/plain" }); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.JSON); + }); + + it("should detect JSON via text sniffing (array)", async () => { + const blob = new Blob(['[1, 2, 3]'], { type: "text/plain" }); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.JSON); + }); + + it("should detect CSV via text sniffing", async () => { + const blob = new Blob(["name,age,city\nAlice,30,LA\nBob,25,NY"], { type: "text/plain" }); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.CSV); + }); + + it("should detect Markdown via text sniffing", async () => { + const blob = new Blob(["# My Title\n\nSome content here"], { type: "text/plain" }); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.MD); + }); + + it("should detect plain text when content is printable ASCII", async () => { + const blob = new Blob(["Hello, world! This is plain text."], { type: "text/plain" }); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.TXT); + }); + + it("should return octet-stream for unidentifiable binary", async () => { + const binary = new Uint8Array([0x00, 0x01, 0x02, 0x80, 0xff, 0xfe, 0x7f, 0x03]); + const blob = new Blob([binary]); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.OCTET_STREAM); + }); + + it("should detect HDF5 from magic bytes (generic .h5)", async () => { + const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]); + const blob = new Blob([magic]); + const result = await component.detectMimeType(blob, "model.h5"); + expect(result).toBe(MIME_TYPES.HDF5); + }); + + it("should refine HDF5 to H5AD by extension", async () => { + const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]); + const blob = new Blob([magic]); + const result = await component.detectMimeType(blob, "scrna.h5ad"); + expect(result).toBe(MIME_TYPES.H5AD); + }); + + it("should refine HDF5 to H5SEURAT by extension", async () => { + const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]); + const blob = new Blob([magic]); + const result = await component.detectMimeType(blob, "pbmc.h5seurat"); + expect(result).toBe(MIME_TYPES.H5SEURAT); + }); + + it("should detect Python pickle from \\x80 + protocol byte", async () => { + const magic = new Uint8Array([0x80, 0x04, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00]); + const blob = new Blob([magic]); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.PICKLE); + }); + + it("should detect NumPy .npy from magic bytes", async () => { + const magic = new Uint8Array([0x93, 0x4e, 0x55, 0x4d, 0x50, 0x59, 0x01, 0x00, 0x00, 0x00]); + const blob = new Blob([magic]); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.NPY); + }); + + it("should detect GGUF from magic bytes", async () => { + const magic = new Uint8Array([0x47, 0x47, 0x55, 0x46, 0x03, 0x00, 0x00, 0x00]); + const blob = new Blob([magic]); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.GGUF); + }); + + it("should detect Safetensors via extension fallback", async () => { + const opaque = new Uint8Array([0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + const blob = new Blob([opaque]); + const result = await component.detectMimeType(blob, "model.safetensors"); + expect(result).toBe(MIME_TYPES.SAFETENSORS); + }); + + it("should detect ONNX via extension fallback", async () => { + const opaque = new Uint8Array([0x08, 0x07, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00]); + const blob = new Blob([opaque]); + const result = await component.detectMimeType(blob, "resnet.onnx"); + expect(result).toBe(MIME_TYPES.ONNX); + }); + + it("should detect VCF from header line", async () => { + const blob = new Blob(["##fileformat=VCFv4.2\n##source=test\n"], { type: "text/plain" }); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.VCF); + }); + + it("should detect FASTA from > prefix", async () => { + const blob = new Blob([">seq1\nACGTACGT\n>seq2\nTGCATGCA\n"], { type: "text/plain" }); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.FASTA); + }); + + it("should detect FASTQ from 4-line @/+ pattern", async () => { + const blob = new Blob(["@read1\nACGT\n+\n!!!!\n@read2\nTGCA\n+\n!!!!\n"], { type: "text/plain" }); + const result = await component.detectMimeType(blob); + expect(result).toBe(MIME_TYPES.FASTQ); + }); + }); + + describe("parser helpers", () => { + it("should parse a NumPy v1.0 header", async () => { + // Construct a minimal valid .npy v1 file: magic + version + uint16 header_len + ASCII header + const headerText = "{'descr': '> 8) & 0xff; + buf.set(headerBytes, 10); + const blob = new Blob([buf]); + const result = await (component as any).parseNpyHeader(blob); + expect(result?.dtype).toBe(" { + const header = JSON.stringify({ + "layer.weight": { dtype: "F32", shape: [128, 64], data_offsets: [0, 32768] }, + "layer.bias": { dtype: "F32", shape: [128], data_offsets: [32768, 33280] }, + __metadata__: { format: "pt" }, + }); + const headerBytes = new TextEncoder().encode(header); + const lenBytes = new Uint8Array(8); + let len = headerBytes.length; + for (let i = 0; i < 8; i++) { + lenBytes[i] = len & 0xff; + len = Math.floor(len / 256); + } + const blob = new Blob([lenBytes, headerBytes]); + const result = await (component as any).parseSafetensorsHeader(blob); + expect(result?.tensorCount).toBe(2); + expect(result?.parameterCount).toBe(128 * 64 + 128); + expect(result?.sampleNames).toEqual(["layer.weight", "layer.bias"]); + }); + + it("should infer column types from tabular sample data", () => { + const rows = [ + ["Alice", "30", "75000.50", "true", "2024-01-15"], + ["Bob", "25", "60000.00", "false", "2024-03-22"], + ["Carol", "", "82000.75", "true", "2024-05-10"], + ]; + const schema = inferColumnSchema(rows, 5); + expect(schema.types).toEqual(["string", "integer", "double", "boolean", "date"]); + expect(schema.nullCounts).toEqual([0, 1, 0, 0, 0]); + expect(schema.samples).toEqual(["Alice", "30", "75000.50", "true", "2024-01-15"]); + }); + + it("should fall back to string for all-null columns", () => { + const rows = [["a", ""], ["b", ""]]; + const schema = inferColumnSchema(rows, 2); + expect(schema.types).toEqual(["string", "string"]); + expect(schema.nullCounts).toEqual([0, 2]); + }); + + it("should expose canOpenInWorkflow whenever a filePath is set", () => { + component.filePath = "/x/y/v1/data.csv"; + expect(component.canOpenInWorkflow).toBe(true); + component.filePath = "/x/y/v1/model.safetensors"; + expect(component.canOpenInWorkflow).toBe(true); + }); + + it("should not expose canOpenInWorkflow when no file is selected", () => { + component.filePath = ""; + expect(component.canOpenInWorkflow).toBe(false); + }); + + + it("should parse a GGUF header", async () => { + const buf = new Uint8Array(24); + buf.set([0x47, 0x47, 0x55, 0x46], 0); // "GGUF" + buf.set([0x03, 0x00, 0x00, 0x00], 4); // version 3 + buf.set([0xd2, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], 8); // 722 tensors + buf.set([0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], 16); // 16 metadata kv + const blob = new Blob([buf]); + const result = await (component as any).parseGgufHeader(blob); + expect(result?.version).toBe(3); + expect(result?.tensorCount).toBe(722); + expect(result?.metadataKvCount).toBe(16); + }); }); }); diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts index 861479ca5a5..564e7867ad4 100644 --- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts +++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts @@ -17,14 +17,23 @@ * under the License. */ -import { Component, EventEmitter, Input, OnChanges, OnDestroy, OnInit, Output, SimpleChanges } from "@angular/core"; +import { ChangeDetectorRef, Component, EventEmitter, Input, OnChanges, OnDestroy, OnInit, Output, SimpleChanges } from "@angular/core"; import { DatasetService } from "../../../../../service/user/dataset/dataset.service"; import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; import * as Papa from "papaparse"; import { ParseResult } from "papaparse"; -import { DomSanitizer, SafeUrl } from "@angular/platform-browser"; -import readXlsxFile from "read-excel-file"; +import { DomSanitizer, SafeResourceUrl, SafeUrl } from "@angular/platform-browser"; +import readXlsxFile, { readSheetNames } from "read-excel-file"; import { NotificationService } from "../../../../../../common/service/notification/notification.service"; +import { formatSize } from "../../../../../../common/util/size-formatter.util"; +import { Router } from "@angular/router"; +import { + DEFAULT_WORKFLOW_NAME, + WorkflowPersistService, +} from "../../../../../../common/service/workflow-persist/workflow-persist.service"; +import { GuiConfigService } from "../../../../../../common/service/gui-config.service"; +import { ExecutionMode, WorkflowContent } from "../../../../../../common/type/workflow"; +import { DASHBOARD_USER_WORKSPACE } from "../../../../../../app-routing.constant"; import { NgStyle, NgIf, NgFor } from "@angular/common"; import { NzSpinComponent } from "ng-zorro-antd/spin"; import { NzAlertComponent } from "ng-zorro-antd/alert"; @@ -38,6 +47,9 @@ import { } from "ng-zorro-antd/table"; import { MarkdownComponent } from "ngx-markdown"; import { NgxJsonViewerModule } from "ngx-json-viewer"; +import { fileTypeFromBlob } from "file-type"; +import { NzButtonComponent } from "ng-zorro-antd/button"; +import { NzIconDirective } from "ng-zorro-antd/icon"; export const MIME_TYPES = { JPEG: "image/jpeg", @@ -45,6 +57,9 @@ export const MIME_TYPES = { PNG: "image/png", WEBP: "image/webp", GIF: "image/gif", + AVIF: "image/avif", + BMP: "image/bmp", + TIFF: "image/tiff", CSV: "text/csv", TXT: "text/plain", MD: "text/markdown", @@ -53,35 +68,474 @@ export const MIME_TYPES = { PDF: "application/pdf", MSWORD: "application/msword", MSEXCEL: "application/vnd.ms-excel", + XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation", MSPOWERPOINT: "application/vnd.ms-powerpoint", MP4: "video/mp4", MP3: "audio/mpeg", - OCTET_STREAM: "application/octet-stream", // Default binary format + WAV: "audio/wav", + FLAC: "audio/flac", + WEBM: "video/webm", + MOV: "video/quicktime", + ARROW: "application/x-arrow", + PARQUET: "application/x-parquet", + // ML / scientific data formats + HDF5: "application/x-hdf5", + H5AD: "application/x-h5ad", + H5SEURAT: "application/x-h5seurat", + LOOM: "application/x-loom", + PICKLE: "application/x-python-pickle", + NPY: "application/x-numpy-array", + NPZ: "application/x-numpy-archive", + SAFETENSORS: "application/x-safetensors", + GGUF: "application/x-gguf", + PYTORCH: "application/x-pytorch", + KERAS: "application/x-keras", + ONNX: "application/x-onnx", + RDS: "application/x-rds", + // Bioinformatics text + FASTA: "application/x-fasta", + FASTQ: "application/x-fastq", + VCF: "application/x-vcf", + OCTET_STREAM: "application/octet-stream", }; export function getMimeType(filename: string): string { - const extension = filename.split(".").pop()?.toUpperCase(); - return extension && MIME_TYPES[extension as keyof typeof MIME_TYPES] - ? MIME_TYPES[extension as keyof typeof MIME_TYPES] - : MIME_TYPES.OCTET_STREAM; + const extensionMap: Record = { + JPG: MIME_TYPES.JPEG, + JPEG: MIME_TYPES.JPEG, + PNG: MIME_TYPES.PNG, + WEBP: MIME_TYPES.WEBP, + GIF: MIME_TYPES.GIF, + AVIF: MIME_TYPES.AVIF, + BMP: MIME_TYPES.BMP, + TIFF: MIME_TYPES.TIFF, + TIF: MIME_TYPES.TIFF, + CSV: MIME_TYPES.CSV, + TSV: MIME_TYPES.CSV, + TXT: MIME_TYPES.TXT, + MD: MIME_TYPES.MD, + HTML: MIME_TYPES.HTML, + HTM: MIME_TYPES.HTML, + JSON: MIME_TYPES.JSON, + JSONL: MIME_TYPES.TXT, + PDF: MIME_TYPES.PDF, + DOC: MIME_TYPES.MSWORD, + XLS: MIME_TYPES.MSEXCEL, + XLSX: MIME_TYPES.XLSX, + DOCX: MIME_TYPES.DOCX, + PPTX: MIME_TYPES.PPTX, + PPT: MIME_TYPES.MSPOWERPOINT, + MP4: MIME_TYPES.MP4, + MP3: MIME_TYPES.MP3, + WAV: MIME_TYPES.WAV, + FLAC: MIME_TYPES.FLAC, + WEBM: MIME_TYPES.WEBM, + MOV: MIME_TYPES.MOV, + ARROW: MIME_TYPES.ARROW, + FEATHER: MIME_TYPES.ARROW, + PARQUET: MIME_TYPES.PARQUET, + // ML / scientific + H5: MIME_TYPES.HDF5, + HDF5: MIME_TYPES.HDF5, + H5AD: MIME_TYPES.H5AD, + H5SEURAT: MIME_TYPES.H5SEURAT, + LOOM: MIME_TYPES.LOOM, + PKL: MIME_TYPES.PICKLE, + PICKLE: MIME_TYPES.PICKLE, + JOBLIB: MIME_TYPES.PICKLE, + NPY: MIME_TYPES.NPY, + NPZ: MIME_TYPES.NPZ, + SAFETENSORS: MIME_TYPES.SAFETENSORS, + GGUF: MIME_TYPES.GGUF, + PT: MIME_TYPES.PYTORCH, + PTH: MIME_TYPES.PYTORCH, + KERAS: MIME_TYPES.KERAS, + ONNX: MIME_TYPES.ONNX, + RDS: MIME_TYPES.RDS, + // Bioinformatics text + FASTA: MIME_TYPES.FASTA, + FA: MIME_TYPES.FASTA, + FNA: MIME_TYPES.FASTA, + FFN: MIME_TYPES.FASTA, + FAA: MIME_TYPES.FASTA, + FASTQ: MIME_TYPES.FASTQ, + FQ: MIME_TYPES.FASTQ, + VCF: MIME_TYPES.VCF, + }; + const ext = filename.split(".").pop()?.toUpperCase() ?? ""; + return extensionMap[ext] ?? MIME_TYPES.OCTET_STREAM; +} + +export function formatDuration(seconds: number): string { + if (!isFinite(seconds) || seconds < 0) return "—"; + const totalSec = Math.floor(seconds); + const h = Math.floor(totalSec / 3600); + const m = Math.floor((totalSec % 3600) / 60); + const s = totalSec % 60; + if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`; + return `${m}:${String(s).padStart(2, "0")}`; } -// the size limits for all preview-supported types +/** + * Maximum size at which we'll attempt to preview a file. + * + * Note on memory: for "identify-only" types (HDF5, Parquet, Arrow, pickle, model containers, etc.) + * we only read the first ~16 bytes for magic-byte detection, so 1 GB is safe. For header-parse types + * (Safetensors, GGUF, NumPy .npy) we only read the first few KB. The cost of bumping all limits to + * 1 GB is the full-blob download time, since the dataset service streams the entire file. + * + * For full-content render types (CSV via Papa.parse, XLSX, JSON, large text) memory cost scales + * with file size — browsers may slow down or OOM well before 1 GB. The user can choose: the guard + * no longer blocks; if their browser tab struggles, they can close it. + */ +const MAX_PREVIEW_SIZE = 1024 * 1024 * 1024; + +// size limits per MIME type — also used as pre-fetch guard export const MIME_TYPE_SIZE_LIMITS_MB = { - [MIME_TYPES.JPEG]: 5 * 1024 * 1024, // 5 MB - [MIME_TYPES.PNG]: 5 * 1024 * 1024, // 5 MB - [MIME_TYPES.WEBP]: 5 * 1024 * 1024, // 5 MB - [MIME_TYPES.GIF]: 10 * 1024 * 1024, // 10 MB - [MIME_TYPES.CSV]: 2 * 1024 * 1024, // 2 MB for text-based data files - [MIME_TYPES.TXT]: 1 * 1024 * 1024, // 1 MB for plain text files - [MIME_TYPES.MD]: 1 * 1024 * 1024, // 1 MB for MD files - [MIME_TYPES.JSON]: 1 * 1024 * 1024, // 1 MB for JSON files - [MIME_TYPES.MSEXCEL]: 10 * 1024 * 1024, // 10 MB for Excel spreadsheets - [MIME_TYPES.MP4]: 50 * 1024 * 1024, // 50 MB for MP4 videos - [MIME_TYPES.MP3]: 10 * 1024 * 1024, // 10 MB for MP3 audio files - [MIME_TYPES.OCTET_STREAM]: 5 * 1024 * 1024, // Default size for other binary formats + [MIME_TYPES.JPEG]: MAX_PREVIEW_SIZE, + [MIME_TYPES.PNG]: MAX_PREVIEW_SIZE, + [MIME_TYPES.WEBP]: MAX_PREVIEW_SIZE, + [MIME_TYPES.GIF]: MAX_PREVIEW_SIZE, + [MIME_TYPES.AVIF]: MAX_PREVIEW_SIZE, + [MIME_TYPES.BMP]: MAX_PREVIEW_SIZE, + [MIME_TYPES.TIFF]: MAX_PREVIEW_SIZE, + [MIME_TYPES.CSV]: MAX_PREVIEW_SIZE, + [MIME_TYPES.TXT]: MAX_PREVIEW_SIZE, + [MIME_TYPES.MD]: MAX_PREVIEW_SIZE, + [MIME_TYPES.JSON]: MAX_PREVIEW_SIZE, + [MIME_TYPES.PDF]: MAX_PREVIEW_SIZE, + [MIME_TYPES.MSEXCEL]: MAX_PREVIEW_SIZE, + [MIME_TYPES.XLSX]: MAX_PREVIEW_SIZE, + [MIME_TYPES.DOCX]: MAX_PREVIEW_SIZE, + [MIME_TYPES.PPTX]: MAX_PREVIEW_SIZE, + [MIME_TYPES.MP4]: MAX_PREVIEW_SIZE, + [MIME_TYPES.WEBM]: MAX_PREVIEW_SIZE, + [MIME_TYPES.MOV]: MAX_PREVIEW_SIZE, + [MIME_TYPES.MP3]: MAX_PREVIEW_SIZE, + [MIME_TYPES.WAV]: MAX_PREVIEW_SIZE, + [MIME_TYPES.FLAC]: MAX_PREVIEW_SIZE, + [MIME_TYPES.ARROW]: MAX_PREVIEW_SIZE, + [MIME_TYPES.PARQUET]: MAX_PREVIEW_SIZE, + [MIME_TYPES.HDF5]: MAX_PREVIEW_SIZE, + [MIME_TYPES.H5AD]: MAX_PREVIEW_SIZE, + [MIME_TYPES.H5SEURAT]: MAX_PREVIEW_SIZE, + [MIME_TYPES.LOOM]: MAX_PREVIEW_SIZE, + [MIME_TYPES.PICKLE]: MAX_PREVIEW_SIZE, + [MIME_TYPES.NPY]: MAX_PREVIEW_SIZE, + [MIME_TYPES.NPZ]: MAX_PREVIEW_SIZE, + [MIME_TYPES.SAFETENSORS]: MAX_PREVIEW_SIZE, + [MIME_TYPES.GGUF]: MAX_PREVIEW_SIZE, + [MIME_TYPES.PYTORCH]: MAX_PREVIEW_SIZE, + [MIME_TYPES.KERAS]: MAX_PREVIEW_SIZE, + [MIME_TYPES.ONNX]: MAX_PREVIEW_SIZE, + [MIME_TYPES.RDS]: MAX_PREVIEW_SIZE, + [MIME_TYPES.FASTA]: MAX_PREVIEW_SIZE, + [MIME_TYPES.FASTQ]: MAX_PREVIEW_SIZE, + [MIME_TYPES.VCF]: MAX_PREVIEW_SIZE, + [MIME_TYPES.OCTET_STREAM]: MAX_PREVIEW_SIZE, }; +export interface FileMetadata { + fileSize?: number; + // image + imageWidth?: number; + imageHeight?: number; + // video + videoDuration?: number; + videoWidth?: number; + videoHeight?: number; + // audio + audioDuration?: number; + // tabular + rowCount?: number; + columnCount?: number; + columnNames?: string[]; + sheetCount?: number; + // json + jsonTopLevelType?: "object" | "array"; + jsonItemCount?: number; + jsonPreviewKeys?: string[]; + // text / markdown + lineCount?: number; + wordCount?: number; + charCount?: number; + headingCount?: number; + // pdf + pageCount?: number; + // ML model / tensor data + modelFormat?: string; // "PyTorch", "Keras", "ONNX", "Safetensors", "GGUF", "TensorFlow" + containerFormat?: string; // "HDF5", "ZIP archive", "gzip" + tensorCount?: number; + parameterCount?: number; + sampleTensorNames?: string[]; + // NumPy + dtype?: string; + shape?: number[]; + // GGUF + ggufVersion?: number; + metadataKvCount?: number; + // Bioinformatics + sequenceCount?: number; + sequenceCountIsExact?: boolean; + variantCount?: number; + variantCountIsExact?: boolean; + + // Rich tabular schema (CSV / XLSX) + columnTypes?: string[]; // inferred type per column: "integer", "double", "boolean", "date", "string" + nullCounts?: number[]; // count of empty cells per column (in sample) + sampleValues?: string[]; // first non-null value per column + + // JSON schema + jsonMaxDepth?: number; + jsonKeyTypes?: { key: string; type: string }[]; // for object roots + jsonArrayElementType?: string; // for array roots: uniform type or "mixed" + + // PDF /Info dictionary + pdfTitle?: string; + pdfAuthor?: string; + pdfCreator?: string; + pdfProducer?: string; + pdfVersion?: string; + pdfEncrypted?: boolean; + + // Markdown structure + codeBlockCount?: number; + linkCount?: number; + imageCount?: number; + listItemCount?: number; + + // Plain text / encoding + encoding?: string; // "UTF-8 BOM", "UTF-8", "ASCII" + emptyLineCount?: number; + avgLineLength?: number; + maxLineLength?: number; + + // NumPy enhanced + totalElements?: number; + byteOrder?: string; // "little-endian", "big-endian" + fortranOrder?: boolean; + + // Safetensors enhanced + dtypeBreakdown?: { dtype: string; params: number }[]; + largestTensor?: { name: string; shape: number[]; params: number }; + safetensorsMetadata?: { key: string; value: string }[]; + + // GGUF enhanced + ggufArchitecture?: string; + ggufQuantization?: string; + + // FASTA enhanced + totalBases?: number; + gcContent?: number; // 0..1 + minSequenceLength?: number; + maxSequenceLength?: number; + avgSequenceLength?: number; + isProtein?: boolean; + + // VCF enhanced + vcfSampleCount?: number; + vcfChromosomes?: string[]; +} + +/** + * Above this size, skip the download entirely and show only extension-based + * identification + a "how to load" hint. The dominant source of preview lag + * is the full-blob download from the dataset service. + */ +export const FULL_PREVIEW_MAX_BYTES = 50 * 1024 * 1024; // 50 MB + +/** + * One-line "how to load" or "what is this" message per format. + * Used both when content was downloaded (in renderByMimeType) and when the + * download was skipped (in showOversizedFileInfo). + */ +export const TYPE_LOADING_HINTS: Record = { + [MIME_TYPES.PARQUET]: "Parquet file. Use the Parquet File Scan operator in Texera to analyze this data.", + [MIME_TYPES.ARROW]: "Arrow / Feather file. Use the Arrow File Scan operator in Texera.", + [MIME_TYPES.HDF5]: "HDF5 binary container (Keras .h5 or scientific dataset). Load with h5py / rhdf5.", + [MIME_TYPES.H5AD]: "AnnData (.h5ad) — single-cell expression matrix. Load with scanpy.read_h5ad().", + [MIME_TYPES.H5SEURAT]: "Seurat HDF5 object (.h5seurat). Load with SeuratDisk::LoadH5Seurat() in R.", + [MIME_TYPES.LOOM]: "Loom (.loom) single-cell expression. Load with loompy / scanpy in Python.", + [MIME_TYPES.RDS]: "R serialized object (.rds) — Seurat / SCE / fitted model. Load with readRDS() in R.", + [MIME_TYPES.PICKLE]: "Python pickle — serialized model or dataset. Load with pickle.load() in Python.", + [MIME_TYPES.PYTORCH]: "PyTorch checkpoint (.pt/.pth). Load with torch.load() in Python.", + [MIME_TYPES.KERAS]: "Keras v3 model (.keras). Load with tf.keras.models.load_model() in Python.", + [MIME_TYPES.ONNX]: "ONNX model (.onnx). Load with onnxruntime; inspect at netron.app.", + [MIME_TYPES.SAFETENSORS]: "Safetensors file. Load with safetensors.torch.load_file() in Python.", + [MIME_TYPES.GGUF]: "GGUF model (llama.cpp / quantized LLM).", + [MIME_TYPES.NPY]: "NumPy array (.npy). Load with numpy.load() in Python.", + [MIME_TYPES.NPZ]: "NumPy archive (.npz) — ZIP of .npy arrays. Load with numpy.load().", + [MIME_TYPES.CSV]: "CSV file. Use the CSV File Scan operator in Texera.", + [MIME_TYPES.JSON]: "JSON file. Use the JSONL File Scan operator (or Python UDF for nested objects).", + [MIME_TYPES.XLSX]: "Excel spreadsheet (.xlsx). Convert to CSV or use a Python UDF with openpyxl.", + [MIME_TYPES.MSEXCEL]: "Excel spreadsheet (.xls). Convert to CSV or use a Python UDF.", + [MIME_TYPES.FASTA]: "FASTA sequence file. Parse with Biopython SeqIO.", + [MIME_TYPES.FASTQ]: "FASTQ reads file. Parse with Biopython SeqIO.", + [MIME_TYPES.VCF]: "VCF variant file. Parse with pyvcf / cyvcf2.", +}; + +/** Classify a single cell value into a coarse type label. */ +function inferCellType(value: string): string { + if (value === "" || value == null) return "null"; + if (/^-?\d+$/.test(value)) return "integer"; + if (/^-?\d+\.\d+$/.test(value) || /^-?\d+\.?\d*[eE][-+]?\d+$/.test(value)) return "double"; + if (/^(true|false|True|False|TRUE|FALSE)$/.test(value)) return "boolean"; + if (/^\d{4}-\d{2}-\d{2}(?:[T ]\d{2}:\d{2}(?::\d{2})?)?$/.test(value)) return "date"; + return "string"; +} + +/** Infer per-column type, null count, and a sample value from tabular data rows. */ +export function inferColumnSchema( + dataRows: string[][], + columnCount: number, + sampleLimit: number = 50 +): { types: string[]; nullCounts: number[]; samples: string[] } { + const types: string[] = []; + const nullCounts: number[] = []; + const samples: string[] = []; + const rowsToScan = Math.min(dataRows.length, sampleLimit); + + for (let c = 0; c < columnCount; c++) { + const typeCounts: Record = {}; + let nullCount = 0; + let firstNonNull = ""; + + for (let r = 0; r < rowsToScan; r++) { + const raw = dataRows[r][c]; + const val = raw == null ? "" : String(raw).trim(); + const t = inferCellType(val); + if (t === "null") { + nullCount++; + } else { + if (firstNonNull === "") firstNonNull = val; + typeCounts[t] = (typeCounts[t] ?? 0) + 1; + } + } + + const ranked = Object.entries(typeCounts).sort((a, b) => b[1] - a[1]); + types.push(ranked[0]?.[0] ?? "string"); + nullCounts.push(nullCount); + samples.push(firstNonNull); + } + return { types, nullCounts, samples }; +} + +/** Walk an arbitrary JSON value and compute max nesting depth. */ +function jsonMaxDepth(value: unknown, depth = 1): number { + if (Array.isArray(value)) { + let max = depth; + for (const item of value) max = Math.max(max, jsonMaxDepth(item, depth + 1)); + return max; + } + if (value !== null && typeof value === "object") { + let max = depth; + for (const v of Object.values(value as Record)) { + max = Math.max(max, jsonMaxDepth(v, depth + 1)); + } + return max; + } + return depth; +} + +/** Describe a JS value's type for human display. */ +function jsTypeLabel(value: unknown): string { + if (value === null) return "null"; + if (Array.isArray(value)) return `array(${value.length})`; + return typeof value; +} + +/** Extract /Info dictionary fields from a PDF's raw text. Heuristic but robust for unencrypted PDFs. */ +function extractPdfInfo(rawText: string): { + title?: string; + author?: string; + creator?: string; + producer?: string; + version?: string; + encrypted?: boolean; +} { + const result: ReturnType = {}; + const versionMatch = rawText.match(/^%PDF-(\d+\.\d+)/); + if (versionMatch) result.version = versionMatch[1]; + result.encrypted = /\/Encrypt\b/.test(rawText); + + // Match `/Title (value)` or `/Title ` — only the parenthesized form is reliably plain text + const fieldRe = (name: string) => new RegExp(`/${name}\\s*\\(([^)\\\\]*(?:\\\\.[^)\\\\]*)*)\\)`); + const grab = (name: string): string | undefined => { + const m = rawText.match(fieldRe(name)); + if (!m) return undefined; + // PDF strings can contain \( \) \\ escapes — unescape minimally + return m[1].replace(/\\([()\\])/g, "$1").trim() || undefined; + }; + result.title = grab("Title"); + result.author = grab("Author"); + result.creator = grab("Creator"); + result.producer = grab("Producer"); + return result; +} + +/** Compute GC content and sequence-length stats from a FASTA blob's text. */ +function summarizeFasta(text: string): { + sequenceCount: number; + totalBases: number; + gcContent: number; + minLen: number; + maxLen: number; + avgLen: number; + isProtein: boolean; +} { + // Walk character by character — avoids splitting a multi-MB string into a huge array. + let inHeader = false; + let sequenceCount = 0; + let currentLen = 0; + let totalBases = 0; + let gcCount = 0; + let nonNucleotideCount = 0; + let minLen = Infinity; + let maxLen = 0; + const nucleotideSet = new Set(["A", "C", "G", "T", "U", "N", "a", "c", "g", "t", "u", "n"]); + + const finishSequence = () => { + if (sequenceCount > 0 && currentLen > 0) { + if (currentLen < minLen) minLen = currentLen; + if (currentLen > maxLen) maxLen = currentLen; + } + currentLen = 0; + }; + + for (let i = 0; i < text.length; i++) { + const ch = text[i]; + if (ch === "\n") { + if (inHeader) inHeader = false; + continue; + } + if (inHeader) continue; + if (ch === ">") { + finishSequence(); + sequenceCount++; + inHeader = true; + continue; + } + if (ch === "\r" || ch === " " || ch === "\t") continue; + currentLen++; + totalBases++; + if (ch === "G" || ch === "C" || ch === "g" || ch === "c") gcCount++; + if (!nucleotideSet.has(ch)) nonNucleotideCount++; + } + finishSequence(); + + return { + sequenceCount, + totalBases, + gcContent: totalBases > 0 ? gcCount / totalBases : 0, + minLen: minLen === Infinity ? 0 : minLen, + maxLen, + avgLen: sequenceCount > 0 ? totalBases / sequenceCount : 0, + isProtein: totalBases > 0 && nonNucleotideCount / totalBases > 0.1, + }; +} + @UntilDestroy() @Component({ selector: "texera-user-dataset-file-renderer", @@ -101,14 +555,30 @@ export const MIME_TYPE_SIZE_LIMITS_MB = { NzTbodyComponent, MarkdownComponent, NgxJsonViewerModule, + NzButtonComponent, + NzIconDirective, ], }) export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDestroy { - private DEFAULT_MAX_SIZE = 5 * 1024 * 1024; // 5 MB + private DEFAULT_MAX_SIZE = 1024 * 1024 * 1024; // 1 GB + + // For text-based formats we slice to this size before parsing/rendering. + // Reading 1 GB as a UTF-16 string in JS would balloon to ~2 GB and likely crash the tab. + private static readonly PREVIEW_TEXT_BYTES = 10 * 1024 * 1024; // 10 MB + + /** Slice the blob if it exceeds the preview limit, returning the slice + whether truncation occurred. */ + private getPreviewSlice(blob: Blob): { slice: Blob; truncated: boolean } { + const limit = UserDatasetFileRendererComponent.PREVIEW_TEXT_BYTES; + if (blob.size <= limit) return { slice: blob, truncated: false }; + return { slice: blob.slice(0, limit), truncated: true }; + } + + /** True when text content shown is from a slice rather than the whole file. */ + public previewTruncated: boolean = false; public fileURL: string | undefined; - // safe url is used to display some formats including image public safeFileURL: SafeUrl | undefined; + public safeResourceFileURL: SafeResourceUrl | undefined; // table related control public displayCSV: boolean = false; @@ -131,10 +601,18 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe // audio public displayMP3: boolean = false; - // plain text & octet stream related control + // PDF + public displayPDF: boolean = false; + + // plain text public displayPlainText: boolean = false; public textContent: string = ""; + // shown for detectable-but-unpreviewable types (Parquet, Arrow, DOCX, PPTX) + public detectedTypeMessage: string = ""; + + public fileMetadata: FileMetadata | undefined = undefined; + // control flags public isLoading: boolean = false; public isFileSizeUnloadable = false; @@ -142,33 +620,107 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe public isFileTypePreviewUnsupported: boolean = false; public currentFile: File | undefined = undefined; - @Input() - isMaximized: boolean = false; - - @Input() - did: number | undefined; - @Input() - dvid: number | undefined; + @Input() isMaximized: boolean = false; + @Input() did: number | undefined; + @Input() dvid: number | undefined; + @Input() filePath: string = ""; + @Input() fileSize?: number; + @Input() isLogin: boolean = false; - @Input() - filePath: string = ""; - - @Input() - fileSize?: number; - - @Input() - isLogin: boolean = false; - - @Output() - loadFile = new EventEmitter<{ file: string; prefix: string }>(); + @Output() loadFile = new EventEmitter<{ file: string; prefix: string }>(); constructor( private datasetService: DatasetService, private sanitizer: DomSanitizer, - private notificationService: NotificationService + private notificationService: NotificationService, + private cdr: ChangeDetectorRef, + private router: Router, + private workflowPersistService: WorkflowPersistService, + private config: GuiConfigService ) {} + /** Always available — every file gives the user something useful when opened in a workflow. */ + get canOpenInWorkflow(): boolean { + return !!this.filePath; + } + + /** + * Map a file's MIME to the scan operator type that handles it. The workspace component + * uses this string to build a properly schema-validated operator via WorkflowUtilService. + */ + private static getOperatorTypeForFile(filePath: string): string | null { + const mime = getMimeType(filePath); + switch (mime) { + case MIME_TYPES.CSV: return "CSVFileScan"; + case MIME_TYPES.JSON: return "JSONLFileScan"; + case MIME_TYPES.ARROW: return "ArrowFileScan"; + case MIME_TYPES.PARQUET: return "ParquetFileScan"; + case MIME_TYPES.PNG: + case MIME_TYPES.JPEG: + case MIME_TYPES.WEBP: + case MIME_TYPES.GIF: + case MIME_TYPES.AVIF: + case MIME_TYPES.BMP: + case MIME_TYPES.TIFF: + case MIME_TYPES.PDF: + case MIME_TYPES.MP3: + case MIME_TYPES.MP4: + case MIME_TYPES.WAV: + case MIME_TYPES.FLAC: + case MIME_TYPES.WEBM: + case MIME_TYPES.MOV: + case MIME_TYPES.TXT: + case MIME_TYPES.MD: + return "FileScan"; + default: + return null; + } + } + + /** + * Creates a new empty workflow and navigates to the editor. If the file type maps to a + * known scan operator, the workspace component picks up the `addOp` + `fileName` query + * params after init and adds the operator via the schema-validated path — see + * `workspace.component.ts:handlePendingOperatorAddition()`. + */ + onOpenInWorkflow(): void { + const fileName = this.filePath.split("/").pop() ?? "file"; + const addOp = UserDatasetFileRendererComponent.getOperatorTypeForFile(this.filePath); + const workflowContent: WorkflowContent = { + operators: [], + commentBoxes: [], + links: [], + operatorPositions: {}, + settings: { + dataTransferBatchSize: this.config.env.defaultDataTransferBatchSize, + executionMode: this.config.env.defaultExecutionMode ?? ExecutionMode.PIPELINED, + }, + }; + const workflowName = `Analysis of ${fileName}`; + this.workflowPersistService + .createWorkflow(workflowContent, workflowName || DEFAULT_WORKFLOW_NAME) + .pipe(untilDestroyed(this)) + .subscribe({ + next: created => { + const wid = created?.workflow?.wid; + if (wid == null) { + this.notificationService.error("Workflow created but no ID was returned."); + return; + } + // Query params tell the workspace component which operator to auto-add and which + // file path to wire into its fileName property. The workspace strips them on use. + const queryParams = addOp ? { addOp, fileName: this.filePath } : undefined; + this.router.navigate([DASHBOARD_USER_WORKSPACE, wid], { queryParams }).then(navigated => { + if (!navigated) { + this.notificationService.error("Navigation to the workflow editor was blocked."); + } + }); + }, + error: () => this.notificationService.error("Failed to create workflow"), + }); + } + ngOnInit(): void { this.reloadFileContent(); } @@ -194,105 +746,894 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe reloadFileContent() { this.turnOffAllDisplay(); - // Pre-check - file size - const mimeType = getMimeType(this.filePath); - if (!this.isPreviewSupported(mimeType)) { - this.onFileTypePreviewUnsupported(); + const extensionMime = getMimeType(this.filePath); + + // Skip the full download for large files. The dataset service streams the entire blob; + // for a 500 MB file we'd wait 30+ seconds just to read its first 16 magic bytes. Above + // the threshold, fall back to extension-based identification + a "how to load" hint. + if (this.fileSize != null && this.fileSize > FULL_PREVIEW_MAX_BYTES) { + this.showOversizedFileInfo(extensionMime); return; } - const limit = MIME_TYPE_SIZE_LIMITS_MB[mimeType] ?? this.DEFAULT_MAX_SIZE; - if (this.fileSize != null && this.fileSize > limit) { + + // Hard upper bound (defensive): even small types shouldn't load anything past this. + const preCheckLimit = MIME_TYPE_SIZE_LIMITS_MB[extensionMime] ?? this.DEFAULT_MAX_SIZE; + if (this.fileSize != null && this.fileSize > preCheckLimit) { this.onFileSizeNotLoadable(); return; } - // Load file + if (!this.did || !this.dvid || !this.filePath) return; + this.isLoading = true; - if (this.did && this.dvid && this.filePath != "") { - this.datasetService - .retrieveDatasetVersionSingleFile(this.filePath, this.isLogin) - .pipe(untilDestroyed(this)) - .subscribe({ - next: blob => { - this.isLoading = false; - const blobMimeType = getMimeType(this.filePath); - if (!this.isPreviewSupported(blobMimeType)) { - this.onFileTypePreviewUnsupported(); - return; - } - const MaxSize = MIME_TYPE_SIZE_LIMITS_MB[blobMimeType] || this.DEFAULT_MAX_SIZE; - const fileSize = blob.size; - if (fileSize > MaxSize) { - this.onFileSizeNotLoadable(); - this.notificationService.warning(`File ${this.filePath} is too large to be previewed`); - return; - } - this.currentFile = new File([blob], this.filePath, { type: blob.type }); - // Handle different file types - switch (blobMimeType) { - case MIME_TYPES.PNG: - case MIME_TYPES.JPEG: - case MIME_TYPES.WEBP: - case MIME_TYPES.GIF: - this.displayImage = true; - this.loadSafeURL(blob); - break; - case MIME_TYPES.MP4: - this.displayMP4 = true; - this.loadSafeURL(blob); - break; - - case MIME_TYPES.MP3: - this.displayMP3 = true; - this.loadSafeURL(blob); - break; - - case MIME_TYPES.MSEXCEL: - readXlsxFile(blob).then(rows => { - let parsedData: string[][] = []; - rows.forEach(row => { - // Convert each cell in the row to a string - let stringRow = row.map(cell => (cell ? cell.toString() : "")); - // Add the string array to the main array - parsedData.push(stringRow); - }); - if (parsedData.length > 0) { - this.loadTabularFile(parsedData); - this.displayXlsx = true; - } - }); - break; - case MIME_TYPES.CSV: - this.displayCSV = true; - // Handle CSV display - Papa.parse(this.currentFile, { - complete: (results: ParseResult) => { - if (results.data.length > 0) { - this.loadTabularFile(results.data); - } - }, - error: error => { - console.error("Error parsing file:", error); - this.onFileLoadingError(); - }, - }); - break; - case MIME_TYPES.MD: - this.displayMarkdown = true; - this.readFileAsText(blob); - break; - case MIME_TYPES.JSON: - this.displayJson = true; - this.readFileAsText(blob); - break; - case MIME_TYPES.TXT: - default: - this.displayPlainText = true; - this.readFileAsText(blob); - break; + this.datasetService + .retrieveDatasetVersionSingleFile(this.filePath, this.isLogin) + .pipe(untilDestroyed(this)) + .subscribe({ + next: async (blob: Blob) => { + this.isLoading = false; + + const detectedMime = await this.detectMimeType(blob, this.filePath); + + // Post-detection size check against the now-known type limit + const sizeLimit = MIME_TYPE_SIZE_LIMITS_MB[detectedMime] ?? this.DEFAULT_MAX_SIZE; + if (blob.size > sizeLimit) { + this.onFileSizeNotLoadable(); + this.notificationService.warning(`File ${this.filePath} is too large to preview`); + return; + } + + // currentFile is built lazily inside the CSV case (the only consumer); avoids an + // extra in-memory copy of the blob for every other type. + this.renderByMimeType(blob, detectedMime); + }, + error: () => this.onFileLoadingError(), + }); + } + + /** + * Detects the actual MIME type of a blob using four strategies in order: + * 1. file-type library (magic bytes, ~100 formats) — refined with extension hints for + * ZIP/gzip container formats (PyTorch, Keras, NPZ, RDS). + * 2. Manual magic bytes for data formats not covered by file-type + * (Parquet, Arrow, HDF5, NumPy .npy, GGUF, Python pickle). + * 3. Extension-based fallback for opaque binary formats with no reliable magic bytes + * (Safetensors, ONNX). + * 4. Text sniffing for JSON, CSV, FASTA, FASTQ, VCF, Markdown, and plain text. + * + * Uses FileReader throughout for broad environment compatibility (tests, browsers). + */ + async detectMimeType(blob: Blob, fileName?: string): Promise { + const ext = (fileName ?? "").split(".").pop()?.toLowerCase() ?? ""; + + // 1. file-type library covers images, video, audio, PDF, Office (ZIP-based), and more. + if (typeof fileTypeFromBlob === "function") { + try { + const result = await fileTypeFromBlob(blob); + if (result) { + // Refine generic container types (ZIP, gzip) using extension hints + if (result.mime === "application/zip") { + if (ext === "pt" || ext === "pth") return MIME_TYPES.PYTORCH; + if (ext === "keras") return MIME_TYPES.KERAS; + if (ext === "npz") return MIME_TYPES.NPZ; + } + if (result.mime === "application/gzip" && ext === "rds") return MIME_TYPES.RDS; + return result.mime; + } + } catch (_) {} + } + + // 2. Manual magic bytes for formats not in file-type's signature list. + try { + const header = await this.readBlobBytes(blob.slice(0, 16)); + + // Parquet: PAR1 at bytes 0–3 + if (header[0] === 0x50 && header[1] === 0x41 && header[2] === 0x52 && header[3] === 0x31) { + return MIME_TYPES.PARQUET; + } + // Arrow IPC: ARROW1 at bytes 0–5 + if ( + header[0] === 0x41 && header[1] === 0x52 && header[2] === 0x52 && + header[3] === 0x4f && header[4] === 0x57 && header[5] === 0x31 + ) { + return MIME_TYPES.ARROW; + } + // HDF5: \x89HDF\r\n\x1a\n at bytes 0–7 + if ( + header[0] === 0x89 && header[1] === 0x48 && header[2] === 0x44 && header[3] === 0x46 && + header[4] === 0x0d && header[5] === 0x0a && header[6] === 0x1a && header[7] === 0x0a + ) { + // Refine HDF5 sub-types by extension (all use identical magic bytes) + if (ext === "h5ad") return MIME_TYPES.H5AD; + if (ext === "h5seurat") return MIME_TYPES.H5SEURAT; + if (ext === "loom") return MIME_TYPES.LOOM; + return MIME_TYPES.HDF5; + } + // NumPy .npy: \x93NUMPY at bytes 0–5 + if ( + header[0] === 0x93 && header[1] === 0x4e && header[2] === 0x55 && + header[3] === 0x4d && header[4] === 0x50 && header[5] === 0x59 + ) { + return MIME_TYPES.NPY; + } + // GGUF: ASCII "GGUF" at bytes 0–3 + if (header[0] === 0x47 && header[1] === 0x47 && header[2] === 0x55 && header[3] === 0x46) { + return MIME_TYPES.GGUF; + } + // Python pickle: \x80 + protocol byte (2..5) + \x95 (FRAME opcode in proto 4+) + if (header[0] === 0x80 && header[1] >= 0x02 && header[1] <= 0x05) { + return MIME_TYPES.PICKLE; + } + } catch (_) {} + + // 3. Extension-based fallback for opaque binaries lacking reliable magic bytes + if (ext === "safetensors") return MIME_TYPES.SAFETENSORS; + if (ext === "onnx") return MIME_TYPES.ONNX; + + // 4. Text sniffing for formats with no fixed magic bytes + try { + const sample = await this.readBlobText(blob.slice(0, 4096)); + const trimmed = sample.trimStart(); + const firstLine = trimmed.split("\n")[0] ?? ""; + + if (trimmed.startsWith("{") || trimmed.startsWith("[")) { + return MIME_TYPES.JSON; + } + if (trimmed.startsWith("# ") || trimmed.startsWith("## ")) { + return MIME_TYPES.MD; + } + // VCF: header line starts with ##fileformat=VCF + if (firstLine.startsWith("##fileformat=VCF")) { + return MIME_TYPES.VCF; + } + // FASTA: first non-empty/comment line starts with '>' + if (firstLine.startsWith(">")) { + return MIME_TYPES.FASTA; + } + // FASTQ: 4-line record pattern — line 1 starts '@', line 3 starts '+' + const lines = trimmed.split("\n"); + if (lines.length >= 4 && lines[0].startsWith("@") && lines[2].startsWith("+")) { + return MIME_TYPES.FASTQ; + } + // CSV heuristic: first line has at least 3 comma-separated fields + if (firstLine.split(",").length >= 3) { + return MIME_TYPES.CSV; + } + // Printable ASCII/UTF-8 → plain text + const bytes = await this.readBlobBytes(blob.slice(0, 512)); + const isPrintable = bytes.every(b => b === 9 || b === 10 || b === 13 || (b >= 32 && b <= 126)); + if (isPrintable) return MIME_TYPES.TXT; + } catch (_) {} + + return MIME_TYPES.OCTET_STREAM; + } + + /** Parse a NumPy .npy header. Returns dtype, shape, byte order, and Fortran flag or null on failure. */ + private async parseNpyHeader( + blob: Blob + ): Promise<{ dtype?: string; shape?: number[]; byteOrder?: string; fortranOrder?: boolean } | null> { + try { + const head = await this.readBlobBytes(blob.slice(0, 4096)); + // bytes 0-5: magic, byte 6: major, byte 7: minor + const major = head[6]; + // v1.0: uint16 LE header length at bytes 8-9; v2.0+: uint32 LE at bytes 8-11 + const headerLen = major >= 2 ? head[8] | (head[9] << 8) | (head[10] << 16) | (head[11] << 24) + : head[8] | (head[9] << 8); + const headerStart = major >= 2 ? 12 : 10; + const headerText = new TextDecoder().decode(head.slice(headerStart, headerStart + headerLen)); + const dtypeMatch = headerText.match(/['"]descr['"]\s*:\s*['"]([^'"]+)['"]/); + const shapeMatch = headerText.match(/['"]shape['"]\s*:\s*\(([^)]*)\)/); + const fortranMatch = headerText.match(/['"]fortran_order['"]\s*:\s*(True|False)/); + const shape = shapeMatch + ? shapeMatch[1].split(",").map(s => s.trim()).filter(s => s.length > 0).map(Number) + : undefined; + const dtype = dtypeMatch?.[1]; + // dtype prefix: '<' = little-endian, '>' = big-endian, '|' = byte order N/A, '=' = native + let byteOrder: string | undefined; + if (dtype) { + if (dtype.startsWith("<")) byteOrder = "little-endian"; + else if (dtype.startsWith(">")) byteOrder = "big-endian"; + else if (dtype.startsWith("|")) byteOrder = "n/a"; + } + const fortranOrder = fortranMatch ? fortranMatch[1] === "True" : undefined; + return { dtype, shape, byteOrder, fortranOrder }; + } catch { + return null; + } + } + + /** Parse a Safetensors file header. Returns rich tensor metadata or null. */ + private async parseSafetensorsHeader(blob: Blob): Promise<{ + tensorCount: number; + parameterCount: number; + sampleNames: string[]; + dtypeBreakdown: { dtype: string; params: number }[]; + largestTensor?: { name: string; shape: number[]; params: number }; + metadata?: { key: string; value: string }[]; + } | null> { + try { + const lenBytes = await this.readBlobBytes(blob.slice(0, 8)); + // uint64 LE — JS can read up to 53 bits safely; header is always small (KB-MB) + let headerLen = 0; + for (let i = 0; i < 8; i++) headerLen += lenBytes[i] * Math.pow(256, i); + if (headerLen <= 0 || headerLen > 100 * 1024 * 1024) return null; + const headerText = await this.readBlobText(blob.slice(8, 8 + headerLen)); + const json = JSON.parse(headerText); + const names = Object.keys(json).filter(k => k !== "__metadata__"); + let paramCount = 0; + const dtypeMap: Record = {}; + let largest: { name: string; shape: number[]; params: number } | undefined; + for (const name of names) { + const shape: number[] = json[name]?.shape ?? []; + const dtype: string = json[name]?.dtype ?? "?"; + const params = shape.length > 0 ? shape.reduce((a, b) => a * b, 1) : 0; + paramCount += params; + dtypeMap[dtype] = (dtypeMap[dtype] ?? 0) + params; + if (!largest || params > largest.params) largest = { name, shape, params }; + } + const dtypeBreakdown = Object.entries(dtypeMap) + .sort((a, b) => b[1] - a[1]) + .map(([dtype, params]) => ({ dtype, params })); + const meta = (json.__metadata__ ?? {}) as Record; + const metadata = Object.entries(meta) + .slice(0, 6) + .map(([key, value]) => ({ key, value: String(value) })); + return { + tensorCount: names.length, + parameterCount: paramCount, + sampleNames: names.slice(0, 5), + dtypeBreakdown, + largestTensor: largest, + metadata: metadata.length > 0 ? metadata : undefined, + }; + } catch { + return null; + } + } + + /** Parse a GGUF (llama.cpp model) header. Returns version/tensor count or null. */ + private async parseGgufHeader( + blob: Blob + ): Promise<{ version: number; tensorCount: number; metadataKvCount: number } | null> { + try { + const head = await this.readBlobBytes(blob.slice(0, 24)); + // bytes 0-3: "GGUF" magic + // bytes 4-7: version (uint32 LE) + const version = head[4] | (head[5] << 8) | (head[6] << 16) | (head[7] << 24); + // bytes 8-15: tensor count (uint64 LE) + let tensorCount = 0; + for (let i = 0; i < 8; i++) tensorCount += head[8 + i] * Math.pow(256, i); + // bytes 16-23: metadata kv count (uint64 LE) + let metadataKvCount = 0; + for (let i = 0; i < 8; i++) metadataKvCount += head[16 + i] * Math.pow(256, i); + return { version, tensorCount, metadataKvCount }; + } catch { + return null; + } + } + + private readBlobBytes(blob: Blob): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(new Uint8Array(reader.result as ArrayBuffer)); + reader.onerror = () => reject(reader.error); + reader.readAsArrayBuffer(blob); + }); + } + + private readBlobText(blob: Blob): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result as string); + reader.onerror = () => reject(reader.error); + reader.readAsText(blob); + }); + } + + /** + * Returns true for any MIME type we know how to render or describe. + * Only truly unidentified binary (OCTET_STREAM) is considered unsupported. + */ + isPreviewSupported(mimeType: string): boolean { + return mimeType !== MIME_TYPES.OCTET_STREAM; + } + + get metadataItems(): { label: string; value: string }[] { + const m = this.fileMetadata; + if (!m) return []; + const items: { label: string; value: string }[] = []; + + if (m.imageWidth != null && m.imageHeight != null) { + items.push({ label: "Dimensions", value: `${m.imageWidth} × ${m.imageHeight} px` }); + const gcd = (a: number, b: number): number => (b === 0 ? a : gcd(b, a % b)); + const g = gcd(m.imageWidth, m.imageHeight); + items.push({ label: "Aspect ratio", value: `${m.imageWidth / g}:${m.imageHeight / g}` }); + } + + if (m.videoDuration != null) items.push({ label: "Duration", value: formatDuration(m.videoDuration) }); + if (m.videoWidth != null && m.videoHeight != null) + items.push({ label: "Resolution", value: `${m.videoWidth} × ${m.videoHeight}` }); + + if (m.audioDuration != null) items.push({ label: "Duration", value: formatDuration(m.audioDuration) }); + + if (m.rowCount != null) items.push({ label: "Rows", value: m.rowCount.toLocaleString() }); + if (m.columnCount != null) items.push({ label: "Columns", value: m.columnCount.toLocaleString() }); + if (m.sheetCount != null) items.push({ label: "Sheets", value: m.sheetCount.toLocaleString() }); + if (m.columnNames?.length) { + const preview = m.columnNames.slice(0, 8).join(", "); + const more = m.columnNames.length > 8 ? ` +${m.columnNames.length - 8} more` : ""; + items.push({ label: "Fields", value: preview + more }); + } + + if (m.jsonTopLevelType != null) { + const label = m.jsonTopLevelType === "array" ? "Items" : "Keys"; + items.push({ label: "JSON", value: m.jsonTopLevelType }); + if (m.jsonItemCount != null) items.push({ label, value: m.jsonItemCount.toLocaleString() }); + if (m.jsonPreviewKeys?.length) items.push({ label: "Preview", value: m.jsonPreviewKeys.join(", ") }); + } + + if (m.lineCount != null) items.push({ label: "Lines", value: m.lineCount.toLocaleString() }); + if (m.wordCount != null) items.push({ label: "Words", value: m.wordCount.toLocaleString() }); + if (m.charCount != null) items.push({ label: "Characters", value: m.charCount.toLocaleString() }); + if (m.headingCount != null) items.push({ label: "Headings", value: m.headingCount.toLocaleString() }); + + if (m.pageCount != null) items.push({ label: "Pages", value: `~${m.pageCount}` }); + + // ML / scientific + if (m.modelFormat) items.push({ label: "Format", value: m.modelFormat }); + if (m.containerFormat) items.push({ label: "Container", value: m.containerFormat }); + if (m.dtype) items.push({ label: "dtype", value: m.dtype }); + if (m.shape?.length) items.push({ label: "Shape", value: `(${m.shape.join(", ")})` }); + if (m.tensorCount != null) items.push({ label: "Tensors", value: m.tensorCount.toLocaleString() }); + if (m.parameterCount != null) items.push({ label: "Parameters", value: `~${m.parameterCount.toLocaleString()}` }); + if (m.sampleTensorNames?.length) + items.push({ label: "Tensors (first)", value: m.sampleTensorNames.join(", ") }); + if (m.ggufVersion != null) items.push({ label: "GGUF version", value: `v${m.ggufVersion}` }); + if (m.metadataKvCount != null) items.push({ label: "Metadata KV", value: m.metadataKvCount.toLocaleString() }); + + // JSON schema details + if (m.jsonMaxDepth != null) items.push({ label: "Max depth", value: m.jsonMaxDepth.toLocaleString() }); + if (m.jsonArrayElementType) items.push({ label: "Element type", value: m.jsonArrayElementType }); + if (m.jsonKeyTypes?.length) { + items.push({ + label: "Schema", + value: m.jsonKeyTypes.map(kt => `${kt.key}: ${kt.type}`).join(", "), + }); + } + + // PDF /Info + if (m.pdfVersion) items.push({ label: "PDF version", value: m.pdfVersion }); + if (m.pdfTitle) items.push({ label: "Title", value: m.pdfTitle }); + if (m.pdfAuthor) items.push({ label: "Author", value: m.pdfAuthor }); + if (m.pdfCreator) items.push({ label: "Creator", value: m.pdfCreator }); + if (m.pdfProducer) items.push({ label: "Producer", value: m.pdfProducer }); + if (m.pdfEncrypted) items.push({ label: "Encrypted", value: "Yes" }); + + // Markdown structure + if (m.codeBlockCount) items.push({ label: "Code blocks", value: m.codeBlockCount.toLocaleString() }); + if (m.linkCount) items.push({ label: "Links", value: m.linkCount.toLocaleString() }); + if (m.imageCount) items.push({ label: "Images", value: m.imageCount.toLocaleString() }); + if (m.listItemCount) items.push({ label: "List items", value: m.listItemCount.toLocaleString() }); + + // Plain text encoding/structure + if (m.encoding) items.push({ label: "Encoding", value: m.encoding }); + if (m.emptyLineCount != null && m.emptyLineCount > 0) + items.push({ label: "Blank lines", value: m.emptyLineCount.toLocaleString() }); + if (m.avgLineLength != null && m.avgLineLength > 0) + items.push({ label: "Avg line", value: `${Math.round(m.avgLineLength)} chars` }); + if (m.maxLineLength != null && m.maxLineLength > 0) + items.push({ label: "Max line", value: `${m.maxLineLength.toLocaleString()} chars` }); + + // NumPy details + if (m.totalElements != null) items.push({ label: "Elements", value: m.totalElements.toLocaleString() }); + if (m.byteOrder) items.push({ label: "Byte order", value: m.byteOrder }); + if (m.fortranOrder != null) items.push({ label: "Order", value: m.fortranOrder ? "Fortran (column)" : "C (row)" }); + + // Safetensors details + if (m.dtypeBreakdown?.length) { + items.push({ + label: "Dtypes", + value: m.dtypeBreakdown.map(d => `${d.dtype}: ${d.params.toLocaleString()}`).join(", "), + }); + } + if (m.largestTensor) { + items.push({ + label: "Largest tensor", + value: `${m.largestTensor.name} (${m.largestTensor.shape.join("×")}, ${m.largestTensor.params.toLocaleString()} params)`, + }); + } + if (m.safetensorsMetadata?.length) { + for (const kv of m.safetensorsMetadata) { + items.push({ label: kv.key, value: kv.value }); + } + } + + // GGUF details + if (m.ggufArchitecture) items.push({ label: "Architecture", value: m.ggufArchitecture }); + if (m.ggufQuantization) items.push({ label: "Quantization", value: m.ggufQuantization }); + + // Bioinformatics + if (m.sequenceCount != null) { + const label = m.sequenceCountIsExact ? "Sequences" : "Sequences (sampled)"; + items.push({ label, value: m.sequenceCount.toLocaleString() }); + } + if (m.variantCount != null) { + const label = m.variantCountIsExact ? "Variants" : "Variants (sampled)"; + items.push({ label, value: m.variantCount.toLocaleString() }); + } + if (m.totalBases != null) items.push({ label: "Total bases", value: m.totalBases.toLocaleString() }); + if (m.gcContent != null) items.push({ label: "GC content", value: `${(m.gcContent * 100).toFixed(1)}%` }); + if (m.isProtein) items.push({ label: "Sequence type", value: "Protein" }); + if (m.minSequenceLength != null && m.maxSequenceLength != null) { + items.push({ + label: "Length range", + value: `${m.minSequenceLength.toLocaleString()}–${m.maxSequenceLength.toLocaleString()} (avg ${Math.round( + m.avgSequenceLength ?? 0 + ).toLocaleString()})`, + }); + } + if (m.vcfSampleCount != null && m.vcfSampleCount > 0) + items.push({ label: "Samples", value: m.vcfSampleCount.toLocaleString() }); + if (m.vcfChromosomes?.length) + items.push({ label: "Chromosomes", value: m.vcfChromosomes.slice(0, 8).join(", ") }); + + return items; + } + + private renderByMimeType(blob: Blob, mimeType: string): void { + if (mimeType.startsWith("image/")) { + this.displayImage = true; + this.loadSafeURL(blob); + this.fileMetadata = { fileSize: blob.size }; + const img = new Image(); + img.onload = () => { + this.fileMetadata = { ...this.fileMetadata, imageWidth: img.naturalWidth, imageHeight: img.naturalHeight }; + this.cdr.markForCheck(); + }; + img.src = this.fileURL!; + return; + } + + if (mimeType.startsWith("video/")) { + this.displayMP4 = true; + this.loadSafeURL(blob); + this.fileMetadata = { fileSize: blob.size }; + const video = document.createElement("video"); + video.preload = "metadata"; + video.onloadedmetadata = () => { + this.fileMetadata = { + ...this.fileMetadata, + videoDuration: video.duration, + videoWidth: video.videoWidth, + videoHeight: video.videoHeight, + }; + this.cdr.markForCheck(); + URL.revokeObjectURL(video.src); + }; + video.src = URL.createObjectURL(blob); + return; + } + + if (mimeType.startsWith("audio/")) { + this.displayMP3 = true; + this.loadSafeURL(blob); + this.fileMetadata = { fileSize: blob.size }; + const audio = document.createElement("audio"); + audio.preload = "metadata"; + audio.onloadedmetadata = () => { + this.fileMetadata = { ...this.fileMetadata, audioDuration: audio.duration }; + this.cdr.markForCheck(); + URL.revokeObjectURL(audio.src); + }; + audio.src = URL.createObjectURL(blob); + return; + } + + switch (mimeType) { + case MIME_TYPES.PDF: + this.displayPDF = true; + this.loadSafeURL(blob); + this.fileMetadata = { fileSize: blob.size }; + // Read first 200KB for /Info + version + page count; tail 50KB for trailer (where /Info often lives) + Promise.all([ + this.readBlobText(blob.slice(0, 200 * 1024)), + this.readBlobText(blob.slice(Math.max(0, blob.size - 50 * 1024))), + ]).then(([head, tail]) => { + const combined = head + "\n" + tail; + const exact = (combined.match(/\/Type\s*\/Page\b/g) ?? []).length; + const fallback = Math.ceil((combined.match(/\/Page\b/g) ?? []).length / 2); + const pageCount = exact > 0 ? exact : fallback || undefined; + const info = extractPdfInfo(combined); + this.fileMetadata = { + ...this.fileMetadata, + pageCount, + pdfTitle: info.title, + pdfAuthor: info.author, + pdfCreator: info.creator, + pdfProducer: info.producer, + pdfVersion: info.version, + pdfEncrypted: info.encrypted, + }; + this.cdr.markForCheck(); + }); + break; + + case MIME_TYPES.MSEXCEL: + case MIME_TYPES.XLSX: + Promise.all([readXlsxFile(blob), readSheetNames(blob)]).then(([rows, sheetNames]) => { + const parsedData = rows.map(row => row.map(cell => (cell != null ? cell.toString() : ""))); + if (parsedData.length > 0) { + this.loadTabularFile(parsedData); + this.displayXlsx = true; + const header = parsedData[0]; + const dataRows = parsedData.slice(1).filter(r => r.some(c => c !== "")); + const schema = inferColumnSchema(dataRows, header.length); + this.fileMetadata = { + fileSize: blob.size, + rowCount: dataRows.length, + columnCount: header.length, + columnNames: header, + sheetCount: sheetNames.length, + columnTypes: schema.types, + nullCounts: schema.nullCounts, + sampleValues: schema.samples, + }; + this.cdr.markForCheck(); + } + }); + break; + + case MIME_TYPES.CSV: { + this.displayCSV = true; + const { slice: csvSlice, truncated: csvTruncated } = this.getPreviewSlice(blob); + this.previewTruncated = csvTruncated; + // Papa.parse needs a File-like; build it from the slice only — no need to keep the full blob. + const fileToParse = new File([csvSlice], this.filePath, { type: MIME_TYPES.CSV }); + Papa.parse(fileToParse, { + complete: (results: ParseResult) => { + if (results.data.length > 0) { + this.loadTabularFile(results.data); + const header: string[] = results.data[0].map(String); + const dataRows = (results.data.slice(1) as any[][]) + .filter(r => r.some((c: any) => c !== "")) + .map(r => r.map((c: any) => (c == null ? "" : String(c)))); + const schema = inferColumnSchema(dataRows, header.length); + this.fileMetadata = { + fileSize: blob.size, + rowCount: dataRows.length, + columnCount: header.length, + columnNames: header, + columnTypes: schema.types, + nullCounts: schema.nullCounts, + sampleValues: schema.samples, + }; + this.cdr.markForCheck(); } }, + error: () => this.onFileLoadingError(), + }); + break; + } + + case MIME_TYPES.MD: { + this.displayMarkdown = true; + const { slice: mdSlice, truncated: mdTruncated } = this.getPreviewSlice(blob); + this.previewTruncated = mdTruncated; + this.readBlobText(mdSlice).then(text => { + this.textContent = text; + const lines = text.split("\n"); + // Strip fenced code blocks to count them; also count inline elements + const codeBlockCount = (text.match(/^```/gm) ?? []).length / 2; + const linkCount = (text.match(/\[[^\]]+\]\([^)]+\)/g) ?? []).length; + const imageCount = (text.match(/!\[[^\]]*\]\([^)]+\)/g) ?? []).length; + const listItemCount = lines.filter(l => /^\s*[-*+]\s/.test(l) || /^\s*\d+\.\s/.test(l)).length; + this.fileMetadata = { + fileSize: blob.size, + lineCount: lines.length, + wordCount: text.trim() ? text.trim().split(/\s+/).length : 0, + headingCount: lines.filter(l => /^#{1,6}\s/.test(l)).length, + codeBlockCount: Math.floor(codeBlockCount), + linkCount: linkCount - imageCount, // image syntax is link syntax + leading '!' + imageCount, + listItemCount, + }; + this.cdr.markForCheck(); + }); + break; + } + + case MIME_TYPES.JSON: { + this.displayJson = true; + const { slice: jsonSlice, truncated: jsonTruncated } = this.getPreviewSlice(blob); + this.previewTruncated = jsonTruncated; + this.readBlobText(jsonSlice).then(text => { + this.textContent = text; + try { + const parsed = JSON.parse(text); + const isArray = Array.isArray(parsed); + const keys = isArray ? null : Object.keys(parsed); + const maxDepth = jsonMaxDepth(parsed); + let jsonKeyTypes: { key: string; type: string }[] | undefined; + let jsonArrayElementType: string | undefined; + if (isArray && parsed.length > 0) { + const elementTypes = new Set(parsed.slice(0, 20).map(jsTypeLabel)); + jsonArrayElementType = elementTypes.size === 1 ? [...elementTypes][0] : "mixed"; + } else if (!isArray && keys) { + jsonKeyTypes = keys.slice(0, 8).map(k => ({ + key: k, + type: jsTypeLabel((parsed as Record)[k]), + })); + } + this.fileMetadata = { + fileSize: blob.size, + jsonTopLevelType: isArray ? "array" : "object", + jsonItemCount: isArray ? parsed.length : keys!.length, + jsonPreviewKeys: isArray + ? parsed.slice(0, 5).map((_: unknown, i: number) => `[${i}]`) + : keys!.slice(0, 8), + jsonMaxDepth: maxDepth, + jsonKeyTypes, + jsonArrayElementType, + }; + } catch { + // Truncated JSON or invalid — fall back to raw text view + this.fileMetadata = { fileSize: blob.size }; + } + this.cdr.markForCheck(); + }); + break; + } + + case MIME_TYPES.PARQUET: + this.detectedTypeMessage = + "Parquet file detected. Use the Parquet File Scan operator in Texera to analyze this data."; + this.fileMetadata = { fileSize: blob.size }; + break; + + case MIME_TYPES.ARROW: + this.detectedTypeMessage = + "Arrow/Feather file detected. Use the Arrow File Scan operator in Texera to analyze this data."; + this.fileMetadata = { fileSize: blob.size }; + break; + + case MIME_TYPES.DOCX: + this.detectedTypeMessage = "Word document (.docx) detected. Rich document preview is not yet supported."; + this.fileMetadata = { fileSize: blob.size }; + break; + + case MIME_TYPES.PPTX: + this.detectedTypeMessage = "PowerPoint (.pptx) detected. Presentation preview is not yet supported."; + this.fileMetadata = { fileSize: blob.size }; + break; + + // --- ML / scientific data formats --- + + case MIME_TYPES.HDF5: + this.detectedTypeMessage = + "HDF5 binary container detected. Likely a model (Keras .h5) or scientific dataset. Load with h5py / rhdf5."; + this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" }; + break; + + case MIME_TYPES.H5AD: + this.detectedTypeMessage = + "AnnData (.h5ad) detected — single-cell expression matrix in HDF5. Load with scanpy.read_h5ad() in Python."; + this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" }; + break; + + case MIME_TYPES.H5SEURAT: + this.detectedTypeMessage = + "Seurat HDF5 object (.h5seurat) detected. Load with SeuratDisk::LoadH5Seurat() in R."; + this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" }; + break; + + case MIME_TYPES.LOOM: + this.detectedTypeMessage = + "Loom (.loom) detected — single-cell expression in HDF5. Load with loompy / scanpy in Python."; + this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" }; + break; + + case MIME_TYPES.RDS: + this.detectedTypeMessage = + "R serialized object (.rds) detected — commonly a Seurat / SingleCellExperiment / fitted model. Load with readRDS() in R."; + this.fileMetadata = { fileSize: blob.size, containerFormat: "gzip" }; + break; + + case MIME_TYPES.PICKLE: + this.detectedTypeMessage = + "Python pickle detected — typically a serialized model (sklearn / joblib) or dataset. Load with pickle.load() in Python."; + this.fileMetadata = { fileSize: blob.size }; + break; + + case MIME_TYPES.PYTORCH: + this.detectedTypeMessage = + "PyTorch checkpoint (.pt/.pth) detected. Load with torch.load() in Python."; + this.fileMetadata = { fileSize: blob.size, modelFormat: "PyTorch", containerFormat: "ZIP archive" }; + break; + + case MIME_TYPES.KERAS: + this.detectedTypeMessage = + "Keras v3 model (.keras) detected. Load with tf.keras.models.load_model() in Python."; + this.fileMetadata = { fileSize: blob.size, modelFormat: "Keras", containerFormat: "ZIP archive" }; + break; + + case MIME_TYPES.ONNX: + this.detectedTypeMessage = + "ONNX model (.onnx) detected — portable neural network. Load with onnxruntime or netron.app for inspection."; + this.fileMetadata = { fileSize: blob.size, modelFormat: "ONNX" }; + break; + + case MIME_TYPES.NPY: + this.parseNpyHeader(blob).then(info => { + const shapeStr = info?.shape ? info.shape.join(" × ") : "?"; + const totalElements = info?.shape?.reduce((a, b) => a * b, 1); + this.detectedTypeMessage = `NumPy array (.npy) detected — ${info?.dtype ?? "?"} array of shape (${shapeStr}).`; + this.fileMetadata = { + fileSize: blob.size, + dtype: info?.dtype, + shape: info?.shape, + totalElements, + byteOrder: info?.byteOrder, + fortranOrder: info?.fortranOrder, + }; + this.cdr.markForCheck(); + }); + break; + + case MIME_TYPES.NPZ: + this.detectedTypeMessage = + "NumPy archive (.npz) detected — ZIP of .npy arrays. Load with numpy.load() and access via dict-like API."; + this.fileMetadata = { fileSize: blob.size, containerFormat: "ZIP archive" }; + break; + + case MIME_TYPES.SAFETENSORS: + this.parseSafetensorsHeader(blob).then(info => { + if (info) { + const paramStr = info.parameterCount.toLocaleString(); + this.detectedTypeMessage = `Safetensors model detected — ${info.tensorCount} tensors, ~${paramStr} parameters.`; + this.fileMetadata = { + fileSize: blob.size, + modelFormat: "Safetensors", + tensorCount: info.tensorCount, + parameterCount: info.parameterCount, + sampleTensorNames: info.sampleNames, + dtypeBreakdown: info.dtypeBreakdown, + largestTensor: info.largestTensor, + safetensorsMetadata: info.metadata, + }; + } else { + this.detectedTypeMessage = "Safetensors file detected. Load with safetensors.torch.load_file() in Python."; + this.fileMetadata = { fileSize: blob.size, modelFormat: "Safetensors" }; + } + this.cdr.markForCheck(); + }); + break; + + case MIME_TYPES.GGUF: + this.parseGgufHeader(blob).then(info => { + if (info) { + this.detectedTypeMessage = `GGUF model detected — v${info.version}, ${info.tensorCount} tensors, ${info.metadataKvCount} metadata entries.`; + this.fileMetadata = { + fileSize: blob.size, + modelFormat: "GGUF", + ggufVersion: info.version, + tensorCount: info.tensorCount, + metadataKvCount: info.metadataKvCount, + }; + } else { + this.detectedTypeMessage = "GGUF model detected (llama.cpp / quantized LLM format)."; + this.fileMetadata = { fileSize: blob.size, modelFormat: "GGUF" }; + } + this.cdr.markForCheck(); + }); + break; + + // --- Bioinformatics text formats — render as plain text plus record-count metadata --- + + case MIME_TYPES.FASTA: { + this.displayPlainText = true; + const { slice: faSlice, truncated: faTruncated } = this.getPreviewSlice(blob); + this.previewTruncated = faTruncated; + this.readBlobText(faSlice).then(text => { + this.textContent = text; + const stats = summarizeFasta(text); + this.fileMetadata = { + fileSize: blob.size, + lineCount: text.split("\n").length, + sequenceCount: stats.sequenceCount, + sequenceCountIsExact: !faTruncated, + totalBases: stats.totalBases, + gcContent: stats.isProtein ? undefined : stats.gcContent, + minSequenceLength: stats.minLen, + maxSequenceLength: stats.maxLen, + avgSequenceLength: stats.avgLen, + isProtein: stats.isProtein, + }; + this.cdr.markForCheck(); + }); + break; + } + + case MIME_TYPES.FASTQ: { + this.displayPlainText = true; + const { slice: fqSlice, truncated: fqTruncated } = this.getPreviewSlice(blob); + this.previewTruncated = fqTruncated; + this.readBlobText(fqSlice).then(text => { + this.textContent = text; + const lineCount = text.split("\n").filter(l => l.length > 0).length; + this.fileMetadata = { + fileSize: blob.size, + lineCount: text.split("\n").length, + sequenceCount: Math.floor(lineCount / 4), + sequenceCountIsExact: !fqTruncated, + }; + this.cdr.markForCheck(); + }); + break; + } + + case MIME_TYPES.VCF: { + this.displayPlainText = true; + const { slice: vcfSlice, truncated: vcfTruncated } = this.getPreviewSlice(blob); + this.previewTruncated = vcfTruncated; + this.readBlobText(vcfSlice).then(text => { + this.textContent = text; + const lines = text.split("\n"); + const variantLines = lines.filter(l => l.length > 0 && !l.startsWith("#")); + // Sample names are tab-separated columns after the 9 fixed VCF fields on the #CHROM header line + const chromHeader = lines.find(l => l.startsWith("#CHROM")); + const headerFields = chromHeader ? chromHeader.split("\t") : []; + const vcfSampleCount = headerFields.length > 9 ? headerFields.length - 9 : 0; + const chromSet = new Set(); + for (const line of variantLines.slice(0, 5000)) { + const chr = line.split("\t", 1)[0]; + if (chr) chromSet.add(chr); + if (chromSet.size >= 30) break; + } + this.fileMetadata = { + fileSize: blob.size, + lineCount: lines.length, + variantCount: variantLines.length, + variantCountIsExact: !vcfTruncated, + vcfSampleCount, + vcfChromosomes: [...chromSet].slice(0, 12), + }; + this.cdr.markForCheck(); }); + break; + } + + case MIME_TYPES.OCTET_STREAM: + this.onFileTypePreviewUnsupported(); + break; + + default: { + this.displayPlainText = true; + const { slice: txtSlice, truncated: txtTruncated } = this.getPreviewSlice(blob); + this.previewTruncated = txtTruncated; + Promise.all([this.readBlobBytes(blob.slice(0, 3)), this.readBlobText(txtSlice)]).then(([head, text]) => { + this.textContent = text; + const lines = text.split("\n"); + const lineLens = lines.map(l => l.length); + const totalLen = lineLens.reduce((a, b) => a + b, 0); + const emptyLineCount = lineLens.filter(n => n === 0).length; + const maxLineLength = lineLens.length > 0 ? Math.max(...lineLens) : 0; + // BOM detection: UTF-8 BOM is EF BB BF; otherwise assume ASCII/UTF-8 + let encoding = "UTF-8"; + if (head[0] === 0xef && head[1] === 0xbb && head[2] === 0xbf) encoding = "UTF-8 BOM"; + else if (lines.every(l => /^[\x00-\x7F]*$/.test(l))) encoding = "ASCII"; + this.fileMetadata = { + fileSize: blob.size, + lineCount: lines.length, + wordCount: text.trim() ? text.trim().split(/\s+/).length : 0, + charCount: text.length, + emptyLineCount, + avgLineLength: lines.length > 0 ? totalLen / lines.length : 0, + maxLineLength, + encoding, + }; + this.cdr.markForCheck(); + }); + } } } @@ -305,17 +1646,26 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe this.displayJson = false; this.displayMP4 = false; this.displayMP3 = false; + this.displayPDF = false; + this.detectedTypeMessage = ""; + this.fileMetadata = undefined; this.isLoading = false; this.isFileLoadingError = false; this.isFileSizeUnloadable = false; this.isFileTypePreviewUnsupported = false; - // garbage collection if (this.fileURL) { URL.revokeObjectURL(this.fileURL); } - if (this.safeFileURL) { - URL.revokeObjectURL(this.safeFileURL.toString()); - } + this.fileURL = undefined; + this.safeFileURL = undefined; + this.safeResourceFileURL = undefined; + // Clear cached content so memory is reclaimed when switching files; without these, + // a previously-loaded 10 MB text or 100K-row table would persist on the component. + this.textContent = ""; + this.tableContent = []; + this.tableDataHeader = []; + this.currentFile = undefined; + this.previewTruncated = false; } onFileLoadingError() { @@ -333,49 +1683,37 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe this.isFileTypePreviewUnsupported = true; } - isPreviewSupported(mimeType: string) { - return mimeType !== MIME_TYPES.OCTET_STREAM && Object.hasOwnProperty.call(MIME_TYPE_SIZE_LIMITS_MB, mimeType); - } - - private readFileAsText(blob: Blob) { - const txtReader = new FileReader(); - txtReader.onload = (event: any) => { - this.textContent = event.target.result; - }; - txtReader.readAsText(blob); + /** + * Skip the download for very large files and show only the extension-based type hint. + * Avoids the multi-second download + memory cost of fetching a multi-hundred-MB blob + * just to render its first frame / table / iframe. + */ + private showOversizedFileInfo(extensionMime: string): void { + const hint = TYPE_LOADING_HINTS[extensionMime]; + const sizeStr = this.fileSize != null ? formatSize(this.fileSize) : "very large"; + this.detectedTypeMessage = hint + ? `${hint} (Preview skipped — file is ${sizeStr}.)` + : `File is ${sizeStr} — full preview skipped to avoid browser lag. Open in a workflow operator to analyze.`; + this.cdr.markForCheck(); } - private loadSafeURL(blob: Blob) { + private loadSafeURL(blob: Blob): void { this.fileURL = URL.createObjectURL(blob); this.safeFileURL = this.sanitizer.bypassSecurityTrustUrl(this.fileURL); + this.safeResourceFileURL = this.sanitizer.bypassSecurityTrustResourceUrl(this.fileURL); } - private loadTabularFile(data: any[][]) { + + private loadTabularFile(data: any[][]): void { if (data.length > 0) { - // Extract the header (first row) this.tableDataHeader = data[0]; - - // Process the rest of the rows this.tableContent = data .slice(1) .map(row => { - // Normalize the row length to match the header length - while (row.length < this.tableDataHeader.length) { - row.push(""); - } + while (row.length < this.tableDataHeader.length) row.push(""); return row; }) - .filter(row => { - // filter out all empty row - let areCellAllEmpty = true; - for (const cell in row) { - if (cell != "") { - areCellAllEmpty = false; - break; - } - } - return !areCellAllEmpty; - }); + .filter(row => row.some(cell => cell !== "")); } } } diff --git a/frontend/src/app/workspace/component/workspace.component.ts b/frontend/src/app/workspace/component/workspace.component.ts index 9968c26f647..e96a53959ea 100644 --- a/frontend/src/app/workspace/component/workspace.component.ts +++ b/frontend/src/app/workspace/component/workspace.component.ts @@ -36,6 +36,7 @@ import { Workflow } from "../../common/type/workflow"; import { OperatorMetadataService } from "../service/operator-metadata/operator-metadata.service"; import { UndoRedoService } from "../service/undo-redo/undo-redo.service"; import { WorkflowActionService } from "../service/workflow-graph/model/workflow-action.service"; +import { WorkflowUtilService } from "../service/workflow-graph/util/workflow-util.service"; import { NzMessageService } from "ng-zorro-antd/message"; import { debounceTime, distinctUntilChanged, filter, switchMap, throttleTime } from "rxjs/operators"; import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; @@ -117,6 +118,7 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy { private undoRedoService: UndoRedoService, private workflowPersistService: WorkflowPersistService, private workflowActionService: WorkflowActionService, + private workflowUtilService: WorkflowUtilService, private location: Location, private route: ActivatedRoute, private operatorMetadataService: OperatorMetadataService, @@ -186,6 +188,38 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy { this.workflowActionService.clearWorkflow(); } + /** + * Reads `addOp` + `fileName` query params (set by the dataset file renderer's "Open in + * workflow" button) and adds the corresponding scan operator with its `fileName` property + * prefilled. Strips the params from the URL on success so a refresh doesn't double-add. + * + * Runs after `loadWorkflowWithId` completes, so the operator metadata is loaded and the + * workflow graph is ready for modification. + */ + handlePendingOperatorAddition(): void { + const params = this.route.snapshot.queryParams; + const addOp: string | undefined = params.addOp; + const fileName: string | undefined = params.fileName; + if (!addOp || !fileName) return; + try { + const operator = this.workflowUtilService.getNewOperatorPredicate(addOp); + // Place near the upper-left of the visible viewport. + const origin = this.workflowActionService.getJointGraphWrapper().getMainJointPaper()?.translate(); + const point = { x: 400 - (origin?.tx ?? 0), y: 200 - (origin?.ty ?? 0) }; + this.workflowActionService.addOperator(operator, point); + // Set the file path through the schema-validated mutation API (operatorProperties is readonly). + this.workflowActionService.setOperatorProperty(operator.operatorID, { + ...operator.operatorProperties, + fileName, + }); + } catch (err) { + this.notificationService.error(`Could not pre-add operator "${addOp}" — drag it in manually.`); + } finally { + // Strip query params so a manual refresh doesn't re-add the operator. + this.router.navigate([], { relativeTo: this.route, queryParams: {}, preserveFragment: true }); + } + } + registerAutoPersistWorkflow(): void { // make sure it is only registered once if (this.autoPersistRegistered) { @@ -260,6 +294,9 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy { this.setLoadingState(false); this.registerAutoPersistWorkflow(); this.triggerCenter(); + // If the user arrived via "Open in workflow" from the dataset file renderer, + // honor the addOp + fileName query params now that the workflow is fully loaded. + this.handlePendingOperatorAddition(); }, () => { this.workflowActionService.resetAsNewWorkflow(); diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 6a4ae4330c4..2d2851d7885 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -2059,6 +2059,13 @@ __metadata: languageName: node linkType: hard +"@borewit/text-codec@npm:^0.2.1": + version: 0.2.2 + resolution: "@borewit/text-codec@npm:0.2.2" + checksum: 10c0/2d3fb132bc6a132914a8fbf8e9ff2fa1ead210ecc395b28bb7355bd7719548a5e351ffe39f21c3bee8048f6cabd99eabd404bb5cc809cad9cba25abed19d271f + languageName: node + linkType: hard + "@bufbuild/protobuf@npm:^2.0.0, @bufbuild/protobuf@npm:^2.5.0": version: 2.12.0 resolution: "@bufbuild/protobuf@npm:2.12.0" @@ -5869,6 +5876,23 @@ __metadata: languageName: node linkType: hard +"@tokenizer/inflate@npm:^0.4.1": + version: 0.4.1 + resolution: "@tokenizer/inflate@npm:0.4.1" + dependencies: + debug: "npm:^4.4.3" + token-types: "npm:^6.1.1" + checksum: 10c0/9817516efe21d1ce3bdfb80a1f94efc8981064ce3873448ba79f4d81d96c0694c484c289bd042d346ae5536cf77f5aa9a367d39c3df700eb610761b7c306b4de + languageName: node + linkType: hard + +"@tokenizer/token@npm:^0.3.0": + version: 0.3.0 + resolution: "@tokenizer/token@npm:0.3.0" + checksum: 10c0/7ab9a822d4b5ff3f5bca7f7d14d46bdd8432528e028db4a52be7fbf90c7f495cc1af1324691dda2813c6af8dc4b8eb29de3107d4508165f9aa5b53e7d501f155 + languageName: node + linkType: hard + "@tsconfig/node10@npm:^1.0.7": version: 1.0.12 resolution: "@tsconfig/node10@npm:1.0.12" @@ -10419,6 +10443,18 @@ __metadata: languageName: node linkType: hard +"file-type@npm:^22.0.1": + version: 22.0.1 + resolution: "file-type@npm:22.0.1" + dependencies: + "@tokenizer/inflate": "npm:^0.4.1" + strtok3: "npm:^10.3.5" + token-types: "npm:^6.1.2" + uint8array-extras: "npm:^1.5.0" + checksum: 10c0/45b70a10196d46965eadd7835ec408c1c07b4fd2ed395e9bbcc0ad63d93f7bf6d076d0e970673b754577002019c8858825bc71ccc07ca7c0e49ac0c2b7e1839f + languageName: node + linkType: hard + "fill-range@npm:^7.1.1": version: 7.1.1 resolution: "fill-range@npm:7.1.1" @@ -11065,6 +11101,7 @@ __metadata: eslint-plugin-rxjs: "npm:5.0.3" eslint-plugin-rxjs-angular: "npm:2.0.1" file-saver: "npm:2.0.5" + file-type: "npm:^22.0.1" fs-extra: "npm:10.0.1" fuse.js: "npm:6.5.3" git-describe: "npm:4.1.0" @@ -11460,7 +11497,7 @@ __metadata: languageName: node linkType: hard -"ieee754@npm:1.2.1, ieee754@npm:^1.1.13": +"ieee754@npm:1.2.1, ieee754@npm:^1.1.13, ieee754@npm:^1.2.1": version: 1.2.1 resolution: "ieee754@npm:1.2.1" checksum: 10c0/b0782ef5e0935b9f12883a2e2aa37baa75da6e66ce6515c168697b42160807d9330de9a32ec1ed73149aea02e0d822e572bca6f1e22bdcbd2149e13b050b17bb @@ -17140,6 +17177,15 @@ __metadata: languageName: node linkType: hard +"strtok3@npm:^10.3.5": + version: 10.3.5 + resolution: "strtok3@npm:10.3.5" + dependencies: + "@tokenizer/token": "npm:^0.3.0" + checksum: 10c0/8d2477b239054c9f1f5b14a65d531147ca158ab9887fdc2d0938e77b7ec8891fb683b58254c7643afd5d98a421a59207534d491762b111f58c795071ecbe9fd1 + languageName: node + linkType: hard + "style-loader@npm:^3.3.0": version: 3.3.4 resolution: "style-loader@npm:3.3.4" @@ -17450,6 +17496,17 @@ __metadata: languageName: node linkType: hard +"token-types@npm:^6.1.1, token-types@npm:^6.1.2": + version: 6.1.2 + resolution: "token-types@npm:6.1.2" + dependencies: + "@borewit/text-codec": "npm:^0.2.1" + "@tokenizer/token": "npm:^0.3.0" + ieee754: "npm:^1.2.1" + checksum: 10c0/8786e28e3cb65b9e890bc3c38def98e6dfe4565538237f8c0e47dbe549ed8f5f00de8dc464717868308abb4729f1958f78f69e1c4c3deebbb685729113a6fee8 + languageName: node + linkType: hard + "totalist@npm:^1.0.0": version: 1.1.0 resolution: "totalist@npm:1.1.0" @@ -17798,6 +17855,13 @@ __metadata: languageName: node linkType: hard +"uint8array-extras@npm:^1.5.0": + version: 1.5.0 + resolution: "uint8array-extras@npm:1.5.0" + checksum: 10c0/0e74641ac7dadb02eadefc1ccdadba6010e007757bda824960de3c72bbe2b04e6d3af75648441f412148c4103261d54fcb60be45a2863beb76643a55fddba3bd + languageName: node + linkType: hard + "underscore@npm:>=1.8.3": version: 1.13.8 resolution: "underscore@npm:1.13.8"