From b17f7121aee40f1a6abbb1ffbf8c7ee22d68807b Mon Sep 17 00:00:00 2001
From: Kunwoo Park <kunwp1@uci.edu>
Date: Sat, 16 May 2026 12:06:17 -0700
Subject: [PATCH 1/4] feat(frontend): rich dataset file preview with type
 detection

Replace extension-based file type guessing in the dataset previewer
with magic-byte detection (file-type library + manual signatures for
Parquet, Arrow, HDF5, NumPy .npy, GGUF, Python pickle), then extract
rich per-format metadata (CSV/XLSX column types and null counts,
JSON schema, PDF /Info, NumPy shape/dtype/byte-order, Safetensors
tensor breakdown and __metadata__, GGUF version, FASTA GC content
and sequence stats, VCF samples and chromosomes). PDF, AnnData,
Seurat, Loom, ML model containers, and bioinformatics text formats
now render meaningfully instead of "preview not supported."

Memory-safe rendering for large files: text/CSV/JSON content is
sliced to the first 10 MB before parsing to avoid browser OOM, with
a warning banner when truncation occurs; cached content is cleared
on file switch. Preview size cap raised to 1 GB.
---
 frontend/package.json                         |    1 +
 .../user-dataset-file-renderer.component.html |   40 +-
 .../user-dataset-file-renderer.component.scss |   58 +
 ...er-dataset-file-renderer.component.spec.ts |  248 ++-
 .../user-dataset-file-renderer.component.ts   | 1526 +++++++++++++++--
 frontend/yarn.lock                            |   66 +-
 6 files changed, 1757 insertions(+), 182 deletions(-)
diff --git a/frontend/package.json b/frontend/package.json
index 08b298260e3..4e117cd05cc 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -47,6 +47,7 @@
     "d3-shape": "2.1.0",
     "dagre": "0.8.5",
     "file-saver": "2.0.5",
+    "file-type": "^22.0.1",
     "fuse.js": "6.5.3",
     "html2canvas": "1.4.1",
     "jointjs": "3.5.4",
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
index fd0ba3af152..b1c5a6ac114 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
@@ -44,6 +44,26 @@
         nzType="warning"
         nzMessage="Preview of the file type is currently not supported"></nz-alert>
     </div>
+    <div *ngIf="detectedTypeMessage">
+      <nz-alert
+        nzType="info"
+        [nzMessage]="detectedTypeMessage"></nz-alert>
+    </div>
+
+    <div *ngIf="previewTruncated">
+      <nz-alert
+        nzType="warning"
+        nzMessage="Showing first 10 MB only"
+        nzDescription="This file is large; only the first 10 MB is parsed and rendered to avoid running the browser out of memory. Statistics shown reflect the previewed slice, not the whole file."
+        nzShowIcon></nz-alert>
+    </div>
+
+    <div *ngIf="metadataItems.length > 0" class="file-metadata-strip">
+      <span *ngFor="let item of metadataItems" class="metadata-pill">
+        <span class="metadata-label">{{ item.label }}</span>
+        <span class="metadata-value">{{ item.value }}</span>
+      </span>
+    </div>
 
     <nz-table
       *ngIf="displayCSV || displayXlsx"
@@ -51,7 +71,17 @@
       [nzData]="tableContent">
       <thead>
         <tr>
-          <th *ngFor="let column of tableDataHeader">{{ column }}</th>
+          <th *ngFor="let column of tableDataHeader; let i = index">
+            <div class="column-name">{{ column }}</div>
+            <div class="column-meta" *ngIf="fileMetadata?.columnTypes?.[i]">
+              <span class="column-type-tag">{{ fileMetadata?.columnTypes?.[i] }}</span>
+              <span
+                class="column-null-hint"
+                *ngIf="fileMetadata?.nullCounts?.[i] && fileMetadata!.nullCounts![i] > 0">
+                {{ fileMetadata?.nullCounts?.[i] }} null
+              </span>
+            </div>
+          </th>
         </tr>
       </thead>
       <tbody>
@@ -79,6 +109,14 @@
         alt="{{filePath}}"
         class="full-size-image" />
     </div>
+
+    <div *ngIf="displayPDF && safeResourceFileURL">
+      <iframe
+        [src]="safeResourceFileURL"
+        style="width: 100%; height: 600px; border: none;">
+      </iframe>
+    </div>
+
     <div *ngIf="displayMarkdown">
       <markdown [data]="textContent"></markdown>
     </div>
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
index e6424f529d8..0b21c57f47a 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
@@ -40,3 +40,61 @@
   max-width: 90%;
   max-height: 90%;
 }
+
+.file-metadata-strip {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+  margin-bottom: 10px;
+  padding: 6px 0;
+  border-bottom: 1px solid #f0f0f0;
+}
+
+.metadata-pill {
+  display: inline-flex;
+  align-items: center;
+  gap: 4px;
+  padding: 2px 8px 2px 6px;
+  background: #fafafa;
+  border: 1px solid #e8e8e8;
+  border-radius: 4px;
+  font-size: 12px;
+  white-space: nowrap;
+}
+
+.metadata-label {
+  color: #8c8c8c;
+  font-weight: 500;
+}
+
+.metadata-value {
+  color: #262626;
+}
+
+.column-name {
+  font-weight: 600;
+}
+
+.column-meta {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  margin-top: 2px;
+  font-weight: 400;
+}
+
+.column-type-tag {
+  display: inline-block;
+  padding: 0 6px;
+  font-size: 11px;
+  color: #1890ff;
+  background: #e6f4ff;
+  border: 1px solid #91caff;
+  border-radius: 3px;
+  font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
+}
+
+.column-null-hint {
+  font-size: 11px;
+  color: #d4380d;
+}
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
index 9e70a444df8..74238a37803 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
@@ -19,7 +19,7 @@
 
 import { TestBed } from "@angular/core/testing";
 import { HttpClientTestingModule } from "@angular/common/http/testing";
-import { UserDatasetFileRendererComponent } from "./user-dataset-file-renderer.component";
+import { UserDatasetFileRendererComponent, MIME_TYPES, getMimeType, inferColumnSchema } from "./user-dataset-file-renderer.component";
 import { DatasetService } from "../../../../../service/user/dataset/dataset.service";
 import { NotificationService } from "../../../../../../common/service/notification/notification.service";
 import { DomSanitizer } from "@angular/platform-browser";
@@ -34,7 +34,13 @@ describe("UserDatasetFileRendererComponent", () => {
       providers: [
         DatasetService,
         NotificationService,
-        { provide: DomSanitizer, useValue: { bypassSecurityTrustUrl: vi.fn() } },
+        {
+          provide: DomSanitizer,
+          useValue: {
+            bypassSecurityTrustUrl: vi.fn((url: string) => url),
+            bypassSecurityTrustResourceUrl: vi.fn((url: string) => url),
+          },
+        },
         ...commonTestProviders,
       ],
     });
@@ -42,15 +48,237 @@ describe("UserDatasetFileRendererComponent", () => {
     component = fixture.componentInstance;
   });
 
-  it("should return true for supported MIME type", () => {
-    const supportedMimeType = "image/jpeg"; // Example of a supported MIME type
-    const result = component.isPreviewSupported(supportedMimeType);
-    expect(result).toBe(true);
+  describe("isPreviewSupported", () => {
+    it("should return true for known MIME types", () => {
+      expect(component.isPreviewSupported("image/jpeg")).toBe(true);
+      expect(component.isPreviewSupported("application/pdf")).toBe(true);
+      expect(component.isPreviewSupported("application/x-parquet")).toBe(true);
+    });
+
+    it("should return false only for unidentified binary (octet-stream)", () => {
+      expect(component.isPreviewSupported(MIME_TYPES.OCTET_STREAM)).toBe(false);
+    });
+  });
+
+  describe("getMimeType (extension-based fallback)", () => {
+    it("should resolve common image extensions", () => {
+      expect(getMimeType("photo.jpg")).toBe(MIME_TYPES.JPEG);
+      expect(getMimeType("photo.PNG")).toBe(MIME_TYPES.PNG);
+      expect(getMimeType("anim.gif")).toBe(MIME_TYPES.GIF);
+    });
+
+    it("should resolve xlsx separately from xls", () => {
+      expect(getMimeType("data.xlsx")).toBe(MIME_TYPES.XLSX);
+      expect(getMimeType("data.xls")).toBe(MIME_TYPES.MSEXCEL);
+    });
+
+    it("should resolve data format extensions", () => {
+      expect(getMimeType("data.parquet")).toBe(MIME_TYPES.PARQUET);
+      expect(getMimeType("data.arrow")).toBe(MIME_TYPES.ARROW);
+      expect(getMimeType("data.feather")).toBe(MIME_TYPES.ARROW);
+    });
+
+    it("should return octet-stream for unknown extensions", () => {
+      expect(getMimeType("file.xyz")).toBe(MIME_TYPES.OCTET_STREAM);
+      expect(getMimeType("noextension")).toBe(MIME_TYPES.OCTET_STREAM);
+    });
+  });
+
+  describe("detectMimeType (magic byte detection)", () => {
+    it("should detect Parquet files from PAR1 magic bytes", async () => {
+      const magic = new Uint8Array([0x50, 0x41, 0x52, 0x31, 0x00, 0x00, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.PARQUET);
+    });
+
+    it("should detect Arrow IPC files from ARROW1 magic bytes", async () => {
+      const magic = new Uint8Array([0x41, 0x52, 0x52, 0x4f, 0x57, 0x31, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.ARROW);
+    });
+
+    it("should detect JSON via text sniffing (object)", async () => {
+      const blob = new Blob(['{"key": "value"}'], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.JSON);
+    });
+
+    it("should detect JSON via text sniffing (array)", async () => {
+      const blob = new Blob(['[1, 2, 3]'], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.JSON);
+    });
+
+    it("should detect CSV via text sniffing", async () => {
+      const blob = new Blob(["name,age,city\nAlice,30,LA\nBob,25,NY"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.CSV);
+    });
+
+    it("should detect Markdown via text sniffing", async () => {
+      const blob = new Blob(["# My Title\n\nSome content here"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.MD);
+    });
+
+    it("should detect plain text when content is printable ASCII", async () => {
+      const blob = new Blob(["Hello, world! This is plain text."], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.TXT);
+    });
+
+    it("should return octet-stream for unidentifiable binary", async () => {
+      const binary = new Uint8Array([0x00, 0x01, 0x02, 0x80, 0xff, 0xfe, 0x7f, 0x03]);
+      const blob = new Blob([binary]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.OCTET_STREAM);
+    });
+
+    it("should detect HDF5 from magic bytes (generic .h5)", async () => {
+      const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob, "model.h5");
+      expect(result).toBe(MIME_TYPES.HDF5);
+    });
+
+    it("should refine HDF5 to H5AD by extension", async () => {
+      const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob, "scrna.h5ad");
+      expect(result).toBe(MIME_TYPES.H5AD);
+    });
+
+    it("should refine HDF5 to H5SEURAT by extension", async () => {
+      const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob, "pbmc.h5seurat");
+      expect(result).toBe(MIME_TYPES.H5SEURAT);
+    });
+
+    it("should detect Python pickle from \\x80 + protocol byte", async () => {
+      const magic = new Uint8Array([0x80, 0x04, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.PICKLE);
+    });
+
+    it("should detect NumPy .npy from magic bytes", async () => {
+      const magic = new Uint8Array([0x93, 0x4e, 0x55, 0x4d, 0x50, 0x59, 0x01, 0x00, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.NPY);
+    });
+
+    it("should detect GGUF from magic bytes", async () => {
+      const magic = new Uint8Array([0x47, 0x47, 0x55, 0x46, 0x03, 0x00, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.GGUF);
+    });
+
+    it("should detect Safetensors via extension fallback", async () => {
+      const opaque = new Uint8Array([0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
+      const blob = new Blob([opaque]);
+      const result = await component.detectMimeType(blob, "model.safetensors");
+      expect(result).toBe(MIME_TYPES.SAFETENSORS);
+    });
+
+    it("should detect ONNX via extension fallback", async () => {
+      const opaque = new Uint8Array([0x08, 0x07, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00]);
+      const blob = new Blob([opaque]);
+      const result = await component.detectMimeType(blob, "resnet.onnx");
+      expect(result).toBe(MIME_TYPES.ONNX);
+    });
+
+    it("should detect VCF from header line", async () => {
+      const blob = new Blob(["##fileformat=VCFv4.2\n##source=test\n"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.VCF);
+    });
+
+    it("should detect FASTA from > prefix", async () => {
+      const blob = new Blob([">seq1\nACGTACGT\n>seq2\nTGCATGCA\n"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.FASTA);
+    });
+
+    it("should detect FASTQ from 4-line @/+ pattern", async () => {
+      const blob = new Blob(["@read1\nACGT\n+\n!!!!\n@read2\nTGCA\n+\n!!!!\n"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.FASTQ);
+    });
   });
 
-  it("should return false for unsupported MIME type", () => {
-    const unsupportedMimeType = "application/unknown"; // Example of an unsupported MIME type
-    const result = component.isPreviewSupported(unsupportedMimeType);
-    expect(result).toBe(false);
+  describe("parser helpers", () => {
+    it("should parse a NumPy v1.0 header", async () => {
+      // Construct a minimal valid .npy v1 file: magic + version + uint16 header_len + ASCII header
+      const headerText = "{'descr': '<f8', 'fortran_order': False, 'shape': (10, 256), }";
+      const padded = headerText + " ".repeat(64 - (headerText.length % 64)) + "\n";
+      const headerBytes = new TextEncoder().encode(padded);
+      const headerLen = headerBytes.length;
+      const buf = new Uint8Array(10 + headerLen);
+      buf.set([0x93, 0x4e, 0x55, 0x4d, 0x50, 0x59, 0x01, 0x00], 0);
+      buf[8] = headerLen & 0xff;
+      buf[9] = (headerLen >> 8) & 0xff;
+      buf.set(headerBytes, 10);
+      const blob = new Blob([buf]);
+      const result = await (component as any).parseNpyHeader(blob);
+      expect(result?.dtype).toBe("<f8");
+      expect(result?.shape).toEqual([10, 256]);
+    });
+
+    it("should parse a Safetensors header", async () => {
+      const header = JSON.stringify({
+        "layer.weight": { dtype: "F32", shape: [128, 64], data_offsets: [0, 32768] },
+        "layer.bias": { dtype: "F32", shape: [128], data_offsets: [32768, 33280] },
+        __metadata__: { format: "pt" },
+      });
+      const headerBytes = new TextEncoder().encode(header);
+      const lenBytes = new Uint8Array(8);
+      let len = headerBytes.length;
+      for (let i = 0; i < 8; i++) {
+        lenBytes[i] = len & 0xff;
+        len = Math.floor(len / 256);
+      }
+      const blob = new Blob([lenBytes, headerBytes]);
+      const result = await (component as any).parseSafetensorsHeader(blob);
+      expect(result?.tensorCount).toBe(2);
+      expect(result?.parameterCount).toBe(128 * 64 + 128);
+      expect(result?.sampleNames).toEqual(["layer.weight", "layer.bias"]);
+    });
+
+    it("should infer column types from tabular sample data", () => {
+      const rows = [
+        ["Alice", "30", "75000.50", "true", "2024-01-15"],
+        ["Bob", "25", "60000.00", "false", "2024-03-22"],
+        ["Carol", "", "82000.75", "true", "2024-05-10"],
+      ];
+      const schema = inferColumnSchema(rows, 5);
+      expect(schema.types).toEqual(["string", "integer", "double", "boolean", "date"]);
+      expect(schema.nullCounts).toEqual([0, 1, 0, 0, 0]);
+      expect(schema.samples).toEqual(["Alice", "30", "75000.50", "true", "2024-01-15"]);
+    });
+
+    it("should fall back to string for all-null columns", () => {
+      const rows = [["a", ""], ["b", ""]];
+      const schema = inferColumnSchema(rows, 2);
+      expect(schema.types).toEqual(["string", "string"]);
+      expect(schema.nullCounts).toEqual([0, 2]);
+    });
+
+    it("should parse a GGUF header", async () => {
+      const buf = new Uint8Array(24);
+      buf.set([0x47, 0x47, 0x55, 0x46], 0); // "GGUF"
+      buf.set([0x03, 0x00, 0x00, 0x00], 4); // version 3
+      buf.set([0xd2, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], 8); // 722 tensors
+      buf.set([0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], 16); // 16 metadata kv
+      const blob = new Blob([buf]);
+      const result = await (component as any).parseGgufHeader(blob);
+      expect(result?.version).toBe(3);
+      expect(result?.tensorCount).toBe(722);
+      expect(result?.metadataKvCount).toBe(16);
+    });
   });
 });
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
index 861479ca5a5..dfb27a80b48 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
@@ -17,14 +17,15 @@
  * under the License.
  */
 
-import { Component, EventEmitter, Input, OnChanges, OnDestroy, OnInit, Output, SimpleChanges } from "@angular/core";
+import { ChangeDetectorRef, Component, EventEmitter, Input, OnChanges, OnDestroy, OnInit, Output, SimpleChanges } from "@angular/core";
 import { DatasetService } from "../../../../../service/user/dataset/dataset.service";
 import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy";
 import * as Papa from "papaparse";
 import { ParseResult } from "papaparse";
-import { DomSanitizer, SafeUrl } from "@angular/platform-browser";
-import readXlsxFile from "read-excel-file";
+import { DomSanitizer, SafeResourceUrl, SafeUrl } from "@angular/platform-browser";
+import readXlsxFile, { readSheetNames } from "read-excel-file";
 import { NotificationService } from "../../../../../../common/service/notification/notification.service";
+import { formatSize } from "../../../../../../common/util/size-formatter.util";
 import { NgStyle, NgIf, NgFor } from "@angular/common";
 import { NzSpinComponent } from "ng-zorro-antd/spin";
 import { NzAlertComponent } from "ng-zorro-antd/alert";
@@ -38,6 +39,7 @@ import {
 } from "ng-zorro-antd/table";
 import { MarkdownComponent } from "ngx-markdown";
 import { NgxJsonViewerModule } from "ngx-json-viewer";
+import { fileTypeFromBlob } from "file-type";
 
 export const MIME_TYPES = {
   JPEG: "image/jpeg",
@@ -45,6 +47,9 @@ export const MIME_TYPES = {
   PNG: "image/png",
   WEBP: "image/webp",
   GIF: "image/gif",
+  AVIF: "image/avif",
+  BMP: "image/bmp",
+  TIFF: "image/tiff",
   CSV: "text/csv",
   TXT: "text/plain",
   MD: "text/markdown",
@@ -53,35 +58,438 @@ export const MIME_TYPES = {
   PDF: "application/pdf",
   MSWORD: "application/msword",
   MSEXCEL: "application/vnd.ms-excel",
+  XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+  DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+  PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
   MSPOWERPOINT: "application/vnd.ms-powerpoint",
   MP4: "video/mp4",
   MP3: "audio/mpeg",
-  OCTET_STREAM: "application/octet-stream", // Default binary format
+  WAV: "audio/wav",
+  FLAC: "audio/flac",
+  WEBM: "video/webm",
+  MOV: "video/quicktime",
+  ARROW: "application/x-arrow",
+  PARQUET: "application/x-parquet",
+  // ML / scientific data formats
+  HDF5: "application/x-hdf5",
+  H5AD: "application/x-h5ad",
+  H5SEURAT: "application/x-h5seurat",
+  LOOM: "application/x-loom",
+  PICKLE: "application/x-python-pickle",
+  NPY: "application/x-numpy-array",
+  NPZ: "application/x-numpy-archive",
+  SAFETENSORS: "application/x-safetensors",
+  GGUF: "application/x-gguf",
+  PYTORCH: "application/x-pytorch",
+  KERAS: "application/x-keras",
+  ONNX: "application/x-onnx",
+  RDS: "application/x-rds",
+  // Bioinformatics text
+  FASTA: "application/x-fasta",
+  FASTQ: "application/x-fastq",
+  VCF: "application/x-vcf",
+  OCTET_STREAM: "application/octet-stream",
 };
 
 export function getMimeType(filename: string): string {
-  const extension = filename.split(".").pop()?.toUpperCase();
-  return extension && MIME_TYPES[extension as keyof typeof MIME_TYPES]
-    ? MIME_TYPES[extension as keyof typeof MIME_TYPES]
-    : MIME_TYPES.OCTET_STREAM;
+  const extensionMap: Record<string, string> = {
+    JPG: MIME_TYPES.JPEG,
+    JPEG: MIME_TYPES.JPEG,
+    PNG: MIME_TYPES.PNG,
+    WEBP: MIME_TYPES.WEBP,
+    GIF: MIME_TYPES.GIF,
+    AVIF: MIME_TYPES.AVIF,
+    BMP: MIME_TYPES.BMP,
+    TIFF: MIME_TYPES.TIFF,
+    TIF: MIME_TYPES.TIFF,
+    CSV: MIME_TYPES.CSV,
+    TSV: MIME_TYPES.CSV,
+    TXT: MIME_TYPES.TXT,
+    MD: MIME_TYPES.MD,
+    HTML: MIME_TYPES.HTML,
+    HTM: MIME_TYPES.HTML,
+    JSON: MIME_TYPES.JSON,
+    JSONL: MIME_TYPES.TXT,
+    PDF: MIME_TYPES.PDF,
+    DOC: MIME_TYPES.MSWORD,
+    XLS: MIME_TYPES.MSEXCEL,
+    XLSX: MIME_TYPES.XLSX,
+    DOCX: MIME_TYPES.DOCX,
+    PPTX: MIME_TYPES.PPTX,
+    PPT: MIME_TYPES.MSPOWERPOINT,
+    MP4: MIME_TYPES.MP4,
+    MP3: MIME_TYPES.MP3,
+    WAV: MIME_TYPES.WAV,
+    FLAC: MIME_TYPES.FLAC,
+    WEBM: MIME_TYPES.WEBM,
+    MOV: MIME_TYPES.MOV,
+    ARROW: MIME_TYPES.ARROW,
+    FEATHER: MIME_TYPES.ARROW,
+    PARQUET: MIME_TYPES.PARQUET,
+    // ML / scientific
+    H5: MIME_TYPES.HDF5,
+    HDF5: MIME_TYPES.HDF5,
+    H5AD: MIME_TYPES.H5AD,
+    H5SEURAT: MIME_TYPES.H5SEURAT,
+    LOOM: MIME_TYPES.LOOM,
+    PKL: MIME_TYPES.PICKLE,
+    PICKLE: MIME_TYPES.PICKLE,
+    JOBLIB: MIME_TYPES.PICKLE,
+    NPY: MIME_TYPES.NPY,
+    NPZ: MIME_TYPES.NPZ,
+    SAFETENSORS: MIME_TYPES.SAFETENSORS,
+    GGUF: MIME_TYPES.GGUF,
+    PT: MIME_TYPES.PYTORCH,
+    PTH: MIME_TYPES.PYTORCH,
+    KERAS: MIME_TYPES.KERAS,
+    ONNX: MIME_TYPES.ONNX,
+    RDS: MIME_TYPES.RDS,
+    // Bioinformatics text
+    FASTA: MIME_TYPES.FASTA,
+    FA: MIME_TYPES.FASTA,
+    FNA: MIME_TYPES.FASTA,
+    FFN: MIME_TYPES.FASTA,
+    FAA: MIME_TYPES.FASTA,
+    FASTQ: MIME_TYPES.FASTQ,
+    FQ: MIME_TYPES.FASTQ,
+    VCF: MIME_TYPES.VCF,
+  };
+  const ext = filename.split(".").pop()?.toUpperCase() ?? "";
+  return extensionMap[ext] ?? MIME_TYPES.OCTET_STREAM;
 }
 
-// the size limits for all preview-supported types
+export function formatDuration(seconds: number): string {
+  if (!isFinite(seconds) || seconds < 0) return "—";
+  const totalSec = Math.floor(seconds);
+  const h = Math.floor(totalSec / 3600);
+  const m = Math.floor((totalSec % 3600) / 60);
+  const s = totalSec % 60;
+  if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
+  return `${m}:${String(s).padStart(2, "0")}`;
+}
+
+/**
+ * Maximum size at which we'll attempt to preview a file.
+ *
+ * Note on memory: for "identify-only" types (HDF5, Parquet, Arrow, pickle, model containers, etc.)
+ * we only read the first ~16 bytes for magic-byte detection, so 1 GB is safe. For header-parse types
+ * (Safetensors, GGUF, NumPy .npy) we only read the first few KB. The cost of bumping all limits to
+ * 1 GB is the full-blob download time, since the dataset service streams the entire file.
+ *
+ * For full-content render types (CSV via Papa.parse, XLSX, JSON, large text) memory cost scales
+ * with file size — browsers may slow down or OOM well before 1 GB. The user can choose: the guard
+ * no longer blocks; if their browser tab struggles, they can close it.
+ */
+const MAX_PREVIEW_SIZE = 1024 * 1024 * 1024;
+
+// size limits per MIME type — also used as pre-fetch guard
 export const MIME_TYPE_SIZE_LIMITS_MB = {
-  [MIME_TYPES.JPEG]: 5 * 1024 * 1024, // 5 MB
-  [MIME_TYPES.PNG]: 5 * 1024 * 1024, // 5 MB
-  [MIME_TYPES.WEBP]: 5 * 1024 * 1024, // 5 MB
-  [MIME_TYPES.GIF]: 10 * 1024 * 1024, // 10 MB
-  [MIME_TYPES.CSV]: 2 * 1024 * 1024, // 2 MB for text-based data files
-  [MIME_TYPES.TXT]: 1 * 1024 * 1024, // 1 MB for plain text files
-  [MIME_TYPES.MD]: 1 * 1024 * 1024, // 1 MB for MD files
-  [MIME_TYPES.JSON]: 1 * 1024 * 1024, // 1 MB for JSON files
-  [MIME_TYPES.MSEXCEL]: 10 * 1024 * 1024, // 10 MB for Excel spreadsheets
-  [MIME_TYPES.MP4]: 50 * 1024 * 1024, // 50 MB for MP4 videos
-  [MIME_TYPES.MP3]: 10 * 1024 * 1024, // 10 MB for MP3 audio files
-  [MIME_TYPES.OCTET_STREAM]: 5 * 1024 * 1024, // Default size for other binary formats
+  [MIME_TYPES.JPEG]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PNG]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.WEBP]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.GIF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.AVIF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.BMP]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.TIFF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.CSV]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.TXT]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MD]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.JSON]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PDF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MSEXCEL]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.XLSX]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.DOCX]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PPTX]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MP4]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.WEBM]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MOV]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MP3]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.WAV]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.FLAC]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.ARROW]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PARQUET]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.HDF5]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.H5AD]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.H5SEURAT]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.LOOM]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PICKLE]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.NPY]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.NPZ]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.SAFETENSORS]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.GGUF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PYTORCH]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.KERAS]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.ONNX]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.RDS]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.FASTA]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.FASTQ]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.VCF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.OCTET_STREAM]: MAX_PREVIEW_SIZE,
 };
 
+export interface FileMetadata {
+  fileSize?: number;
+  // image
+  imageWidth?: number;
+  imageHeight?: number;
+  // video
+  videoDuration?: number;
+  videoWidth?: number;
+  videoHeight?: number;
+  // audio
+  audioDuration?: number;
+  // tabular
+  rowCount?: number;
+  columnCount?: number;
+  columnNames?: string[];
+  sheetCount?: number;
+  // json
+  jsonTopLevelType?: "object" | "array";
+  jsonItemCount?: number;
+  jsonPreviewKeys?: string[];
+  // text / markdown
+  lineCount?: number;
+  wordCount?: number;
+  charCount?: number;
+  headingCount?: number;
+  // pdf
+  pageCount?: number;
+  // ML model / tensor data
+  modelFormat?: string; // "PyTorch", "Keras", "ONNX", "Safetensors", "GGUF", "TensorFlow"
+  containerFormat?: string; // "HDF5", "ZIP archive", "gzip"
+  tensorCount?: number;
+  parameterCount?: number;
+  sampleTensorNames?: string[];
+  // NumPy
+  dtype?: string;
+  shape?: number[];
+  // GGUF
+  ggufVersion?: number;
+  metadataKvCount?: number;
+  // Bioinformatics
+  sequenceCount?: number;
+  sequenceCountIsExact?: boolean;
+  variantCount?: number;
+  variantCountIsExact?: boolean;
+
+  // Rich tabular schema (CSV / XLSX)
+  columnTypes?: string[]; // inferred type per column: "integer", "double", "boolean", "date", "string"
+  nullCounts?: number[]; // count of empty cells per column (in sample)
+  sampleValues?: string[]; // first non-null value per column
+
+  // JSON schema
+  jsonMaxDepth?: number;
+  jsonKeyTypes?: { key: string; type: string }[]; // for object roots
+  jsonArrayElementType?: string; // for array roots: uniform type or "mixed"
+
+  // PDF /Info dictionary
+  pdfTitle?: string;
+  pdfAuthor?: string;
+  pdfCreator?: string;
+  pdfProducer?: string;
+  pdfVersion?: string;
+  pdfEncrypted?: boolean;
+
+  // Markdown structure
+  codeBlockCount?: number;
+  linkCount?: number;
+  imageCount?: number;
+  listItemCount?: number;
+
+  // Plain text / encoding
+  encoding?: string; // "UTF-8 BOM", "UTF-8", "ASCII"
+  emptyLineCount?: number;
+  avgLineLength?: number;
+  maxLineLength?: number;
+
+  // NumPy enhanced
+  totalElements?: number;
+  byteOrder?: string; // "little-endian", "big-endian"
+  fortranOrder?: boolean;
+
+  // Safetensors enhanced
+  dtypeBreakdown?: { dtype: string; params: number }[];
+  largestTensor?: { name: string; shape: number[]; params: number };
+  safetensorsMetadata?: { key: string; value: string }[];
+
+  // GGUF enhanced
+  ggufArchitecture?: string;
+  ggufQuantization?: string;
+
+  // FASTA enhanced
+  totalBases?: number;
+  gcContent?: number; // 0..1
+  minSequenceLength?: number;
+  maxSequenceLength?: number;
+  avgSequenceLength?: number;
+  isProtein?: boolean;
+
+  // VCF enhanced
+  vcfSampleCount?: number;
+  vcfChromosomes?: string[];
+}
+
+/** Classify a single cell value into a coarse type label. */
+function inferCellType(value: string): string {
+  if (value === "" || value == null) return "null";
+  if (/^-?\d+$/.test(value)) return "integer";
+  if (/^-?\d+\.\d+$/.test(value) || /^-?\d+\.?\d*[eE][-+]?\d+$/.test(value)) return "double";
+  if (/^(true|false|True|False|TRUE|FALSE)$/.test(value)) return "boolean";
+  if (/^\d{4}-\d{2}-\d{2}(?:[T ]\d{2}:\d{2}(?::\d{2})?)?$/.test(value)) return "date";
+  return "string";
+}
+
+/** Infer per-column type, null count, and a sample value from tabular data rows. */
+export function inferColumnSchema(
+  dataRows: string[][],
+  columnCount: number,
+  sampleLimit: number = 50
+): { types: string[]; nullCounts: number[]; samples: string[] } {
+  const types: string[] = [];
+  const nullCounts: number[] = [];
+  const samples: string[] = [];
+  const rowsToScan = Math.min(dataRows.length, sampleLimit);
+
+  for (let c = 0; c < columnCount; c++) {
+    const typeCounts: Record<string, number> = {};
+    let nullCount = 0;
+    let firstNonNull = "";
+
+    for (let r = 0; r < rowsToScan; r++) {
+      const raw = dataRows[r][c];
+      const val = raw == null ? "" : String(raw).trim();
+      const t = inferCellType(val);
+      if (t === "null") {
+        nullCount++;
+      } else {
+        if (firstNonNull === "") firstNonNull = val;
+        typeCounts[t] = (typeCounts[t] ?? 0) + 1;
+      }
+    }
+
+    const ranked = Object.entries(typeCounts).sort((a, b) => b[1] - a[1]);
+    types.push(ranked[0]?.[0] ?? "string");
+    nullCounts.push(nullCount);
+    samples.push(firstNonNull);
+  }
+  return { types, nullCounts, samples };
+}
+
+/** Walk an arbitrary JSON value and compute max nesting depth. */
+function jsonMaxDepth(value: unknown, depth = 1): number {
+  if (Array.isArray(value)) {
+    let max = depth;
+    for (const item of value) max = Math.max(max, jsonMaxDepth(item, depth + 1));
+    return max;
+  }
+  if (value !== null && typeof value === "object") {
+    let max = depth;
+    for (const v of Object.values(value as Record<string, unknown>)) {
+      max = Math.max(max, jsonMaxDepth(v, depth + 1));
+    }
+    return max;
+  }
+  return depth;
+}
+
+/** Describe a JS value's type for human display. */
+function jsTypeLabel(value: unknown): string {
+  if (value === null) return "null";
+  if (Array.isArray(value)) return `array(${value.length})`;
+  return typeof value;
+}
+
+/** Extract /Info dictionary fields from a PDF's raw text. Heuristic but robust for unencrypted PDFs. */
+function extractPdfInfo(rawText: string): {
+  title?: string;
+  author?: string;
+  creator?: string;
+  producer?: string;
+  version?: string;
+  encrypted?: boolean;
+} {
+  const result: ReturnType<typeof extractPdfInfo> = {};
+  const versionMatch = rawText.match(/^%PDF-(\d+\.\d+)/);
+  if (versionMatch) result.version = versionMatch[1];
+  result.encrypted = /\/Encrypt\b/.test(rawText);
+
+  // Match `/Title (value)` or `/Title <hex>` — only the parenthesized form is reliably plain text
+  const fieldRe = (name: string) => new RegExp(`/${name}\\s*\\(([^)\\\\]*(?:\\\\.[^)\\\\]*)*)\\)`);
+  const grab = (name: string): string | undefined => {
+    const m = rawText.match(fieldRe(name));
+    if (!m) return undefined;
+    // PDF strings can contain \( \) \\ escapes — unescape minimally
+    return m[1].replace(/\\([()\\])/g, "$1").trim() || undefined;
+  };
+  result.title = grab("Title");
+  result.author = grab("Author");
+  result.creator = grab("Creator");
+  result.producer = grab("Producer");
+  return result;
+}
+
+/** Compute GC content and sequence-length stats from a FASTA blob's text. */
+function summarizeFasta(text: string): {
+  sequenceCount: number;
+  totalBases: number;
+  gcContent: number;
+  minLen: number;
+  maxLen: number;
+  avgLen: number;
+  isProtein: boolean;
+} {
+  // Walk character by character — avoids splitting a multi-MB string into a huge array.
+  let inHeader = false;
+  let sequenceCount = 0;
+  let currentLen = 0;
+  let totalBases = 0;
+  let gcCount = 0;
+  let nonNucleotideCount = 0;
+  let minLen = Infinity;
+  let maxLen = 0;
+  const nucleotideSet = new Set(["A", "C", "G", "T", "U", "N", "a", "c", "g", "t", "u", "n"]);
+
+  const finishSequence = () => {
+    if (sequenceCount > 0 && currentLen > 0) {
+      if (currentLen < minLen) minLen = currentLen;
+      if (currentLen > maxLen) maxLen = currentLen;
+    }
+    currentLen = 0;
+  };
+
+  for (let i = 0; i < text.length; i++) {
+    const ch = text[i];
+    if (ch === "\n") {
+      if (inHeader) inHeader = false;
+      continue;
+    }
+    if (inHeader) continue;
+    if (ch === ">") {
+      finishSequence();
+      sequenceCount++;
+      inHeader = true;
+      continue;
+    }
+    if (ch === "\r" || ch === " " || ch === "\t") continue;
+    currentLen++;
+    totalBases++;
+    if (ch === "G" || ch === "C" || ch === "g" || ch === "c") gcCount++;
+    if (!nucleotideSet.has(ch)) nonNucleotideCount++;
+  }
+  finishSequence();
+
+  return {
+    sequenceCount,
+    totalBases,
+    gcContent: totalBases > 0 ? gcCount / totalBases : 0,
+    minLen: minLen === Infinity ? 0 : minLen,
+    maxLen,
+    avgLen: sequenceCount > 0 ? totalBases / sequenceCount : 0,
+    // Heuristic: if more than 10% of characters aren't ACGTUN, treat as protein
+    isProtein: totalBases > 0 && nonNucleotideCount / totalBases > 0.1,
+  };
+}
+
 @UntilDestroy()
 @Component({
   selector: "texera-user-dataset-file-renderer",
@@ -104,11 +512,25 @@ export const MIME_TYPE_SIZE_LIMITS_MB = {
   ],
 })
 export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDestroy {
-  private DEFAULT_MAX_SIZE = 5 * 1024 * 1024; // 5 MB
+  private DEFAULT_MAX_SIZE = 1024 * 1024 * 1024; // 1 GB
+
+  // For text-based formats we slice to this size before parsing/rendering.
+  // Reading 1 GB as a UTF-16 string in JS would balloon to ~2 GB and likely crash the tab.
+  private static readonly PREVIEW_TEXT_BYTES = 10 * 1024 * 1024; // 10 MB
+
+  /** Slice the blob if it exceeds the preview limit, returning the slice + whether truncation occurred. */
+  private getPreviewSlice(blob: Blob): { slice: Blob; truncated: boolean } {
+    const limit = UserDatasetFileRendererComponent.PREVIEW_TEXT_BYTES;
+    if (blob.size <= limit) return { slice: blob, truncated: false };
+    return { slice: blob.slice(0, limit), truncated: true };
+  }
+
+  /** True when text content shown is from a slice rather than the whole file. */
+  public previewTruncated: boolean = false;
 
   public fileURL: string | undefined;
-  // safe url is used to display some formats including image
   public safeFileURL: SafeUrl | undefined;
+  public safeResourceFileURL: SafeResourceUrl | undefined;
 
   // table related control
   public displayCSV: boolean = false;
@@ -131,10 +553,18 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
   // audio
   public displayMP3: boolean = false;
 
-  // plain text & octet stream related control
+  // PDF
+  public displayPDF: boolean = false;
+
+  // plain text
   public displayPlainText: boolean = false;
   public textContent: string = "";
 
+  // shown for detectable-but-unpreviewable types (Parquet, Arrow, DOCX, PPTX)
+  public detectedTypeMessage: string = "";
+
+  public fileMetadata: FileMetadata | undefined = undefined;
+
   // control flags
   public isLoading: boolean = false;
   public isFileSizeUnloadable = false;
@@ -142,31 +572,21 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
   public isFileTypePreviewUnsupported: boolean = false;
 
   public currentFile: File | undefined = undefined;
-  @Input()
-  isMaximized: boolean = false;
-
-  @Input()
-  did: number | undefined;
-
-  @Input()
-  dvid: number | undefined;
-
-  @Input()
-  filePath: string = "";
-
-  @Input()
-  fileSize?: number;
 
-  @Input()
-  isLogin: boolean = false;
+  @Input() isMaximized: boolean = false;
+  @Input() did: number | undefined;
+  @Input() dvid: number | undefined;
+  @Input() filePath: string = "";
+  @Input() fileSize?: number;
+  @Input() isLogin: boolean = false;
 
-  @Output()
-  loadFile = new EventEmitter<{ file: string; prefix: string }>();
+  @Output() loadFile = new EventEmitter<{ file: string; prefix: string }>();
 
   constructor(
     private datasetService: DatasetService,
     private sanitizer: DomSanitizer,
-    private notificationService: NotificationService
+    private notificationService: NotificationService,
+    private cdr: ChangeDetectorRef
   ) {}
 
   ngOnInit(): void {
@@ -194,105 +614,888 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
   reloadFileContent() {
     this.turnOffAllDisplay();
 
-    // Pre-check - file size
-    const mimeType = getMimeType(this.filePath);
-    if (!this.isPreviewSupported(mimeType)) {
-      this.onFileTypePreviewUnsupported();
-      return;
-    }
-    const limit = MIME_TYPE_SIZE_LIMITS_MB[mimeType] ?? this.DEFAULT_MAX_SIZE;
-    if (this.fileSize != null && this.fileSize > limit) {
+    // Pre-fetch size guard: use extension hint for known types, DEFAULT_MAX_SIZE for unknown.
+    // We no longer reject on extension alone — magic byte detection runs after the fetch.
+    const extensionMime = getMimeType(this.filePath);
+    const preCheckLimit = MIME_TYPE_SIZE_LIMITS_MB[extensionMime] ?? this.DEFAULT_MAX_SIZE;
+    if (this.fileSize != null && this.fileSize > preCheckLimit) {
       this.onFileSizeNotLoadable();
       return;
     }
 
-    // Load file
+    if (!this.did || !this.dvid || !this.filePath) return;
+
     this.isLoading = true;
-    if (this.did && this.dvid && this.filePath != "") {
-      this.datasetService
-        .retrieveDatasetVersionSingleFile(this.filePath, this.isLogin)
-        .pipe(untilDestroyed(this))
-        .subscribe({
-          next: blob => {
-            this.isLoading = false;
-            const blobMimeType = getMimeType(this.filePath);
-            if (!this.isPreviewSupported(blobMimeType)) {
-              this.onFileTypePreviewUnsupported();
-              return;
-            }
-            const MaxSize = MIME_TYPE_SIZE_LIMITS_MB[blobMimeType] || this.DEFAULT_MAX_SIZE;
-            const fileSize = blob.size;
-            if (fileSize > MaxSize) {
-              this.onFileSizeNotLoadable();
-              this.notificationService.warning(`File ${this.filePath} is too large to be previewed`);
-              return;
-            }
-            this.currentFile = new File([blob], this.filePath, { type: blob.type });
-            // Handle different file types
-            switch (blobMimeType) {
-              case MIME_TYPES.PNG:
-              case MIME_TYPES.JPEG:
-              case MIME_TYPES.WEBP:
-              case MIME_TYPES.GIF:
-                this.displayImage = true;
-                this.loadSafeURL(blob);
-                break;
-              case MIME_TYPES.MP4:
-                this.displayMP4 = true;
-                this.loadSafeURL(blob);
-                break;
-
-              case MIME_TYPES.MP3:
-                this.displayMP3 = true;
-                this.loadSafeURL(blob);
-                break;
-
-              case MIME_TYPES.MSEXCEL:
-                readXlsxFile(blob).then(rows => {
-                  let parsedData: string[][] = [];
-                  rows.forEach(row => {
-                    // Convert each cell in the row to a string
-                    let stringRow = row.map(cell => (cell ? cell.toString() : ""));
-                    // Add the string array to the main array
-                    parsedData.push(stringRow);
-                  });
-                  if (parsedData.length > 0) {
-                    this.loadTabularFile(parsedData);
-                    this.displayXlsx = true;
-                  }
-                });
-                break;
-              case MIME_TYPES.CSV:
-                this.displayCSV = true;
-                // Handle CSV display
-                Papa.parse(this.currentFile, {
-                  complete: (results: ParseResult<any>) => {
-                    if (results.data.length > 0) {
-                      this.loadTabularFile(results.data);
-                    }
-                  },
-                  error: error => {
-                    console.error("Error parsing file:", error);
-                    this.onFileLoadingError();
-                  },
-                });
-                break;
-              case MIME_TYPES.MD:
-                this.displayMarkdown = true;
-                this.readFileAsText(blob);
-                break;
-              case MIME_TYPES.JSON:
-                this.displayJson = true;
-                this.readFileAsText(blob);
-                break;
-              case MIME_TYPES.TXT:
-              default:
-                this.displayPlainText = true;
-                this.readFileAsText(blob);
-                break;
+    this.datasetService
+      .retrieveDatasetVersionSingleFile(this.filePath, this.isLogin)
+      .pipe(untilDestroyed(this))
+      .subscribe({
+        next: async (blob: Blob) => {
+          this.isLoading = false;
+
+          const detectedMime = await this.detectMimeType(blob, this.filePath);
+
+          // Post-detection size check against the now-known type limit
+          const sizeLimit = MIME_TYPE_SIZE_LIMITS_MB[detectedMime] ?? this.DEFAULT_MAX_SIZE;
+          if (blob.size > sizeLimit) {
+            this.onFileSizeNotLoadable();
+            this.notificationService.warning(`File ${this.filePath} is too large to preview`);
+            return;
+          }
+
+          // currentFile is built lazily inside the CSV case (the only consumer); avoids an
+          // extra in-memory copy of the blob for every other type.
+          this.renderByMimeType(blob, detectedMime);
+        },
+        error: () => this.onFileLoadingError(),
+      });
+  }
+
+  /**
+   * Detects the actual MIME type of a blob using four strategies in order:
+   * 1. file-type library (magic bytes, ~100 formats) — refined with extension hints for
+   *    ZIP/gzip container formats (PyTorch, Keras, NPZ, RDS).
+   * 2. Manual magic bytes for data formats not covered by file-type
+   *    (Parquet, Arrow, HDF5, NumPy .npy, GGUF, Python pickle).
+   * 3. Extension-based fallback for opaque binary formats with no reliable magic bytes
+   *    (Safetensors, ONNX).
+   * 4. Text sniffing for JSON, CSV, FASTA, FASTQ, VCF, Markdown, and plain text.
+   *
+   * Uses FileReader throughout for broad environment compatibility (tests, browsers).
+   */
+  async detectMimeType(blob: Blob, fileName?: string): Promise<string> {
+    const ext = (fileName ?? "").split(".").pop()?.toLowerCase() ?? "";
+
+    // 1. file-type library covers images, video, audio, PDF, Office (ZIP-based), and more.
+    if (typeof fileTypeFromBlob === "function") {
+      try {
+        const result = await fileTypeFromBlob(blob);
+        if (result) {
+          // Refine generic container types (ZIP, gzip) using extension hints
+          if (result.mime === "application/zip") {
+            if (ext === "pt" || ext === "pth") return MIME_TYPES.PYTORCH;
+            if (ext === "keras") return MIME_TYPES.KERAS;
+            if (ext === "npz") return MIME_TYPES.NPZ;
+          }
+          if (result.mime === "application/gzip" && ext === "rds") return MIME_TYPES.RDS;
+          return result.mime;
+        }
+      } catch (_) {}
+    }
+
+    // 2. Manual magic bytes for formats not in file-type's signature list.
+    try {
+      const header = await this.readBlobBytes(blob.slice(0, 16));
+
+      // Parquet: PAR1 at bytes 0–3
+      if (header[0] === 0x50 && header[1] === 0x41 && header[2] === 0x52 && header[3] === 0x31) {
+        return MIME_TYPES.PARQUET;
+      }
+      // Arrow IPC: ARROW1 at bytes 0–5
+      if (
+        header[0] === 0x41 && header[1] === 0x52 && header[2] === 0x52 &&
+        header[3] === 0x4f && header[4] === 0x57 && header[5] === 0x31
+      ) {
+        return MIME_TYPES.ARROW;
+      }
+      // HDF5: \x89HDF\r\n\x1a\n at bytes 0–7
+      if (
+        header[0] === 0x89 && header[1] === 0x48 && header[2] === 0x44 && header[3] === 0x46 &&
+        header[4] === 0x0d && header[5] === 0x0a && header[6] === 0x1a && header[7] === 0x0a
+      ) {
+        // Refine HDF5 sub-types by extension (all use identical magic bytes)
+        if (ext === "h5ad") return MIME_TYPES.H5AD;
+        if (ext === "h5seurat") return MIME_TYPES.H5SEURAT;
+        if (ext === "loom") return MIME_TYPES.LOOM;
+        return MIME_TYPES.HDF5;
+      }
+      // NumPy .npy: \x93NUMPY at bytes 0–5
+      if (
+        header[0] === 0x93 && header[1] === 0x4e && header[2] === 0x55 &&
+        header[3] === 0x4d && header[4] === 0x50 && header[5] === 0x59
+      ) {
+        return MIME_TYPES.NPY;
+      }
+      // GGUF: ASCII "GGUF" at bytes 0–3
+      if (header[0] === 0x47 && header[1] === 0x47 && header[2] === 0x55 && header[3] === 0x46) {
+        return MIME_TYPES.GGUF;
+      }
+      // Python pickle: \x80 + protocol byte (2..5) + \x95 (FRAME opcode in proto 4+)
+      if (header[0] === 0x80 && header[1] >= 0x02 && header[1] <= 0x05) {
+        return MIME_TYPES.PICKLE;
+      }
+    } catch (_) {}
+
+    // 3. Extension-based fallback for opaque binaries lacking reliable magic bytes
+    if (ext === "safetensors") return MIME_TYPES.SAFETENSORS;
+    if (ext === "onnx") return MIME_TYPES.ONNX;
+
+    // 4. Text sniffing for formats with no fixed magic bytes
+    try {
+      const sample = await this.readBlobText(blob.slice(0, 4096));
+      const trimmed = sample.trimStart();
+      const firstLine = trimmed.split("\n")[0] ?? "";
+
+      if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
+        return MIME_TYPES.JSON;
+      }
+      if (trimmed.startsWith("# ") || trimmed.startsWith("## ")) {
+        return MIME_TYPES.MD;
+      }
+      // VCF: header line starts with ##fileformat=VCF
+      if (firstLine.startsWith("##fileformat=VCF")) {
+        return MIME_TYPES.VCF;
+      }
+      // FASTA: first non-empty/comment line starts with '>'
+      if (firstLine.startsWith(">")) {
+        return MIME_TYPES.FASTA;
+      }
+      // FASTQ: 4-line record pattern — line 1 starts '@', line 3 starts '+'
+      const lines = trimmed.split("\n");
+      if (lines.length >= 4 && lines[0].startsWith("@") && lines[2].startsWith("+")) {
+        return MIME_TYPES.FASTQ;
+      }
+      // CSV heuristic: first line has at least 3 comma-separated fields
+      if (firstLine.split(",").length >= 3) {
+        return MIME_TYPES.CSV;
+      }
+      // Printable ASCII/UTF-8 → plain text
+      const bytes = await this.readBlobBytes(blob.slice(0, 512));
+      const isPrintable = bytes.every(b => b === 9 || b === 10 || b === 13 || (b >= 32 && b <= 126));
+      if (isPrintable) return MIME_TYPES.TXT;
+    } catch (_) {}
+
+    return MIME_TYPES.OCTET_STREAM;
+  }
+
+  /** Parse a NumPy .npy header. Returns dtype, shape, byte order, and Fortran flag or null on failure. */
+  private async parseNpyHeader(
+    blob: Blob
+  ): Promise<{ dtype?: string; shape?: number[]; byteOrder?: string; fortranOrder?: boolean } | null> {
+    try {
+      const head = await this.readBlobBytes(blob.slice(0, 4096));
+      // bytes 0-5: magic, byte 6: major, byte 7: minor
+      const major = head[6];
+      // v1.0: uint16 LE header length at bytes 8-9; v2.0+: uint32 LE at bytes 8-11
+      const headerLen = major >= 2 ? head[8] | (head[9] << 8) | (head[10] << 16) | (head[11] << 24)
+                                   : head[8] | (head[9] << 8);
+      const headerStart = major >= 2 ? 12 : 10;
+      const headerText = new TextDecoder().decode(head.slice(headerStart, headerStart + headerLen));
+      const dtypeMatch = headerText.match(/['"]descr['"]\s*:\s*['"]([^'"]+)['"]/);
+      const shapeMatch = headerText.match(/['"]shape['"]\s*:\s*\(([^)]*)\)/);
+      const fortranMatch = headerText.match(/['"]fortran_order['"]\s*:\s*(True|False)/);
+      const shape = shapeMatch
+        ? shapeMatch[1].split(",").map(s => s.trim()).filter(s => s.length > 0).map(Number)
+        : undefined;
+      const dtype = dtypeMatch?.[1];
+      // dtype prefix: '<' = little-endian, '>' = big-endian, '|' = byte order N/A, '=' = native
+      let byteOrder: string | undefined;
+      if (dtype) {
+        if (dtype.startsWith("<")) byteOrder = "little-endian";
+        else if (dtype.startsWith(">")) byteOrder = "big-endian";
+        else if (dtype.startsWith("|")) byteOrder = "n/a";
+      }
+      const fortranOrder = fortranMatch ? fortranMatch[1] === "True" : undefined;
+      return { dtype, shape, byteOrder, fortranOrder };
+    } catch {
+      return null;
+    }
+  }
+
+  /** Parse a Safetensors file header. Returns rich tensor metadata or null. */
+  private async parseSafetensorsHeader(blob: Blob): Promise<{
+    tensorCount: number;
+    parameterCount: number;
+    sampleNames: string[];
+    dtypeBreakdown: { dtype: string; params: number }[];
+    largestTensor?: { name: string; shape: number[]; params: number };
+    metadata?: { key: string; value: string }[];
+  } | null> {
+    try {
+      const lenBytes = await this.readBlobBytes(blob.slice(0, 8));
+      // uint64 LE — JS can read up to 53 bits safely; header is always small (KB-MB)
+      let headerLen = 0;
+      for (let i = 0; i < 8; i++) headerLen += lenBytes[i] * Math.pow(256, i);
+      if (headerLen <= 0 || headerLen > 100 * 1024 * 1024) return null;
+      const headerText = await this.readBlobText(blob.slice(8, 8 + headerLen));
+      const json = JSON.parse(headerText);
+      const names = Object.keys(json).filter(k => k !== "__metadata__");
+      let paramCount = 0;
+      const dtypeMap: Record<string, number> = {};
+      let largest: { name: string; shape: number[]; params: number } | undefined;
+      for (const name of names) {
+        const shape: number[] = json[name]?.shape ?? [];
+        const dtype: string = json[name]?.dtype ?? "?";
+        const params = shape.length > 0 ? shape.reduce((a, b) => a * b, 1) : 0;
+        paramCount += params;
+        dtypeMap[dtype] = (dtypeMap[dtype] ?? 0) + params;
+        if (!largest || params > largest.params) largest = { name, shape, params };
+      }
+      const dtypeBreakdown = Object.entries(dtypeMap)
+        .sort((a, b) => b[1] - a[1])
+        .map(([dtype, params]) => ({ dtype, params }));
+      const meta = (json.__metadata__ ?? {}) as Record<string, string>;
+      const metadata = Object.entries(meta)
+        .slice(0, 6)
+        .map(([key, value]) => ({ key, value: String(value) }));
+      return {
+        tensorCount: names.length,
+        parameterCount: paramCount,
+        sampleNames: names.slice(0, 5),
+        dtypeBreakdown,
+        largestTensor: largest,
+        metadata: metadata.length > 0 ? metadata : undefined,
+      };
+    } catch {
+      return null;
+    }
+  }
+
+  /** Parse a GGUF (llama.cpp model) header. Returns version/tensor count or null. */
+  private async parseGgufHeader(
+    blob: Blob
+  ): Promise<{ version: number; tensorCount: number; metadataKvCount: number } | null> {
+    try {
+      const head = await this.readBlobBytes(blob.slice(0, 24));
+      // bytes 0-3: "GGUF" magic
+      // bytes 4-7: version (uint32 LE)
+      const version = head[4] | (head[5] << 8) | (head[6] << 16) | (head[7] << 24);
+      // bytes 8-15: tensor count (uint64 LE)
+      let tensorCount = 0;
+      for (let i = 0; i < 8; i++) tensorCount += head[8 + i] * Math.pow(256, i);
+      // bytes 16-23: metadata kv count (uint64 LE)
+      let metadataKvCount = 0;
+      for (let i = 0; i < 8; i++) metadataKvCount += head[16 + i] * Math.pow(256, i);
+      return { version, tensorCount, metadataKvCount };
+    } catch {
+      return null;
+    }
+  }
+
+  private readBlobBytes(blob: Blob): Promise<Uint8Array> {
+    return new Promise((resolve, reject) => {
+      const reader = new FileReader();
+      reader.onload = () => resolve(new Uint8Array(reader.result as ArrayBuffer));
+      reader.onerror = () => reject(reader.error);
+      reader.readAsArrayBuffer(blob);
+    });
+  }
+
+  private readBlobText(blob: Blob): Promise<string> {
+    return new Promise((resolve, reject) => {
+      const reader = new FileReader();
+      reader.onload = () => resolve(reader.result as string);
+      reader.onerror = () => reject(reader.error);
+      reader.readAsText(blob);
+    });
+  }
+
+  /**
+   * Returns true for any MIME type we know how to render or describe.
+   * Only truly unidentified binary (OCTET_STREAM) is considered unsupported.
+   */
+  isPreviewSupported(mimeType: string): boolean {
+    return mimeType !== MIME_TYPES.OCTET_STREAM;
+  }
+
+  get metadataItems(): { label: string; value: string }[] {
+    const m = this.fileMetadata;
+    if (!m) return [];
+    const items: { label: string; value: string }[] = [];
+
+    if (m.fileSize != null) items.push({ label: "Size", value: formatSize(m.fileSize) });
+
+    if (m.imageWidth != null && m.imageHeight != null) {
+      items.push({ label: "Dimensions", value: `${m.imageWidth} × ${m.imageHeight} px` });
+      const gcd = (a: number, b: number): number => (b === 0 ? a : gcd(b, a % b));
+      const g = gcd(m.imageWidth, m.imageHeight);
+      items.push({ label: "Aspect ratio", value: `${m.imageWidth / g}:${m.imageHeight / g}` });
+    }
+
+    if (m.videoDuration != null) items.push({ label: "Duration", value: formatDuration(m.videoDuration) });
+    if (m.videoWidth != null && m.videoHeight != null)
+      items.push({ label: "Resolution", value: `${m.videoWidth} × ${m.videoHeight}` });
+
+    if (m.audioDuration != null) items.push({ label: "Duration", value: formatDuration(m.audioDuration) });
+
+    if (m.rowCount != null) items.push({ label: "Rows", value: m.rowCount.toLocaleString() });
+    if (m.columnCount != null) items.push({ label: "Columns", value: m.columnCount.toLocaleString() });
+    if (m.sheetCount != null) items.push({ label: "Sheets", value: m.sheetCount.toLocaleString() });
+    if (m.columnNames?.length) {
+      const preview = m.columnNames.slice(0, 8).join(", ");
+      const more = m.columnNames.length > 8 ? ` +${m.columnNames.length - 8} more` : "";
+      items.push({ label: "Fields", value: preview + more });
+    }
+
+    if (m.jsonTopLevelType != null) {
+      const label = m.jsonTopLevelType === "array" ? "Items" : "Keys";
+      items.push({ label: "JSON", value: m.jsonTopLevelType });
+      if (m.jsonItemCount != null) items.push({ label, value: m.jsonItemCount.toLocaleString() });
+      if (m.jsonPreviewKeys?.length) items.push({ label: "Preview", value: m.jsonPreviewKeys.join(", ") });
+    }
+
+    if (m.lineCount != null) items.push({ label: "Lines", value: m.lineCount.toLocaleString() });
+    if (m.wordCount != null) items.push({ label: "Words", value: m.wordCount.toLocaleString() });
+    if (m.charCount != null) items.push({ label: "Characters", value: m.charCount.toLocaleString() });
+    if (m.headingCount != null) items.push({ label: "Headings", value: m.headingCount.toLocaleString() });
+
+    if (m.pageCount != null) items.push({ label: "Pages", value: `~${m.pageCount}` });
+
+    // ML / scientific
+    if (m.modelFormat) items.push({ label: "Format", value: m.modelFormat });
+    if (m.containerFormat) items.push({ label: "Container", value: m.containerFormat });
+    if (m.dtype) items.push({ label: "dtype", value: m.dtype });
+    if (m.shape?.length) items.push({ label: "Shape", value: `(${m.shape.join(", ")})` });
+    if (m.tensorCount != null) items.push({ label: "Tensors", value: m.tensorCount.toLocaleString() });
+    if (m.parameterCount != null) items.push({ label: "Parameters", value: `~${m.parameterCount.toLocaleString()}` });
+    if (m.sampleTensorNames?.length)
+      items.push({ label: "Tensors (first)", value: m.sampleTensorNames.join(", ") });
+    if (m.ggufVersion != null) items.push({ label: "GGUF version", value: `v${m.ggufVersion}` });
+    if (m.metadataKvCount != null) items.push({ label: "Metadata KV", value: m.metadataKvCount.toLocaleString() });
+
+    // JSON schema details
+    if (m.jsonMaxDepth != null) items.push({ label: "Max depth", value: m.jsonMaxDepth.toLocaleString() });
+    if (m.jsonArrayElementType) items.push({ label: "Element type", value: m.jsonArrayElementType });
+    if (m.jsonKeyTypes?.length) {
+      items.push({
+        label: "Schema",
+        value: m.jsonKeyTypes.map(kt => `${kt.key}: ${kt.type}`).join(", "),
+      });
+    }
+
+    // PDF /Info
+    if (m.pdfVersion) items.push({ label: "PDF version", value: m.pdfVersion });
+    if (m.pdfTitle) items.push({ label: "Title", value: m.pdfTitle });
+    if (m.pdfAuthor) items.push({ label: "Author", value: m.pdfAuthor });
+    if (m.pdfCreator) items.push({ label: "Creator", value: m.pdfCreator });
+    if (m.pdfProducer) items.push({ label: "Producer", value: m.pdfProducer });
+    if (m.pdfEncrypted) items.push({ label: "Encrypted", value: "Yes" });
+
+    // Markdown structure
+    if (m.codeBlockCount) items.push({ label: "Code blocks", value: m.codeBlockCount.toLocaleString() });
+    if (m.linkCount) items.push({ label: "Links", value: m.linkCount.toLocaleString() });
+    if (m.imageCount) items.push({ label: "Images", value: m.imageCount.toLocaleString() });
+    if (m.listItemCount) items.push({ label: "List items", value: m.listItemCount.toLocaleString() });
+
+    // Plain text encoding/structure
+    if (m.encoding) items.push({ label: "Encoding", value: m.encoding });
+    if (m.emptyLineCount != null && m.emptyLineCount > 0)
+      items.push({ label: "Blank lines", value: m.emptyLineCount.toLocaleString() });
+    if (m.avgLineLength != null && m.avgLineLength > 0)
+      items.push({ label: "Avg line", value: `${Math.round(m.avgLineLength)} chars` });
+    if (m.maxLineLength != null && m.maxLineLength > 0)
+      items.push({ label: "Max line", value: `${m.maxLineLength.toLocaleString()} chars` });
+
+    // NumPy details
+    if (m.totalElements != null) items.push({ label: "Elements", value: m.totalElements.toLocaleString() });
+    if (m.byteOrder) items.push({ label: "Byte order", value: m.byteOrder });
+    if (m.fortranOrder != null) items.push({ label: "Order", value: m.fortranOrder ? "Fortran (column)" : "C (row)" });
+
+    // Safetensors details
+    if (m.dtypeBreakdown?.length) {
+      items.push({
+        label: "Dtypes",
+        value: m.dtypeBreakdown.map(d => `${d.dtype}: ${d.params.toLocaleString()}`).join(", "),
+      });
+    }
+    if (m.largestTensor) {
+      items.push({
+        label: "Largest tensor",
+        value: `${m.largestTensor.name} (${m.largestTensor.shape.join("×")}, ${m.largestTensor.params.toLocaleString()} params)`,
+      });
+    }
+    if (m.safetensorsMetadata?.length) {
+      for (const kv of m.safetensorsMetadata) {
+        items.push({ label: kv.key, value: kv.value });
+      }
+    }
+
+    // GGUF details
+    if (m.ggufArchitecture) items.push({ label: "Architecture", value: m.ggufArchitecture });
+    if (m.ggufQuantization) items.push({ label: "Quantization", value: m.ggufQuantization });
+
+    // Bioinformatics
+    if (m.sequenceCount != null) {
+      const label = m.sequenceCountIsExact ? "Sequences" : "Sequences (sampled)";
+      items.push({ label, value: m.sequenceCount.toLocaleString() });
+    }
+    if (m.variantCount != null) {
+      const label = m.variantCountIsExact ? "Variants" : "Variants (sampled)";
+      items.push({ label, value: m.variantCount.toLocaleString() });
+    }
+    if (m.totalBases != null) items.push({ label: "Total bases", value: m.totalBases.toLocaleString() });
+    if (m.gcContent != null) items.push({ label: "GC content", value: `${(m.gcContent * 100).toFixed(1)}%` });
+    if (m.isProtein) items.push({ label: "Sequence type", value: "Protein" });
+    if (m.minSequenceLength != null && m.maxSequenceLength != null) {
+      items.push({
+        label: "Length range",
+        value: `${m.minSequenceLength.toLocaleString()}–${m.maxSequenceLength.toLocaleString()} (avg ${Math.round(
+          m.avgSequenceLength ?? 0
+        ).toLocaleString()})`,
+      });
+    }
+    if (m.vcfSampleCount != null && m.vcfSampleCount > 0)
+      items.push({ label: "Samples", value: m.vcfSampleCount.toLocaleString() });
+    if (m.vcfChromosomes?.length)
+      items.push({ label: "Chromosomes", value: m.vcfChromosomes.slice(0, 8).join(", ") });
+
+    return items;
+  }
+
+  private renderByMimeType(blob: Blob, mimeType: string): void {
+    if (mimeType.startsWith("image/")) {
+      this.displayImage = true;
+      this.loadSafeURL(blob);
+      this.fileMetadata = { fileSize: blob.size };
+      const img = new Image();
+      img.onload = () => {
+        this.fileMetadata = { ...this.fileMetadata, imageWidth: img.naturalWidth, imageHeight: img.naturalHeight };
+        this.cdr.markForCheck();
+      };
+      img.src = this.fileURL!;
+      return;
+    }
+
+    if (mimeType.startsWith("video/")) {
+      this.displayMP4 = true;
+      this.loadSafeURL(blob);
+      this.fileMetadata = { fileSize: blob.size };
+      const video = document.createElement("video");
+      video.preload = "metadata";
+      video.onloadedmetadata = () => {
+        this.fileMetadata = {
+          ...this.fileMetadata,
+          videoDuration: video.duration,
+          videoWidth: video.videoWidth,
+          videoHeight: video.videoHeight,
+        };
+        this.cdr.markForCheck();
+        URL.revokeObjectURL(video.src);
+      };
+      video.src = URL.createObjectURL(blob);
+      return;
+    }
+
+    if (mimeType.startsWith("audio/")) {
+      this.displayMP3 = true;
+      this.loadSafeURL(blob);
+      this.fileMetadata = { fileSize: blob.size };
+      const audio = document.createElement("audio");
+      audio.preload = "metadata";
+      audio.onloadedmetadata = () => {
+        this.fileMetadata = { ...this.fileMetadata, audioDuration: audio.duration };
+        this.cdr.markForCheck();
+        URL.revokeObjectURL(audio.src);
+      };
+      audio.src = URL.createObjectURL(blob);
+      return;
+    }
+
+    switch (mimeType) {
+      case MIME_TYPES.PDF:
+        this.displayPDF = true;
+        this.loadSafeURL(blob);
+        this.fileMetadata = { fileSize: blob.size };
+        // Read first 200KB for /Info + version + page count; tail 50KB for trailer (where /Info often lives)
+        Promise.all([
+          this.readBlobText(blob.slice(0, 200 * 1024)),
+          this.readBlobText(blob.slice(Math.max(0, blob.size - 50 * 1024))),
+        ]).then(([head, tail]) => {
+          const combined = head + "\n" + tail;
+          const exact = (combined.match(/\/Type\s*\/Page\b/g) ?? []).length;
+          const fallback = Math.ceil((combined.match(/\/Page\b/g) ?? []).length / 2);
+          const pageCount = exact > 0 ? exact : fallback || undefined;
+          const info = extractPdfInfo(combined);
+          this.fileMetadata = {
+            ...this.fileMetadata,
+            pageCount,
+            pdfTitle: info.title,
+            pdfAuthor: info.author,
+            pdfCreator: info.creator,
+            pdfProducer: info.producer,
+            pdfVersion: info.version,
+            pdfEncrypted: info.encrypted,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+
+      case MIME_TYPES.MSEXCEL:
+      case MIME_TYPES.XLSX:
+        Promise.all([readXlsxFile(blob), readSheetNames(blob)]).then(([rows, sheetNames]) => {
+          const parsedData = rows.map(row => row.map(cell => (cell != null ? cell.toString() : "")));
+          if (parsedData.length > 0) {
+            this.loadTabularFile(parsedData);
+            this.displayXlsx = true;
+            const header = parsedData[0];
+            const dataRows = parsedData.slice(1).filter(r => r.some(c => c !== ""));
+            const schema = inferColumnSchema(dataRows, header.length);
+            this.fileMetadata = {
+              fileSize: blob.size,
+              rowCount: dataRows.length,
+              columnCount: header.length,
+              columnNames: header,
+              sheetCount: sheetNames.length,
+              columnTypes: schema.types,
+              nullCounts: schema.nullCounts,
+              sampleValues: schema.samples,
+            };
+            this.cdr.markForCheck();
+          }
+        });
+        break;
+
+      case MIME_TYPES.CSV: {
+        this.displayCSV = true;
+        const { slice: csvSlice, truncated: csvTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = csvTruncated;
+        // Papa.parse needs a File-like; build it from the slice only — no need to keep the full blob.
+        const fileToParse = new File([csvSlice], this.filePath, { type: MIME_TYPES.CSV });
+        Papa.parse(fileToParse, {
+          complete: (results: ParseResult<any>) => {
+            if (results.data.length > 0) {
+              this.loadTabularFile(results.data);
+              const header: string[] = results.data[0].map(String);
+              const dataRows = (results.data.slice(1) as any[][])
+                .filter(r => r.some((c: any) => c !== ""))
+                .map(r => r.map((c: any) => (c == null ? "" : String(c))));
+              const schema = inferColumnSchema(dataRows, header.length);
+              this.fileMetadata = {
+                fileSize: blob.size,
+                rowCount: dataRows.length,
+                columnCount: header.length,
+                columnNames: header,
+                columnTypes: schema.types,
+                nullCounts: schema.nullCounts,
+                sampleValues: schema.samples,
+              };
+              this.cdr.markForCheck();
             }
           },
+          error: () => this.onFileLoadingError(),
         });
+        break;
+      }
+
+      case MIME_TYPES.MD: {
+        this.displayMarkdown = true;
+        const { slice: mdSlice, truncated: mdTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = mdTruncated;
+        this.readBlobText(mdSlice).then(text => {
+          this.textContent = text;
+          const lines = text.split("\n");
+          // Strip fenced code blocks to count them; also count inline elements
+          const codeBlockCount = (text.match(/^```/gm) ?? []).length / 2;
+          const linkCount = (text.match(/\[[^\]]+\]\([^)]+\)/g) ?? []).length;
+          const imageCount = (text.match(/!\[[^\]]*\]\([^)]+\)/g) ?? []).length;
+          const listItemCount = lines.filter(l => /^\s*[-*+]\s/.test(l) || /^\s*\d+\.\s/.test(l)).length;
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: lines.length,
+            wordCount: text.trim() ? text.trim().split(/\s+/).length : 0,
+            headingCount: lines.filter(l => /^#{1,6}\s/.test(l)).length,
+            codeBlockCount: Math.floor(codeBlockCount),
+            linkCount: linkCount - imageCount, // image syntax is link syntax + leading '!'
+            imageCount,
+            listItemCount,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.JSON: {
+        this.displayJson = true;
+        const { slice: jsonSlice, truncated: jsonTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = jsonTruncated;
+        this.readBlobText(jsonSlice).then(text => {
+          this.textContent = text;
+          try {
+            const parsed = JSON.parse(text);
+            const isArray = Array.isArray(parsed);
+            const keys = isArray ? null : Object.keys(parsed);
+            const maxDepth = jsonMaxDepth(parsed);
+            let jsonKeyTypes: { key: string; type: string }[] | undefined;
+            let jsonArrayElementType: string | undefined;
+            if (isArray && parsed.length > 0) {
+              const elementTypes = new Set(parsed.slice(0, 20).map(jsTypeLabel));
+              jsonArrayElementType = elementTypes.size === 1 ? [...elementTypes][0] : "mixed";
+            } else if (!isArray && keys) {
+              jsonKeyTypes = keys.slice(0, 8).map(k => ({
+                key: k,
+                type: jsTypeLabel((parsed as Record<string, unknown>)[k]),
+              }));
+            }
+            this.fileMetadata = {
+              fileSize: blob.size,
+              jsonTopLevelType: isArray ? "array" : "object",
+              jsonItemCount: isArray ? parsed.length : keys!.length,
+              jsonPreviewKeys: isArray
+                ? parsed.slice(0, 5).map((_: unknown, i: number) => `[${i}]`)
+                : keys!.slice(0, 8),
+              jsonMaxDepth: maxDepth,
+              jsonKeyTypes,
+              jsonArrayElementType,
+            };
+          } catch {
+            // Truncated JSON or invalid — fall back to raw text view
+            this.fileMetadata = { fileSize: blob.size };
+          }
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.PARQUET:
+        this.detectedTypeMessage =
+          "Parquet file detected. Use the Parquet File Scan operator in Texera to analyze this data.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      case MIME_TYPES.ARROW:
+        this.detectedTypeMessage =
+          "Arrow/Feather file detected. Use the Arrow File Scan operator in Texera to analyze this data.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      case MIME_TYPES.DOCX:
+        this.detectedTypeMessage = "Word document (.docx) detected. Rich document preview is not yet supported.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      case MIME_TYPES.PPTX:
+        this.detectedTypeMessage = "PowerPoint (.pptx) detected. Presentation preview is not yet supported.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      // --- ML / scientific data formats ---
+
+      case MIME_TYPES.HDF5:
+        this.detectedTypeMessage =
+          "HDF5 binary container detected. Likely a model (Keras .h5) or scientific dataset. Load with h5py / rhdf5.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" };
+        break;
+
+      case MIME_TYPES.H5AD:
+        this.detectedTypeMessage =
+          "AnnData (.h5ad) detected — single-cell expression matrix in HDF5. Load with scanpy.read_h5ad() in Python.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" };
+        break;
+
+      case MIME_TYPES.H5SEURAT:
+        this.detectedTypeMessage =
+          "Seurat HDF5 object (.h5seurat) detected. Load with SeuratDisk::LoadH5Seurat() in R.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" };
+        break;
+
+      case MIME_TYPES.LOOM:
+        this.detectedTypeMessage =
+          "Loom (.loom) detected — single-cell expression in HDF5. Load with loompy / scanpy in Python.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" };
+        break;
+
+      case MIME_TYPES.RDS:
+        this.detectedTypeMessage =
+          "R serialized object (.rds) detected — commonly a Seurat / SingleCellExperiment / fitted model. Load with readRDS() in R.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "gzip" };
+        break;
+
+      case MIME_TYPES.PICKLE:
+        this.detectedTypeMessage =
+          "Python pickle detected — typically a serialized model (sklearn / joblib) or dataset. Load with pickle.load() in Python.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      case MIME_TYPES.PYTORCH:
+        this.detectedTypeMessage =
+          "PyTorch checkpoint (.pt/.pth) detected. Load with torch.load() in Python.";
+        this.fileMetadata = { fileSize: blob.size, modelFormat: "PyTorch", containerFormat: "ZIP archive" };
+        break;
+
+      case MIME_TYPES.KERAS:
+        this.detectedTypeMessage =
+          "Keras v3 model (.keras) detected. Load with tf.keras.models.load_model() in Python.";
+        this.fileMetadata = { fileSize: blob.size, modelFormat: "Keras", containerFormat: "ZIP archive" };
+        break;
+
+      case MIME_TYPES.ONNX:
+        this.detectedTypeMessage =
+          "ONNX model (.onnx) detected — portable neural network. Load with onnxruntime or netron.app for inspection.";
+        this.fileMetadata = { fileSize: blob.size, modelFormat: "ONNX" };
+        break;
+
+      case MIME_TYPES.NPY:
+        this.parseNpyHeader(blob).then(info => {
+          const shapeStr = info?.shape ? info.shape.join(" × ") : "?";
+          const totalElements = info?.shape?.reduce((a, b) => a * b, 1);
+          this.detectedTypeMessage = `NumPy array (.npy) detected — ${info?.dtype ?? "?"} array of shape (${shapeStr}).`;
+          this.fileMetadata = {
+            fileSize: blob.size,
+            dtype: info?.dtype,
+            shape: info?.shape,
+            totalElements,
+            byteOrder: info?.byteOrder,
+            fortranOrder: info?.fortranOrder,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+
+      case MIME_TYPES.NPZ:
+        this.detectedTypeMessage =
+          "NumPy archive (.npz) detected — ZIP of .npy arrays. Load with numpy.load() and access via dict-like API.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "ZIP archive" };
+        break;
+
+      case MIME_TYPES.SAFETENSORS:
+        this.parseSafetensorsHeader(blob).then(info => {
+          if (info) {
+            const paramStr = info.parameterCount.toLocaleString();
+            this.detectedTypeMessage = `Safetensors model detected — ${info.tensorCount} tensors, ~${paramStr} parameters.`;
+            this.fileMetadata = {
+              fileSize: blob.size,
+              modelFormat: "Safetensors",
+              tensorCount: info.tensorCount,
+              parameterCount: info.parameterCount,
+              sampleTensorNames: info.sampleNames,
+              dtypeBreakdown: info.dtypeBreakdown,
+              largestTensor: info.largestTensor,
+              safetensorsMetadata: info.metadata,
+            };
+          } else {
+            this.detectedTypeMessage = "Safetensors file detected. Load with safetensors.torch.load_file() in Python.";
+            this.fileMetadata = { fileSize: blob.size, modelFormat: "Safetensors" };
+          }
+          this.cdr.markForCheck();
+        });
+        break;
+
+      case MIME_TYPES.GGUF:
+        this.parseGgufHeader(blob).then(info => {
+          if (info) {
+            this.detectedTypeMessage = `GGUF model detected — v${info.version}, ${info.tensorCount} tensors, ${info.metadataKvCount} metadata entries.`;
+            this.fileMetadata = {
+              fileSize: blob.size,
+              modelFormat: "GGUF",
+              ggufVersion: info.version,
+              tensorCount: info.tensorCount,
+              metadataKvCount: info.metadataKvCount,
+            };
+          } else {
+            this.detectedTypeMessage = "GGUF model detected (llama.cpp / quantized LLM format).";
+            this.fileMetadata = { fileSize: blob.size, modelFormat: "GGUF" };
+          }
+          this.cdr.markForCheck();
+        });
+        break;
+
+      // --- Bioinformatics text formats — render as plain text plus record-count metadata ---
+
+      case MIME_TYPES.FASTA: {
+        this.displayPlainText = true;
+        const { slice: faSlice, truncated: faTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = faTruncated;
+        this.readBlobText(faSlice).then(text => {
+          this.textContent = text;
+          const stats = summarizeFasta(text);
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: text.split("\n").length,
+            sequenceCount: stats.sequenceCount,
+            sequenceCountIsExact: !faTruncated,
+            totalBases: stats.totalBases,
+            gcContent: stats.isProtein ? undefined : stats.gcContent,
+            minSequenceLength: stats.minLen,
+            maxSequenceLength: stats.maxLen,
+            avgSequenceLength: stats.avgLen,
+            isProtein: stats.isProtein,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.FASTQ: {
+        this.displayPlainText = true;
+        const { slice: fqSlice, truncated: fqTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = fqTruncated;
+        this.readBlobText(fqSlice).then(text => {
+          this.textContent = text;
+          const lineCount = text.split("\n").filter(l => l.length > 0).length;
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: text.split("\n").length,
+            sequenceCount: Math.floor(lineCount / 4),
+            sequenceCountIsExact: !fqTruncated,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.VCF: {
+        this.displayPlainText = true;
+        const { slice: vcfSlice, truncated: vcfTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = vcfTruncated;
+        this.readBlobText(vcfSlice).then(text => {
+          this.textContent = text;
+          const lines = text.split("\n");
+          const variantLines = lines.filter(l => l.length > 0 && !l.startsWith("#"));
+          // Sample names are tab-separated columns after the 9 fixed VCF fields on the #CHROM header line
+          const chromHeader = lines.find(l => l.startsWith("#CHROM"));
+          const headerFields = chromHeader ? chromHeader.split("\t") : [];
+          const vcfSampleCount = headerFields.length > 9 ? headerFields.length - 9 : 0;
+          const chromSet = new Set<string>();
+          for (const line of variantLines.slice(0, 5000)) {
+            const chr = line.split("\t", 1)[0];
+            if (chr) chromSet.add(chr);
+            if (chromSet.size >= 30) break;
+          }
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: lines.length,
+            variantCount: variantLines.length,
+            variantCountIsExact: !vcfTruncated,
+            vcfSampleCount,
+            vcfChromosomes: [...chromSet].slice(0, 12),
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.OCTET_STREAM:
+        this.onFileTypePreviewUnsupported();
+        break;
+
+      default: {
+        this.displayPlainText = true;
+        const { slice: txtSlice, truncated: txtTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = txtTruncated;
+        Promise.all([this.readBlobBytes(blob.slice(0, 3)), this.readBlobText(txtSlice)]).then(([head, text]) => {
+          this.textContent = text;
+          const lines = text.split("\n");
+          const lineLens = lines.map(l => l.length);
+          const totalLen = lineLens.reduce((a, b) => a + b, 0);
+          const emptyLineCount = lineLens.filter(n => n === 0).length;
+          const maxLineLength = lineLens.length > 0 ? Math.max(...lineLens) : 0;
+          // BOM detection: UTF-8 BOM is EF BB BF; otherwise assume ASCII/UTF-8
+          let encoding = "UTF-8";
+          if (head[0] === 0xef && head[1] === 0xbb && head[2] === 0xbf) encoding = "UTF-8 BOM";
+          else if (lines.every(l => /^[\x00-\x7F]*$/.test(l))) encoding = "ASCII";
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: lines.length,
+            wordCount: text.trim() ? text.trim().split(/\s+/).length : 0,
+            charCount: text.length,
+            emptyLineCount,
+            avgLineLength: lines.length > 0 ? totalLen / lines.length : 0,
+            maxLineLength,
+            encoding,
+          };
+          this.cdr.markForCheck();
+        });
+      }
     }
   }
 
@@ -305,17 +1508,26 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
     this.displayJson = false;
     this.displayMP4 = false;
     this.displayMP3 = false;
+    this.displayPDF = false;
+    this.detectedTypeMessage = "";
+    this.fileMetadata = undefined;
     this.isLoading = false;
     this.isFileLoadingError = false;
     this.isFileSizeUnloadable = false;
     this.isFileTypePreviewUnsupported = false;
-    // garbage collection
     if (this.fileURL) {
       URL.revokeObjectURL(this.fileURL);
     }
-    if (this.safeFileURL) {
-      URL.revokeObjectURL(this.safeFileURL.toString());
-    }
+    this.fileURL = undefined;
+    this.safeFileURL = undefined;
+    this.safeResourceFileURL = undefined;
+    // Clear cached content so memory is reclaimed when switching files; without these,
+    // a previously-loaded 10 MB text or 100K-row table would persist on the component.
+    this.textContent = "";
+    this.tableContent = [];
+    this.tableDataHeader = [];
+    this.currentFile = undefined;
+    this.previewTruncated = false;
   }
 
   onFileLoadingError() {
@@ -333,49 +1545,23 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
     this.isFileTypePreviewUnsupported = true;
   }
 
-  isPreviewSupported(mimeType: string) {
-    return mimeType !== MIME_TYPES.OCTET_STREAM && Object.hasOwnProperty.call(MIME_TYPE_SIZE_LIMITS_MB, mimeType);
-  }
-
-  private readFileAsText(blob: Blob) {
-    const txtReader = new FileReader();
-    txtReader.onload = (event: any) => {
-      this.textContent = event.target.result;
-    };
-    txtReader.readAsText(blob);
-  }
-
-  private loadSafeURL(blob: Blob) {
+  private loadSafeURL(blob: Blob): void {
     this.fileURL = URL.createObjectURL(blob);
     this.safeFileURL = this.sanitizer.bypassSecurityTrustUrl(this.fileURL);
+    this.safeResourceFileURL = this.sanitizer.bypassSecurityTrustResourceUrl(this.fileURL);
   }
 
-  private loadTabularFile(data: any[][]) {
+
+  private loadTabularFile(data: any[][]): void {
     if (data.length > 0) {
-      // Extract the header (first row)
       this.tableDataHeader = data[0];
-
-      // Process the rest of the rows
       this.tableContent = data
         .slice(1)
         .map(row => {
-          // Normalize the row length to match the header length
-          while (row.length < this.tableDataHeader.length) {
-            row.push("");
-          }
+          while (row.length < this.tableDataHeader.length) row.push("");
           return row;
         })
-        .filter(row => {
-          // filter out all empty row
-          let areCellAllEmpty = true;
-          for (const cell in row) {
-            if (cell != "") {
-              areCellAllEmpty = false;
-              break;
-            }
-          }
-          return !areCellAllEmpty;
-        });
+        .filter(row => row.some(cell => cell !== ""));
     }
   }
 }
diff --git a/frontend/yarn.lock b/frontend/yarn.lock
index 6a4ae4330c4..2d2851d7885 100644
--- a/frontend/yarn.lock
+++ b/frontend/yarn.lock
@@ -2059,6 +2059,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@borewit/text-codec@npm:^0.2.1":
+  version: 0.2.2
+  resolution: "@borewit/text-codec@npm:0.2.2"
+  checksum: 10c0/2d3fb132bc6a132914a8fbf8e9ff2fa1ead210ecc395b28bb7355bd7719548a5e351ffe39f21c3bee8048f6cabd99eabd404bb5cc809cad9cba25abed19d271f
+  languageName: node
+  linkType: hard
+
 "@bufbuild/protobuf@npm:^2.0.0, @bufbuild/protobuf@npm:^2.5.0":
   version: 2.12.0
   resolution: "@bufbuild/protobuf@npm:2.12.0"
@@ -5869,6 +5876,23 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@tokenizer/inflate@npm:^0.4.1":
+  version: 0.4.1
+  resolution: "@tokenizer/inflate@npm:0.4.1"
+  dependencies:
+    debug: "npm:^4.4.3"
+    token-types: "npm:^6.1.1"
+  checksum: 10c0/9817516efe21d1ce3bdfb80a1f94efc8981064ce3873448ba79f4d81d96c0694c484c289bd042d346ae5536cf77f5aa9a367d39c3df700eb610761b7c306b4de
+  languageName: node
+  linkType: hard
+
+"@tokenizer/token@npm:^0.3.0":
+  version: 0.3.0
+  resolution: "@tokenizer/token@npm:0.3.0"
+  checksum: 10c0/7ab9a822d4b5ff3f5bca7f7d14d46bdd8432528e028db4a52be7fbf90c7f495cc1af1324691dda2813c6af8dc4b8eb29de3107d4508165f9aa5b53e7d501f155
+  languageName: node
+  linkType: hard
+
 "@tsconfig/node10@npm:^1.0.7":
   version: 1.0.12
   resolution: "@tsconfig/node10@npm:1.0.12"
@@ -10419,6 +10443,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"file-type@npm:^22.0.1":
+  version: 22.0.1
+  resolution: "file-type@npm:22.0.1"
+  dependencies:
+    "@tokenizer/inflate": "npm:^0.4.1"
+    strtok3: "npm:^10.3.5"
+    token-types: "npm:^6.1.2"
+    uint8array-extras: "npm:^1.5.0"
+  checksum: 10c0/45b70a10196d46965eadd7835ec408c1c07b4fd2ed395e9bbcc0ad63d93f7bf6d076d0e970673b754577002019c8858825bc71ccc07ca7c0e49ac0c2b7e1839f
+  languageName: node
+  linkType: hard
+
 "fill-range@npm:^7.1.1":
   version: 7.1.1
   resolution: "fill-range@npm:7.1.1"
@@ -11065,6 +11101,7 @@ __metadata:
     eslint-plugin-rxjs: "npm:5.0.3"
     eslint-plugin-rxjs-angular: "npm:2.0.1"
     file-saver: "npm:2.0.5"
+    file-type: "npm:^22.0.1"
     fs-extra: "npm:10.0.1"
     fuse.js: "npm:6.5.3"
     git-describe: "npm:4.1.0"
@@ -11460,7 +11497,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"ieee754@npm:1.2.1, ieee754@npm:^1.1.13":
+"ieee754@npm:1.2.1, ieee754@npm:^1.1.13, ieee754@npm:^1.2.1":
   version: 1.2.1
   resolution: "ieee754@npm:1.2.1"
   checksum: 10c0/b0782ef5e0935b9f12883a2e2aa37baa75da6e66ce6515c168697b42160807d9330de9a32ec1ed73149aea02e0d822e572bca6f1e22bdcbd2149e13b050b17bb
@@ -17140,6 +17177,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"strtok3@npm:^10.3.5":
+  version: 10.3.5
+  resolution: "strtok3@npm:10.3.5"
+  dependencies:
+    "@tokenizer/token": "npm:^0.3.0"
+  checksum: 10c0/8d2477b239054c9f1f5b14a65d531147ca158ab9887fdc2d0938e77b7ec8891fb683b58254c7643afd5d98a421a59207534d491762b111f58c795071ecbe9fd1
+  languageName: node
+  linkType: hard
+
 "style-loader@npm:^3.3.0":
   version: 3.3.4
   resolution: "style-loader@npm:3.3.4"
@@ -17450,6 +17496,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"token-types@npm:^6.1.1, token-types@npm:^6.1.2":
+  version: 6.1.2
+  resolution: "token-types@npm:6.1.2"
+  dependencies:
+    "@borewit/text-codec": "npm:^0.2.1"
+    "@tokenizer/token": "npm:^0.3.0"
+    ieee754: "npm:^1.2.1"
+  checksum: 10c0/8786e28e3cb65b9e890bc3c38def98e6dfe4565538237f8c0e47dbe549ed8f5f00de8dc464717868308abb4729f1958f78f69e1c4c3deebbb685729113a6fee8
+  languageName: node
+  linkType: hard
+
 "totalist@npm:^1.0.0":
   version: 1.1.0
   resolution: "totalist@npm:1.1.0"
@@ -17798,6 +17855,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"uint8array-extras@npm:^1.5.0":
+  version: 1.5.0
+  resolution: "uint8array-extras@npm:1.5.0"
+  checksum: 10c0/0e74641ac7dadb02eadefc1ccdadba6010e007757bda824960de3c72bbe2b04e6d3af75648441f412148c4103261d54fcb60be45a2863beb76643a55fddba3bd
+  languageName: node
+  linkType: hard
+
 "underscore@npm:>=1.8.3":
   version: 1.13.8
   resolution: "underscore@npm:1.13.8"

From 2728afe249ae0f815294505d38da62dac402c1a4 Mon Sep 17 00:00:00 2001
From: Kunwoo Park <kunwp1@uci.edu>
Date: Sat, 16 May 2026 12:12:36 -0700
Subject: [PATCH 2/4] fix(frontend): skip download for oversized files, drop
 size pill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Above 50 MB, skip the full-blob download from the dataset service and
show only the extension-based type identification + a "how to load"
hint. The dominant source of preview lag was the network download, not
the parsing — for a 500 MB Parquet file we used to fetch 500 MB just to
read its first 4 magic bytes.

Also drop the redundant "Size" pill from the metadata strip; size is
already visible in the dataset file listing and in the truncation
banner context.
---
 .../user-dataset-file-renderer.component.ts   | 65 +++++++++++++++++--
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
index dfb27a80b48..bea0c556b9d 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
@@ -329,6 +329,43 @@ export interface FileMetadata {
   vcfChromosomes?: string[];
 }
 
+/**
+ * Above this size, skip the download entirely and show only extension-based
+ * identification + a "how to load" hint. The dominant source of preview lag
+ * is the full-blob download from the dataset service.
+ */
+export const FULL_PREVIEW_MAX_BYTES = 50 * 1024 * 1024; // 50 MB
+
+/**
+ * One-line "how to load" or "what is this" message per format.
+ * Used both when content was downloaded (in renderByMimeType) and when the
+ * download was skipped (in showOversizedFileInfo).
+ */
+export const TYPE_LOADING_HINTS: Record<string, string> = {
+  [MIME_TYPES.PARQUET]: "Parquet file. Use the Parquet File Scan operator in Texera to analyze this data.",
+  [MIME_TYPES.ARROW]: "Arrow / Feather file. Use the Arrow File Scan operator in Texera.",
+  [MIME_TYPES.HDF5]: "HDF5 binary container (Keras .h5 or scientific dataset). Load with h5py / rhdf5.",
+  [MIME_TYPES.H5AD]: "AnnData (.h5ad) — single-cell expression matrix. Load with scanpy.read_h5ad().",
+  [MIME_TYPES.H5SEURAT]: "Seurat HDF5 object (.h5seurat). Load with SeuratDisk::LoadH5Seurat() in R.",
+  [MIME_TYPES.LOOM]: "Loom (.loom) single-cell expression. Load with loompy / scanpy in Python.",
+  [MIME_TYPES.RDS]: "R serialized object (.rds) — Seurat / SCE / fitted model. Load with readRDS() in R.",
+  [MIME_TYPES.PICKLE]: "Python pickle — serialized model or dataset. Load with pickle.load() in Python.",
+  [MIME_TYPES.PYTORCH]: "PyTorch checkpoint (.pt/.pth). Load with torch.load() in Python.",
+  [MIME_TYPES.KERAS]: "Keras v3 model (.keras). Load with tf.keras.models.load_model() in Python.",
+  [MIME_TYPES.ONNX]: "ONNX model (.onnx). Load with onnxruntime; inspect at netron.app.",
+  [MIME_TYPES.SAFETENSORS]: "Safetensors file. Load with safetensors.torch.load_file() in Python.",
+  [MIME_TYPES.GGUF]: "GGUF model (llama.cpp / quantized LLM).",
+  [MIME_TYPES.NPY]: "NumPy array (.npy). Load with numpy.load() in Python.",
+  [MIME_TYPES.NPZ]: "NumPy archive (.npz) — ZIP of .npy arrays. Load with numpy.load().",
+  [MIME_TYPES.CSV]: "CSV file. Use the CSV File Scan operator in Texera.",
+  [MIME_TYPES.JSON]: "JSON file. Use the JSONL File Scan operator (or Python UDF for nested objects).",
+  [MIME_TYPES.XLSX]: "Excel spreadsheet (.xlsx). Convert to CSV or use a Python UDF with openpyxl.",
+  [MIME_TYPES.MSEXCEL]: "Excel spreadsheet (.xls). Convert to CSV or use a Python UDF.",
+  [MIME_TYPES.FASTA]: "FASTA sequence file. Parse with Biopython SeqIO.",
+  [MIME_TYPES.FASTQ]: "FASTQ reads file. Parse with Biopython SeqIO.",
+  [MIME_TYPES.VCF]: "VCF variant file. Parse with pyvcf / cyvcf2.",
+};
+
 /** Classify a single cell value into a coarse type label. */
 function inferCellType(value: string): string {
   if (value === "" || value == null) return "null";
@@ -614,9 +651,17 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
   reloadFileContent() {
     this.turnOffAllDisplay();
 
-    // Pre-fetch size guard: use extension hint for known types, DEFAULT_MAX_SIZE for unknown.
-    // We no longer reject on extension alone — magic byte detection runs after the fetch.
     const extensionMime = getMimeType(this.filePath);
+
+    // Skip the full download for large files. The dataset service streams the entire blob;
+    // for a 500 MB file we'd wait 30+ seconds just to read its first 16 magic bytes. Above
+    // the threshold, fall back to extension-based identification + a "how to load" hint.
+    if (this.fileSize != null && this.fileSize > FULL_PREVIEW_MAX_BYTES) {
+      this.showOversizedFileInfo(extensionMime);
+      return;
+    }
+
+    // Hard upper bound (defensive): even small types shouldn't load anything past this.
     const preCheckLimit = MIME_TYPE_SIZE_LIMITS_MB[extensionMime] ?? this.DEFAULT_MAX_SIZE;
     if (this.fileSize != null && this.fileSize > preCheckLimit) {
       this.onFileSizeNotLoadable();
@@ -903,8 +948,6 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
     if (!m) return [];
     const items: { label: string; value: string }[] = [];
 
-    if (m.fileSize != null) items.push({ label: "Size", value: formatSize(m.fileSize) });
-
     if (m.imageWidth != null && m.imageHeight != null) {
       items.push({ label: "Dimensions", value: `${m.imageWidth} × ${m.imageHeight} px` });
       const gcd = (a: number, b: number): number => (b === 0 ? a : gcd(b, a % b));
@@ -1545,6 +1588,20 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
     this.isFileTypePreviewUnsupported = true;
   }
 
+  /**
+   * Skip the download for very large files and show only the extension-based type hint.
+   * Avoids the multi-second download + memory cost of fetching a multi-hundred-MB blob
+   * just to render its first frame / table / iframe.
+   */
+  private showOversizedFileInfo(extensionMime: string): void {
+    const hint = TYPE_LOADING_HINTS[extensionMime];
+    const sizeStr = this.fileSize != null ? formatSize(this.fileSize) : "very large";
+    this.detectedTypeMessage = hint
+      ? `${hint}  (Preview skipped — file is ${sizeStr}.)`
+      : `File is ${sizeStr} — full preview skipped to avoid browser lag. Open in a workflow operator to analyze.`;
+    this.cdr.markForCheck();
+  }
+
   private loadSafeURL(blob: Blob): void {
     this.fileURL = URL.createObjectURL(blob);
     this.safeFileURL = this.sanitizer.bypassSecurityTrustUrl(this.fileURL);

From 845edbaaca95625d49fb99e3a1d007c29e0b5c0a Mon Sep 17 00:00:00 2001
From: Kunwoo Park <kunwp1@uci.edu>
Date: Sat, 16 May 2026 12:34:19 -0700
Subject: [PATCH 3/4] feat(frontend): add "Open in workflow" CTA to file
 renderer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Creates a new empty workflow and navigates to the editor when the user
clicks the button on a previewed file. The file path is copied to the
clipboard and a notification suggests which scan operator to drag in
(CSV → "CSV File Scan", etc.).

Empty-workflow + clipboard handoff is used instead of pre-populating
the operator JSON because hand-constructed OperatorPredicates skip the
operator-metadata schema validation, leading to workflows the editor
can't load. The same UX outcome with far higher reliability.
---
 .../user-dataset-file-renderer.component.html | 12 ++-
 .../user-dataset-file-renderer.component.scss |  4 +
 ...er-dataset-file-renderer.component.spec.ts | 17 ++++
 .../user-dataset-file-renderer.component.ts   | 85 ++++++++++++++++++-
 4 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
index b1c5a6ac114..7092d3294e4 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
@@ -58,11 +58,21 @@
         nzShowIcon></nz-alert>
     </div>
 
-    <div *ngIf="metadataItems.length > 0" class="file-metadata-strip">
+    <div *ngIf="metadataItems.length > 0 || canOpenInWorkflow" class="file-metadata-strip">
       <span *ngFor="let item of metadataItems" class="metadata-pill">
         <span class="metadata-label">{{ item.label }}</span>
         <span class="metadata-value">{{ item.value }}</span>
       </span>
+      <button
+        *ngIf="canOpenInWorkflow"
+        nz-button
+        nzType="primary"
+        nzSize="small"
+        class="open-in-workflow-btn"
+        (click)="onOpenInWorkflow()">
+        <span nz-icon nzType="play-circle" nzTheme="outline"></span>
+        Open in workflow
+      </button>
     </div>
 
     <nz-table
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
index 0b21c57f47a..c692589f767 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
@@ -98,3 +98,7 @@
   font-size: 11px;
   color: #d4380d;
 }
+
+.open-in-workflow-btn {
+  margin-left: auto;
+}
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
index 74238a37803..568dac0ecd5 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
@@ -24,6 +24,8 @@ import { DatasetService } from "../../../../../service/user/dataset/dataset.serv
 import { NotificationService } from "../../../../../../common/service/notification/notification.service";
 import { DomSanitizer } from "@angular/platform-browser";
 import { commonTestProviders } from "../../../../../../common/testing/test-utils";
+import { Router } from "@angular/router";
+import { WorkflowPersistService } from "../../../../../../common/service/workflow-persist/workflow-persist.service";
 
 describe("UserDatasetFileRendererComponent", () => {
   let component: UserDatasetFileRendererComponent;
@@ -34,6 +36,8 @@ describe("UserDatasetFileRendererComponent", () => {
       providers: [
         DatasetService,
         NotificationService,
+        WorkflowPersistService,
+        { provide: Router, useValue: { navigate: vi.fn() } },
         {
           provide: DomSanitizer,
           useValue: {
@@ -268,6 +272,19 @@ describe("UserDatasetFileRendererComponent", () => {
       expect(schema.nullCounts).toEqual([0, 2]);
     });
 
+    it("should expose canOpenInWorkflow whenever a filePath is set", () => {
+      component.filePath = "/x/y/v1/data.csv";
+      expect(component.canOpenInWorkflow).toBe(true);
+      component.filePath = "/x/y/v1/model.safetensors";
+      expect(component.canOpenInWorkflow).toBe(true);
+    });
+
+    it("should not expose canOpenInWorkflow when no file is selected", () => {
+      component.filePath = "";
+      expect(component.canOpenInWorkflow).toBe(false);
+    });
+
+
     it("should parse a GGUF header", async () => {
       const buf = new Uint8Array(24);
       buf.set([0x47, 0x47, 0x55, 0x46], 0); // "GGUF"
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
index bea0c556b9d..f0e5e14ff54 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
@@ -26,6 +26,14 @@ import { DomSanitizer, SafeResourceUrl, SafeUrl } from "@angular/platform-browse
 import readXlsxFile, { readSheetNames } from "read-excel-file";
 import { NotificationService } from "../../../../../../common/service/notification/notification.service";
 import { formatSize } from "../../../../../../common/util/size-formatter.util";
+import { Router } from "@angular/router";
+import {
+  DEFAULT_WORKFLOW_NAME,
+  WorkflowPersistService,
+} from "../../../../../../common/service/workflow-persist/workflow-persist.service";
+import { GuiConfigService } from "../../../../../../common/service/gui-config.service";
+import { ExecutionMode, WorkflowContent } from "../../../../../../common/type/workflow";
+import { DASHBOARD_USER_WORKSPACE } from "../../../../../../app-routing.constant";
 import { NgStyle, NgIf, NgFor } from "@angular/common";
 import { NzSpinComponent } from "ng-zorro-antd/spin";
 import { NzAlertComponent } from "ng-zorro-antd/alert";
@@ -40,6 +48,8 @@ import {
 import { MarkdownComponent } from "ngx-markdown";
 import { NgxJsonViewerModule } from "ngx-json-viewer";
 import { fileTypeFromBlob } from "file-type";
+import { NzButtonComponent } from "ng-zorro-antd/button";
+import { NzIconDirective } from "ng-zorro-antd/icon";
 
 export const MIME_TYPES = {
   JPEG: "image/jpeg",
@@ -522,7 +532,6 @@ function summarizeFasta(text: string): {
     minLen: minLen === Infinity ? 0 : minLen,
     maxLen,
     avgLen: sequenceCount > 0 ? totalBases / sequenceCount : 0,
-    // Heuristic: if more than 10% of characters aren't ACGTUN, treat as protein
     isProtein: totalBases > 0 && nonNucleotideCount / totalBases > 0.1,
   };
 }
@@ -546,6 +555,8 @@ function summarizeFasta(text: string): {
     NzTbodyComponent,
     MarkdownComponent,
     NgxJsonViewerModule,
+    NzButtonComponent,
+    NzIconDirective,
   ],
 })
 export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDestroy {
@@ -623,9 +634,79 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
     private datasetService: DatasetService,
     private sanitizer: DomSanitizer,
     private notificationService: NotificationService,
-    private cdr: ChangeDetectorRef
+    private cdr: ChangeDetectorRef,
+    private router: Router,
+    private workflowPersistService: WorkflowPersistService,
+    private config: GuiConfigService
   ) {}
 
+  /** Always available — every file gives the user something useful when opened in a workflow. */
+  get canOpenInWorkflow(): boolean {
+    return !!this.filePath;
+  }
+
+  /** Suggested operator type for this file (used in the post-create notification). */
+  private static getSuggestedOperatorName(filePath: string): string {
+    const mime = getMimeType(filePath);
+    switch (mime) {
+      case MIME_TYPES.CSV: return "CSV File Scan";
+      case MIME_TYPES.JSON: return "JSONL File Scan";
+      case MIME_TYPES.ARROW: return "Arrow File Scan";
+      case MIME_TYPES.PARQUET: return "Parquet File Scan";
+      default: return "File Scan";
+    }
+  }
+
+  /**
+   * Creates a new empty workflow and navigates to the editor. We deliberately do NOT
+   * pre-populate the workflow with an operator — hand-constructing operator JSON without
+   * the operator-metadata schema validation tends to produce workflows the editor can't
+   * load. Instead we copy the file path to the clipboard and tell the user which operator
+   * to drag in. Same UX outcome, far more reliable.
+   */
+  onOpenInWorkflow(): void {
+    const fileName = this.filePath.split("/").pop() ?? "file";
+    const suggestedOp = UserDatasetFileRendererComponent.getSuggestedOperatorName(this.filePath);
+    const workflowContent: WorkflowContent = {
+      operators: [],
+      commentBoxes: [],
+      links: [],
+      operatorPositions: {},
+      settings: {
+        dataTransferBatchSize: this.config.env.defaultDataTransferBatchSize,
+        executionMode: this.config.env.defaultExecutionMode ?? ExecutionMode.PIPELINED,
+      },
+    };
+    const workflowName = `Analysis of ${fileName}`;
+    this.workflowPersistService
+      .createWorkflow(workflowContent, workflowName || DEFAULT_WORKFLOW_NAME)
+      .pipe(untilDestroyed(this))
+      .subscribe({
+        next: created => {
+          const wid = created?.workflow?.wid;
+          if (wid == null) {
+            this.notificationService.error("Workflow created but no ID was returned.");
+            return;
+          }
+          // Best-effort clipboard copy of the dataset file path so the user can paste it
+          // straight into the operator's File field. Falls back silently if the API isn't
+          // available (insecure context, older browser).
+          if (navigator.clipboard?.writeText) {
+            navigator.clipboard.writeText(this.filePath).catch(() => undefined);
+          }
+          this.notificationService.success(
+            `Workflow created. Drag a "${suggestedOp}" operator and paste the file path (copied to clipboard).`
+          );
+          this.router.navigate([DASHBOARD_USER_WORKSPACE, wid]).then(navigated => {
+            if (!navigated) {
+              this.notificationService.error("Navigation to the workflow editor was blocked.");
+            }
+          });
+        },
+        error: () => this.notificationService.error("Failed to create workflow"),
+      });
+  }
+
   ngOnInit(): void {
     this.reloadFileContent();
   }

From 92cd90137bbfd94a33b6be22a14f0c09c6395610 Mon Sep 17 00:00:00 2001
From: Kunwoo Park <kunwp1@uci.edu>
Date: Sat, 16 May 2026 12:38:45 -0700
Subject: [PATCH 4/4] feat(frontend): auto-add scan operator after "Open in
 workflow"

The dataset file renderer now passes addOp + fileName as query params
when navigating to the editor. The workspace component reads them
after the workflow finishes loading and adds the operator via
WorkflowUtilService.getNewOperatorPredicate() (so it goes through
schema validation), then strips the query params so a refresh doesn't
re-add. The file path is set via setOperatorProperty rather than
mutating the readonly operatorProperties dict directly.

Replaces the prior clipboard-handoff fallback. Unmapped file types
still get an empty workflow with no auto-add.
---
 .../user-dataset-file-renderer.component.ts   | 60 ++++++++++++-------
 .../component/workspace.component.ts          | 37 ++++++++++++
 2 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
index f0e5e14ff54..564e7867ad4 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
@@ -645,28 +645,48 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
     return !!this.filePath;
   }
 
-  /** Suggested operator type for this file (used in the post-create notification). */
-  private static getSuggestedOperatorName(filePath: string): string {
+  /**
+   * Map a file's MIME to the scan operator type that handles it. The workspace component
+   * uses this string to build a properly schema-validated operator via WorkflowUtilService.
+   */
+  private static getOperatorTypeForFile(filePath: string): string | null {
     const mime = getMimeType(filePath);
     switch (mime) {
-      case MIME_TYPES.CSV: return "CSV File Scan";
-      case MIME_TYPES.JSON: return "JSONL File Scan";
-      case MIME_TYPES.ARROW: return "Arrow File Scan";
-      case MIME_TYPES.PARQUET: return "Parquet File Scan";
-      default: return "File Scan";
+      case MIME_TYPES.CSV: return "CSVFileScan";
+      case MIME_TYPES.JSON: return "JSONLFileScan";
+      case MIME_TYPES.ARROW: return "ArrowFileScan";
+      case MIME_TYPES.PARQUET: return "ParquetFileScan";
+      case MIME_TYPES.PNG:
+      case MIME_TYPES.JPEG:
+      case MIME_TYPES.WEBP:
+      case MIME_TYPES.GIF:
+      case MIME_TYPES.AVIF:
+      case MIME_TYPES.BMP:
+      case MIME_TYPES.TIFF:
+      case MIME_TYPES.PDF:
+      case MIME_TYPES.MP3:
+      case MIME_TYPES.MP4:
+      case MIME_TYPES.WAV:
+      case MIME_TYPES.FLAC:
+      case MIME_TYPES.WEBM:
+      case MIME_TYPES.MOV:
+      case MIME_TYPES.TXT:
+      case MIME_TYPES.MD:
+        return "FileScan";
+      default:
+        return null;
     }
   }
 
   /**
-   * Creates a new empty workflow and navigates to the editor. We deliberately do NOT
-   * pre-populate the workflow with an operator — hand-constructing operator JSON without
-   * the operator-metadata schema validation tends to produce workflows the editor can't
-   * load. Instead we copy the file path to the clipboard and tell the user which operator
-   * to drag in. Same UX outcome, far more reliable.
+   * Creates a new empty workflow and navigates to the editor. If the file type maps to a
+   * known scan operator, the workspace component picks up the `addOp` + `fileName` query
+   * params after init and adds the operator via the schema-validated path — see
+   * `workspace.component.ts:handlePendingOperatorAddition()`.
    */
   onOpenInWorkflow(): void {
     const fileName = this.filePath.split("/").pop() ?? "file";
-    const suggestedOp = UserDatasetFileRendererComponent.getSuggestedOperatorName(this.filePath);
+    const addOp = UserDatasetFileRendererComponent.getOperatorTypeForFile(this.filePath);
     const workflowContent: WorkflowContent = {
       operators: [],
       commentBoxes: [],
@@ -688,16 +708,10 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
             this.notificationService.error("Workflow created but no ID was returned.");
             return;
           }
-          // Best-effort clipboard copy of the dataset file path so the user can paste it
-          // straight into the operator's File field. Falls back silently if the API isn't
-          // available (insecure context, older browser).
-          if (navigator.clipboard?.writeText) {
-            navigator.clipboard.writeText(this.filePath).catch(() => undefined);
-          }
-          this.notificationService.success(
-            `Workflow created. Drag a "${suggestedOp}" operator and paste the file path (copied to clipboard).`
-          );
-          this.router.navigate([DASHBOARD_USER_WORKSPACE, wid]).then(navigated => {
+          // Query params tell the workspace component which operator to auto-add and which
+          // file path to wire into its fileName property. The workspace strips them on use.
+          const queryParams = addOp ? { addOp, fileName: this.filePath } : undefined;
+          this.router.navigate([DASHBOARD_USER_WORKSPACE, wid], { queryParams }).then(navigated => {
             if (!navigated) {
               this.notificationService.error("Navigation to the workflow editor was blocked.");
             }
diff --git a/frontend/src/app/workspace/component/workspace.component.ts b/frontend/src/app/workspace/component/workspace.component.ts
index 9968c26f647..e96a53959ea 100644
--- a/frontend/src/app/workspace/component/workspace.component.ts
+++ b/frontend/src/app/workspace/component/workspace.component.ts
@@ -36,6 +36,7 @@ import { Workflow } from "../../common/type/workflow";
 import { OperatorMetadataService } from "../service/operator-metadata/operator-metadata.service";
 import { UndoRedoService } from "../service/undo-redo/undo-redo.service";
 import { WorkflowActionService } from "../service/workflow-graph/model/workflow-action.service";
+import { WorkflowUtilService } from "../service/workflow-graph/util/workflow-util.service";
 import { NzMessageService } from "ng-zorro-antd/message";
 import { debounceTime, distinctUntilChanged, filter, switchMap, throttleTime } from "rxjs/operators";
 import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy";
@@ -117,6 +118,7 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy {
     private undoRedoService: UndoRedoService,
     private workflowPersistService: WorkflowPersistService,
     private workflowActionService: WorkflowActionService,
+    private workflowUtilService: WorkflowUtilService,
     private location: Location,
     private route: ActivatedRoute,
     private operatorMetadataService: OperatorMetadataService,
@@ -186,6 +188,38 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy {
     this.workflowActionService.clearWorkflow();
   }
 
+  /**
+   * Reads `addOp` + `fileName` query params (set by the dataset file renderer's "Open in
+   * workflow" button) and adds the corresponding scan operator with its `fileName` property
+   * prefilled. Strips the params from the URL on success so a refresh doesn't double-add.
+   *
+   * Runs after `loadWorkflowWithId` completes, so the operator metadata is loaded and the
+   * workflow graph is ready for modification.
+   */
+  handlePendingOperatorAddition(): void {
+    const params = this.route.snapshot.queryParams;
+    const addOp: string | undefined = params.addOp;
+    const fileName: string | undefined = params.fileName;
+    if (!addOp || !fileName) return;
+    try {
+      const operator = this.workflowUtilService.getNewOperatorPredicate(addOp);
+      // Place near the upper-left of the visible viewport.
+      const origin = this.workflowActionService.getJointGraphWrapper().getMainJointPaper()?.translate();
+      const point = { x: 400 - (origin?.tx ?? 0), y: 200 - (origin?.ty ?? 0) };
+      this.workflowActionService.addOperator(operator, point);
+      // Set the file path through the schema-validated mutation API (operatorProperties is readonly).
+      this.workflowActionService.setOperatorProperty(operator.operatorID, {
+        ...operator.operatorProperties,
+        fileName,
+      });
+    } catch (err) {
+      this.notificationService.error(`Could not pre-add operator "${addOp}" — drag it in manually.`);
+    } finally {
+      // Strip query params so a manual refresh doesn't re-add the operator.
+      this.router.navigate([], { relativeTo: this.route, queryParams: {}, preserveFragment: true });
+    }
+  }
+
   registerAutoPersistWorkflow(): void {
     // make sure it is only registered once
     if (this.autoPersistRegistered) {
@@ -260,6 +294,9 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy {
           this.setLoadingState(false);
           this.registerAutoPersistWorkflow();
           this.triggerCenter();
+          // If the user arrived via "Open in workflow" from the dataset file renderer,
+          // honor the addOp + fileName query params now that the workflow is fully loaded.
+          this.handlePendingOperatorAddition();
         },
         () => {
           this.workflowActionService.resetAsNewWorkflow();