diff --git a/frontend/package.json b/frontend/package.json
index 08b298260e3..4e117cd05cc 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -47,6 +47,7 @@
     "d3-shape": "2.1.0",
     "dagre": "0.8.5",
     "file-saver": "2.0.5",
+    "file-type": "^22.0.1",
     "fuse.js": "6.5.3",
     "html2canvas": "1.4.1",
     "jointjs": "3.5.4",
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
index fd0ba3af152..7092d3294e4 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.html
@@ -44,6 +44,36 @@
         nzType="warning"
         nzMessage="Preview of the file type is currently not supported"></nz-alert>
     </div>
+    <div *ngIf="detectedTypeMessage">
+      <nz-alert
+        nzType="info"
+        [nzMessage]="detectedTypeMessage"></nz-alert>
+    </div>
+
+    <div *ngIf="previewTruncated">
+      <nz-alert
+        nzType="warning"
+        nzMessage="Showing first 10 MB only"
+        nzDescription="This file is large; only the first 10 MB is parsed and rendered to avoid running the browser out of memory. Statistics shown reflect the previewed slice, not the whole file."
+        nzShowIcon></nz-alert>
+    </div>
+
+    <div *ngIf="metadataItems.length > 0 || canOpenInWorkflow" class="file-metadata-strip">
+      <span *ngFor="let item of metadataItems" class="metadata-pill">
+        <span class="metadata-label">{{ item.label }}</span>
+        <span class="metadata-value">{{ item.value }}</span>
+      </span>
+      <button
+        *ngIf="canOpenInWorkflow"
+        nz-button
+        nzType="primary"
+        nzSize="small"
+        class="open-in-workflow-btn"
+        (click)="onOpenInWorkflow()">
+        <span nz-icon nzType="play-circle" nzTheme="outline"></span>
+        Open in workflow
+      </button>
+    </div>
 
     <nz-table
       *ngIf="displayCSV || displayXlsx"
@@ -51,7 +81,17 @@
       [nzData]="tableContent">
       <thead>
         <tr>
-          <th *ngFor="let column of tableDataHeader">{{ column }}</th>
+          <th *ngFor="let column of tableDataHeader; let i = index">
+            <div class="column-name">{{ column }}</div>
+            <div class="column-meta" *ngIf="fileMetadata?.columnTypes?.[i]">
+              <span class="column-type-tag">{{ fileMetadata?.columnTypes?.[i] }}</span>
+              <span
+                class="column-null-hint"
+                *ngIf="fileMetadata?.nullCounts?.[i] && fileMetadata!.nullCounts![i] > 0">
+                {{ fileMetadata?.nullCounts?.[i] }} null
+              </span>
+            </div>
+          </th>
         </tr>
       </thead>
       <tbody>
@@ -79,6 +119,14 @@
         alt="{{filePath}}"
         class="full-size-image" />
     </div>
+
+    <div *ngIf="displayPDF && safeResourceFileURL">
+      <iframe
+        [src]="safeResourceFileURL"
+        style="width: 100%; height: 600px; border: none;">
+      </iframe>
+    </div>
+
     <div *ngIf="displayMarkdown">
       <markdown [data]="textContent"></markdown>
     </div>
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
index e6424f529d8..c692589f767 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.scss
@@ -40,3 +40,65 @@
   max-width: 90%;
   max-height: 90%;
 }
+
+.file-metadata-strip {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+  margin-bottom: 10px;
+  padding: 6px 0;
+  border-bottom: 1px solid #f0f0f0;
+}
+
+.metadata-pill {
+  display: inline-flex;
+  align-items: center;
+  gap: 4px;
+  padding: 2px 8px 2px 6px;
+  background: #fafafa;
+  border: 1px solid #e8e8e8;
+  border-radius: 4px;
+  font-size: 12px;
+  white-space: nowrap;
+}
+
+.metadata-label {
+  color: #8c8c8c;
+  font-weight: 500;
+}
+
+.metadata-value {
+  color: #262626;
+}
+
+.column-name {
+  font-weight: 600;
+}
+
+.column-meta {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  margin-top: 2px;
+  font-weight: 400;
+}
+
+.column-type-tag {
+  display: inline-block;
+  padding: 0 6px;
+  font-size: 11px;
+  color: #1890ff;
+  background: #e6f4ff;
+  border: 1px solid #91caff;
+  border-radius: 3px;
+  font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
+}
+
+.column-null-hint {
+  font-size: 11px;
+  color: #d4380d;
+}
+
+.open-in-workflow-btn {
+  margin-left: auto;
+}
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
index 9e70a444df8..568dac0ecd5 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.spec.ts
@@ -19,11 +19,13 @@
 
 import { TestBed } from "@angular/core/testing";
 import { HttpClientTestingModule } from "@angular/common/http/testing";
-import { UserDatasetFileRendererComponent } from "./user-dataset-file-renderer.component";
+import { UserDatasetFileRendererComponent, MIME_TYPES, getMimeType, inferColumnSchema } from "./user-dataset-file-renderer.component";
 import { DatasetService } from "../../../../../service/user/dataset/dataset.service";
 import { NotificationService } from "../../../../../../common/service/notification/notification.service";
 import { DomSanitizer } from "@angular/platform-browser";
 import { commonTestProviders } from "../../../../../../common/testing/test-utils";
+import { Router } from "@angular/router";
+import { WorkflowPersistService } from "../../../../../../common/service/workflow-persist/workflow-persist.service";
 
 describe("UserDatasetFileRendererComponent", () => {
   let component: UserDatasetFileRendererComponent;
@@ -34,7 +36,15 @@ describe("UserDatasetFileRendererComponent", () => {
       providers: [
         DatasetService,
         NotificationService,
-        { provide: DomSanitizer, useValue: { bypassSecurityTrustUrl: vi.fn() } },
+        WorkflowPersistService,
+        { provide: Router, useValue: { navigate: vi.fn() } },
+        {
+          provide: DomSanitizer,
+          useValue: {
+            bypassSecurityTrustUrl: vi.fn((url: string) => url),
+            bypassSecurityTrustResourceUrl: vi.fn((url: string) => url),
+          },
+        },
         ...commonTestProviders,
       ],
     });
@@ -42,15 +52,250 @@ describe("UserDatasetFileRendererComponent", () => {
     component = fixture.componentInstance;
   });
 
-  it("should return true for supported MIME type", () => {
-    const supportedMimeType = "image/jpeg"; // Example of a supported MIME type
-    const result = component.isPreviewSupported(supportedMimeType);
-    expect(result).toBe(true);
+  describe("isPreviewSupported", () => {
+    it("should return true for known MIME types", () => {
+      expect(component.isPreviewSupported("image/jpeg")).toBe(true);
+      expect(component.isPreviewSupported("application/pdf")).toBe(true);
+      expect(component.isPreviewSupported("application/x-parquet")).toBe(true);
+    });
+
+    it("should return false only for unidentified binary (octet-stream)", () => {
+      expect(component.isPreviewSupported(MIME_TYPES.OCTET_STREAM)).toBe(false);
+    });
   });
 
-  it("should return false for unsupported MIME type", () => {
-    const unsupportedMimeType = "application/unknown"; // Example of an unsupported MIME type
-    const result = component.isPreviewSupported(unsupportedMimeType);
-    expect(result).toBe(false);
+  describe("getMimeType (extension-based fallback)", () => {
+    it("should resolve common image extensions", () => {
+      expect(getMimeType("photo.jpg")).toBe(MIME_TYPES.JPEG);
+      expect(getMimeType("photo.PNG")).toBe(MIME_TYPES.PNG);
+      expect(getMimeType("anim.gif")).toBe(MIME_TYPES.GIF);
+    });
+
+    it("should resolve xlsx separately from xls", () => {
+      expect(getMimeType("data.xlsx")).toBe(MIME_TYPES.XLSX);
+      expect(getMimeType("data.xls")).toBe(MIME_TYPES.MSEXCEL);
+    });
+
+    it("should resolve data format extensions", () => {
+      expect(getMimeType("data.parquet")).toBe(MIME_TYPES.PARQUET);
+      expect(getMimeType("data.arrow")).toBe(MIME_TYPES.ARROW);
+      expect(getMimeType("data.feather")).toBe(MIME_TYPES.ARROW);
+    });
+
+    it("should return octet-stream for unknown extensions", () => {
+      expect(getMimeType("file.xyz")).toBe(MIME_TYPES.OCTET_STREAM);
+      expect(getMimeType("noextension")).toBe(MIME_TYPES.OCTET_STREAM);
+    });
+  });
+
+  describe("detectMimeType (magic byte detection)", () => {
+    it("should detect Parquet files from PAR1 magic bytes", async () => {
+      const magic = new Uint8Array([0x50, 0x41, 0x52, 0x31, 0x00, 0x00, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.PARQUET);
+    });
+
+    it("should detect Arrow IPC files from ARROW1 magic bytes", async () => {
+      const magic = new Uint8Array([0x41, 0x52, 0x52, 0x4f, 0x57, 0x31, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.ARROW);
+    });
+
+    it("should detect JSON via text sniffing (object)", async () => {
+      const blob = new Blob(['{"key": "value"}'], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.JSON);
+    });
+
+    it("should detect JSON via text sniffing (array)", async () => {
+      const blob = new Blob(['[1, 2, 3]'], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.JSON);
+    });
+
+    it("should detect CSV via text sniffing", async () => {
+      const blob = new Blob(["name,age,city\nAlice,30,LA\nBob,25,NY"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.CSV);
+    });
+
+    it("should detect Markdown via text sniffing", async () => {
+      const blob = new Blob(["# My Title\n\nSome content here"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.MD);
+    });
+
+    it("should detect plain text when content is printable ASCII", async () => {
+      const blob = new Blob(["Hello, world! This is plain text."], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.TXT);
+    });
+
+    it("should return octet-stream for unidentifiable binary", async () => {
+      const binary = new Uint8Array([0x00, 0x01, 0x02, 0x80, 0xff, 0xfe, 0x7f, 0x03]);
+      const blob = new Blob([binary]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.OCTET_STREAM);
+    });
+
+    it("should detect HDF5 from magic bytes (generic .h5)", async () => {
+      const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob, "model.h5");
+      expect(result).toBe(MIME_TYPES.HDF5);
+    });
+
+    it("should refine HDF5 to H5AD by extension", async () => {
+      const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob, "scrna.h5ad");
+      expect(result).toBe(MIME_TYPES.H5AD);
+    });
+
+    it("should refine HDF5 to H5SEURAT by extension", async () => {
+      const magic = new Uint8Array([0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob, "pbmc.h5seurat");
+      expect(result).toBe(MIME_TYPES.H5SEURAT);
+    });
+
+    it("should detect Python pickle from \\x80 + protocol byte", async () => {
+      const magic = new Uint8Array([0x80, 0x04, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.PICKLE);
+    });
+
+    it("should detect NumPy .npy from magic bytes", async () => {
+      const magic = new Uint8Array([0x93, 0x4e, 0x55, 0x4d, 0x50, 0x59, 0x01, 0x00, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.NPY);
+    });
+
+    it("should detect GGUF from magic bytes", async () => {
+      const magic = new Uint8Array([0x47, 0x47, 0x55, 0x46, 0x03, 0x00, 0x00, 0x00]);
+      const blob = new Blob([magic]);
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.GGUF);
+    });
+
+    it("should detect Safetensors via extension fallback", async () => {
+      const opaque = new Uint8Array([0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
+      const blob = new Blob([opaque]);
+      const result = await component.detectMimeType(blob, "model.safetensors");
+      expect(result).toBe(MIME_TYPES.SAFETENSORS);
+    });
+
+    it("should detect ONNX via extension fallback", async () => {
+      const opaque = new Uint8Array([0x08, 0x07, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00]);
+      const blob = new Blob([opaque]);
+      const result = await component.detectMimeType(blob, "resnet.onnx");
+      expect(result).toBe(MIME_TYPES.ONNX);
+    });
+
+    it("should detect VCF from header line", async () => {
+      const blob = new Blob(["##fileformat=VCFv4.2\n##source=test\n"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.VCF);
+    });
+
+    it("should detect FASTA from > prefix", async () => {
+      const blob = new Blob([">seq1\nACGTACGT\n>seq2\nTGCATGCA\n"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.FASTA);
+    });
+
+    it("should detect FASTQ from 4-line @/+ pattern", async () => {
+      const blob = new Blob(["@read1\nACGT\n+\n!!!!\n@read2\nTGCA\n+\n!!!!\n"], { type: "text/plain" });
+      const result = await component.detectMimeType(blob);
+      expect(result).toBe(MIME_TYPES.FASTQ);
+    });
+  });
+
+  describe("parser helpers", () => {
+    it("should parse a NumPy v1.0 header", async () => {
+      // Construct a minimal valid .npy v1 file: magic + version + uint16 header_len + ASCII header
+      const headerText = "{'descr': '<f8', 'fortran_order': False, 'shape': (10, 256), }";
+      const padded = headerText + " ".repeat(64 - (headerText.length % 64)) + "\n";
+      const headerBytes = new TextEncoder().encode(padded);
+      const headerLen = headerBytes.length;
+      const buf = new Uint8Array(10 + headerLen);
+      buf.set([0x93, 0x4e, 0x55, 0x4d, 0x50, 0x59, 0x01, 0x00], 0);
+      buf[8] = headerLen & 0xff;
+      buf[9] = (headerLen >> 8) & 0xff;
+      buf.set(headerBytes, 10);
+      const blob = new Blob([buf]);
+      const result = await (component as any).parseNpyHeader(blob);
+      expect(result?.dtype).toBe("<f8");
+      expect(result?.shape).toEqual([10, 256]);
+    });
+
+    it("should parse a Safetensors header", async () => {
+      const header = JSON.stringify({
+        "layer.weight": { dtype: "F32", shape: [128, 64], data_offsets: [0, 32768] },
+        "layer.bias": { dtype: "F32", shape: [128], data_offsets: [32768, 33280] },
+        __metadata__: { format: "pt" },
+      });
+      const headerBytes = new TextEncoder().encode(header);
+      const lenBytes = new Uint8Array(8);
+      let len = headerBytes.length;
+      for (let i = 0; i < 8; i++) {
+        lenBytes[i] = len & 0xff;
+        len = Math.floor(len / 256);
+      }
+      const blob = new Blob([lenBytes, headerBytes]);
+      const result = await (component as any).parseSafetensorsHeader(blob);
+      expect(result?.tensorCount).toBe(2);
+      expect(result?.parameterCount).toBe(128 * 64 + 128);
+      expect(result?.sampleNames).toEqual(["layer.weight", "layer.bias"]);
+    });
+
+    it("should infer column types from tabular sample data", () => {
+      const rows = [
+        ["Alice", "30", "75000.50", "true", "2024-01-15"],
+        ["Bob", "25", "60000.00", "false", "2024-03-22"],
+        ["Carol", "", "82000.75", "true", "2024-05-10"],
+      ];
+      const schema = inferColumnSchema(rows, 5);
+      expect(schema.types).toEqual(["string", "integer", "double", "boolean", "date"]);
+      expect(schema.nullCounts).toEqual([0, 1, 0, 0, 0]);
+      expect(schema.samples).toEqual(["Alice", "30", "75000.50", "true", "2024-01-15"]);
+    });
+
+    it("should fall back to string for all-null columns", () => {
+      const rows = [["a", ""], ["b", ""]];
+      const schema = inferColumnSchema(rows, 2);
+      expect(schema.types).toEqual(["string", "string"]);
+      expect(schema.nullCounts).toEqual([0, 2]);
+    });
+
+    it("should expose canOpenInWorkflow whenever a filePath is set", () => {
+      component.filePath = "/x/y/v1/data.csv";
+      expect(component.canOpenInWorkflow).toBe(true);
+      component.filePath = "/x/y/v1/model.safetensors";
+      expect(component.canOpenInWorkflow).toBe(true);
+    });
+
+    it("should not expose canOpenInWorkflow when no file is selected", () => {
+      component.filePath = "";
+      expect(component.canOpenInWorkflow).toBe(false);
+    });
+
+
+    it("should parse a GGUF header", async () => {
+      const buf = new Uint8Array(24);
+      buf.set([0x47, 0x47, 0x55, 0x46], 0); // "GGUF"
+      buf.set([0x03, 0x00, 0x00, 0x00], 4); // version 3
+      buf.set([0xd2, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], 8); // 722 tensors
+      buf.set([0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], 16); // 16 metadata kv
+      const blob = new Blob([buf]);
+      const result = await (component as any).parseGgufHeader(blob);
+      expect(result?.version).toBe(3);
+      expect(result?.tensorCount).toBe(722);
+      expect(result?.metadataKvCount).toBe(16);
+    });
   });
 });
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
index 861479ca5a5..564e7867ad4 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-file-renderer/user-dataset-file-renderer.component.ts
@@ -17,14 +17,23 @@
  * under the License.
  */
 
-import { Component, EventEmitter, Input, OnChanges, OnDestroy, OnInit, Output, SimpleChanges } from "@angular/core";
+import { ChangeDetectorRef, Component, EventEmitter, Input, OnChanges, OnDestroy, OnInit, Output, SimpleChanges } from "@angular/core";
 import { DatasetService } from "../../../../../service/user/dataset/dataset.service";
 import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy";
 import * as Papa from "papaparse";
 import { ParseResult } from "papaparse";
-import { DomSanitizer, SafeUrl } from "@angular/platform-browser";
-import readXlsxFile from "read-excel-file";
+import { DomSanitizer, SafeResourceUrl, SafeUrl } from "@angular/platform-browser";
+import readXlsxFile, { readSheetNames } from "read-excel-file";
 import { NotificationService } from "../../../../../../common/service/notification/notification.service";
+import { formatSize } from "../../../../../../common/util/size-formatter.util";
+import { Router } from "@angular/router";
+import {
+  DEFAULT_WORKFLOW_NAME,
+  WorkflowPersistService,
+} from "../../../../../../common/service/workflow-persist/workflow-persist.service";
+import { GuiConfigService } from "../../../../../../common/service/gui-config.service";
+import { ExecutionMode, WorkflowContent } from "../../../../../../common/type/workflow";
+import { DASHBOARD_USER_WORKSPACE } from "../../../../../../app-routing.constant";
 import { NgStyle, NgIf, NgFor } from "@angular/common";
 import { NzSpinComponent } from "ng-zorro-antd/spin";
 import { NzAlertComponent } from "ng-zorro-antd/alert";
@@ -38,6 +47,9 @@ import {
 } from "ng-zorro-antd/table";
 import { MarkdownComponent } from "ngx-markdown";
 import { NgxJsonViewerModule } from "ngx-json-viewer";
+import { fileTypeFromBlob } from "file-type";
+import { NzButtonComponent } from "ng-zorro-antd/button";
+import { NzIconDirective } from "ng-zorro-antd/icon";
 
 export const MIME_TYPES = {
   JPEG: "image/jpeg",
@@ -45,6 +57,9 @@ export const MIME_TYPES = {
   PNG: "image/png",
   WEBP: "image/webp",
   GIF: "image/gif",
+  AVIF: "image/avif",
+  BMP: "image/bmp",
+  TIFF: "image/tiff",
   CSV: "text/csv",
   TXT: "text/plain",
   MD: "text/markdown",
@@ -53,35 +68,474 @@ export const MIME_TYPES = {
   PDF: "application/pdf",
   MSWORD: "application/msword",
   MSEXCEL: "application/vnd.ms-excel",
+  XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+  DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+  PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
   MSPOWERPOINT: "application/vnd.ms-powerpoint",
   MP4: "video/mp4",
   MP3: "audio/mpeg",
-  OCTET_STREAM: "application/octet-stream", // Default binary format
+  WAV: "audio/wav",
+  FLAC: "audio/flac",
+  WEBM: "video/webm",
+  MOV: "video/quicktime",
+  ARROW: "application/x-arrow",
+  PARQUET: "application/x-parquet",
+  // ML / scientific data formats
+  HDF5: "application/x-hdf5",
+  H5AD: "application/x-h5ad",
+  H5SEURAT: "application/x-h5seurat",
+  LOOM: "application/x-loom",
+  PICKLE: "application/x-python-pickle",
+  NPY: "application/x-numpy-array",
+  NPZ: "application/x-numpy-archive",
+  SAFETENSORS: "application/x-safetensors",
+  GGUF: "application/x-gguf",
+  PYTORCH: "application/x-pytorch",
+  KERAS: "application/x-keras",
+  ONNX: "application/x-onnx",
+  RDS: "application/x-rds",
+  // Bioinformatics text
+  FASTA: "application/x-fasta",
+  FASTQ: "application/x-fastq",
+  VCF: "application/x-vcf",
+  OCTET_STREAM: "application/octet-stream",
 };
 
 export function getMimeType(filename: string): string {
-  const extension = filename.split(".").pop()?.toUpperCase();
-  return extension && MIME_TYPES[extension as keyof typeof MIME_TYPES]
-    ? MIME_TYPES[extension as keyof typeof MIME_TYPES]
-    : MIME_TYPES.OCTET_STREAM;
+  const extensionMap: Record<string, string> = {
+    JPG: MIME_TYPES.JPEG,
+    JPEG: MIME_TYPES.JPEG,
+    PNG: MIME_TYPES.PNG,
+    WEBP: MIME_TYPES.WEBP,
+    GIF: MIME_TYPES.GIF,
+    AVIF: MIME_TYPES.AVIF,
+    BMP: MIME_TYPES.BMP,
+    TIFF: MIME_TYPES.TIFF,
+    TIF: MIME_TYPES.TIFF,
+    CSV: MIME_TYPES.CSV,
+    TSV: MIME_TYPES.CSV,
+    TXT: MIME_TYPES.TXT,
+    MD: MIME_TYPES.MD,
+    HTML: MIME_TYPES.HTML,
+    HTM: MIME_TYPES.HTML,
+    JSON: MIME_TYPES.JSON,
+    JSONL: MIME_TYPES.TXT,
+    PDF: MIME_TYPES.PDF,
+    DOC: MIME_TYPES.MSWORD,
+    XLS: MIME_TYPES.MSEXCEL,
+    XLSX: MIME_TYPES.XLSX,
+    DOCX: MIME_TYPES.DOCX,
+    PPTX: MIME_TYPES.PPTX,
+    PPT: MIME_TYPES.MSPOWERPOINT,
+    MP4: MIME_TYPES.MP4,
+    MP3: MIME_TYPES.MP3,
+    WAV: MIME_TYPES.WAV,
+    FLAC: MIME_TYPES.FLAC,
+    WEBM: MIME_TYPES.WEBM,
+    MOV: MIME_TYPES.MOV,
+    ARROW: MIME_TYPES.ARROW,
+    FEATHER: MIME_TYPES.ARROW,
+    PARQUET: MIME_TYPES.PARQUET,
+    // ML / scientific
+    H5: MIME_TYPES.HDF5,
+    HDF5: MIME_TYPES.HDF5,
+    H5AD: MIME_TYPES.H5AD,
+    H5SEURAT: MIME_TYPES.H5SEURAT,
+    LOOM: MIME_TYPES.LOOM,
+    PKL: MIME_TYPES.PICKLE,
+    PICKLE: MIME_TYPES.PICKLE,
+    JOBLIB: MIME_TYPES.PICKLE,
+    NPY: MIME_TYPES.NPY,
+    NPZ: MIME_TYPES.NPZ,
+    SAFETENSORS: MIME_TYPES.SAFETENSORS,
+    GGUF: MIME_TYPES.GGUF,
+    PT: MIME_TYPES.PYTORCH,
+    PTH: MIME_TYPES.PYTORCH,
+    KERAS: MIME_TYPES.KERAS,
+    ONNX: MIME_TYPES.ONNX,
+    RDS: MIME_TYPES.RDS,
+    // Bioinformatics text
+    FASTA: MIME_TYPES.FASTA,
+    FA: MIME_TYPES.FASTA,
+    FNA: MIME_TYPES.FASTA,
+    FFN: MIME_TYPES.FASTA,
+    FAA: MIME_TYPES.FASTA,
+    FASTQ: MIME_TYPES.FASTQ,
+    FQ: MIME_TYPES.FASTQ,
+    VCF: MIME_TYPES.VCF,
+  };
+  const ext = filename.split(".").pop()?.toUpperCase() ?? "";
+  return extensionMap[ext] ?? MIME_TYPES.OCTET_STREAM;
+}
+
+export function formatDuration(seconds: number): string {
+  if (!isFinite(seconds) || seconds < 0) return "—";
+  const totalSec = Math.floor(seconds);
+  const h = Math.floor(totalSec / 3600);
+  const m = Math.floor((totalSec % 3600) / 60);
+  const s = totalSec % 60;
+  if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
+  return `${m}:${String(s).padStart(2, "0")}`;
 }
 
-// the size limits for all preview-supported types
+/**
+ * Maximum size at which we'll attempt to preview a file.
+ *
+ * Note on memory: for "identify-only" types (HDF5, Parquet, Arrow, pickle, model containers, etc.)
+ * we only read the first ~16 bytes for magic-byte detection, so 1 GB is safe. For header-parse types
+ * (Safetensors, GGUF, NumPy .npy) we only read the first few KB. The cost of bumping all limits to
+ * 1 GB is the full-blob download time, since the dataset service streams the entire file.
+ *
+ * For full-content render types (CSV via Papa.parse, XLSX, JSON, large text) memory cost scales
+ * with file size — browsers may slow down or OOM well before 1 GB. The user can choose: the guard
+ * no longer blocks; if their browser tab struggles, they can close it.
+ */
+const MAX_PREVIEW_SIZE = 1024 * 1024 * 1024;
+
+// size limits per MIME type — also used as pre-fetch guard
 export const MIME_TYPE_SIZE_LIMITS_MB = {
-  [MIME_TYPES.JPEG]: 5 * 1024 * 1024, // 5 MB
-  [MIME_TYPES.PNG]: 5 * 1024 * 1024, // 5 MB
-  [MIME_TYPES.WEBP]: 5 * 1024 * 1024, // 5 MB
-  [MIME_TYPES.GIF]: 10 * 1024 * 1024, // 10 MB
-  [MIME_TYPES.CSV]: 2 * 1024 * 1024, // 2 MB for text-based data files
-  [MIME_TYPES.TXT]: 1 * 1024 * 1024, // 1 MB for plain text files
-  [MIME_TYPES.MD]: 1 * 1024 * 1024, // 1 MB for MD files
-  [MIME_TYPES.JSON]: 1 * 1024 * 1024, // 1 MB for JSON files
-  [MIME_TYPES.MSEXCEL]: 10 * 1024 * 1024, // 10 MB for Excel spreadsheets
-  [MIME_TYPES.MP4]: 50 * 1024 * 1024, // 50 MB for MP4 videos
-  [MIME_TYPES.MP3]: 10 * 1024 * 1024, // 10 MB for MP3 audio files
-  [MIME_TYPES.OCTET_STREAM]: 5 * 1024 * 1024, // Default size for other binary formats
+  [MIME_TYPES.JPEG]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PNG]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.WEBP]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.GIF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.AVIF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.BMP]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.TIFF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.CSV]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.TXT]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MD]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.JSON]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PDF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MSEXCEL]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.XLSX]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.DOCX]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PPTX]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MP4]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.WEBM]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MOV]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.MP3]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.WAV]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.FLAC]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.ARROW]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PARQUET]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.HDF5]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.H5AD]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.H5SEURAT]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.LOOM]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PICKLE]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.NPY]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.NPZ]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.SAFETENSORS]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.GGUF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.PYTORCH]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.KERAS]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.ONNX]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.RDS]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.FASTA]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.FASTQ]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.VCF]: MAX_PREVIEW_SIZE,
+  [MIME_TYPES.OCTET_STREAM]: MAX_PREVIEW_SIZE,
 };
 
+export interface FileMetadata {
+  fileSize?: number;
+  // image
+  imageWidth?: number;
+  imageHeight?: number;
+  // video
+  videoDuration?: number;
+  videoWidth?: number;
+  videoHeight?: number;
+  // audio
+  audioDuration?: number;
+  // tabular
+  rowCount?: number;
+  columnCount?: number;
+  columnNames?: string[];
+  sheetCount?: number;
+  // json
+  jsonTopLevelType?: "object" | "array";
+  jsonItemCount?: number;
+  jsonPreviewKeys?: string[];
+  // text / markdown
+  lineCount?: number;
+  wordCount?: number;
+  charCount?: number;
+  headingCount?: number;
+  // pdf
+  pageCount?: number;
+  // ML model / tensor data
+  modelFormat?: string; // "PyTorch", "Keras", "ONNX", "Safetensors", "GGUF", "TensorFlow"
+  containerFormat?: string; // "HDF5", "ZIP archive", "gzip"
+  tensorCount?: number;
+  parameterCount?: number;
+  sampleTensorNames?: string[];
+  // NumPy
+  dtype?: string;
+  shape?: number[];
+  // GGUF
+  ggufVersion?: number;
+  metadataKvCount?: number;
+  // Bioinformatics
+  sequenceCount?: number;
+  sequenceCountIsExact?: boolean;
+  variantCount?: number;
+  variantCountIsExact?: boolean;
+
+  // Rich tabular schema (CSV / XLSX)
+  columnTypes?: string[]; // inferred type per column: "integer", "double", "boolean", "date", "string"
+  nullCounts?: number[]; // count of empty cells per column (in sample)
+  sampleValues?: string[]; // first non-null value per column
+
+  // JSON schema
+  jsonMaxDepth?: number;
+  jsonKeyTypes?: { key: string; type: string }[]; // for object roots
+  jsonArrayElementType?: string; // for array roots: uniform type or "mixed"
+
+  // PDF /Info dictionary
+  pdfTitle?: string;
+  pdfAuthor?: string;
+  pdfCreator?: string;
+  pdfProducer?: string;
+  pdfVersion?: string;
+  pdfEncrypted?: boolean;
+
+  // Markdown structure
+  codeBlockCount?: number;
+  linkCount?: number;
+  imageCount?: number;
+  listItemCount?: number;
+
+  // Plain text / encoding
+  encoding?: string; // "UTF-8 BOM", "UTF-8", "ASCII"
+  emptyLineCount?: number;
+  avgLineLength?: number;
+  maxLineLength?: number;
+
+  // NumPy enhanced
+  totalElements?: number;
+  byteOrder?: string; // "little-endian", "big-endian"
+  fortranOrder?: boolean;
+
+  // Safetensors enhanced
+  dtypeBreakdown?: { dtype: string; params: number }[];
+  largestTensor?: { name: string; shape: number[]; params: number };
+  safetensorsMetadata?: { key: string; value: string }[];
+
+  // GGUF enhanced
+  ggufArchitecture?: string;
+  ggufQuantization?: string;
+
+  // FASTA enhanced
+  totalBases?: number;
+  gcContent?: number; // 0..1
+  minSequenceLength?: number;
+  maxSequenceLength?: number;
+  avgSequenceLength?: number;
+  isProtein?: boolean;
+
+  // VCF enhanced
+  vcfSampleCount?: number;
+  vcfChromosomes?: string[];
+}
+
+/**
+ * Above this size, skip the download entirely and show only extension-based
+ * identification + a "how to load" hint. The dominant source of preview lag
+ * is the full-blob download from the dataset service.
+ */
+export const FULL_PREVIEW_MAX_BYTES = 50 * 1024 * 1024; // 50 MB
+
+/**
+ * One-line "how to load" or "what is this" message per format.
+ * Used both when content was downloaded (in renderByMimeType) and when the
+ * download was skipped (in showOversizedFileInfo).
+ */
+export const TYPE_LOADING_HINTS: Record<string, string> = {
+  [MIME_TYPES.PARQUET]: "Parquet file. Use the Parquet File Scan operator in Texera to analyze this data.",
+  [MIME_TYPES.ARROW]: "Arrow / Feather file. Use the Arrow File Scan operator in Texera.",
+  [MIME_TYPES.HDF5]: "HDF5 binary container (Keras .h5 or scientific dataset). Load with h5py / rhdf5.",
+  [MIME_TYPES.H5AD]: "AnnData (.h5ad) — single-cell expression matrix. Load with scanpy.read_h5ad().",
+  [MIME_TYPES.H5SEURAT]: "Seurat HDF5 object (.h5seurat). Load with SeuratDisk::LoadH5Seurat() in R.",
+  [MIME_TYPES.LOOM]: "Loom (.loom) single-cell expression. Load with loompy / scanpy in Python.",
+  [MIME_TYPES.RDS]: "R serialized object (.rds) — Seurat / SCE / fitted model. Load with readRDS() in R.",
+  [MIME_TYPES.PICKLE]: "Python pickle — serialized model or dataset. Load with pickle.load() in Python.",
+  [MIME_TYPES.PYTORCH]: "PyTorch checkpoint (.pt/.pth). Load with torch.load() in Python.",
+  [MIME_TYPES.KERAS]: "Keras v3 model (.keras). Load with tf.keras.models.load_model() in Python.",
+  [MIME_TYPES.ONNX]: "ONNX model (.onnx). Load with onnxruntime; inspect at netron.app.",
+  [MIME_TYPES.SAFETENSORS]: "Safetensors file. Load with safetensors.torch.load_file() in Python.",
+  [MIME_TYPES.GGUF]: "GGUF model (llama.cpp / quantized LLM).",
+  [MIME_TYPES.NPY]: "NumPy array (.npy). Load with numpy.load() in Python.",
+  [MIME_TYPES.NPZ]: "NumPy archive (.npz) — ZIP of .npy arrays. Load with numpy.load().",
+  [MIME_TYPES.CSV]: "CSV file. Use the CSV File Scan operator in Texera.",
+  [MIME_TYPES.JSON]: "JSON file. Use the JSONL File Scan operator (or Python UDF for nested objects).",
+  [MIME_TYPES.XLSX]: "Excel spreadsheet (.xlsx). Convert to CSV or use a Python UDF with openpyxl.",
+  [MIME_TYPES.MSEXCEL]: "Excel spreadsheet (.xls). Convert to CSV or use a Python UDF.",
+  [MIME_TYPES.FASTA]: "FASTA sequence file. Parse with Biopython SeqIO.",
+  [MIME_TYPES.FASTQ]: "FASTQ reads file. Parse with Biopython SeqIO.",
+  [MIME_TYPES.VCF]: "VCF variant file. Parse with pyvcf / cyvcf2.",
+};
+
+/** Classify a single cell value into a coarse type label. */
+function inferCellType(value: string): string {
+  if (value === "" || value == null) return "null";
+  if (/^-?\d+$/.test(value)) return "integer";
+  if (/^-?\d+\.\d+$/.test(value) || /^-?\d+\.?\d*[eE][-+]?\d+$/.test(value)) return "double";
+  if (/^(true|false|True|False|TRUE|FALSE)$/.test(value)) return "boolean";
+  if (/^\d{4}-\d{2}-\d{2}(?:[T ]\d{2}:\d{2}(?::\d{2})?)?$/.test(value)) return "date";
+  return "string";
+}
+
+/** Infer per-column type, null count, and a sample value from tabular data rows. */
+export function inferColumnSchema(
+  dataRows: string[][],
+  columnCount: number,
+  sampleLimit: number = 50
+): { types: string[]; nullCounts: number[]; samples: string[] } {
+  const types: string[] = [];
+  const nullCounts: number[] = [];
+  const samples: string[] = [];
+  const rowsToScan = Math.min(dataRows.length, sampleLimit);
+
+  for (let c = 0; c < columnCount; c++) {
+    const typeCounts: Record<string, number> = {};
+    let nullCount = 0;
+    let firstNonNull = "";
+
+    for (let r = 0; r < rowsToScan; r++) {
+      const raw = dataRows[r][c];
+      const val = raw == null ? "" : String(raw).trim();
+      const t = inferCellType(val);
+      if (t === "null") {
+        nullCount++;
+      } else {
+        if (firstNonNull === "") firstNonNull = val;
+        typeCounts[t] = (typeCounts[t] ?? 0) + 1;
+      }
+    }
+
+    const ranked = Object.entries(typeCounts).sort((a, b) => b[1] - a[1]);
+    types.push(ranked[0]?.[0] ?? "string");
+    nullCounts.push(nullCount);
+    samples.push(firstNonNull);
+  }
+  return { types, nullCounts, samples };
+}
+
+/** Walk an arbitrary JSON value and compute max nesting depth. */
+function jsonMaxDepth(value: unknown, depth = 1): number {
+  if (Array.isArray(value)) {
+    let max = depth;
+    for (const item of value) max = Math.max(max, jsonMaxDepth(item, depth + 1));
+    return max;
+  }
+  if (value !== null && typeof value === "object") {
+    let max = depth;
+    for (const v of Object.values(value as Record<string, unknown>)) {
+      max = Math.max(max, jsonMaxDepth(v, depth + 1));
+    }
+    return max;
+  }
+  return depth;
+}
+
+/** Describe a JS value's type for human display. */
+function jsTypeLabel(value: unknown): string {
+  if (value === null) return "null";
+  if (Array.isArray(value)) return `array(${value.length})`;
+  return typeof value;
+}
+
+/** Extract /Info dictionary fields from a PDF's raw text. Heuristic but robust for unencrypted PDFs. */
+function extractPdfInfo(rawText: string): {
+  title?: string;
+  author?: string;
+  creator?: string;
+  producer?: string;
+  version?: string;
+  encrypted?: boolean;
+} {
+  const result: ReturnType<typeof extractPdfInfo> = {};
+  const versionMatch = rawText.match(/^%PDF-(\d+\.\d+)/);
+  if (versionMatch) result.version = versionMatch[1];
+  result.encrypted = /\/Encrypt\b/.test(rawText);
+
+  // Match `/Title (value)` or `/Title <hex>` — only the parenthesized form is reliably plain text
+  const fieldRe = (name: string) => new RegExp(`/${name}\\s*\\(([^)\\\\]*(?:\\\\.[^)\\\\]*)*)\\)`);
+  const grab = (name: string): string | undefined => {
+    const m = rawText.match(fieldRe(name));
+    if (!m) return undefined;
+    // PDF strings can contain \( \) \\ escapes — unescape minimally
+    return m[1].replace(/\\([()\\])/g, "$1").trim() || undefined;
+  };
+  result.title = grab("Title");
+  result.author = grab("Author");
+  result.creator = grab("Creator");
+  result.producer = grab("Producer");
+  return result;
+}
+
+/** Compute GC content and sequence-length stats from a FASTA blob's text. */
+function summarizeFasta(text: string): {
+  sequenceCount: number;
+  totalBases: number;
+  gcContent: number;
+  minLen: number;
+  maxLen: number;
+  avgLen: number;
+  isProtein: boolean;
+} {
+  // Walk character by character — avoids splitting a multi-MB string into a huge array.
+  let inHeader = false;
+  let sequenceCount = 0;
+  let currentLen = 0;
+  let totalBases = 0;
+  let gcCount = 0;
+  let nonNucleotideCount = 0;
+  let minLen = Infinity;
+  let maxLen = 0;
+  const nucleotideSet = new Set(["A", "C", "G", "T", "U", "N", "a", "c", "g", "t", "u", "n"]);
+
+  const finishSequence = () => {
+    if (sequenceCount > 0 && currentLen > 0) {
+      if (currentLen < minLen) minLen = currentLen;
+      if (currentLen > maxLen) maxLen = currentLen;
+    }
+    currentLen = 0;
+  };
+
+  for (let i = 0; i < text.length; i++) {
+    const ch = text[i];
+    if (ch === "\n") {
+      if (inHeader) inHeader = false;
+      continue;
+    }
+    if (inHeader) continue;
+    if (ch === ">") {
+      finishSequence();
+      sequenceCount++;
+      inHeader = true;
+      continue;
+    }
+    if (ch === "\r" || ch === " " || ch === "\t") continue;
+    currentLen++;
+    totalBases++;
+    if (ch === "G" || ch === "C" || ch === "g" || ch === "c") gcCount++;
+    if (!nucleotideSet.has(ch)) nonNucleotideCount++;
+  }
+  finishSequence();
+
+  return {
+    sequenceCount,
+    totalBases,
+    gcContent: totalBases > 0 ? gcCount / totalBases : 0,
+    minLen: minLen === Infinity ? 0 : minLen,
+    maxLen,
+    avgLen: sequenceCount > 0 ? totalBases / sequenceCount : 0,
+    isProtein: totalBases > 0 && nonNucleotideCount / totalBases > 0.1,
+  };
+}
+
 @UntilDestroy()
 @Component({
   selector: "texera-user-dataset-file-renderer",
@@ -101,14 +555,30 @@ export const MIME_TYPE_SIZE_LIMITS_MB = {
     NzTbodyComponent,
     MarkdownComponent,
     NgxJsonViewerModule,
+    NzButtonComponent,
+    NzIconDirective,
   ],
 })
 export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDestroy {
-  private DEFAULT_MAX_SIZE = 5 * 1024 * 1024; // 5 MB
+  private DEFAULT_MAX_SIZE = 1024 * 1024 * 1024; // 1 GB
+
+  // For text-based formats we slice to this size before parsing/rendering.
+  // Reading 1 GB as a UTF-16 string in JS would balloon to ~2 GB and likely crash the tab.
+  private static readonly PREVIEW_TEXT_BYTES = 10 * 1024 * 1024; // 10 MB
+
+  /** Slice the blob if it exceeds the preview limit, returning the slice + whether truncation occurred. */
+  private getPreviewSlice(blob: Blob): { slice: Blob; truncated: boolean } {
+    const limit = UserDatasetFileRendererComponent.PREVIEW_TEXT_BYTES;
+    if (blob.size <= limit) return { slice: blob, truncated: false };
+    return { slice: blob.slice(0, limit), truncated: true };
+  }
+
+  /** True when text content shown is from a slice rather than the whole file. */
+  public previewTruncated: boolean = false;
 
   public fileURL: string | undefined;
-  // safe url is used to display some formats including image
   public safeFileURL: SafeUrl | undefined;
+  public safeResourceFileURL: SafeResourceUrl | undefined;
 
   // table related control
   public displayCSV: boolean = false;
@@ -131,10 +601,18 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
   // audio
   public displayMP3: boolean = false;
 
-  // plain text & octet stream related control
+  // PDF
+  public displayPDF: boolean = false;
+
+  // plain text
   public displayPlainText: boolean = false;
   public textContent: string = "";
 
+  // shown for detectable-but-unpreviewable types (Parquet, Arrow, DOCX, PPTX)
+  public detectedTypeMessage: string = "";
+
+  public fileMetadata: FileMetadata | undefined = undefined;
+
   // control flags
   public isLoading: boolean = false;
   public isFileSizeUnloadable = false;
@@ -142,33 +620,107 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
   public isFileTypePreviewUnsupported: boolean = false;
 
   public currentFile: File | undefined = undefined;
-  @Input()
-  isMaximized: boolean = false;
-
-  @Input()
-  did: number | undefined;
 
-  @Input()
-  dvid: number | undefined;
+  @Input() isMaximized: boolean = false;
+  @Input() did: number | undefined;
+  @Input() dvid: number | undefined;
+  @Input() filePath: string = "";
+  @Input() fileSize?: number;
+  @Input() isLogin: boolean = false;
 
-  @Input()
-  filePath: string = "";
-
-  @Input()
-  fileSize?: number;
-
-  @Input()
-  isLogin: boolean = false;
-
-  @Output()
-  loadFile = new EventEmitter<{ file: string; prefix: string }>();
+  @Output() loadFile = new EventEmitter<{ file: string; prefix: string }>();
 
   constructor(
     private datasetService: DatasetService,
     private sanitizer: DomSanitizer,
-    private notificationService: NotificationService
+    private notificationService: NotificationService,
+    private cdr: ChangeDetectorRef,
+    private router: Router,
+    private workflowPersistService: WorkflowPersistService,
+    private config: GuiConfigService
   ) {}
 
+  /** Always available — every file gives the user something useful when opened in a workflow. */
+  get canOpenInWorkflow(): boolean {
+    return !!this.filePath;
+  }
+
+  /**
+   * Map a file's MIME to the scan operator type that handles it. The workspace component
+   * uses this string to build a properly schema-validated operator via WorkflowUtilService.
+   */
+  private static getOperatorTypeForFile(filePath: string): string | null {
+    const mime = getMimeType(filePath);
+    switch (mime) {
+      case MIME_TYPES.CSV: return "CSVFileScan";
+      case MIME_TYPES.JSON: return "JSONLFileScan";
+      case MIME_TYPES.ARROW: return "ArrowFileScan";
+      case MIME_TYPES.PARQUET: return "ParquetFileScan";
+      case MIME_TYPES.PNG:
+      case MIME_TYPES.JPEG:
+      case MIME_TYPES.WEBP:
+      case MIME_TYPES.GIF:
+      case MIME_TYPES.AVIF:
+      case MIME_TYPES.BMP:
+      case MIME_TYPES.TIFF:
+      case MIME_TYPES.PDF:
+      case MIME_TYPES.MP3:
+      case MIME_TYPES.MP4:
+      case MIME_TYPES.WAV:
+      case MIME_TYPES.FLAC:
+      case MIME_TYPES.WEBM:
+      case MIME_TYPES.MOV:
+      case MIME_TYPES.TXT:
+      case MIME_TYPES.MD:
+        return "FileScan";
+      default:
+        return null;
+    }
+  }
+
+  /**
+   * Creates a new empty workflow and navigates to the editor. If the file type maps to a
+   * known scan operator, the workspace component picks up the `addOp` + `fileName` query
+   * params after init and adds the operator via the schema-validated path — see
+   * `workspace.component.ts:handlePendingOperatorAddition()`.
+   */
+  onOpenInWorkflow(): void {
+    const fileName = this.filePath.split("/").pop() ?? "file";
+    const addOp = UserDatasetFileRendererComponent.getOperatorTypeForFile(this.filePath);
+    const workflowContent: WorkflowContent = {
+      operators: [],
+      commentBoxes: [],
+      links: [],
+      operatorPositions: {},
+      settings: {
+        dataTransferBatchSize: this.config.env.defaultDataTransferBatchSize,
+        executionMode: this.config.env.defaultExecutionMode ?? ExecutionMode.PIPELINED,
+      },
+    };
+    const workflowName = `Analysis of ${fileName}`;
+    this.workflowPersistService
+      .createWorkflow(workflowContent, workflowName || DEFAULT_WORKFLOW_NAME)
+      .pipe(untilDestroyed(this))
+      .subscribe({
+        next: created => {
+          const wid = created?.workflow?.wid;
+          if (wid == null) {
+            this.notificationService.error("Workflow created but no ID was returned.");
+            return;
+          }
+          // Query params tell the workspace component which operator to auto-add and which
+          // file path to wire into its fileName property. The workspace strips them on use.
+          const queryParams = addOp ? { addOp, fileName: this.filePath } : undefined;
+          this.router.navigate([DASHBOARD_USER_WORKSPACE, wid], { queryParams }).then(navigated => {
+            if (!navigated) {
+              this.notificationService.error("Navigation to the workflow editor was blocked.");
+            }
+          });
+        },
+        error: () => this.notificationService.error("Failed to create workflow"),
+      });
+  }
+
   ngOnInit(): void {
     this.reloadFileContent();
   }
@@ -194,105 +746,894 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
   reloadFileContent() {
     this.turnOffAllDisplay();
 
-    // Pre-check - file size
-    const mimeType = getMimeType(this.filePath);
-    if (!this.isPreviewSupported(mimeType)) {
-      this.onFileTypePreviewUnsupported();
+    const extensionMime = getMimeType(this.filePath);
+
+    // Skip the full download for large files. The dataset service streams the entire blob;
+    // for a 500 MB file we'd wait 30+ seconds just to read its first 16 magic bytes. Above
+    // the threshold, fall back to extension-based identification + a "how to load" hint.
+    if (this.fileSize != null && this.fileSize > FULL_PREVIEW_MAX_BYTES) {
+      this.showOversizedFileInfo(extensionMime);
       return;
     }
-    const limit = MIME_TYPE_SIZE_LIMITS_MB[mimeType] ?? this.DEFAULT_MAX_SIZE;
-    if (this.fileSize != null && this.fileSize > limit) {
+
+    // Hard upper bound (defensive): even small types shouldn't load anything past this.
+    const preCheckLimit = MIME_TYPE_SIZE_LIMITS_MB[extensionMime] ?? this.DEFAULT_MAX_SIZE;
+    if (this.fileSize != null && this.fileSize > preCheckLimit) {
       this.onFileSizeNotLoadable();
       return;
     }
 
-    // Load file
+    if (!this.did || !this.dvid || !this.filePath) return;
+
     this.isLoading = true;
-    if (this.did && this.dvid && this.filePath != "") {
-      this.datasetService
-        .retrieveDatasetVersionSingleFile(this.filePath, this.isLogin)
-        .pipe(untilDestroyed(this))
-        .subscribe({
-          next: blob => {
-            this.isLoading = false;
-            const blobMimeType = getMimeType(this.filePath);
-            if (!this.isPreviewSupported(blobMimeType)) {
-              this.onFileTypePreviewUnsupported();
-              return;
-            }
-            const MaxSize = MIME_TYPE_SIZE_LIMITS_MB[blobMimeType] || this.DEFAULT_MAX_SIZE;
-            const fileSize = blob.size;
-            if (fileSize > MaxSize) {
-              this.onFileSizeNotLoadable();
-              this.notificationService.warning(`File ${this.filePath} is too large to be previewed`);
-              return;
-            }
-            this.currentFile = new File([blob], this.filePath, { type: blob.type });
-            // Handle different file types
-            switch (blobMimeType) {
-              case MIME_TYPES.PNG:
-              case MIME_TYPES.JPEG:
-              case MIME_TYPES.WEBP:
-              case MIME_TYPES.GIF:
-                this.displayImage = true;
-                this.loadSafeURL(blob);
-                break;
-              case MIME_TYPES.MP4:
-                this.displayMP4 = true;
-                this.loadSafeURL(blob);
-                break;
-
-              case MIME_TYPES.MP3:
-                this.displayMP3 = true;
-                this.loadSafeURL(blob);
-                break;
-
-              case MIME_TYPES.MSEXCEL:
-                readXlsxFile(blob).then(rows => {
-                  let parsedData: string[][] = [];
-                  rows.forEach(row => {
-                    // Convert each cell in the row to a string
-                    let stringRow = row.map(cell => (cell ? cell.toString() : ""));
-                    // Add the string array to the main array
-                    parsedData.push(stringRow);
-                  });
-                  if (parsedData.length > 0) {
-                    this.loadTabularFile(parsedData);
-                    this.displayXlsx = true;
-                  }
-                });
-                break;
-              case MIME_TYPES.CSV:
-                this.displayCSV = true;
-                // Handle CSV display
-                Papa.parse(this.currentFile, {
-                  complete: (results: ParseResult<any>) => {
-                    if (results.data.length > 0) {
-                      this.loadTabularFile(results.data);
-                    }
-                  },
-                  error: error => {
-                    console.error("Error parsing file:", error);
-                    this.onFileLoadingError();
-                  },
-                });
-                break;
-              case MIME_TYPES.MD:
-                this.displayMarkdown = true;
-                this.readFileAsText(blob);
-                break;
-              case MIME_TYPES.JSON:
-                this.displayJson = true;
-                this.readFileAsText(blob);
-                break;
-              case MIME_TYPES.TXT:
-              default:
-                this.displayPlainText = true;
-                this.readFileAsText(blob);
-                break;
+    this.datasetService
+      .retrieveDatasetVersionSingleFile(this.filePath, this.isLogin)
+      .pipe(untilDestroyed(this))
+      .subscribe({
+        next: async (blob: Blob) => {
+          this.isLoading = false;
+
+          const detectedMime = await this.detectMimeType(blob, this.filePath);
+
+          // Post-detection size check against the now-known type limit
+          const sizeLimit = MIME_TYPE_SIZE_LIMITS_MB[detectedMime] ?? this.DEFAULT_MAX_SIZE;
+          if (blob.size > sizeLimit) {
+            this.onFileSizeNotLoadable();
+            this.notificationService.warning(`File ${this.filePath} is too large to preview`);
+            return;
+          }
+
+          // currentFile is built lazily inside the CSV case (the only consumer); avoids an
+          // extra in-memory copy of the blob for every other type.
+          this.renderByMimeType(blob, detectedMime);
+        },
+        error: () => this.onFileLoadingError(),
+      });
+  }
+
+  /**
+   * Detects the actual MIME type of a blob using four strategies in order:
+   * 1. file-type library (magic bytes, ~100 formats) — refined with extension hints for
+   *    ZIP/gzip container formats (PyTorch, Keras, NPZ, RDS).
+   * 2. Manual magic bytes for data formats not covered by file-type
+   *    (Parquet, Arrow, HDF5, NumPy .npy, GGUF, Python pickle).
+   * 3. Extension-based fallback for opaque binary formats with no reliable magic bytes
+   *    (Safetensors, ONNX).
+   * 4. Text sniffing for JSON, CSV, FASTA, FASTQ, VCF, Markdown, and plain text.
+   *
+   * Uses FileReader throughout for broad environment compatibility (tests, browsers).
+   */
+  async detectMimeType(blob: Blob, fileName?: string): Promise<string> {
+    const ext = (fileName ?? "").split(".").pop()?.toLowerCase() ?? "";
+
+    // 1. file-type library covers images, video, audio, PDF, Office (ZIP-based), and more.
+    if (typeof fileTypeFromBlob === "function") {
+      try {
+        const result = await fileTypeFromBlob(blob);
+        if (result) {
+          // Refine generic container types (ZIP, gzip) using extension hints
+          if (result.mime === "application/zip") {
+            if (ext === "pt" || ext === "pth") return MIME_TYPES.PYTORCH;
+            if (ext === "keras") return MIME_TYPES.KERAS;
+            if (ext === "npz") return MIME_TYPES.NPZ;
+          }
+          if (result.mime === "application/gzip" && ext === "rds") return MIME_TYPES.RDS;
+          return result.mime;
+        }
+      } catch (_) {}
+    }
+
+    // 2. Manual magic bytes for formats not in file-type's signature list.
+    try {
+      const header = await this.readBlobBytes(blob.slice(0, 16));
+
+      // Parquet: PAR1 at bytes 0–3
+      if (header[0] === 0x50 && header[1] === 0x41 && header[2] === 0x52 && header[3] === 0x31) {
+        return MIME_TYPES.PARQUET;
+      }
+      // Arrow IPC: ARROW1 at bytes 0–5
+      if (
+        header[0] === 0x41 && header[1] === 0x52 && header[2] === 0x52 &&
+        header[3] === 0x4f && header[4] === 0x57 && header[5] === 0x31
+      ) {
+        return MIME_TYPES.ARROW;
+      }
+      // HDF5: \x89HDF\r\n\x1a\n at bytes 0–7
+      if (
+        header[0] === 0x89 && header[1] === 0x48 && header[2] === 0x44 && header[3] === 0x46 &&
+        header[4] === 0x0d && header[5] === 0x0a && header[6] === 0x1a && header[7] === 0x0a
+      ) {
+        // Refine HDF5 sub-types by extension (all use identical magic bytes)
+        if (ext === "h5ad") return MIME_TYPES.H5AD;
+        if (ext === "h5seurat") return MIME_TYPES.H5SEURAT;
+        if (ext === "loom") return MIME_TYPES.LOOM;
+        return MIME_TYPES.HDF5;
+      }
+      // NumPy .npy: \x93NUMPY at bytes 0–5
+      if (
+        header[0] === 0x93 && header[1] === 0x4e && header[2] === 0x55 &&
+        header[3] === 0x4d && header[4] === 0x50 && header[5] === 0x59
+      ) {
+        return MIME_TYPES.NPY;
+      }
+      // GGUF: ASCII "GGUF" at bytes 0–3
+      if (header[0] === 0x47 && header[1] === 0x47 && header[2] === 0x55 && header[3] === 0x46) {
+        return MIME_TYPES.GGUF;
+      }
+      // Python pickle: \x80 + protocol byte (2..5) + \x95 (FRAME opcode in proto 4+)
+      if (header[0] === 0x80 && header[1] >= 0x02 && header[1] <= 0x05) {
+        return MIME_TYPES.PICKLE;
+      }
+    } catch (_) {}
+
+    // 3. Extension-based fallback for opaque binaries lacking reliable magic bytes
+    if (ext === "safetensors") return MIME_TYPES.SAFETENSORS;
+    if (ext === "onnx") return MIME_TYPES.ONNX;
+
+    // 4. Text sniffing for formats with no fixed magic bytes
+    try {
+      const sample = await this.readBlobText(blob.slice(0, 4096));
+      const trimmed = sample.trimStart();
+      const firstLine = trimmed.split("\n")[0] ?? "";
+
+      if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
+        return MIME_TYPES.JSON;
+      }
+      if (trimmed.startsWith("# ") || trimmed.startsWith("## ")) {
+        return MIME_TYPES.MD;
+      }
+      // VCF: header line starts with ##fileformat=VCF
+      if (firstLine.startsWith("##fileformat=VCF")) {
+        return MIME_TYPES.VCF;
+      }
+      // FASTA: first non-empty/comment line starts with '>'
+      if (firstLine.startsWith(">")) {
+        return MIME_TYPES.FASTA;
+      }
+      // FASTQ: 4-line record pattern — line 1 starts '@', line 3 starts '+'
+      const lines = trimmed.split("\n");
+      if (lines.length >= 4 && lines[0].startsWith("@") && lines[2].startsWith("+")) {
+        return MIME_TYPES.FASTQ;
+      }
+      // CSV heuristic: first line has at least 3 comma-separated fields
+      if (firstLine.split(",").length >= 3) {
+        return MIME_TYPES.CSV;
+      }
+      // Printable ASCII/UTF-8 → plain text
+      const bytes = await this.readBlobBytes(blob.slice(0, 512));
+      const isPrintable = bytes.every(b => b === 9 || b === 10 || b === 13 || (b >= 32 && b <= 126));
+      if (isPrintable) return MIME_TYPES.TXT;
+    } catch (_) {}
+
+    return MIME_TYPES.OCTET_STREAM;
+  }
+
+  /** Parse a NumPy .npy header. Returns dtype, shape, byte order, and Fortran flag or null on failure. */
+  private async parseNpyHeader(
+    blob: Blob
+  ): Promise<{ dtype?: string; shape?: number[]; byteOrder?: string; fortranOrder?: boolean } | null> {
+    try {
+      const head = await this.readBlobBytes(blob.slice(0, 4096));
+      // bytes 0-5: magic, byte 6: major, byte 7: minor
+      const major = head[6];
+      // v1.0: uint16 LE header length at bytes 8-9; v2.0+: uint32 LE at bytes 8-11
+      const headerLen = major >= 2 ? head[8] | (head[9] << 8) | (head[10] << 16) | (head[11] << 24)
+                                   : head[8] | (head[9] << 8);
+      const headerStart = major >= 2 ? 12 : 10;
+      const headerText = new TextDecoder().decode(head.slice(headerStart, headerStart + headerLen));
+      const dtypeMatch = headerText.match(/['"]descr['"]\s*:\s*['"]([^'"]+)['"]/);
+      const shapeMatch = headerText.match(/['"]shape['"]\s*:\s*\(([^)]*)\)/);
+      const fortranMatch = headerText.match(/['"]fortran_order['"]\s*:\s*(True|False)/);
+      const shape = shapeMatch
+        ? shapeMatch[1].split(",").map(s => s.trim()).filter(s => s.length > 0).map(Number)
+        : undefined;
+      const dtype = dtypeMatch?.[1];
+      // dtype prefix: '<' = little-endian, '>' = big-endian, '|' = byte order N/A, '=' = native
+      let byteOrder: string | undefined;
+      if (dtype) {
+        if (dtype.startsWith("<")) byteOrder = "little-endian";
+        else if (dtype.startsWith(">")) byteOrder = "big-endian";
+        else if (dtype.startsWith("|")) byteOrder = "n/a";
+      }
+      const fortranOrder = fortranMatch ? fortranMatch[1] === "True" : undefined;
+      return { dtype, shape, byteOrder, fortranOrder };
+    } catch {
+      return null;
+    }
+  }
+
+  /** Parse a Safetensors file header. Returns rich tensor metadata or null. */
+  private async parseSafetensorsHeader(blob: Blob): Promise<{
+    tensorCount: number;
+    parameterCount: number;
+    sampleNames: string[];
+    dtypeBreakdown: { dtype: string; params: number }[];
+    largestTensor?: { name: string; shape: number[]; params: number };
+    metadata?: { key: string; value: string }[];
+  } | null> {
+    try {
+      const lenBytes = await this.readBlobBytes(blob.slice(0, 8));
+      // uint64 LE — JS can read up to 53 bits safely; header is always small (KB-MB)
+      let headerLen = 0;
+      for (let i = 0; i < 8; i++) headerLen += lenBytes[i] * Math.pow(256, i);
+      if (headerLen <= 0 || headerLen > 100 * 1024 * 1024) return null;
+      const headerText = await this.readBlobText(blob.slice(8, 8 + headerLen));
+      const json = JSON.parse(headerText);
+      const names = Object.keys(json).filter(k => k !== "__metadata__");
+      let paramCount = 0;
+      const dtypeMap: Record<string, number> = {};
+      let largest: { name: string; shape: number[]; params: number } | undefined;
+      for (const name of names) {
+        const shape: number[] = json[name]?.shape ?? [];
+        const dtype: string = json[name]?.dtype ?? "?";
+        const params = shape.length > 0 ? shape.reduce((a, b) => a * b, 1) : 0;
+        paramCount += params;
+        dtypeMap[dtype] = (dtypeMap[dtype] ?? 0) + params;
+        if (!largest || params > largest.params) largest = { name, shape, params };
+      }
+      const dtypeBreakdown = Object.entries(dtypeMap)
+        .sort((a, b) => b[1] - a[1])
+        .map(([dtype, params]) => ({ dtype, params }));
+      const meta = (json.__metadata__ ?? {}) as Record<string, string>;
+      const metadata = Object.entries(meta)
+        .slice(0, 6)
+        .map(([key, value]) => ({ key, value: String(value) }));
+      return {
+        tensorCount: names.length,
+        parameterCount: paramCount,
+        sampleNames: names.slice(0, 5),
+        dtypeBreakdown,
+        largestTensor: largest,
+        metadata: metadata.length > 0 ? metadata : undefined,
+      };
+    } catch {
+      return null;
+    }
+  }
+
+  /** Parse a GGUF (llama.cpp model) header. Returns version/tensor count or null. */
+  private async parseGgufHeader(
+    blob: Blob
+  ): Promise<{ version: number; tensorCount: number; metadataKvCount: number } | null> {
+    try {
+      const head = await this.readBlobBytes(blob.slice(0, 24));
+      // bytes 0-3: "GGUF" magic
+      // bytes 4-7: version (uint32 LE)
+      const version = head[4] | (head[5] << 8) | (head[6] << 16) | (head[7] << 24);
+      // bytes 8-15: tensor count (uint64 LE)
+      let tensorCount = 0;
+      for (let i = 0; i < 8; i++) tensorCount += head[8 + i] * Math.pow(256, i);
+      // bytes 16-23: metadata kv count (uint64 LE)
+      let metadataKvCount = 0;
+      for (let i = 0; i < 8; i++) metadataKvCount += head[16 + i] * Math.pow(256, i);
+      return { version, tensorCount, metadataKvCount };
+    } catch {
+      return null;
+    }
+  }
+
+  private readBlobBytes(blob: Blob): Promise<Uint8Array> {
+    return new Promise((resolve, reject) => {
+      const reader = new FileReader();
+      reader.onload = () => resolve(new Uint8Array(reader.result as ArrayBuffer));
+      reader.onerror = () => reject(reader.error);
+      reader.readAsArrayBuffer(blob);
+    });
+  }
+
+  private readBlobText(blob: Blob): Promise<string> {
+    return new Promise((resolve, reject) => {
+      const reader = new FileReader();
+      reader.onload = () => resolve(reader.result as string);
+      reader.onerror = () => reject(reader.error);
+      reader.readAsText(blob);
+    });
+  }
+
+  /**
+   * Returns true for any MIME type we know how to render or describe.
+   * Only truly unidentified binary (OCTET_STREAM) is considered unsupported.
+   */
+  isPreviewSupported(mimeType: string): boolean {
+    return mimeType !== MIME_TYPES.OCTET_STREAM;
+  }
+
+  get metadataItems(): { label: string; value: string }[] {
+    const m = this.fileMetadata;
+    if (!m) return [];
+    const items: { label: string; value: string }[] = [];
+
+    if (m.imageWidth != null && m.imageHeight != null) {
+      items.push({ label: "Dimensions", value: `${m.imageWidth} × ${m.imageHeight} px` });
+      const gcd = (a: number, b: number): number => (b === 0 ? a : gcd(b, a % b));
+      const g = gcd(m.imageWidth, m.imageHeight);
+      items.push({ label: "Aspect ratio", value: `${m.imageWidth / g}:${m.imageHeight / g}` });
+    }
+
+    if (m.videoDuration != null) items.push({ label: "Duration", value: formatDuration(m.videoDuration) });
+    if (m.videoWidth != null && m.videoHeight != null)
+      items.push({ label: "Resolution", value: `${m.videoWidth} × ${m.videoHeight}` });
+
+    if (m.audioDuration != null) items.push({ label: "Duration", value: formatDuration(m.audioDuration) });
+
+    if (m.rowCount != null) items.push({ label: "Rows", value: m.rowCount.toLocaleString() });
+    if (m.columnCount != null) items.push({ label: "Columns", value: m.columnCount.toLocaleString() });
+    if (m.sheetCount != null) items.push({ label: "Sheets", value: m.sheetCount.toLocaleString() });
+    if (m.columnNames?.length) {
+      const preview = m.columnNames.slice(0, 8).join(", ");
+      const more = m.columnNames.length > 8 ? ` +${m.columnNames.length - 8} more` : "";
+      items.push({ label: "Fields", value: preview + more });
+    }
+
+    if (m.jsonTopLevelType != null) {
+      const label = m.jsonTopLevelType === "array" ? "Items" : "Keys";
+      items.push({ label: "JSON", value: m.jsonTopLevelType });
+      if (m.jsonItemCount != null) items.push({ label, value: m.jsonItemCount.toLocaleString() });
+      if (m.jsonPreviewKeys?.length) items.push({ label: "Preview", value: m.jsonPreviewKeys.join(", ") });
+    }
+
+    if (m.lineCount != null) items.push({ label: "Lines", value: m.lineCount.toLocaleString() });
+    if (m.wordCount != null) items.push({ label: "Words", value: m.wordCount.toLocaleString() });
+    if (m.charCount != null) items.push({ label: "Characters", value: m.charCount.toLocaleString() });
+    if (m.headingCount != null) items.push({ label: "Headings", value: m.headingCount.toLocaleString() });
+
+    if (m.pageCount != null) items.push({ label: "Pages", value: `~${m.pageCount}` });
+
+    // ML / scientific
+    if (m.modelFormat) items.push({ label: "Format", value: m.modelFormat });
+    if (m.containerFormat) items.push({ label: "Container", value: m.containerFormat });
+    if (m.dtype) items.push({ label: "dtype", value: m.dtype });
+    if (m.shape?.length) items.push({ label: "Shape", value: `(${m.shape.join(", ")})` });
+    if (m.tensorCount != null) items.push({ label: "Tensors", value: m.tensorCount.toLocaleString() });
+    if (m.parameterCount != null) items.push({ label: "Parameters", value: `~${m.parameterCount.toLocaleString()}` });
+    if (m.sampleTensorNames?.length)
+      items.push({ label: "Tensors (first)", value: m.sampleTensorNames.join(", ") });
+    if (m.ggufVersion != null) items.push({ label: "GGUF version", value: `v${m.ggufVersion}` });
+    if (m.metadataKvCount != null) items.push({ label: "Metadata KV", value: m.metadataKvCount.toLocaleString() });
+
+    // JSON schema details
+    if (m.jsonMaxDepth != null) items.push({ label: "Max depth", value: m.jsonMaxDepth.toLocaleString() });
+    if (m.jsonArrayElementType) items.push({ label: "Element type", value: m.jsonArrayElementType });
+    if (m.jsonKeyTypes?.length) {
+      items.push({
+        label: "Schema",
+        value: m.jsonKeyTypes.map(kt => `${kt.key}: ${kt.type}`).join(", "),
+      });
+    }
+
+    // PDF /Info
+    if (m.pdfVersion) items.push({ label: "PDF version", value: m.pdfVersion });
+    if (m.pdfTitle) items.push({ label: "Title", value: m.pdfTitle });
+    if (m.pdfAuthor) items.push({ label: "Author", value: m.pdfAuthor });
+    if (m.pdfCreator) items.push({ label: "Creator", value: m.pdfCreator });
+    if (m.pdfProducer) items.push({ label: "Producer", value: m.pdfProducer });
+    if (m.pdfEncrypted) items.push({ label: "Encrypted", value: "Yes" });
+
+    // Markdown structure
+    if (m.codeBlockCount) items.push({ label: "Code blocks", value: m.codeBlockCount.toLocaleString() });
+    if (m.linkCount) items.push({ label: "Links", value: m.linkCount.toLocaleString() });
+    if (m.imageCount) items.push({ label: "Images", value: m.imageCount.toLocaleString() });
+    if (m.listItemCount) items.push({ label: "List items", value: m.listItemCount.toLocaleString() });
+
+    // Plain text encoding/structure
+    if (m.encoding) items.push({ label: "Encoding", value: m.encoding });
+    if (m.emptyLineCount != null && m.emptyLineCount > 0)
+      items.push({ label: "Blank lines", value: m.emptyLineCount.toLocaleString() });
+    if (m.avgLineLength != null && m.avgLineLength > 0)
+      items.push({ label: "Avg line", value: `${Math.round(m.avgLineLength)} chars` });
+    if (m.maxLineLength != null && m.maxLineLength > 0)
+      items.push({ label: "Max line", value: `${m.maxLineLength.toLocaleString()} chars` });
+
+    // NumPy details
+    if (m.totalElements != null) items.push({ label: "Elements", value: m.totalElements.toLocaleString() });
+    if (m.byteOrder) items.push({ label: "Byte order", value: m.byteOrder });
+    if (m.fortranOrder != null) items.push({ label: "Order", value: m.fortranOrder ? "Fortran (column)" : "C (row)" });
+
+    // Safetensors details
+    if (m.dtypeBreakdown?.length) {
+      items.push({
+        label: "Dtypes",
+        value: m.dtypeBreakdown.map(d => `${d.dtype}: ${d.params.toLocaleString()}`).join(", "),
+      });
+    }
+    if (m.largestTensor) {
+      items.push({
+        label: "Largest tensor",
+        value: `${m.largestTensor.name} (${m.largestTensor.shape.join("×")}, ${m.largestTensor.params.toLocaleString()} params)`,
+      });
+    }
+    if (m.safetensorsMetadata?.length) {
+      for (const kv of m.safetensorsMetadata) {
+        items.push({ label: kv.key, value: kv.value });
+      }
+    }
+
+    // GGUF details
+    if (m.ggufArchitecture) items.push({ label: "Architecture", value: m.ggufArchitecture });
+    if (m.ggufQuantization) items.push({ label: "Quantization", value: m.ggufQuantization });
+
+    // Bioinformatics
+    if (m.sequenceCount != null) {
+      const label = m.sequenceCountIsExact ? "Sequences" : "Sequences (sampled)";
+      items.push({ label, value: m.sequenceCount.toLocaleString() });
+    }
+    if (m.variantCount != null) {
+      const label = m.variantCountIsExact ? "Variants" : "Variants (sampled)";
+      items.push({ label, value: m.variantCount.toLocaleString() });
+    }
+    if (m.totalBases != null) items.push({ label: "Total bases", value: m.totalBases.toLocaleString() });
+    if (m.gcContent != null) items.push({ label: "GC content", value: `${(m.gcContent * 100).toFixed(1)}%` });
+    if (m.isProtein) items.push({ label: "Sequence type", value: "Protein" });
+    if (m.minSequenceLength != null && m.maxSequenceLength != null) {
+      items.push({
+        label: "Length range",
+        value: `${m.minSequenceLength.toLocaleString()}–${m.maxSequenceLength.toLocaleString()} (avg ${Math.round(
+          m.avgSequenceLength ?? 0
+        ).toLocaleString()})`,
+      });
+    }
+    if (m.vcfSampleCount != null && m.vcfSampleCount > 0)
+      items.push({ label: "Samples", value: m.vcfSampleCount.toLocaleString() });
+    if (m.vcfChromosomes?.length)
+      items.push({ label: "Chromosomes", value: m.vcfChromosomes.slice(0, 8).join(", ") });
+
+    return items;
+  }
+
+  private renderByMimeType(blob: Blob, mimeType: string): void {
+    if (mimeType.startsWith("image/")) {
+      this.displayImage = true;
+      this.loadSafeURL(blob);
+      this.fileMetadata = { fileSize: blob.size };
+      const img = new Image();
+      img.onload = () => {
+        this.fileMetadata = { ...this.fileMetadata, imageWidth: img.naturalWidth, imageHeight: img.naturalHeight };
+        this.cdr.markForCheck();
+      };
+      img.src = this.fileURL!;
+      return;
+    }
+
+    if (mimeType.startsWith("video/")) {
+      this.displayMP4 = true;
+      this.loadSafeURL(blob);
+      this.fileMetadata = { fileSize: blob.size };
+      const video = document.createElement("video");
+      video.preload = "metadata";
+      video.onloadedmetadata = () => {
+        this.fileMetadata = {
+          ...this.fileMetadata,
+          videoDuration: video.duration,
+          videoWidth: video.videoWidth,
+          videoHeight: video.videoHeight,
+        };
+        this.cdr.markForCheck();
+        URL.revokeObjectURL(video.src);
+      };
+      video.src = URL.createObjectURL(blob);
+      return;
+    }
+
+    if (mimeType.startsWith("audio/")) {
+      this.displayMP3 = true;
+      this.loadSafeURL(blob);
+      this.fileMetadata = { fileSize: blob.size };
+      const audio = document.createElement("audio");
+      audio.preload = "metadata";
+      audio.onloadedmetadata = () => {
+        this.fileMetadata = { ...this.fileMetadata, audioDuration: audio.duration };
+        this.cdr.markForCheck();
+        URL.revokeObjectURL(audio.src);
+      };
+      audio.src = URL.createObjectURL(blob);
+      return;
+    }
+
+    switch (mimeType) {
+      case MIME_TYPES.PDF:
+        this.displayPDF = true;
+        this.loadSafeURL(blob);
+        this.fileMetadata = { fileSize: blob.size };
+        // Read first 200KB for /Info + version + page count; tail 50KB for trailer (where /Info often lives)
+        Promise.all([
+          this.readBlobText(blob.slice(0, 200 * 1024)),
+          this.readBlobText(blob.slice(Math.max(0, blob.size - 50 * 1024))),
+        ]).then(([head, tail]) => {
+          const combined = head + "\n" + tail;
+          const exact = (combined.match(/\/Type\s*\/Page\b/g) ?? []).length;
+          const fallback = Math.ceil((combined.match(/\/Page\b/g) ?? []).length / 2);
+          const pageCount = exact > 0 ? exact : fallback || undefined;
+          const info = extractPdfInfo(combined);
+          this.fileMetadata = {
+            ...this.fileMetadata,
+            pageCount,
+            pdfTitle: info.title,
+            pdfAuthor: info.author,
+            pdfCreator: info.creator,
+            pdfProducer: info.producer,
+            pdfVersion: info.version,
+            pdfEncrypted: info.encrypted,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+
+      case MIME_TYPES.MSEXCEL:
+      case MIME_TYPES.XLSX:
+        Promise.all([readXlsxFile(blob), readSheetNames(blob)]).then(([rows, sheetNames]) => {
+          const parsedData = rows.map(row => row.map(cell => (cell != null ? cell.toString() : "")));
+          if (parsedData.length > 0) {
+            this.loadTabularFile(parsedData);
+            this.displayXlsx = true;
+            const header = parsedData[0];
+            const dataRows = parsedData.slice(1).filter(r => r.some(c => c !== ""));
+            const schema = inferColumnSchema(dataRows, header.length);
+            this.fileMetadata = {
+              fileSize: blob.size,
+              rowCount: dataRows.length,
+              columnCount: header.length,
+              columnNames: header,
+              sheetCount: sheetNames.length,
+              columnTypes: schema.types,
+              nullCounts: schema.nullCounts,
+              sampleValues: schema.samples,
+            };
+            this.cdr.markForCheck();
+          }
+        });
+        break;
+
+      case MIME_TYPES.CSV: {
+        this.displayCSV = true;
+        const { slice: csvSlice, truncated: csvTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = csvTruncated;
+        // Papa.parse needs a File-like; build it from the slice only — no need to keep the full blob.
+        const fileToParse = new File([csvSlice], this.filePath, { type: MIME_TYPES.CSV });
+        Papa.parse(fileToParse, {
+          complete: (results: ParseResult<any>) => {
+            if (results.data.length > 0) {
+              this.loadTabularFile(results.data);
+              const header: string[] = results.data[0].map(String);
+              const dataRows = (results.data.slice(1) as any[][])
+                .filter(r => r.some((c: any) => c !== ""))
+                .map(r => r.map((c: any) => (c == null ? "" : String(c))));
+              const schema = inferColumnSchema(dataRows, header.length);
+              this.fileMetadata = {
+                fileSize: blob.size,
+                rowCount: dataRows.length,
+                columnCount: header.length,
+                columnNames: header,
+                columnTypes: schema.types,
+                nullCounts: schema.nullCounts,
+                sampleValues: schema.samples,
+              };
+              this.cdr.markForCheck();
             }
           },
+          error: () => this.onFileLoadingError(),
+        });
+        break;
+      }
+
+      case MIME_TYPES.MD: {
+        this.displayMarkdown = true;
+        const { slice: mdSlice, truncated: mdTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = mdTruncated;
+        this.readBlobText(mdSlice).then(text => {
+          this.textContent = text;
+          const lines = text.split("\n");
+          // Strip fenced code blocks to count them; also count inline elements
+          const codeBlockCount = (text.match(/^```/gm) ?? []).length / 2;
+          const linkCount = (text.match(/\[[^\]]+\]\([^)]+\)/g) ?? []).length;
+          const imageCount = (text.match(/!\[[^\]]*\]\([^)]+\)/g) ?? []).length;
+          const listItemCount = lines.filter(l => /^\s*[-*+]\s/.test(l) || /^\s*\d+\.\s/.test(l)).length;
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: lines.length,
+            wordCount: text.trim() ? text.trim().split(/\s+/).length : 0,
+            headingCount: lines.filter(l => /^#{1,6}\s/.test(l)).length,
+            codeBlockCount: Math.floor(codeBlockCount),
+            linkCount: linkCount - imageCount, // image syntax is link syntax + leading '!'
+            imageCount,
+            listItemCount,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.JSON: {
+        this.displayJson = true;
+        const { slice: jsonSlice, truncated: jsonTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = jsonTruncated;
+        this.readBlobText(jsonSlice).then(text => {
+          this.textContent = text;
+          try {
+            const parsed = JSON.parse(text);
+            const isArray = Array.isArray(parsed);
+            const keys = isArray ? null : Object.keys(parsed);
+            const maxDepth = jsonMaxDepth(parsed);
+            let jsonKeyTypes: { key: string; type: string }[] | undefined;
+            let jsonArrayElementType: string | undefined;
+            if (isArray && parsed.length > 0) {
+              const elementTypes = new Set(parsed.slice(0, 20).map(jsTypeLabel));
+              jsonArrayElementType = elementTypes.size === 1 ? [...elementTypes][0] : "mixed";
+            } else if (!isArray && keys) {
+              jsonKeyTypes = keys.slice(0, 8).map(k => ({
+                key: k,
+                type: jsTypeLabel((parsed as Record<string, unknown>)[k]),
+              }));
+            }
+            this.fileMetadata = {
+              fileSize: blob.size,
+              jsonTopLevelType: isArray ? "array" : "object",
+              jsonItemCount: isArray ? parsed.length : keys!.length,
+              jsonPreviewKeys: isArray
+                ? parsed.slice(0, 5).map((_: unknown, i: number) => `[${i}]`)
+                : keys!.slice(0, 8),
+              jsonMaxDepth: maxDepth,
+              jsonKeyTypes,
+              jsonArrayElementType,
+            };
+          } catch {
+            // Truncated JSON or invalid — fall back to raw text view
+            this.fileMetadata = { fileSize: blob.size };
+          }
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.PARQUET:
+        this.detectedTypeMessage =
+          "Parquet file detected. Use the Parquet File Scan operator in Texera to analyze this data.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      case MIME_TYPES.ARROW:
+        this.detectedTypeMessage =
+          "Arrow/Feather file detected. Use the Arrow File Scan operator in Texera to analyze this data.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      case MIME_TYPES.DOCX:
+        this.detectedTypeMessage = "Word document (.docx) detected. Rich document preview is not yet supported.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      case MIME_TYPES.PPTX:
+        this.detectedTypeMessage = "PowerPoint (.pptx) detected. Presentation preview is not yet supported.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      // --- ML / scientific data formats ---
+
+      case MIME_TYPES.HDF5:
+        this.detectedTypeMessage =
+          "HDF5 binary container detected. Likely a model (Keras .h5) or scientific dataset. Load with h5py / rhdf5.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" };
+        break;
+
+      case MIME_TYPES.H5AD:
+        this.detectedTypeMessage =
+          "AnnData (.h5ad) detected — single-cell expression matrix in HDF5. Load with scanpy.read_h5ad() in Python.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" };
+        break;
+
+      case MIME_TYPES.H5SEURAT:
+        this.detectedTypeMessage =
+          "Seurat HDF5 object (.h5seurat) detected. Load with SeuratDisk::LoadH5Seurat() in R.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" };
+        break;
+
+      case MIME_TYPES.LOOM:
+        this.detectedTypeMessage =
+          "Loom (.loom) detected — single-cell expression in HDF5. Load with loompy / scanpy in Python.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "HDF5" };
+        break;
+
+      case MIME_TYPES.RDS:
+        this.detectedTypeMessage =
+          "R serialized object (.rds) detected — commonly a Seurat / SingleCellExperiment / fitted model. Load with readRDS() in R.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "gzip" };
+        break;
+
+      case MIME_TYPES.PICKLE:
+        this.detectedTypeMessage =
+          "Python pickle detected — typically a serialized model (sklearn / joblib) or dataset. Load with pickle.load() in Python.";
+        this.fileMetadata = { fileSize: blob.size };
+        break;
+
+      case MIME_TYPES.PYTORCH:
+        this.detectedTypeMessage =
+          "PyTorch checkpoint (.pt/.pth) detected. Load with torch.load() in Python.";
+        this.fileMetadata = { fileSize: blob.size, modelFormat: "PyTorch", containerFormat: "ZIP archive" };
+        break;
+
+      case MIME_TYPES.KERAS:
+        this.detectedTypeMessage =
+          "Keras v3 model (.keras) detected. Load with tf.keras.models.load_model() in Python.";
+        this.fileMetadata = { fileSize: blob.size, modelFormat: "Keras", containerFormat: "ZIP archive" };
+        break;
+
+      case MIME_TYPES.ONNX:
+        this.detectedTypeMessage =
+          "ONNX model (.onnx) detected — portable neural network. Load with onnxruntime or netron.app for inspection.";
+        this.fileMetadata = { fileSize: blob.size, modelFormat: "ONNX" };
+        break;
+
+      case MIME_TYPES.NPY:
+        this.parseNpyHeader(blob).then(info => {
+          const shapeStr = info?.shape ? info.shape.join(" × ") : "?";
+          const totalElements = info?.shape?.reduce((a, b) => a * b, 1);
+          this.detectedTypeMessage = `NumPy array (.npy) detected — ${info?.dtype ?? "?"} array of shape (${shapeStr}).`;
+          this.fileMetadata = {
+            fileSize: blob.size,
+            dtype: info?.dtype,
+            shape: info?.shape,
+            totalElements,
+            byteOrder: info?.byteOrder,
+            fortranOrder: info?.fortranOrder,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+
+      case MIME_TYPES.NPZ:
+        this.detectedTypeMessage =
+          "NumPy archive (.npz) detected — ZIP of .npy arrays. Load with numpy.load() and access via dict-like API.";
+        this.fileMetadata = { fileSize: blob.size, containerFormat: "ZIP archive" };
+        break;
+
+      case MIME_TYPES.SAFETENSORS:
+        this.parseSafetensorsHeader(blob).then(info => {
+          if (info) {
+            const paramStr = info.parameterCount.toLocaleString();
+            this.detectedTypeMessage = `Safetensors model detected — ${info.tensorCount} tensors, ~${paramStr} parameters.`;
+            this.fileMetadata = {
+              fileSize: blob.size,
+              modelFormat: "Safetensors",
+              tensorCount: info.tensorCount,
+              parameterCount: info.parameterCount,
+              sampleTensorNames: info.sampleNames,
+              dtypeBreakdown: info.dtypeBreakdown,
+              largestTensor: info.largestTensor,
+              safetensorsMetadata: info.metadata,
+            };
+          } else {
+            this.detectedTypeMessage = "Safetensors file detected. Load with safetensors.torch.load_file() in Python.";
+            this.fileMetadata = { fileSize: blob.size, modelFormat: "Safetensors" };
+          }
+          this.cdr.markForCheck();
+        });
+        break;
+
+      case MIME_TYPES.GGUF:
+        this.parseGgufHeader(blob).then(info => {
+          if (info) {
+            this.detectedTypeMessage = `GGUF model detected — v${info.version}, ${info.tensorCount} tensors, ${info.metadataKvCount} metadata entries.`;
+            this.fileMetadata = {
+              fileSize: blob.size,
+              modelFormat: "GGUF",
+              ggufVersion: info.version,
+              tensorCount: info.tensorCount,
+              metadataKvCount: info.metadataKvCount,
+            };
+          } else {
+            this.detectedTypeMessage = "GGUF model detected (llama.cpp / quantized LLM format).";
+            this.fileMetadata = { fileSize: blob.size, modelFormat: "GGUF" };
+          }
+          this.cdr.markForCheck();
+        });
+        break;
+
+      // --- Bioinformatics text formats — render as plain text plus record-count metadata ---
+
+      case MIME_TYPES.FASTA: {
+        this.displayPlainText = true;
+        const { slice: faSlice, truncated: faTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = faTruncated;
+        this.readBlobText(faSlice).then(text => {
+          this.textContent = text;
+          const stats = summarizeFasta(text);
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: text.split("\n").length,
+            sequenceCount: stats.sequenceCount,
+            sequenceCountIsExact: !faTruncated,
+            totalBases: stats.totalBases,
+            gcContent: stats.isProtein ? undefined : stats.gcContent,
+            minSequenceLength: stats.minLen,
+            maxSequenceLength: stats.maxLen,
+            avgSequenceLength: stats.avgLen,
+            isProtein: stats.isProtein,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.FASTQ: {
+        this.displayPlainText = true;
+        const { slice: fqSlice, truncated: fqTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = fqTruncated;
+        this.readBlobText(fqSlice).then(text => {
+          this.textContent = text;
+          const lineCount = text.split("\n").filter(l => l.length > 0).length;
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: text.split("\n").length,
+            sequenceCount: Math.floor(lineCount / 4),
+            sequenceCountIsExact: !fqTruncated,
+          };
+          this.cdr.markForCheck();
+        });
+        break;
+      }
+
+      case MIME_TYPES.VCF: {
+        this.displayPlainText = true;
+        const { slice: vcfSlice, truncated: vcfTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = vcfTruncated;
+        this.readBlobText(vcfSlice).then(text => {
+          this.textContent = text;
+          const lines = text.split("\n");
+          const variantLines = lines.filter(l => l.length > 0 && !l.startsWith("#"));
+          // Sample names are tab-separated columns after the 9 fixed VCF fields on the #CHROM header line
+          const chromHeader = lines.find(l => l.startsWith("#CHROM"));
+          const headerFields = chromHeader ? chromHeader.split("\t") : [];
+          const vcfSampleCount = headerFields.length > 9 ? headerFields.length - 9 : 0;
+          const chromSet = new Set<string>();
+          for (const line of variantLines.slice(0, 5000)) {
+            const chr = line.split("\t", 1)[0];
+            if (chr) chromSet.add(chr);
+            if (chromSet.size >= 30) break;
+          }
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: lines.length,
+            variantCount: variantLines.length,
+            variantCountIsExact: !vcfTruncated,
+            vcfSampleCount,
+            vcfChromosomes: [...chromSet].slice(0, 12),
+          };
+          this.cdr.markForCheck();
         });
+        break;
+      }
+
+      case MIME_TYPES.OCTET_STREAM:
+        this.onFileTypePreviewUnsupported();
+        break;
+
+      default: {
+        this.displayPlainText = true;
+        const { slice: txtSlice, truncated: txtTruncated } = this.getPreviewSlice(blob);
+        this.previewTruncated = txtTruncated;
+        Promise.all([this.readBlobBytes(blob.slice(0, 3)), this.readBlobText(txtSlice)]).then(([head, text]) => {
+          this.textContent = text;
+          const lines = text.split("\n");
+          const lineLens = lines.map(l => l.length);
+          const totalLen = lineLens.reduce((a, b) => a + b, 0);
+          const emptyLineCount = lineLens.filter(n => n === 0).length;
+          const maxLineLength = lineLens.length > 0 ? Math.max(...lineLens) : 0;
+          // BOM detection: UTF-8 BOM is EF BB BF; otherwise assume ASCII/UTF-8
+          let encoding = "UTF-8";
+          if (head[0] === 0xef && head[1] === 0xbb && head[2] === 0xbf) encoding = "UTF-8 BOM";
+          else if (lines.every(l => /^[\x00-\x7F]*$/.test(l))) encoding = "ASCII";
+          this.fileMetadata = {
+            fileSize: blob.size,
+            lineCount: lines.length,
+            wordCount: text.trim() ? text.trim().split(/\s+/).length : 0,
+            charCount: text.length,
+            emptyLineCount,
+            avgLineLength: lines.length > 0 ? totalLen / lines.length : 0,
+            maxLineLength,
+            encoding,
+          };
+          this.cdr.markForCheck();
+        });
+      }
     }
   }
 
@@ -305,17 +1646,26 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
     this.displayJson = false;
     this.displayMP4 = false;
     this.displayMP3 = false;
+    this.displayPDF = false;
+    this.detectedTypeMessage = "";
+    this.fileMetadata = undefined;
     this.isLoading = false;
     this.isFileLoadingError = false;
     this.isFileSizeUnloadable = false;
     this.isFileTypePreviewUnsupported = false;
-    // garbage collection
     if (this.fileURL) {
       URL.revokeObjectURL(this.fileURL);
     }
-    if (this.safeFileURL) {
-      URL.revokeObjectURL(this.safeFileURL.toString());
-    }
+    this.fileURL = undefined;
+    this.safeFileURL = undefined;
+    this.safeResourceFileURL = undefined;
+    // Clear cached content so memory is reclaimed when switching files; without these,
+    // a previously-loaded 10 MB text or 100K-row table would persist on the component.
+    this.textContent = "";
+    this.tableContent = [];
+    this.tableDataHeader = [];
+    this.currentFile = undefined;
+    this.previewTruncated = false;
   }
 
   onFileLoadingError() {
@@ -333,49 +1683,37 @@ export class UserDatasetFileRendererComponent implements OnInit, OnChanges, OnDe
     this.isFileTypePreviewUnsupported = true;
   }
 
-  isPreviewSupported(mimeType: string) {
-    return mimeType !== MIME_TYPES.OCTET_STREAM && Object.hasOwnProperty.call(MIME_TYPE_SIZE_LIMITS_MB, mimeType);
-  }
-
-  private readFileAsText(blob: Blob) {
-    const txtReader = new FileReader();
-    txtReader.onload = (event: any) => {
-      this.textContent = event.target.result;
-    };
-    txtReader.readAsText(blob);
+  /**
+   * Skip the download for very large files and show only the extension-based type hint.
+   * Avoids the multi-second download + memory cost of fetching a multi-hundred-MB blob
+   * just to render its first frame / table / iframe.
+   */
+  private showOversizedFileInfo(extensionMime: string): void {
+    const hint = TYPE_LOADING_HINTS[extensionMime];
+    const sizeStr = this.fileSize != null ? formatSize(this.fileSize) : "very large";
+    this.detectedTypeMessage = hint
+      ? `${hint}  (Preview skipped — file is ${sizeStr}.)`
+      : `File is ${sizeStr} — full preview skipped to avoid browser lag. Open in a workflow operator to analyze.`;
+    this.cdr.markForCheck();
   }
 
-  private loadSafeURL(blob: Blob) {
+  private loadSafeURL(blob: Blob): void {
     this.fileURL = URL.createObjectURL(blob);
     this.safeFileURL = this.sanitizer.bypassSecurityTrustUrl(this.fileURL);
+    this.safeResourceFileURL = this.sanitizer.bypassSecurityTrustResourceUrl(this.fileURL);
   }
 
-  private loadTabularFile(data: any[][]) {
+
+  private loadTabularFile(data: any[][]): void {
     if (data.length > 0) {
-      // Extract the header (first row)
       this.tableDataHeader = data[0];
-
-      // Process the rest of the rows
       this.tableContent = data
         .slice(1)
         .map(row => {
-          // Normalize the row length to match the header length
-          while (row.length < this.tableDataHeader.length) {
-            row.push("");
-          }
+          while (row.length < this.tableDataHeader.length) row.push("");
           return row;
         })
-        .filter(row => {
-          // filter out all empty row
-          let areCellAllEmpty = true;
-          for (const cell in row) {
-            if (cell != "") {
-              areCellAllEmpty = false;
-              break;
-            }
-          }
-          return !areCellAllEmpty;
-        });
+        .filter(row => row.some(cell => cell !== ""));
     }
   }
 }
diff --git a/frontend/src/app/workspace/component/workspace.component.ts b/frontend/src/app/workspace/component/workspace.component.ts
index 9968c26f647..e96a53959ea 100644
--- a/frontend/src/app/workspace/component/workspace.component.ts
+++ b/frontend/src/app/workspace/component/workspace.component.ts
@@ -36,6 +36,7 @@ import { Workflow } from "../../common/type/workflow";
 import { OperatorMetadataService } from "../service/operator-metadata/operator-metadata.service";
 import { UndoRedoService } from "../service/undo-redo/undo-redo.service";
 import { WorkflowActionService } from "../service/workflow-graph/model/workflow-action.service";
+import { WorkflowUtilService } from "../service/workflow-graph/util/workflow-util.service";
 import { NzMessageService } from "ng-zorro-antd/message";
 import { debounceTime, distinctUntilChanged, filter, switchMap, throttleTime } from "rxjs/operators";
 import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy";
@@ -117,6 +118,7 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy {
     private undoRedoService: UndoRedoService,
     private workflowPersistService: WorkflowPersistService,
     private workflowActionService: WorkflowActionService,
+    private workflowUtilService: WorkflowUtilService,
     private location: Location,
     private route: ActivatedRoute,
     private operatorMetadataService: OperatorMetadataService,
@@ -186,6 +188,38 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy {
     this.workflowActionService.clearWorkflow();
   }
 
+  /**
+   * Reads `addOp` + `fileName` query params (set by the dataset file renderer's "Open in
+   * workflow" button) and adds the corresponding scan operator with its `fileName` property
+   * prefilled. Strips the params from the URL on success so a refresh doesn't double-add.
+   *
+   * Runs after `loadWorkflowWithId` completes, so the operator metadata is loaded and the
+   * workflow graph is ready for modification.
+   */
+  handlePendingOperatorAddition(): void {
+    const params = this.route.snapshot.queryParams;
+    const addOp: string | undefined = params.addOp;
+    const fileName: string | undefined = params.fileName;
+    if (!addOp || !fileName) return;
+    try {
+      const operator = this.workflowUtilService.getNewOperatorPredicate(addOp);
+      // Place near the upper-left of the visible viewport.
+      const origin = this.workflowActionService.getJointGraphWrapper().getMainJointPaper()?.translate();
+      const point = { x: 400 - (origin?.tx ?? 0), y: 200 - (origin?.ty ?? 0) };
+      this.workflowActionService.addOperator(operator, point);
+      // Set the file path through the schema-validated mutation API (operatorProperties is readonly).
+      this.workflowActionService.setOperatorProperty(operator.operatorID, {
+        ...operator.operatorProperties,
+        fileName,
+      });
+    } catch (err) {
+      this.notificationService.error(`Could not pre-add operator "${addOp}" — drag it in manually.`);
+    } finally {
+      // Strip query params so a manual refresh doesn't re-add the operator.
+      this.router.navigate([], { relativeTo: this.route, queryParams: {}, preserveFragment: true });
+    }
+  }
+
   registerAutoPersistWorkflow(): void {
     // make sure it is only registered once
     if (this.autoPersistRegistered) {
@@ -260,6 +294,9 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy {
           this.setLoadingState(false);
           this.registerAutoPersistWorkflow();
           this.triggerCenter();
+          // If the user arrived via "Open in workflow" from the dataset file renderer,
+          // honor the addOp + fileName query params now that the workflow is fully loaded.
+          this.handlePendingOperatorAddition();
         },
         () => {
           this.workflowActionService.resetAsNewWorkflow();
diff --git a/frontend/yarn.lock b/frontend/yarn.lock
index 6a4ae4330c4..2d2851d7885 100644
--- a/frontend/yarn.lock
+++ b/frontend/yarn.lock
@@ -2059,6 +2059,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@borewit/text-codec@npm:^0.2.1":
+  version: 0.2.2
+  resolution: "@borewit/text-codec@npm:0.2.2"
+  checksum: 10c0/2d3fb132bc6a132914a8fbf8e9ff2fa1ead210ecc395b28bb7355bd7719548a5e351ffe39f21c3bee8048f6cabd99eabd404bb5cc809cad9cba25abed19d271f
+  languageName: node
+  linkType: hard
+
 "@bufbuild/protobuf@npm:^2.0.0, @bufbuild/protobuf@npm:^2.5.0":
   version: 2.12.0
   resolution: "@bufbuild/protobuf@npm:2.12.0"
@@ -5869,6 +5876,23 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@tokenizer/inflate@npm:^0.4.1":
+  version: 0.4.1
+  resolution: "@tokenizer/inflate@npm:0.4.1"
+  dependencies:
+    debug: "npm:^4.4.3"
+    token-types: "npm:^6.1.1"
+  checksum: 10c0/9817516efe21d1ce3bdfb80a1f94efc8981064ce3873448ba79f4d81d96c0694c484c289bd042d346ae5536cf77f5aa9a367d39c3df700eb610761b7c306b4de
+  languageName: node
+  linkType: hard
+
+"@tokenizer/token@npm:^0.3.0":
+  version: 0.3.0
+  resolution: "@tokenizer/token@npm:0.3.0"
+  checksum: 10c0/7ab9a822d4b5ff3f5bca7f7d14d46bdd8432528e028db4a52be7fbf90c7f495cc1af1324691dda2813c6af8dc4b8eb29de3107d4508165f9aa5b53e7d501f155
+  languageName: node
+  linkType: hard
+
 "@tsconfig/node10@npm:^1.0.7":
   version: 1.0.12
   resolution: "@tsconfig/node10@npm:1.0.12"
@@ -10419,6 +10443,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"file-type@npm:^22.0.1":
+  version: 22.0.1
+  resolution: "file-type@npm:22.0.1"
+  dependencies:
+    "@tokenizer/inflate": "npm:^0.4.1"
+    strtok3: "npm:^10.3.5"
+    token-types: "npm:^6.1.2"
+    uint8array-extras: "npm:^1.5.0"
+  checksum: 10c0/45b70a10196d46965eadd7835ec408c1c07b4fd2ed395e9bbcc0ad63d93f7bf6d076d0e970673b754577002019c8858825bc71ccc07ca7c0e49ac0c2b7e1839f
+  languageName: node
+  linkType: hard
+
 "fill-range@npm:^7.1.1":
   version: 7.1.1
   resolution: "fill-range@npm:7.1.1"
@@ -11065,6 +11101,7 @@ __metadata:
     eslint-plugin-rxjs: "npm:5.0.3"
     eslint-plugin-rxjs-angular: "npm:2.0.1"
     file-saver: "npm:2.0.5"
+    file-type: "npm:^22.0.1"
     fs-extra: "npm:10.0.1"
     fuse.js: "npm:6.5.3"
     git-describe: "npm:4.1.0"
@@ -11460,7 +11497,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"ieee754@npm:1.2.1, ieee754@npm:^1.1.13":
+"ieee754@npm:1.2.1, ieee754@npm:^1.1.13, ieee754@npm:^1.2.1":
   version: 1.2.1
   resolution: "ieee754@npm:1.2.1"
   checksum: 10c0/b0782ef5e0935b9f12883a2e2aa37baa75da6e66ce6515c168697b42160807d9330de9a32ec1ed73149aea02e0d822e572bca6f1e22bdcbd2149e13b050b17bb
@@ -17140,6 +17177,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"strtok3@npm:^10.3.5":
+  version: 10.3.5
+  resolution: "strtok3@npm:10.3.5"
+  dependencies:
+    "@tokenizer/token": "npm:^0.3.0"
+  checksum: 10c0/8d2477b239054c9f1f5b14a65d531147ca158ab9887fdc2d0938e77b7ec8891fb683b58254c7643afd5d98a421a59207534d491762b111f58c795071ecbe9fd1
+  languageName: node
+  linkType: hard
+
 "style-loader@npm:^3.3.0":
   version: 3.3.4
   resolution: "style-loader@npm:3.3.4"
@@ -17450,6 +17496,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"token-types@npm:^6.1.1, token-types@npm:^6.1.2":
+  version: 6.1.2
+  resolution: "token-types@npm:6.1.2"
+  dependencies:
+    "@borewit/text-codec": "npm:^0.2.1"
+    "@tokenizer/token": "npm:^0.3.0"
+    ieee754: "npm:^1.2.1"
+  checksum: 10c0/8786e28e3cb65b9e890bc3c38def98e6dfe4565538237f8c0e47dbe549ed8f5f00de8dc464717868308abb4729f1958f78f69e1c4c3deebbb685729113a6fee8
+  languageName: node
+  linkType: hard
+
 "totalist@npm:^1.0.0":
   version: 1.1.0
   resolution: "totalist@npm:1.1.0"
@@ -17798,6 +17855,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"uint8array-extras@npm:^1.5.0":
+  version: 1.5.0
+  resolution: "uint8array-extras@npm:1.5.0"
+  checksum: 10c0/0e74641ac7dadb02eadefc1ccdadba6010e007757bda824960de3c72bbe2b04e6d3af75648441f412148c4103261d54fcb60be45a2863beb76643a55fddba3bd
+  languageName: node
+  linkType: hard
+
 "underscore@npm:>=1.8.3":
   version: 1.13.8
   resolution: "underscore@npm:1.13.8"