From 761add475ca98d7eb8d4d622b3afcc7ae1f0a83a Mon Sep 17 00:00:00 2001
From: Tanishq Gandhi <tangandhi19@gmail.com>
Date: Fri, 15 May 2026 21:57:55 -0700
Subject: [PATCH] feat: add smart source and visual trace

---
 agent-service/src/agent/prompts.test.ts       |  51 ++
 agent-service/src/agent/prompts.ts            |   2 +
 agent-service/src/types/agent.test.ts         |  27 +
 agent-service/src/types/agent.ts              |   1 +
 .../texera/web/TexeraWebApplication.scala     |   1 +
 .../resource/SmartFileInferenceResource.scala | 114 +++++
 .../web/service/ExecutionResultService.scala  |  47 +-
 .../service/ExecutionResultServiceSpec.scala  |  29 ++
 build.sbt                                     |  13 +
 .../texera/amber/util/ImageFormatUtils.scala  |  74 +++
 common/workflow-operator/build.sbt            |  46 ++
 .../texera/amber/operator/LogicalOp.scala     |   4 +
 .../operator/fileSplit/FileSplitOpDesc.scala  | 106 ++++
 .../operator/fileSplit/FileSplitOpExec.scala  |  58 +++
 .../source/scan/FolderInputResolver.scala     | 127 +++++
 .../scan/file/FileScanSourceOpDesc.scala      |  18 +-
 .../scan/file/FileScanSourceOpExec.scala      |  27 +-
 .../source/scan/file/FileScanUtils.scala      |  14 +-
 .../source/scan/smart/CSVDialectSniffer.scala | 144 ++++++
 .../source/scan/smart/FormatDetector.scala    | 125 +++++
 .../source/scan/smart/ParquetUtils.scala      | 208 ++++++++
 .../source/scan/smart/SmartFileFormat.java    |  66 +++
 .../scan/smart/SmartFileInferencer.scala      | 476 ++++++++++++++++++
 .../scan/smart/SmartFileSourceOpDesc.scala    | 137 +++++
 .../scan/smart/SmartFileSourceOpExec.scala    | 345 +++++++++++++
 .../fileSplit/FileSplitOpDescSpec.scala       |  53 ++
 .../fileSplit/FileSplitOpExecSpec.scala       |  69 +++
 .../scan/file/FileScanSourceOpDescSpec.scala  |  59 +++
 .../scan/smart/CSVDialectSnifferSpec.scala    |  60 +++
 .../scan/smart/FormatDetectorSpec.scala       |  86 ++++
 .../smart/SmartFileSourceOpDescSpec.scala     | 307 +++++++++++
 .../smart/SmartFileSourceOpExecSpec.scala     | 115 +++++
 ...user-dataset-version-filetree.component.ts |   9 +-
 .../dataset-file-selector.component.ts        |   1 +
 .../dataset-selection-modal.component.html    |   1 +
 .../dataset-selection-modal.component.ts      |   3 +-
 ...perator-property-edit-frame.component.html |  24 +
 ...perator-property-edit-frame.component.scss |  14 +
 .../operator-property-edit-frame.component.ts | 136 ++++-
 .../result-table-cell.utils.spec.ts           |  34 ++
 .../result-table-cell.utils.ts                |  22 +
 .../result-table-frame.component.html         |   9 +-
 .../result-table-frame.component.scss         |   8 +
 .../result-table-frame.component.ts           |   9 +
 .../visual-trace-panel.component.html         | 136 +++++
 .../visual-trace-panel.component.scss         | 346 +++++++++++++
 .../visual-trace-panel.component.spec.ts      |  98 ++++
 .../visual-trace-panel.component.ts           |  85 ++++
 ...visualization-frame-content.component.html |   2 +
 .../visualization-frame-content.component.ts  |  87 +++-
 .../component/workspace.component.html        |   1 +
 .../component/workspace.component.ts          |   2 +
 .../smart-file-inference.service.ts           |  73 +++
 .../visual-trace/visual-trace.service.ts      |  39 ++
 .../visual-trace/visual-trace.utils.spec.ts   | 224 +++++++++
 .../visual-trace/visual-trace.utils.ts        | 293 +++++++++++
 .../workspace/types/visual-trace.interface.ts |  52 ++
 .../src/assets/operator_images/FileSplit.png  | Bin 0 -> 1657 bytes
 .../assets/operator_images/SmartFileScan.png  | Bin 0 -> 6977 bytes
 59 files changed, 4664 insertions(+), 53 deletions(-)
 create mode 100644 agent-service/src/agent/prompts.test.ts
 create mode 100644 agent-service/src/types/agent.test.ts
 create mode 100644 amber/src/main/scala/org/apache/texera/web/resource/SmartFileInferenceResource.scala
 create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/util/ImageFormatUtils.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpDesc.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpExec.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/FolderInputResolver.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/CSVDialectSniffer.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/FormatDetector.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/ParquetUtils.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileFormat.java
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileInferencer.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpDesc.scala
 create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpExec.scala
 create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpDescSpec.scala
 create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpExecSpec.scala
 create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/CSVDialectSnifferSpec.scala
 create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/FormatDetectorSpec.scala
 create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpDescSpec.scala
 create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpExecSpec.scala
 create mode 100644 frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-cell.utils.spec.ts
 create mode 100644 frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-cell.utils.ts
 create mode 100644 frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.html
 create mode 100644 frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.scss
 create mode 100644 frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.spec.ts
 create mode 100644 frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.ts
 create mode 100644 frontend/src/app/workspace/service/smart-file-inference/smart-file-inference.service.ts
 create mode 100644 frontend/src/app/workspace/service/visual-trace/visual-trace.service.ts
 create mode 100644 frontend/src/app/workspace/service/visual-trace/visual-trace.utils.spec.ts
 create mode 100644 frontend/src/app/workspace/service/visual-trace/visual-trace.utils.ts
 create mode 100644 frontend/src/app/workspace/types/visual-trace.interface.ts
 create mode 100644 frontend/src/assets/operator_images/FileSplit.png
 create mode 100644 frontend/src/assets/operator_images/SmartFileScan.png

diff --git a/agent-service/src/agent/prompts.test.ts b/agent-service/src/agent/prompts.test.ts
new file mode 100644
index 00000000000..b036c76eaf0
--- /dev/null
+++ b/agent-service/src/agent/prompts.test.ts
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { describe, expect, test } from "bun:test";
+import { buildSystemPrompt } from "./prompts";
+import { WorkflowSystemMetadata } from "./util/workflow-system-metadata";
+
+describe("buildSystemPrompt", () => {
+  test("includes both operator type and display name", () => {
+    const metadata = new WorkflowSystemMetadata();
+    metadata.loadFromMetadata({
+      operators: [
+        {
+          operatorType: "SmartFileScan",
+          operatorVersion: "1",
+          jsonSchema: { properties: { fileName: { type: "string" } }, required: ["fileName"] },
+          additionalMetadata: {
+            userFriendlyName: "Smart Source",
+            operatorGroupName: "Data Input",
+            operatorDescription: "Auto-detects files and folders.",
+            inputPorts: [],
+            outputPorts: [{}],
+          },
+        },
+      ],
+      groups: [],
+    });
+
+    const prompt = buildSystemPrompt(metadata, ["SmartFileScan"]);
+
+    expect(prompt).toContain("## SmartFileScan");
+    expect(prompt).toContain("Display name: Smart Source");
+    expect(prompt).toContain("Description: Auto-detects files and folders.");
+  });
+});
diff --git a/agent-service/src/agent/prompts.ts b/agent-service/src/agent/prompts.ts
index 064eed2e3e5..ca3b542c463 100644
--- a/agent-service/src/agent/prompts.ts
+++ b/agent-service/src/agent/prompts.ts
@@ -268,10 +268,12 @@ function buildAllowedOperatorSchemas(
   for (const operatorType of operatorTypes) {
     const compactSchema = metadataStore.getCompactSchema(operatorType);
     const description = metadataStore.getDescription(operatorType);
+    const displayName = metadataStore.getAdditionalMetadata(operatorType)?.userFriendlyName;
 
     if (compactSchema) {
       schemas.push(
         `## ${operatorType}\n` +
+          (displayName ? `Display name: ${displayName}\n` : "") +
           (description ? `Description: ${description}\n` : "") +
           `Schema:\n\`\`\`json\n${JSON.stringify(compactSchema, null, 2)}\n\`\`\``
       );
diff --git a/agent-service/src/types/agent.test.ts b/agent-service/src/types/agent.test.ts
new file mode 100644
index 00000000000..abc4e73acf2
--- /dev/null
+++ b/agent-service/src/types/agent.test.ts
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { describe, expect, test } from "bun:test";
+import { DEFAULT_AGENT_SETTINGS } from "./agent";
+
+describe("DEFAULT_AGENT_SETTINGS", () => {
+  test("allows the smart source operator by default", () => {
+    expect(DEFAULT_AGENT_SETTINGS.allowedOperatorTypes).toContain("SmartFileScan");
+  });
+});
diff --git a/agent-service/src/types/agent.ts b/agent-service/src/types/agent.ts
index 765f5a7cb46..74cb6230c16 100644
--- a/agent-service/src/types/agent.ts
+++ b/agent-service/src/types/agent.ts
@@ -87,6 +87,7 @@ export const DEFAULT_AGENT_SETTINGS: Omit<AgentSettings, "systemPrompt"> = {
   executionTimeoutMs: 240000,
   maxSteps: 100,
   allowedOperatorTypes: [
+    "SmartFileScan",
     "CSVFileScan",
     "Filter",
     "Projection",
diff --git a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala
index 98b7c68c974..2390e38ea22 100644
--- a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala
+++ b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala
@@ -130,6 +130,7 @@ class TexeraWebApplication
     environment.servlets.setSessionHandler(new SessionHandler)
 
     environment.jersey.register(classOf[SystemMetadataResource])
+    environment.jersey.register(classOf[SmartFileInferenceResource])
     // environment.jersey().register(classOf[MockKillWorkerResource])
 
     environment.jersey.register(classOf[HealthCheckResource])
diff --git a/amber/src/main/scala/org/apache/texera/web/resource/SmartFileInferenceResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/SmartFileInferenceResource.scala
new file mode 100644
index 00000000000..27d9706462c
--- /dev/null
+++ b/amber/src/main/scala/org/apache/texera/web/resource/SmartFileInferenceResource.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.web.resource
+
+import com.fasterxml.jackson.annotation.{JsonIgnoreProperties, JsonProperty}
+import org.apache.texera.amber.core.storage.FileResolver
+import org.apache.texera.amber.operator.source.scan.FileDecodingMethod
+import org.apache.texera.amber.operator.source.scan.smart.{
+  InferenceOverrides,
+  SmartFileFormat,
+  SmartFileInferencer
+}
+
+import javax.annotation.security.RolesAllowed
+import javax.ws.rs.core.MediaType
+import javax.ws.rs.{Consumes, POST, Path, Produces}
+import scala.jdk.CollectionConverters._
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+case class SmartFileInferenceRequest(
+    @JsonProperty("fileName") fileName: String,
+    @JsonProperty("fileEncoding") fileEncoding: Option[String] = None,
+    @JsonProperty("formatOverride") formatOverride: Option[String] = None,
+    @JsonProperty("customDelimiter") customDelimiter: Option[String] = None,
+    @JsonProperty("hasHeader") hasHeader: Option[Boolean] = None,
+    @JsonProperty("sheetName") sheetName: Option[String] = None,
+    @JsonProperty("flatten") flatten: Option[Boolean] = None
+)
+
+case class SmartFileInferenceColumn(name: String, `type`: String)
+
+case class SmartFileInferenceResponse(
+    detectedFormat: String,
+    schema: java.util.List[SmartFileInferenceColumn],
+    customDelimiter: String,
+    hasHeader: java.lang.Boolean,
+    sheetName: String,
+    availableSheetNames: java.util.List[String],
+    flatten: java.lang.Boolean,
+    isFolder: Boolean,
+    fileCount: Int
+)
+
+@Path("/file-inference")
+@RolesAllowed(Array("REGULAR", "ADMIN"))
+@Consumes(Array(MediaType.APPLICATION_JSON))
+@Produces(Array(MediaType.APPLICATION_JSON))
+class SmartFileInferenceResource {
+
+  @POST
+  @Path("/preview")
+  def preview(request: SmartFileInferenceRequest): SmartFileInferenceResponse = {
+    val uri = FileResolver.resolve(request.fileName)
+    val charset = request.fileEncoding
+      .flatMap(name => tryParseEncoding(name))
+      .getOrElse(FileDecodingMethod.UTF_8.getCharset)
+
+    val overrides = InferenceOverrides(
+      format = request.formatOverride.flatMap(s => tryParseFormat(s)),
+      delimiter = request.customDelimiter.flatMap(_.headOption),
+      hasHeader = request.hasHeader,
+      sheetName = request.sheetName,
+      flatten = request.flatten
+    )
+
+    val result = SmartFileInferencer.infer(uri, charset, overrides)
+    val columns = result.schema.getAttributes
+      .map(a => SmartFileInferenceColumn(a.getName, a.getType.toString))
+      .asJava
+
+    SmartFileInferenceResponse(
+      detectedFormat = result.format.getLabel,
+      schema = columns,
+      customDelimiter = result.csvDelimiter.orNull,
+      hasHeader = result.csvHasHeader.map(java.lang.Boolean.valueOf).orNull,
+      sheetName = result.sheetName.orNull,
+      availableSheetNames = result.availableSheetNames.asJava,
+      flatten = result.flatten.map(java.lang.Boolean.valueOf).orNull,
+      isFolder = result.isFolder,
+      fileCount = result.fileCount
+    )
+  }
+
+  private def tryParseFormat(value: String): Option[SmartFileFormat] = {
+    val upper = value.toUpperCase
+    // Accept both the enum name (CSV, TSV, ...) and the user-facing label ("Plain text", ...).
+    try Some(SmartFileFormat.valueOf(upper))
+    catch {
+      case _: IllegalArgumentException =>
+        SmartFileFormat.values().find(_.getLabel.equalsIgnoreCase(value))
+    }
+  }
+
+  private def tryParseEncoding(value: String): Option[java.nio.charset.Charset] =
+    try Some(FileDecodingMethod.valueOf(value.toUpperCase).getCharset)
+    catch { case _: IllegalArgumentException => None }
+}
diff --git a/amber/src/main/scala/org/apache/texera/web/service/ExecutionResultService.scala b/amber/src/main/scala/org/apache/texera/web/service/ExecutionResultService.scala
index b335ed0c3c7..5ff51f7f7e2 100644
--- a/amber/src/main/scala/org/apache/texera/web/service/ExecutionResultService.scala
+++ b/amber/src/main/scala/org/apache/texera/web/service/ExecutionResultService.scala
@@ -46,6 +46,7 @@ import org.apache.texera.amber.engine.architecture.rpc.controlreturns.WorkflowAg
 import org.apache.texera.amber.engine.common.AmberRuntime
 import org.apache.texera.amber.engine.common.client.AmberClient
 import org.apache.texera.amber.engine.common.executionruntimestate.ExecutionMetadataStore
+import org.apache.texera.amber.util.ImageFormatUtils
 import org.apache.texera.web.SubscriptionManager
 import org.apache.texera.web.model.websocket.event.{
   PaginatedResultEvent,
@@ -59,6 +60,7 @@ import org.apache.texera.web.service.WorkflowExecutionService.getLatestExecution
 import org.apache.texera.web.storage.{ExecutionStateStore, WorkflowStateStore}
 
 import java.lang.Byte.{SIZE => BitsPerByte}
+import java.util.Base64
 import java.util.UUID
 import scala.collection.mutable
 import scala.concurrent.duration.DurationInt
@@ -76,6 +78,11 @@ object ExecutionResultService {
       )
       .mkString("")
 
+  private def bytesToImageDataUrl(bytes: Array[Byte]): Option[String] =
+    ImageFormatUtils
+      .detectMimeType(bytes)
+      .map(mimeType => s"data:$mimeType;base64,${Base64.getEncoder.encodeToString(bytes)}")
+
   /**
     * Converts a collection of Tuples to a list of JSON ObjectNodes.
     *
@@ -107,25 +114,27 @@ object ExecutionResultService {
                   case AttributeType.BINARY =>
                     value match {
                       case byteArray: Array[Byte] =>
-                        val totalSize = byteArray.length
-                        val sizeFormatted = f"$totalSize%,d"
-                        val totalBits = totalSize * BitsPerByte
-                        val preview =
-                          if (totalBits <= binaryPreviewLeadingBits + binaryPreviewTrailingBits)
-                            bytesToBinaryString(byteArray)
-                          else {
-                            val leadingBytesNeeded =
-                              math.ceil(binaryPreviewLeadingBits.toDouble / BitsPerByte).toInt
-                            val trailingBytesNeeded =
-                              math.ceil(binaryPreviewTrailingBits.toDouble / BitsPerByte).toInt
-                            val leading = bytesToBinaryString(byteArray.take(leadingBytesNeeded))
-                              .take(binaryPreviewLeadingBits)
-                            val trailing = bytesToBinaryString(
-                              byteArray.takeRight(trailingBytesNeeded)
-                            ).takeRight(binaryPreviewTrailingBits)
-                            s"$leading...$trailing"
-                          }
-                        s"<binary $preview, size = $sizeFormatted bytes>"
+                        bytesToImageDataUrl(byteArray).getOrElse {
+                          val totalSize = byteArray.length
+                          val sizeFormatted = f"$totalSize%,d"
+                          val totalBits = totalSize * BitsPerByte
+                          val preview =
+                            if (totalBits <= binaryPreviewLeadingBits + binaryPreviewTrailingBits)
+                              bytesToBinaryString(byteArray)
+                            else {
+                              val leadingBytesNeeded =
+                                math.ceil(binaryPreviewLeadingBits.toDouble / BitsPerByte).toInt
+                              val trailingBytesNeeded =
+                                math.ceil(binaryPreviewTrailingBits.toDouble / BitsPerByte).toInt
+                              val leading = bytesToBinaryString(byteArray.take(leadingBytesNeeded))
+                                .take(binaryPreviewLeadingBits)
+                              val trailing = bytesToBinaryString(
+                                byteArray.takeRight(trailingBytesNeeded)
+                              ).takeRight(binaryPreviewTrailingBits)
+                              s"$leading...$trailing"
+                            }
+                          s"<binary $preview, size = $sizeFormatted bytes>"
+                        }
 
                       case _ =>
                         throw new RuntimeException(
diff --git a/amber/src/test/scala/org/apache/texera/web/service/ExecutionResultServiceSpec.scala b/amber/src/test/scala/org/apache/texera/web/service/ExecutionResultServiceSpec.scala
index 0afe31fc099..2d86c47a158 100644
--- a/amber/src/test/scala/org/apache/texera/web/service/ExecutionResultServiceSpec.scala
+++ b/amber/src/test/scala/org/apache/texera/web/service/ExecutionResultServiceSpec.scala
@@ -23,6 +23,10 @@ import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, Tup
 import org.scalatest.flatspec.AnyFlatSpec
 import org.scalatest.matchers.should.Matchers
 
+import java.awt.image.BufferedImage
+import java.io.ByteArrayOutputStream
+import javax.imageio.ImageIO
+
 class ExecutionResultServiceSpec extends AnyFlatSpec with Matchers {
 
   "convertTuplesToJson" should "convert tuples with various field types correctly" in {
@@ -181,6 +185,24 @@ class ExecutionResultServiceSpec extends AnyFlatSpec with Matchers {
     emptyBinaryString should include("size = 0 bytes")
   }
 
+  it should "serialize recognized image binaries as data URLs" in {
+    val attributes = List(
+      new Attribute("image", AttributeType.BINARY)
+    )
+    val schema = new Schema(attributes)
+    val imageBytes = pngBytes(width = 2, height = 2)
+
+    val tuple = Tuple
+      .builder(schema)
+      .add("image", AttributeType.BINARY, imageBytes)
+      .build()
+
+    val result = ExecutionResultService.convertTuplesToJson(List(tuple))
+
+    result should have size 1
+    result.head.get("image").asText() should startWith("data:image/png;base64,")
+  }
+
   it should "handle binary data with single ByteBuffer" in {
     val attributes = List(
       new Attribute("singleBufferBinary", AttributeType.BINARY)
@@ -475,4 +497,11 @@ class ExecutionResultServiceSpec extends AnyFlatSpec with Matchers {
     resultsDefault(2).get("value").asText() shouldBe "medium length"
     resultsDefault(3).get("value").asText() should endWith("...")
   }
+
+  private def pngBytes(width: Int, height: Int): Array[Byte] = {
+    val image = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB)
+    val out = new ByteArrayOutputStream()
+    ImageIO.write(image, "png", out)
+    out.toByteArray
+  }
 }
diff --git a/build.sbt b/build.sbt
index b7b6b3cfb20..22dcd24e085 100644
--- a/build.sbt
+++ b/build.sbt
@@ -50,6 +50,19 @@ lazy val asfLicensingSettingsWithVendored = AddMetaInfLicenseFiles.workflowOpera
 
 val jacksonVersion = "2.18.6"
 
+// Globally exclude transitive Hadoop landmines that conflict with Texera's
+// Dropwizard + Jersey stack. These ride in via Parquet's `parquet-hadoop`,
+// added in common/workflow-operator/build.sbt for SmartFileScan. Defining the
+// excludes at ThisBuild level ensures they apply to every project that
+// transitively pulls Hadoop — most importantly amber.
+ThisBuild / excludeDependencies ++= Seq(
+  ExclusionRule("javax.servlet.jsp", "jsp-api"),
+  ExclusionRule("javax.servlet", "servlet-api"),
+  ExclusionRule(organization = "com.sun.jersey"),
+  ExclusionRule(organization = "com.sun.jersey.contribs"),
+  ExclusionRule("com.github.pjfanning", "jersey-json")
+)
+
 lazy val DAO = (project in file("common/dao")).settings(asfLicensingSettings)
 lazy val Config = (project in file("common/config")).settings(asfLicensingSettings)
 lazy val Auth = (project in file("common/auth"))
diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ImageFormatUtils.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ImageFormatUtils.scala
new file mode 100644
index 00000000000..27c1d66ef9f
--- /dev/null
+++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ImageFormatUtils.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.util
+
+object ImageFormatUtils {
+
+  private val PngMagic = Array[Byte](0x89.toByte, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a)
+  private val JpegMagic = Array[Byte](0xff.toByte, 0xd8.toByte, 0xff.toByte)
+  private val Gif87Magic = "GIF87a".getBytes("US-ASCII")
+  private val Gif89Magic = "GIF89a".getBytes("US-ASCII")
+  private val RiffMagic = "RIFF".getBytes("US-ASCII")
+  private val WebpMagic = "WEBP".getBytes("US-ASCII")
+
+  def detectFormat(bytes: Array[Byte]): Option[String] = {
+    if (startsWith(bytes, PngMagic)) Some("png")
+    else if (startsWith(bytes, JpegMagic)) Some("jpeg")
+    else if (startsWith(bytes, Gif87Magic) || startsWith(bytes, Gif89Magic)) Some("gif")
+    else if (isWebp(bytes)) Some("webp")
+    else None
+  }
+
+  def detectMimeType(bytes: Array[Byte]): Option[String] =
+    detectFormat(bytes).map {
+      case "png"  => "image/png"
+      case "jpeg" => "image/jpeg"
+      case "gif"  => "image/gif"
+      case "webp" => "image/webp"
+    }
+
+  def extensionFormat(path: String): Option[String] = {
+    val lower = path.toLowerCase
+    val dot = lower.lastIndexOf('.')
+    if (dot < 0) return None
+    lower.substring(dot + 1) match {
+      case "png"           => Some("png")
+      case "jpg" | "jpeg"  => Some("jpeg")
+      case "gif"           => Some("gif")
+      case "webp"          => Some("webp")
+      case _               => None
+    }
+  }
+
+  private def isWebp(bytes: Array[Byte]): Boolean =
+    bytes.length >= 12 &&
+      startsWith(bytes, RiffMagic) &&
+      startsWith(bytes.drop(8), WebpMagic)
+
+  private def startsWith(bytes: Array[Byte], prefix: Array[Byte]): Boolean = {
+    if (bytes.length < prefix.length) return false
+    var index = 0
+    while (index < prefix.length) {
+      if (bytes(index) != prefix(index)) return false
+      index += 1
+    }
+    true
+  }
+}
diff --git a/common/workflow-operator/build.sbt b/common/workflow-operator/build.sbt
index 1c082cae96e..a79165d0b64 100644
--- a/common/workflow-operator/build.sbt
+++ b/common/workflow-operator/build.sbt
@@ -113,4 +113,50 @@ libraryDependencies ++= Seq(
   "org.apache.lucene" % "lucene-analyzers-common" % "8.11.4"
 )
 
+// SmartFileSource: Parquet + Excel support.
+//
+// Hadoop drags in a LOT of stuff Texera doesn't use, and several of those
+// transitive deps conflict head-on with Texera's existing Dropwizard + Jersey-3
+// stack. We exclude all of the known troublemakers here. If you're tempted to
+// remove one of these, run TexeraWebApplication and watch it die at startup.
+//
+// Conflicts being avoided:
+//   - slf4j-reload4j / reload4j: conflicts with the project's logback setup
+//   - jsp-api 2.1: ships an ancient `javax.el.ExpressionFactory` (no
+//     `newInstance()`) that shadows the real `javax.el-3.0.x` Dropwizard's
+//     Hibernate Validator needs (NoSuchMethodError otherwise)
+//   - com.sun.jersey.* (Jersey 1.x): collides with the project's Jersey 3 via
+//     HK2 — JSONRootElementProvider gets instantiated and explodes on init
+//   - tomcat / jasper: only used by Hadoop's embedded web UIs
+//   - servlet-api 2.5: ancient javax servlet that conflicts with Jakarta
+libraryDependencies ++= Seq(
+  "org.apache.parquet" % "parquet-hadoop" % "1.13.1",
+  "org.apache.hadoop" % "hadoop-common" % "3.3.6"
+    exclude("org.slf4j", "slf4j-reload4j")
+    exclude("ch.qos.reload4j", "reload4j")
+    exclude("javax.servlet.jsp", "jsp-api")
+    exclude("javax.servlet", "servlet-api")
+    exclude("org.mortbay.jetty", "jetty")
+    exclude("org.mortbay.jetty", "jetty-util")
+    exclude("org.mortbay.jetty", "jsp-api-2.1")
+    exclude("tomcat", "jasper-compiler")
+    exclude("tomcat", "jasper-runtime")
+    exclude("com.sun.jersey", "jersey-core")
+    exclude("com.sun.jersey", "jersey-server")
+    exclude("com.sun.jersey", "jersey-json")
+    exclude("com.sun.jersey", "jersey-servlet")
+    exclude("com.sun.jersey", "jersey-client")
+    excludeAll(ExclusionRule(organization = "com.sun.jersey")),
+  "org.apache.hadoop" % "hadoop-mapreduce-client-core" % "3.3.6"
+    exclude("org.slf4j", "slf4j-reload4j")
+    exclude("ch.qos.reload4j", "reload4j")
+    exclude("javax.servlet.jsp", "jsp-api")
+    exclude("javax.servlet", "servlet-api")
+    excludeAll(ExclusionRule(organization = "com.sun.jersey")),
+  "org.apache.poi" % "poi-ooxml" % "5.2.5"
+)
+// Global Hadoop transitive-dep blackhole is declared at the top-level
+// build.sbt as `ThisBuild / excludeDependencies` so it applies to every
+// downstream project (especially amber) that pulls Hadoop through us.
+
 libraryDependencies += "io.github.classgraph" % "classgraph" % "4.8.184" % Test
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
index 4e9d6c6e2cd..26643a4804f 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
@@ -41,6 +41,7 @@ import org.apache.texera.amber.operator.difference.DifferenceOpDesc
 import org.apache.texera.amber.operator.distinct.DistinctOpDesc
 import org.apache.texera.amber.operator.dummy.DummyOpDesc
 import org.apache.texera.amber.operator.filter.SpecializedFilterOpDesc
+import org.apache.texera.amber.operator.fileSplit.FileSplitOpDesc
 import org.apache.texera.amber.operator.hashJoin.HashJoinOpDesc
 import org.apache.texera.amber.operator.huggingFace.{
   HuggingFaceIrisLogisticRegressionOpDesc,
@@ -81,6 +82,7 @@ import org.apache.texera.amber.operator.source.scan.arrow.ArrowSourceOpDesc
 import org.apache.texera.amber.operator.source.scan.csv.CSVScanSourceOpDesc
 import org.apache.texera.amber.operator.source.scan.csvOld.CSVOldScanSourceOpDesc
 import org.apache.texera.amber.operator.source.scan.json.JSONLScanSourceOpDesc
+import org.apache.texera.amber.operator.source.scan.smart.SmartFileSourceOpDesc
 import org.apache.texera.amber.operator.source.scan.text.TextInputSourceOpDesc
 import org.apache.texera.amber.operator.source.sql.asterixdb.AsterixDBSourceOpDesc
 import org.apache.texera.amber.operator.source.sql.mysql.MySQLSourceOpDesc
@@ -164,9 +166,11 @@ trait StateTransferFunc
 @JsonSubTypes(
   Array(
     new Type(value = classOf[IfOpDesc], name = "If"),
+    new Type(value = classOf[FileSplitOpDesc], name = "FileSplit"),
     new Type(value = classOf[SankeyDiagramOpDesc], name = "SankeyDiagram"),
     new Type(value = classOf[IcicleChartOpDesc], name = "IcicleChart"),
     new Type(value = classOf[FileListerSourceOpDesc], name = "FileLister"),
+    new Type(value = classOf[SmartFileSourceOpDesc], name = "SmartFileScan"),
     new Type(value = classOf[CSVScanSourceOpDesc], name = "CSVFileScan"),
     // disabled the ParallelCSVScanSourceOpDesc so that it does not confuse user. it can be re-enabled when doing experiments.
     // new Type(value = classOf[ParallelCSVScanSourceOpDesc], name = "ParallelCSVFileScan"),
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpDesc.scala
new file mode 100644
index 00000000000..4b71e441202
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpDesc.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.fileSplit
+
+import com.fasterxml.jackson.annotation.{JsonInclude, JsonProperty, JsonPropertyDescription}
+import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.core.executor.OpExecWithClassName
+import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity}
+import org.apache.texera.amber.core.workflow._
+import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.operator.{LogicalOp, PortDescription}
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+
+class FileSplitOpDesc extends LogicalOp {
+
+  @JsonProperty
+  @JsonSchemaTitle("File Column")
+  @JsonPropertyDescription("leave empty to auto-detect source_file or filename")
+  @JsonInclude(JsonInclude.Include.NON_ABSENT)
+  var fileAttribute: Option[String] = None
+
+  override def getPhysicalOp(
+      workflowId: WorkflowIdentity,
+      executionId: ExecutionIdentity
+  ): PhysicalOp =
+    PhysicalOp
+      .oneToOnePhysicalOp(
+        workflowId,
+        executionId,
+        operatorIdentifier,
+        OpExecWithClassName(
+          "org.apache.texera.amber.operator.fileSplit.FileSplitOpExec",
+          objectMapper.writeValueAsString(this)
+        )
+      )
+      .withInputPorts(operatorInfo.inputPorts)
+      .withOutputPorts(operatorInfo.outputPorts)
+      .withParallelizable(false)
+      .withPropagateSchema(
+        SchemaPropagationFunc(inputSchemas => {
+          require(inputSchemas.size == 1, "File Split requires exactly one input")
+          val inputSchema = inputSchemas.values.head
+          resolveFileAttribute(inputSchema)
+          operatorInfo.outputPorts.map(port => port.id -> inputSchema).toMap
+        })
+      )
+
+  override def operatorInfo: OperatorInfo = {
+    val outputPortInfo =
+      if (outputPorts != null && outputPorts.nonEmpty) {
+        outputPorts.zipWithIndex.map {
+          case (portDesc: PortDescription, idx) =>
+            OutputPort(PortIdentity(idx), displayName = portDesc.displayName)
+        }
+      } else {
+        List(OutputPort(PortIdentity()), OutputPort(PortIdentity(1)))
+      }
+
+    OperatorInfo(
+      userFriendlyName = "File Split",
+      operatorDescription = "Route rows from the same file to the same output port",
+      operatorGroupName = OperatorGroupConstants.UTILITY_GROUP,
+      inputPorts = List(InputPort()),
+      outputPorts = outputPortInfo,
+      dynamicOutputPorts = true,
+      allowPortCustomization = true
+    )
+  }
+
+  def resolveFileAttribute(schema: Schema): String = {
+    val attributeName = fileAttribute.getOrElse {
+      List("source_file", "filename")
+        .find(schema.containsAttribute)
+        .getOrElse(
+          throw new IllegalArgumentException(
+            "File Split requires a source_file or filename column, or an explicit File Column"
+          )
+        )
+    }
+    if (!schema.containsAttribute(attributeName)) {
+      throw new IllegalArgumentException(s"File Split column '$attributeName' does not exist")
+    }
+    if (schema.getAttribute(attributeName).getType != AttributeType.STRING) {
+      throw new IllegalArgumentException(s"File Split column '$attributeName' must be a STRING")
+    }
+    attributeName
+  }
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpExec.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpExec.scala
new file mode 100644
index 00000000000..9816cf34c17
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpExec.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.fileSplit
+
+import org.apache.texera.amber.core.executor.OperatorExecutor
+import org.apache.texera.amber.core.tuple.{Tuple, TupleLike}
+import org.apache.texera.amber.core.workflow.PortIdentity
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+
+import scala.collection.mutable
+
+class FileSplitOpExec(descString: String) extends OperatorExecutor {
+  private val desc: FileSplitOpDesc = objectMapper.readValue(descString, classOf[FileSplitOpDesc])
+  private val fileToPort = mutable.LinkedHashMap.empty[String, PortIdentity]
+  private var fileAttribute: String = _
+  private var outputPortCount: Int = _
+
+  override def open(): Unit = {
+    outputPortCount = desc.operatorInfo.outputPorts.length
+    require(outputPortCount > 0, "File Split requires at least one output port")
+  }
+
+  override def processTupleMultiPort(
+      tuple: Tuple,
+      port: Int
+  ): Iterator[(TupleLike, Option[PortIdentity])] = {
+    if (fileAttribute == null) {
+      fileAttribute = desc.resolveFileAttribute(tuple.getSchema)
+    }
+    val sourceFile = Option(tuple.getField[String](fileAttribute)).getOrElse(
+      throw new IllegalArgumentException(s"File Split column '$fileAttribute' cannot be null")
+    )
+    val outputPort = fileToPort.getOrElseUpdate(
+      sourceFile,
+      PortIdentity(fileToPort.size % outputPortCount)
+    )
+    Iterator.single((tuple, Some(outputPort)))
+  }
+
+  override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = ???
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/FolderInputResolver.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/FolderInputResolver.scala
new file mode 100644
index 00000000000..e4f022d201d
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/FolderInputResolver.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan
+
+import org.apache.texera.amber.core.storage.FileResolver
+import org.apache.texera.amber.core.storage.util.LakeFSStorageClient
+
+import java.net.{URI, URLDecoder, URLEncoder}
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Path, Paths}
+import scala.jdk.CollectionConverters._
+import scala.util.Using
+
+case class ResolvedInputFile(uri: URI, displayName: String)
+case class ResolvedFolderInput(files: List[ResolvedInputFile], isFolder: Boolean)
+
+object FolderInputResolver {
+
+  def resolve(uri: URI): ResolvedFolderInput =
+    Option(uri.getScheme).map(_.toLowerCase) match {
+      case Some("file")                               => resolveLocalInput(uri)
+      case Some(FileResolver.DATASET_FILE_URI_SCHEME) => resolveDatasetInput(uri)
+      case _ =>
+        ResolvedFolderInput(List(ResolvedInputFile(uri, uri.toASCIIString)), isFolder = false)
+    }
+
+  private def resolveLocalInput(uri: URI): ResolvedFolderInput = {
+    val path = Paths.get(uri)
+    if (Files.isDirectory(path)) {
+      val files = Using.resource(Files.walk(path)) { stream =>
+        stream
+          .iterator()
+          .asScala
+          .filter(Files.isRegularFile(_))
+          .filterNot(isHiddenPath)
+          .map(file => ResolvedInputFile(file.toUri, path.relativize(file).toString))
+          .toList
+          .sortBy(_.displayName)
+      }
+      ResolvedFolderInput(files, isFolder = true)
+    } else {
+      ResolvedFolderInput(List(ResolvedInputFile(uri, uri.toASCIIString)), isFolder = false)
+    }
+  }
+
+  private def resolveDatasetInput(uri: URI): ResolvedFolderInput = {
+    val segments = Paths
+      .get(uri.getPath)
+      .iterator()
+      .asScala
+      .map(_.toString)
+      .toList
+
+    if (segments.length < 3) {
+      throw new IllegalArgumentException(s"Dataset URI is missing a relative path: $uri")
+    }
+
+    val repositoryName = segments.head
+    val versionHash = URLDecoder.decode(segments(1), StandardCharsets.UTF_8)
+    val relativePath = segments
+      .drop(2)
+      .map(part => URLDecoder.decode(part, StandardCharsets.UTF_8))
+      .mkString("/")
+
+    val objects = LakeFSStorageClient.retrieveObjectsOfVersion(repositoryName, versionHash)
+    val exactFile = objects.find(_.getPath == relativePath)
+    exactFile match {
+      case Some(file) =>
+        ResolvedFolderInput(
+          List(
+            ResolvedInputFile(
+              buildDatasetFileUri(repositoryName, versionHash, file.getPath),
+              uri.toASCIIString
+            )
+          ),
+          isFolder = false
+        )
+      case None =>
+        val prefix = if (relativePath.endsWith("/")) relativePath else s"$relativePath/"
+        val files = objects
+          .map(_.getPath)
+          .filter(_.startsWith(prefix))
+          .filterNot(isHiddenDatasetPath)
+          .sorted
+          .map { path =>
+            ResolvedInputFile(
+              buildDatasetFileUri(repositoryName, versionHash, path),
+              path.stripPrefix(prefix)
+            )
+          }
+        ResolvedFolderInput(files, isFolder = true)
+    }
+  }
+
+  private def buildDatasetFileUri(repositoryName: String, versionHash: String, relativePath: String): URI = {
+    val encodedSegments =
+      List(repositoryName, versionHash) ++ relativePath
+        .split("/")
+        .toList
+        .filter(_.nonEmpty)
+        .map(segment => URLEncoder.encode(segment, StandardCharsets.UTF_8))
+    new URI(FileResolver.DATASET_FILE_URI_SCHEME, "", s"/${encodedSegments.mkString("/")}", null)
+  }
+
+  private def isHiddenPath(path: Path): Boolean =
+    Option(path.getFileName).exists(_.toString.startsWith("."))
+
+  private def isHiddenDatasetPath(path: String): Boolean =
+    path.split("/").lastOption.exists(_.startsWith("."))
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpDesc.scala
index 82997632d14..b3c3d260723 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpDesc.scala
@@ -20,11 +20,7 @@
 package org.apache.texera.amber.operator.source.scan.file
 
 import com.fasterxml.jackson.annotation.{JsonIgnoreProperties, JsonProperty}
-import com.kjetland.jackson.jsonSchema.annotations.{
-  JsonSchemaInject,
-  JsonSchemaString,
-  JsonSchemaTitle
-}
+import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaString, JsonSchemaTitle}
 import org.apache.texera.amber.core.executor.OpExecWithClassName
 import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
 import org.apache.texera.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity}
@@ -53,14 +49,7 @@ class FileScanSourceOpDesc extends ScanSourceOpDesc with TextSourceOpDesc {
 
   @JsonProperty(defaultValue = "false")
   @JsonSchemaTitle("Include Filename")
-  @JsonSchemaInject(
-    strings = Array(
-      new JsonSchemaString(path = HideAnnotation.hideTarget, value = "extract"),
-      new JsonSchemaString(path = HideAnnotation.hideType, value = HideAnnotation.Type.equals),
-      new JsonSchemaString(path = HideAnnotation.hideExpectedValue, value = "false")
-    )
-  )
-  val outputFileName: Boolean = false
+  var outputFileName: Boolean = false
 
   fileTypeName = Option("")
 
@@ -92,4 +81,7 @@ class FileScanSourceOpDesc extends ScanSourceOpDesc with TextSourceOpDesc {
     }
     schema.add(attributeName, attributeType.getType)
   }
+
+  override def operatorInfo =
+    super.operatorInfo.copy(operatorDescription = "Scan data from a file or a folder of files")
 }
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpExec.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpExec.scala
index d47cf3681c2..3b71a126437 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpExec.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpExec.scala
@@ -21,9 +21,11 @@ package org.apache.texera.amber.operator.source.scan.file
 
 import org.apache.texera.amber.core.executor.SourceOperatorExecutor
 import org.apache.texera.amber.core.tuple.TupleLike
+import org.apache.texera.amber.operator.source.scan.FolderInputResolver
 import org.apache.texera.amber.util.JSONUtils.objectMapper
 
 import java.io.IOException
+import java.net.URI
 
 class FileScanSourceOpExec private[scan] (
     descString: String
@@ -33,14 +35,21 @@ class FileScanSourceOpExec private[scan] (
 
   @throws[IOException]
   override def produceTuple(): Iterator[TupleLike] = {
-    FileScanUtils.createTuplesFromFile(
-      fileName = desc.fileName.get,
-      attributeType = desc.attributeType,
-      fileEncoding = desc.fileEncoding,
-      extract = desc.extract,
-      outputFileName = desc.outputFileName,
-      fileScanOffset = desc.fileScanOffset,
-      fileScanLimit = desc.fileScanLimit
-    )
+    FolderInputResolver
+      .resolve(new URI(desc.fileName.get))
+      .files
+      .iterator
+      .flatMap(file =>
+        FileScanUtils.createTuplesFromFile(
+          fileName = file.uri.toASCIIString,
+          displayFileName = file.displayName,
+          attributeType = desc.attributeType,
+          fileEncoding = desc.fileEncoding,
+          extract = desc.extract,
+          outputFileName = desc.outputFileName,
+          fileScanOffset = desc.fileScanOffset,
+          fileScanLimit = desc.fileScanLimit
+        )
+      )
   }
 }
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanUtils.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanUtils.scala
index a7f81b4869c..e022d96e435 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanUtils.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/file/FileScanUtils.scala
@@ -110,7 +110,8 @@ private[file] object FileScanUtils {
             TupleLike(fields.toSeq: _*)
         }
       } else {
-        fileEntries.flatMap(entry =>
+        fileEntries.zipAll(filenameIt, null, null).flatMap {
+          case (entry, entryFileName) =>
           new BufferedReader(new InputStreamReader(entry, fileEncoding.getCharset))
             .lines()
             .iterator()
@@ -119,13 +120,14 @@ private[file] object FileScanUtils {
               fileScanOffset.getOrElse(0),
               fileScanOffset.getOrElse(0) + fileScanLimit.getOrElse(Int.MaxValue)
             )
-            .map(line =>
-              TupleLike(attributeType match {
+            .map { line =>
+              val parsed = attributeType match {
                 case FileAttributeType.SINGLE_STRING => line
                 case _                               => parseField(line, attributeType.getType)
-              })
-            )
-        )
+              }
+              if (outputFileName) TupleLike(entryFileName, parsed) else TupleLike(parsed)
+            }
+        }
       }
 
     new AutoClosingIterator(rawIterator, () => closeables.foreach(_.close()))
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/CSVDialectSniffer.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/CSVDialectSniffer.scala
new file mode 100644
index 00000000000..fe73bf36f17
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/CSVDialectSniffer.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import com.univocity.parsers.csv.{CsvFormat, CsvParser, CsvParserSettings}
+import org.apache.texera.amber.core.tuple.{AttributeType, AttributeTypeUtils}
+
+import java.io.StringReader
+
+/** A guess at how a CSV-family file should be read. */
+case class CSVDialect(delimiter: Char, hasHeader: Boolean)
+
+/**
+  * Heuristic CSV dialect detector. Given a text sample (first ~64 KB of the file),
+  * it picks the delimiter that produces the most consistent column count across rows,
+  * then decides whether the first row is a header.
+  *
+  * Not perfect — quoted multi-line values can confuse it on very short samples — but
+  * good enough for the common cases the Smart File Source wants to cover.
+  */
+object CSVDialectSniffer {
+
+  private val Candidates: Seq[Char] = Seq(',', '\t', ';', '|')
+
+  /**
+    * @param sampleText decoded text sample
+    * @param preferred  an extension-based hint (`,` if `.csv`, `\t` if `.tsv`). When the
+    *                   data is consistent with the preferred delimiter, we keep it even
+    *                   if another delimiter would score marginally higher.
+    */
+  def sniff(sampleText: String, preferred: Option[Char] = None): CSVDialect = {
+    val scored = Candidates.map(d => d -> scoreDelimiter(sampleText, d)).toMap
+
+    val delimiter = preferred match {
+      case Some(p) if scored.getOrElse(p, 0.0) >= 0.5 => p
+      case _ =>
+        scored
+          .filter { case (_, score) => score > 0.0 }
+          .toSeq
+          .sortBy { case (_, score) => -score }
+          .headOption
+          .map(_._1)
+          .getOrElse(',') // fall back to comma; downstream parsing will surface a real error
+    }
+
+    val hasHeader = detectHeader(sampleText, delimiter)
+    CSVDialect(delimiter, hasHeader)
+  }
+
+  /**
+    * A delimiter is "consistent" when the per-row column count is stable across rows.
+    * Score is `(rows_with_modal_count - 1) / total_rows`, in [0, 1].
+    */
+  private def scoreDelimiter(sample: String, delimiter: Char): Double = {
+    val rows = parseRows(sample, delimiter, headerExtraction = false, maxRows = 30)
+    if (rows.size < 2) return 0.0
+    val counts = rows.map(_.length).filter(_ > 0)
+    if (counts.length < 2) return 0.0
+    val modalCount = counts.groupBy(identity).view.mapValues(_.size).maxBy(_._2)._1
+    if (modalCount < 2) return 0.0 // single-column "matches" don't tell us anything
+    val agreeing = counts.count(_ == modalCount)
+    (agreeing - 1).toDouble / rows.size
+  }
+
+  /**
+    * Header detection: parse the first row, then parse subsequent rows; if at least one
+    * column has a row-1 type of STRING but later rows are numeric/boolean/timestamp, the
+    * first row is probably a header.
+    */
+  private def detectHeader(sample: String, delimiter: Char): Boolean = {
+    val rows = parseRows(sample, delimiter, headerExtraction = false, maxRows = 30)
+    if (rows.size < 2) return true // safer default — most CSVs have headers
+    val firstRow = rows.head
+    val laterRows = rows.tail
+    val width = firstRow.length
+    if (width == 0) return true
+
+    val laterTypes: Array[AttributeType] = AttributeTypeUtils.inferSchemaFromRows(
+      laterRows.iterator.map(r => r.padTo(width, "").take(width).asInstanceOf[Array[Any]])
+    )
+
+    val firstTypes = firstRow.map { v =>
+      if (v == null || v.trim.isEmpty) AttributeType.STRING
+      else AttributeTypeUtils.inferField(v)
+    }
+
+    val typedColumns = laterTypes.zipWithIndex.collect {
+      case (t, i)
+          if t != AttributeType.STRING && i < firstTypes.length
+            && firstTypes(i) == AttributeType.STRING =>
+        i
+    }
+    typedColumns.nonEmpty
+  }
+
+  private def parseRows(
+      sample: String,
+      delimiter: Char,
+      headerExtraction: Boolean,
+      maxRows: Int
+  ): Array[Array[String]] = {
+    val format = new CsvFormat()
+    format.setDelimiter(delimiter)
+    format.setLineSeparator("\n")
+    format.setComment('\u0000')
+    val settings = new CsvParserSettings()
+    settings.setFormat(format)
+    settings.setMaxCharsPerColumn(-1)
+    settings.setHeaderExtractionEnabled(headerExtraction)
+    settings.setNullValue("")
+    val parser = new CsvParser(settings)
+    val reader = new StringReader(sample)
+    try {
+      parser.beginParsing(reader)
+      val buf = scala.collection.mutable.ArrayBuffer.empty[Array[String]]
+      var count = 0
+      var row = parser.parseNext()
+      while (row != null && count < maxRows) {
+        buf += row
+        count += 1
+        row = parser.parseNext()
+      }
+      parser.stopParsing()
+      buf.toArray
+    } finally reader.close()
+  }
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/FormatDetector.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/FormatDetector.scala
new file mode 100644
index 00000000000..143b1a9290f
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/FormatDetector.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import org.apache.texera.amber.util.ImageFormatUtils
+
+import java.nio.charset.Charset
+
+object FormatDetector {
+
+  // Magic bytes used by the formats we support.
+  private val ParquetMagic: Array[Byte] = "PAR1".getBytes("US-ASCII")
+  private val XlsxMagic: Array[Byte] = Array(0x50, 0x4b, 0x03, 0x04).map(_.toByte) // PK\x03\x04 ZIP container
+  private val OleMagic: Array[Byte] = // legacy .xls (OLE2 compound document)
+    Array(0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1).map(_.toByte)
+  // Arrow IPC stream begins with "ARROW1\0\0", file format also starts with this signature.
+  private val ArrowMagic: Array[Byte] = "ARROW1".getBytes("US-ASCII")
+
+  /**
+    * Cheap detection from a byte sample plus optional filename hint.
+    * Order: magic bytes (most reliable) → extension → content sniff.
+    */
+  def detect(
+      fileNameHint: Option[String],
+      sample: Array[Byte],
+      charset: Charset
+  ): SmartFileFormat = {
+    if (startsWith(sample, ParquetMagic)) return SmartFileFormat.PARQUET
+    if (startsWith(sample, OleMagic)) return SmartFileFormat.EXCEL
+    if (startsWith(sample, ArrowMagic)) return SmartFileFormat.ARROW
+    if (ImageFormatUtils.detectFormat(sample).nonEmpty) return SmartFileFormat.IMAGE
+
+    val extensionDetected = fileNameHint.flatMap(extensionFormat)
+    if (startsWith(sample, XlsxMagic) && extensionDetected.contains(SmartFileFormat.EXCEL)) {
+      return SmartFileFormat.EXCEL
+    }
+
+    extensionDetected.foreach(return _)
+
+    sniffText(sample, charset)
+  }
+
+  /** Extension-based detection. Returns None if extension is unknown or absent. */
+  def extensionFormat(fileName: String): Option[SmartFileFormat] = {
+    val lower = fileName.toLowerCase
+    val dot = lower.lastIndexOf('.')
+    if (dot < 0) return None
+    lower.substring(dot + 1) match {
+      case "csv"                       => Some(SmartFileFormat.CSV)
+      case "tsv" | "tab"               => Some(SmartFileFormat.TSV)
+      case "json"                      => Some(SmartFileFormat.JSON)
+      case "jsonl" | "ndjson"          => Some(SmartFileFormat.JSONL)
+      case "arrow"                     => Some(SmartFileFormat.ARROW)
+      case "parquet" | "pq"            => Some(SmartFileFormat.PARQUET)
+      case "xlsx" | "xls" | "xlsm"     => Some(SmartFileFormat.EXCEL)
+      case "png" | "jpg" | "jpeg" |
+          "gif" | "webp"               => Some(SmartFileFormat.IMAGE)
+      case "txt" | "log"               => Some(SmartFileFormat.TEXT)
+      case _                           => None
+    }
+  }
+
+  /**
+    * Content-based sniffing for text formats when neither magic bytes nor extension
+    * give a definitive answer. Heuristics:
+    *   - first non-blank char `{` → JSON object → ambiguous JSON vs JSONL → look at how many
+    *     `{` start at the beginning of a line
+    *   - first non-blank char `[` → JSON array
+    *   - lines with consistent tabs but few commas → TSV
+    *   - otherwise → CSV (the most common case)
+    */
+  private def sniffText(sample: Array[Byte], charset: Charset): SmartFileFormat = {
+    val text = new String(sample, charset)
+    val trimmed = text.dropWhile(_.isWhitespace)
+    if (trimmed.isEmpty) return SmartFileFormat.TEXT
+
+    trimmed.head match {
+      case '[' => return SmartFileFormat.JSON
+      case '{' =>
+        // Either a single JSON object, JSON array of objects pretty-printed, or JSONL.
+        // JSONL: multiple lines each starting with `{`.
+        val objectLineStarts = text.linesIterator
+          .filter(_.nonEmpty)
+          .count(line => line.headOption.contains('{'))
+        return if (objectLineStarts >= 2) SmartFileFormat.JSONL else SmartFileFormat.JSON
+      case _ =>
+    }
+
+    // Delimiter heuristic — only the first ~30 lines.
+    val lines = text.linesIterator.take(30).filter(_.nonEmpty).toList
+    if (lines.isEmpty) return SmartFileFormat.TEXT
+    val tabHits = lines.count(_.contains('\t'))
+    val commaHits = lines.count(_.contains(','))
+    if (tabHits > 0 && tabHits >= commaHits) SmartFileFormat.TSV
+    else if (commaHits > 0) SmartFileFormat.CSV
+    else SmartFileFormat.TEXT
+  }
+
+  private def startsWith(sample: Array[Byte], prefix: Array[Byte]): Boolean = {
+    if (sample.length < prefix.length) return false
+    var i = 0
+    while (i < prefix.length) {
+      if (sample(i) != prefix(i)) return false
+      i += 1
+    }
+    true
+  }
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/ParquetUtils.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/ParquetUtils.scala
new file mode 100644
index 00000000000..3954c8cf55b
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/ParquetUtils.scala
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.example.data.Group
+import org.apache.parquet.example.data.simple.convert.GroupRecordConverter
+import org.apache.parquet.hadoop.ParquetFileReader
+import org.apache.parquet.hadoop.util.HadoopInputFile
+import org.apache.parquet.io.ColumnIOFactory
+import org.apache.parquet.schema.LogicalTypeAnnotation
+import org.apache.parquet.schema.LogicalTypeAnnotation.{
+  DateLogicalTypeAnnotation,
+  StringLogicalTypeAnnotation,
+  TimestampLogicalTypeAnnotation
+}
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+import org.apache.parquet.schema.{MessageType, PrimitiveType, Type}
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema}
+
+import java.io.File
+
+object ParquetUtils {
+
+  /** Map a Parquet `MessageType` to a Texera Schema. Skips non-primitive (nested) fields. */
+  def toTexeraSchema(messageType: MessageType): Schema = {
+    val attrs = scala.collection.mutable.ListBuffer.empty[Attribute]
+    val fieldCount = messageType.getFieldCount
+    var i = 0
+    while (i < fieldCount) {
+      val field: Type = messageType.getType(i)
+      if (field.isPrimitive) {
+        attrs += new Attribute(field.getName, toAttributeType(field.asPrimitiveType()))
+      }
+      i += 1
+    }
+    Schema(attrs.toList)
+  }
+
+  def toAttributeType(primitive: PrimitiveType): AttributeType = {
+    val logical = primitive.getLogicalTypeAnnotation
+    primitive.getPrimitiveTypeName match {
+      case PrimitiveTypeName.BOOLEAN => AttributeType.BOOLEAN
+      case PrimitiveTypeName.INT32 =>
+        logical match {
+          case _: DateLogicalTypeAnnotation => AttributeType.TIMESTAMP
+          case _                            => AttributeType.INTEGER
+        }
+      case PrimitiveTypeName.INT64 =>
+        logical match {
+          case _: TimestampLogicalTypeAnnotation => AttributeType.TIMESTAMP
+          case _                                 => AttributeType.LONG
+        }
+      case PrimitiveTypeName.FLOAT | PrimitiveTypeName.DOUBLE => AttributeType.DOUBLE
+      case PrimitiveTypeName.INT96                            => AttributeType.TIMESTAMP
+      case PrimitiveTypeName.BINARY =>
+        logical match {
+          case _: StringLogicalTypeAnnotation => AttributeType.STRING
+          case _ if isStringLike(logical)     => AttributeType.STRING
+          case _                              => AttributeType.BINARY
+        }
+      case PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY => AttributeType.BINARY
+    }
+  }
+
+  private def isStringLike(logical: LogicalTypeAnnotation): Boolean = {
+    if (logical == null) return false
+    // EnumLogicalTypeAnnotation / JsonLogicalTypeAnnotation also serialize as text.
+    val name = logical.toString.toLowerCase
+    name.contains("string") || name.contains("enum") || name.contains("json")
+  }
+
+  /** Opens a `ParquetFileReader` on a local file. */
+  def openReader(file: File): ParquetFileReader = {
+    val conf = newConfiguration()
+    val inputFile = HadoopInputFile.fromPath(new Path(file.toURI), conf)
+    ParquetFileReader.open(inputFile)
+  }
+
+  /**
+    * Read the file into a lazy iterator of `Group` records.
+    * Caller is responsible for closing the returned reader via [[ParquetReadHandle.close]].
+    */
+  def openRecords(file: File): ParquetReadHandle = {
+    val conf = newConfiguration()
+    val inputFile = HadoopInputFile.fromPath(new Path(file.toURI), conf)
+    val reader = ParquetFileReader.open(inputFile)
+    val schema = reader.getFooter.getFileMetaData.getSchema
+    val converter = new GroupRecordConverter(schema)
+    val columnIO = new ColumnIOFactory().getColumnIO(schema)
+    val iterator = new Iterator[Group] {
+      private var currentPages = reader.readNextRowGroup()
+      private var recordReader =
+        if (currentPages != null) columnIO.getRecordReader(currentPages, converter) else null
+      private var remaining: Long = if (currentPages != null) currentPages.getRowCount else 0L
+
+      override def hasNext: Boolean = {
+        if (remaining > 0) return true
+        // Advance to next row group.
+        var nextPages = reader.readNextRowGroup()
+        while (nextPages != null && nextPages.getRowCount == 0) nextPages = reader.readNextRowGroup()
+        if (nextPages == null) false
+        else {
+          currentPages = nextPages
+          recordReader = columnIO.getRecordReader(nextPages, converter)
+          remaining = nextPages.getRowCount
+          true
+        }
+      }
+
+      override def next(): Group = {
+        if (!hasNext) throw new NoSuchElementException
+        remaining -= 1
+        recordReader.read().asInstanceOf[Group]
+      }
+    }
+    ParquetReadHandle(schema, iterator, () => reader.close())
+  }
+
+  /** Read a primitive field at position `index` of a Parquet `Group`, honoring schema. */
+  def readField(group: Group, index: Int, schema: MessageType): Any = {
+    if (group.getFieldRepetitionCount(index) == 0) return null
+    val field = schema.getType(index)
+    if (!field.isPrimitive) return null
+    val primitive = field.asPrimitiveType()
+    primitive.getPrimitiveTypeName match {
+      case PrimitiveTypeName.BOOLEAN => group.getBoolean(index, 0)
+      case PrimitiveTypeName.INT32 =>
+        primitive.getLogicalTypeAnnotation match {
+          case _: DateLogicalTypeAnnotation =>
+            // Date stored as days since epoch.
+            val days = group.getInteger(index, 0).toLong
+            new java.sql.Timestamp(days * 86400000L)
+          case _ => Int.box(group.getInteger(index, 0))
+        }
+      case PrimitiveTypeName.INT64 =>
+        primitive.getLogicalTypeAnnotation match {
+          case ts: TimestampLogicalTypeAnnotation =>
+            val raw = group.getLong(index, 0)
+            val millis = ts.getUnit match {
+              case LogicalTypeAnnotation.TimeUnit.MILLIS => raw
+              case LogicalTypeAnnotation.TimeUnit.MICROS => raw / 1000L
+              case LogicalTypeAnnotation.TimeUnit.NANOS  => raw / 1000000L
+            }
+            new java.sql.Timestamp(millis)
+          case _ => Long.box(group.getLong(index, 0))
+        }
+      case PrimitiveTypeName.FLOAT  => Double.box(group.getFloat(index, 0).toDouble)
+      case PrimitiveTypeName.DOUBLE => Double.box(group.getDouble(index, 0))
+      case PrimitiveTypeName.INT96 =>
+        // INT96 → 96-bit timestamp; convert via Parquet's NanoTime helper.
+        val binary = group.getInt96(index, 0)
+        int96ToTimestamp(binary.getBytes)
+      case PrimitiveTypeName.BINARY =>
+        val binary = group.getBinary(index, 0)
+        primitive.getLogicalTypeAnnotation match {
+          case _: StringLogicalTypeAnnotation              => binary.toStringUsingUTF8
+          case logical if isStringLike(logical)            => binary.toStringUsingUTF8
+          case _                                           => binary.getBytes
+        }
+      case PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY => group.getBinary(index, 0).getBytes
+    }
+  }
+
+  private def int96ToTimestamp(bytes: Array[Byte]): java.sql.Timestamp = {
+    // INT96: 8 bytes little-endian nanoseconds of day, then 4 bytes little-endian Julian day.
+    var nanos: Long = 0L
+    for (i <- 0 until 8) nanos |= (bytes(i).toLong & 0xff) << (8 * i)
+    var julian: Int = 0
+    for (i <- 0 until 4) julian |= (bytes(8 + i).toInt & 0xff) << (8 * i)
+    val daysFromEpoch = julian - 2440588 // Julian day 2440588 = 1970-01-01
+    val millis = daysFromEpoch.toLong * 86400000L + nanos / 1000000L
+    new java.sql.Timestamp(millis)
+  }
+
+  private def newConfiguration(): Configuration = {
+    val conf = new Configuration(false)
+    // Reduce noisy default classpath probing — we only ever look at local files.
+    conf.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem")
+    conf
+  }
+
+  case class ParquetReadHandle(
+      schema: MessageType,
+      records: Iterator[Group],
+      closer: () => Unit
+  ) {
+    def close(): Unit = closer()
+  }
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileFormat.java b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileFormat.java
new file mode 100644
index 00000000000..190b367daec
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileFormat.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
+
+public enum SmartFileFormat {
+    AUTO("Auto-detect"),
+    CSV("CSV"),
+    TSV("TSV"),
+    JSON("JSON"),
+    JSONL("JSONL"),
+    ARROW("Arrow"),
+    PARQUET("Parquet"),
+    EXCEL("Excel"),
+    IMAGE("Image"),
+    TEXT("Plain text");
+
+    private final String label;
+
+    SmartFileFormat(String label) {
+        this.label = label;
+    }
+
+    @JsonValue
+    public String getLabel() {
+        return label;
+    }
+
+    /** Accept either the enum name (e.g. "CSV") or the label (e.g. "Plain text"). */
+    @JsonCreator
+    public static SmartFileFormat fromString(String value) {
+        if (value == null) {
+            return null;
+        }
+        for (SmartFileFormat format : values()) {
+            if (format.name().equalsIgnoreCase(value) || format.label.equalsIgnoreCase(value)) {
+                return format;
+            }
+        }
+        throw new IllegalArgumentException("Unknown SmartFileFormat: " + value);
+    }
+
+    @Override
+    public String toString() {
+        return label;
+    }
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileInferencer.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileInferencer.scala
new file mode 100644
index 00000000000..0a657639d65
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileInferencer.scala
@@ -0,0 +1,476 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import com.fasterxml.jackson.databind.JsonNode
+import com.univocity.parsers.csv.{CsvFormat, CsvParser, CsvParserSettings}
+import org.apache.arrow.memory.RootAllocator
+import org.apache.arrow.vector.ipc.ArrowFileReader
+import org.apache.poi.ss.usermodel.{Cell, CellType, DateUtil, Sheet, WorkbookFactory}
+import org.apache.texera.amber.core.storage.DocumentFactory
+import org.apache.texera.amber.core.tuple.AttributeTypeUtils.inferSchemaFromRows
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema}
+import org.apache.texera.amber.operator.source.scan.FolderInputResolver
+import org.apache.texera.amber.util.ArrowUtils
+import org.apache.texera.amber.util.JSONUtils.{JSONToMap, objectMapper}
+
+import java.io.{BufferedReader, InputStream, InputStreamReader}
+import java.net.URI
+import java.nio.charset.Charset
+import java.nio.file.{Files, StandardOpenOption}
+import scala.collection.mutable.ArrayBuffer
+import scala.jdk.CollectionConverters._
+import scala.util.Using
+
+/**
+  * Overrides supplied by the user. Each `Some(...)` value short-circuits the corresponding
+  * detection step; `None` means "let the inferencer decide".
+  */
+case class InferenceOverrides(
+    format: Option[SmartFileFormat] = None,
+    delimiter: Option[Char] = None,
+    hasHeader: Option[Boolean] = None,
+    sheetName: Option[String] = None,
+    flatten: Option[Boolean] = None
+)
+
+/**
+  * The full inference result. Carries the inferred schema along with the configuration
+  * the runtime executor needs to read the file the same way the inferencer did.
+  */
+case class InferenceResult(
+    format: SmartFileFormat,
+    schema: Schema,
+    csvDelimiter: Option[String] = None,
+    csvHasHeader: Option[Boolean] = None,
+    sheetName: Option[String] = None,
+    availableSheetNames: List[String] = Nil,
+    flatten: Option[Boolean] = None,
+    isFolder: Boolean = false,
+    fileCount: Int = 1
+)
+
+/**
+  * The single source of truth for "look at this file and decide how to read it."
+  * Both the operator descriptor (compile-time schema declaration) and the live
+  * preview REST endpoint route through this object so their behavior is identical.
+  */
+object SmartFileInferencer {
+
+  /** Bytes to read when sniffing format / delimiter / header. */
+  private val SampleByteCount = 64 * 1024
+
+  /** Rows to read when inferring types. Matches `ScanSourceOpDesc.INFER_READ_LIMIT`. */
+  private val InferRowLimit = 100
+
+  /** Cheap detection that only reads the header bytes. */
+  def detect(uri: URI, encoding: Charset): SmartFileFormat = {
+    val sample = readSampleBytes(uri)
+    FormatDetector.detect(Some(uri.getPath), sample, encoding)
+  }
+
+  /** Full inference: format detection + schema. */
+  def infer(uri: URI, encoding: Charset, overrides: InferenceOverrides): InferenceResult = {
+    val input = FolderInputResolver.resolve(uri)
+    if (input.isFolder) {
+      inferFolder(uri, input.files.map(_.uri), encoding, overrides)
+    } else {
+      inferSingle(uri, encoding, overrides)
+    }
+  }
+
+  private def inferFolder(
+      folderUri: URI,
+      files: List[URI],
+      encoding: Charset,
+      overrides: InferenceOverrides
+  ): InferenceResult = {
+    if (files.isEmpty) {
+      throw new IllegalArgumentException(s"Folder $folderUri does not contain any readable files")
+    }
+
+    val inferred = files.map(file => inferSingle(file, encoding, overrides))
+    val first = inferred.head
+    val mismatchedFormat = inferred.find(_.format != first.format)
+    if (mismatchedFormat.nonEmpty) {
+      throw new IllegalArgumentException(
+        s"Folder $folderUri must contain files with the same detected format"
+      )
+    }
+
+    val expectedSchema = schemaSignature(first.schema)
+    val mismatchedSchema = inferred.find(result => schemaSignature(result.schema) != expectedSchema)
+    if (mismatchedSchema.nonEmpty) {
+      throw new IllegalArgumentException(
+        s"Folder $folderUri must contain files with the same inferred schema"
+      )
+    }
+
+    first.copy(isFolder = true, fileCount = files.size)
+  }
+
+  private def inferSingle(uri: URI, encoding: Charset, overrides: InferenceOverrides): InferenceResult = {
+    val format = overrides.format
+      .filter(_ != SmartFileFormat.AUTO)
+      .getOrElse {
+        val sample = readSampleBytes(uri)
+        FormatDetector.detect(Some(uri.getPath), sample, encoding)
+      }
+
+    format match {
+      case SmartFileFormat.CSV | SmartFileFormat.TSV => inferCsv(uri, encoding, format, overrides)
+      case SmartFileFormat.JSONL                     => inferJsonl(uri, encoding, overrides)
+      case SmartFileFormat.JSON                      => inferJson(uri, encoding, overrides)
+      case SmartFileFormat.ARROW                     => inferArrow(uri)
+      case SmartFileFormat.PARQUET                   => inferParquet(uri)
+      case SmartFileFormat.EXCEL                     => inferExcel(uri, overrides)
+      case SmartFileFormat.IMAGE                     => inferImage()
+      case SmartFileFormat.TEXT                      => inferText()
+      case SmartFileFormat.AUTO =>
+        throw new IllegalStateException("AUTO should have been resolved before dispatch")
+    }
+  }
+
+  private def schemaSignature(schema: Schema): List[(String, AttributeType)] =
+    schema.getAttributes.map(attribute => attribute.getName -> attribute.getType)
+
+  // ---------------------------------------------------------------------------
+  // CSV / TSV
+  // ---------------------------------------------------------------------------
+
+  private def inferCsv(
+      uri: URI,
+      encoding: Charset,
+      format: SmartFileFormat,
+      overrides: InferenceOverrides
+  ): InferenceResult = {
+    val sampleText = readSampleText(uri, encoding)
+    val preferred = format match {
+      case SmartFileFormat.TSV => Some('\t')
+      case _                   => Some(',')
+    }
+    val sniffed = CSVDialectSniffer.sniff(sampleText, preferred)
+    val delimiter = overrides.delimiter.getOrElse(sniffed.delimiter)
+    val hasHeader = overrides.hasHeader.getOrElse(sniffed.hasHeader)
+    val schema = inferCsvSchema(uri, encoding, delimiter, hasHeader)
+    InferenceResult(
+      format = format,
+      schema = schema,
+      csvDelimiter = Some(delimiter.toString),
+      csvHasHeader = Some(hasHeader)
+    )
+  }
+
+  private def inferCsvSchema(
+      uri: URI,
+      encoding: Charset,
+      delimiter: Char,
+      hasHeader: Boolean
+  ): Schema = {
+    val csvFormat = new CsvFormat()
+    csvFormat.setDelimiter(delimiter)
+    csvFormat.setLineSeparator("\n")
+    csvFormat.setComment('\u0000')
+    val settings = new CsvParserSettings()
+    settings.setMaxCharsPerColumn(-1)
+    settings.setFormat(csvFormat)
+    settings.setHeaderExtractionEnabled(hasHeader)
+    settings.setNullValue("")
+
+    val parser = new CsvParser(settings)
+    val stream = openStream(uri)
+    val reader = new InputStreamReader(stream, encoding)
+    try {
+      parser.beginParsing(reader)
+      val rows = ArrayBuffer.empty[Array[String]]
+      var row = parser.parseNext()
+      var read = 0
+      while (row != null && read < InferRowLimit) {
+        rows += row
+        read += 1
+        row = parser.parseNext()
+      }
+      parser.stopParsing()
+      val attributeTypes = inferSchemaFromRows(rows.iterator.map(_.asInstanceOf[Array[Any]]))
+      val header =
+        if (hasHeader)
+          Option(parser.getContext.headers())
+            .getOrElse((1 to attributeTypes.length).map(i => s"column-$i").toArray)
+        else
+          (1 to attributeTypes.length).map(i => s"column-$i").toArray
+      val pairs = header.indices.map { i =>
+        val attributeType =
+          if (i < attributeTypes.length) attributeTypes(i) else AttributeType.STRING
+        (header(i), attributeType)
+      }
+      pairs.foldLeft(Schema()) { case (s, (name, t)) => s.add(name, t) }
+    } finally reader.close()
+  }
+
+  // ---------------------------------------------------------------------------
+  // JSONL
+  // ---------------------------------------------------------------------------
+
+  private def inferJsonl(
+      uri: URI,
+      encoding: Charset,
+      overrides: InferenceOverrides
+  ): InferenceResult = {
+    val flatten = overrides.flatten.getOrElse(false)
+    val stream = openStream(uri)
+    val reader = new BufferedReader(new InputStreamReader(stream, encoding))
+    try {
+      val fieldNames = scala.collection.mutable.LinkedHashSet[String]()
+      val rows = ArrayBuffer.empty[Map[String, String]]
+      val lines = reader.lines().iterator().asScala.take(InferRowLimit)
+      lines.foreach { line =>
+        if (line != null && line.trim.nonEmpty) {
+          val root: JsonNode = objectMapper.readTree(line)
+          if (root.isObject) {
+            val fields = JSONToMap(root, flatten = flatten)
+            fields.keys.foreach(fieldNames += _)
+            rows += fields
+          }
+        }
+      }
+      val orderedNames = fieldNames.toList
+      val schema = buildJsonSchema(orderedNames, rows.toSeq)
+      InferenceResult(
+        format = SmartFileFormat.JSONL,
+        schema = schema,
+        flatten = Some(flatten)
+      )
+    } finally reader.close()
+  }
+
+  // ---------------------------------------------------------------------------
+  // JSON (single object or array of objects)
+  // ---------------------------------------------------------------------------
+
+  private def inferJson(
+      uri: URI,
+      encoding: Charset,
+      overrides: InferenceOverrides
+  ): InferenceResult = {
+    val flatten = overrides.flatten.getOrElse(false)
+    val stream = openStream(uri)
+    val reader = new InputStreamReader(stream, encoding)
+    try {
+      val root = objectMapper.readTree(reader)
+      val rows = ArrayBuffer.empty[Map[String, String]]
+      val fieldNames = scala.collection.mutable.LinkedHashSet[String]()
+
+      val objectNodes: Iterator[JsonNode] =
+        if (root.isArray) root.elements().asScala
+        else if (root.isObject) Iterator.single(root)
+        else Iterator.empty
+
+      var count = 0
+      while (objectNodes.hasNext && count < InferRowLimit) {
+        val node = objectNodes.next()
+        if (node.isObject) {
+          val fields = JSONToMap(node, flatten = flatten)
+          fields.keys.foreach(fieldNames += _)
+          rows += fields
+          count += 1
+        }
+      }
+
+      val schema = buildJsonSchema(fieldNames.toList, rows.toSeq)
+      InferenceResult(
+        format = SmartFileFormat.JSON,
+        schema = schema,
+        flatten = Some(flatten)
+      )
+    } finally reader.close()
+  }
+
+  private def buildJsonSchema(orderedNames: List[String], rows: Seq[Map[String, String]]): Schema = {
+    if (orderedNames.isEmpty) return Schema()
+    val attributeTypes = inferSchemaFromRows(rows.iterator.map { row =>
+      orderedNames.map(name => row.getOrElse(name, null)).toArray[Any]
+    })
+    val attrs = orderedNames.indices.map { i =>
+      val t =
+        if (i < attributeTypes.length) attributeTypes(i) else AttributeType.STRING
+      new Attribute(orderedNames(i), t)
+    }
+    Schema(attrs.toList)
+  }
+
+  // ---------------------------------------------------------------------------
+  // Arrow
+  // ---------------------------------------------------------------------------
+
+  private def inferArrow(uri: URI): InferenceResult = {
+    val file = DocumentFactory.openReadonlyDocument(uri).asFile()
+    val allocator = new RootAllocator()
+    val schema = Using
+      .Manager { use =>
+        val channel = use(Files.newByteChannel(file.toPath, StandardOpenOption.READ))
+        val reader = use(new ArrowFileReader(channel, allocator))
+        ArrowUtils.toTexeraSchema(reader.getVectorSchemaRoot.getSchema)
+      }
+      .getOrElse(throw new RuntimeException(s"Failed to read Arrow schema from $uri"))
+    InferenceResult(format = SmartFileFormat.ARROW, schema = schema)
+  }
+
+  // ---------------------------------------------------------------------------
+  // Parquet
+  // ---------------------------------------------------------------------------
+
+  private def inferParquet(uri: URI): InferenceResult = {
+    val file = DocumentFactory.openReadonlyDocument(uri).asFile()
+    val reader = ParquetUtils.openReader(file)
+    try {
+      val parquetSchema = reader.getFooter.getFileMetaData.getSchema
+      InferenceResult(format = SmartFileFormat.PARQUET, schema = ParquetUtils.toTexeraSchema(parquetSchema))
+    } finally reader.close()
+  }
+
+  // ---------------------------------------------------------------------------
+  // Excel
+  // ---------------------------------------------------------------------------
+
+  private def inferExcel(uri: URI, overrides: InferenceOverrides): InferenceResult = {
+    val file = DocumentFactory.openReadonlyDocument(uri).asFile()
+    val workbook = WorkbookFactory.create(file, null, true) // read-only
+    try {
+      val sheetNames = (0 until workbook.getNumberOfSheets).map(workbook.getSheetName).toList
+      val targetSheet: Sheet = overrides.sheetName
+        .flatMap(name => Option(workbook.getSheet(name)))
+        .getOrElse(workbook.getSheetAt(0))
+      val hasHeader = overrides.hasHeader.getOrElse(true)
+
+      val rowIter = targetSheet.iterator().asScala
+      val sampled = rowIter.take(InferRowLimit + 1).toList
+      if (sampled.isEmpty) {
+        return InferenceResult(
+          format = SmartFileFormat.EXCEL,
+          schema = Schema(),
+          sheetName = Some(targetSheet.getSheetName),
+          availableSheetNames = sheetNames,
+          csvHasHeader = Some(hasHeader)
+        )
+      }
+
+      val columnCount = sampled.map(_.getLastCellNum.toInt).max
+      val rowsAsStrings: List[Array[String]] = sampled.map { row =>
+        (0 until columnCount).map(c => cellToString(row.getCell(c))).toArray
+      }
+
+      val header: Array[String] =
+        if (hasHeader && rowsAsStrings.nonEmpty)
+          rowsAsStrings.head.zipWithIndex.map {
+            case (s, i) => if (s == null || s.isEmpty) s"column-${i + 1}" else s
+          }
+        else (1 to columnCount).map(i => s"column-$i").toArray
+
+      val dataRows = if (hasHeader) rowsAsStrings.drop(1) else rowsAsStrings
+      val attributeTypes = inferSchemaFromRows(dataRows.iterator.map(_.asInstanceOf[Array[Any]]))
+
+      val schema = header.indices.foldLeft(Schema()) { (s, i) =>
+        val t = if (i < attributeTypes.length) attributeTypes(i) else AttributeType.STRING
+        s.add(header(i), t)
+      }
+
+      InferenceResult(
+        format = SmartFileFormat.EXCEL,
+        schema = schema,
+        sheetName = Some(targetSheet.getSheetName),
+        availableSheetNames = sheetNames,
+        csvHasHeader = Some(hasHeader)
+      )
+    } finally workbook.close()
+  }
+
+  private def cellToString(cell: Cell): String = {
+    if (cell == null) return null
+    cell.getCellType match {
+      case CellType.STRING => cell.getStringCellValue
+      case CellType.BOOLEAN => String.valueOf(cell.getBooleanCellValue)
+      case CellType.NUMERIC =>
+        if (DateUtil.isCellDateFormatted(cell))
+          new java.sql.Timestamp(cell.getDateCellValue.getTime).toString
+        else {
+          val d = cell.getNumericCellValue
+          if (d == d.toLong.toDouble) d.toLong.toString else d.toString
+        }
+      case CellType.FORMULA =>
+        cellToString(safelyEvaluate(cell))
+      case CellType.BLANK | CellType._NONE | CellType.ERROR => null
+      case _                                                => null
+    }
+  }
+
+  private def safelyEvaluate(cell: Cell): Cell = {
+    try {
+      val evaluator = cell.getSheet.getWorkbook.getCreationHelper.createFormulaEvaluator()
+      evaluator.evaluateInCell(cell)
+    } catch {
+      case _: Throwable => cell
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Plain text
+  // ---------------------------------------------------------------------------
+
+  private def inferText(): InferenceResult =
+    InferenceResult(
+      format = SmartFileFormat.TEXT,
+      schema = Schema(List(new Attribute("line", AttributeType.STRING)))
+    )
+
+  private def inferImage(): InferenceResult =
+    InferenceResult(
+      format = SmartFileFormat.IMAGE,
+      schema = Schema()
+        .add("image", AttributeType.BINARY)
+        .add("format", AttributeType.STRING)
+        .add("width", AttributeType.INTEGER)
+        .add("height", AttributeType.INTEGER)
+    )
+
+  // ---------------------------------------------------------------------------
+  // I/O helpers
+  // ---------------------------------------------------------------------------
+
+  private def openStream(uri: URI): InputStream =
+    DocumentFactory.openReadonlyDocument(uri).asInputStream()
+
+  private def readSampleBytes(uri: URI): Array[Byte] = {
+    val stream = openStream(uri)
+    try {
+      val buffer = new Array[Byte](SampleByteCount)
+      var totalRead = 0
+      var lastRead = 0
+      while (totalRead < buffer.length && {
+               lastRead = stream.read(buffer, totalRead, buffer.length - totalRead); lastRead
+             } > 0) {
+        totalRead += lastRead
+      }
+      if (totalRead == buffer.length) buffer else buffer.take(totalRead)
+    } finally stream.close()
+  }
+
+  private def readSampleText(uri: URI, charset: Charset): String =
+    new String(readSampleBytes(uri), charset)
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpDesc.scala
new file mode 100644
index 00000000000..e2101bd2808
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpDesc.scala
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import com.fasterxml.jackson.annotation.{JsonInclude, JsonProperty, JsonPropertyDescription}
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.core.executor.OpExecWithClassName
+import org.apache.texera.amber.core.tuple.Schema
+import org.apache.texera.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity}
+import org.apache.texera.amber.core.workflow.{OutputPort, PhysicalOp, SchemaPropagationFunc}
+import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.operator.source.scan.ScanSourceOpDesc
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+
+import java.io.IOException
+import java.net.URI
+
+class SmartFileSourceOpDesc extends ScanSourceOpDesc {
+
+  @JsonProperty(defaultValue = "AUTO")
+  @JsonSchemaTitle("Format")
+  @JsonPropertyDescription("override automatic format detection")
+  var formatOverride: SmartFileFormat = SmartFileFormat.AUTO
+
+  @JsonProperty
+  @JsonSchemaTitle("Delimiter")
+  @JsonPropertyDescription("CSV/TSV delimiter (auto-detected if empty)")
+  @JsonInclude(JsonInclude.Include.NON_ABSENT)
+  var customDelimiter: Option[String] = None
+
+  @JsonProperty
+  @JsonSchemaTitle("Has Header")
+  @JsonPropertyDescription("first row contains column names (CSV/TSV/Excel)")
+  @JsonDeserialize(contentAs = classOf[java.lang.Boolean])
+  @JsonInclude(JsonInclude.Include.NON_ABSENT)
+  var hasHeader: Option[Boolean] = None
+
+  @JsonProperty
+  @JsonSchemaTitle("Excel Sheet Name")
+  @JsonPropertyDescription("for Excel files; leave empty to use the first sheet")
+  @JsonInclude(JsonInclude.Include.NON_ABSENT)
+  var sheetName: Option[String] = None
+
+  @JsonProperty
+  @JsonSchemaTitle("Flatten Nested JSON")
+  @JsonPropertyDescription("flatten nested JSON objects and arrays into dot-notation columns")
+  @JsonDeserialize(contentAs = classOf[java.lang.Boolean])
+  @JsonInclude(JsonInclude.Include.NON_ABSENT)
+  var flatten: Option[Boolean] = None
+
+  @JsonProperty(defaultValue = "false")
+  @JsonSchemaTitle("Include Source File")
+  @JsonPropertyDescription("append a source file column when reading folders")
+  var includeSourceFile: Boolean = false
+
+  @JsonProperty(defaultValue = "source_file")
+  @JsonSchemaTitle("Source File Column")
+  @JsonPropertyDescription("column name used when source file output is enabled")
+  var sourceFileAttribute: String = "source_file"
+
+  fileTypeName = Option("Smart")
+
+  override def operatorInfo: OperatorInfo =
+    OperatorInfo(
+      userFriendlyName = "Smart Source",
+      operatorDescription =
+        "Auto-detects file format and schema for a file or a folder of similar files. Supports CSV, TSV, JSON, JSONL, Arrow, Parquet, Excel, images, and plain text.",
+      operatorGroupName = OperatorGroupConstants.INPUT_GROUP,
+      inputPorts = List.empty,
+      outputPorts = List(OutputPort())
+    )
+
+  @throws[IOException]
+  override def getPhysicalOp(
+      workflowId: WorkflowIdentity,
+      executionId: ExecutionIdentity
+  ): PhysicalOp = {
+    PhysicalOp
+      .sourcePhysicalOp(
+        workflowId,
+        executionId,
+        operatorIdentifier,
+        OpExecWithClassName(
+          "org.apache.texera.amber.operator.source.scan.smart.SmartFileSourceOpExec",
+          objectMapper.writeValueAsString(this)
+        )
+      )
+      .withInputPorts(operatorInfo.inputPorts)
+      .withOutputPorts(operatorInfo.outputPorts)
+      .withPropagateSchema(
+        SchemaPropagationFunc(_ => Map(operatorInfo.outputPorts.head.id -> sourceSchema()))
+      )
+  }
+
+  override def sourceSchema(): Schema = {
+    if (!fileResolved()) return null
+    withOptionalSourceFile(runInference().schema)
+  }
+
+  /** Run inference using the descriptor's own fields as overrides. */
+  def runInference(): InferenceResult = {
+    val overrides = InferenceOverrides(
+      format = Option(formatOverride),
+      delimiter = customDelimiter.flatMap(_.headOption),
+      hasHeader = hasHeader,
+      sheetName = sheetName,
+      flatten = flatten
+    )
+    SmartFileInferencer.infer(
+      new URI(fileName.get),
+      fileEncoding.getCharset,
+      overrides
+    )
+  }
+
+  def withOptionalSourceFile(schema: Schema): Schema =
+    if (includeSourceFile) schema.add(sourceFileAttribute, org.apache.texera.amber.core.tuple.AttributeType.STRING)
+    else schema
+}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpExec.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpExec.scala
new file mode 100644
index 00000000000..b6849bc0cd5
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpExec.scala
@@ -0,0 +1,345 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import com.fasterxml.jackson.databind.JsonNode
+import com.univocity.parsers.csv.{CsvFormat, CsvParser, CsvParserSettings}
+import org.apache.arrow.memory.RootAllocator
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowFileReader
+import org.apache.poi.ss.usermodel.{Workbook, WorkbookFactory}
+import org.apache.texera.amber.core.executor.SourceOperatorExecutor
+import org.apache.texera.amber.core.storage.DocumentFactory
+import org.apache.texera.amber.core.tuple.{AttributeTypeUtils, Schema, TupleLike}
+import org.apache.texera.amber.operator.source.scan.FolderInputResolver
+import org.apache.texera.amber.util.{ArrowUtils, ImageFormatUtils, JSONUtils}
+import org.apache.texera.amber.util.JSONUtils.{JSONToMap, objectMapper}
+
+import java.io.{BufferedReader, ByteArrayInputStream, InputStreamReader}
+import java.net.URI
+import java.nio.file.{Files, StandardOpenOption}
+import javax.imageio.ImageIO
+import scala.collection.immutable.ArraySeq
+import scala.jdk.CollectionConverters._
+
+class SmartFileSourceOpExec(descString: String) extends SourceOperatorExecutor {
+
+  private val desc: SmartFileSourceOpDesc =
+    objectMapper.readValue(descString, classOf[SmartFileSourceOpDesc])
+
+  private var inference: InferenceResult = _
+  private var schema: Schema = _
+  private val resources = scala.collection.mutable.ListBuffer.empty[AutoCloseable]
+  private var tupleSource: Iterator[TupleLike] = Iterator.empty
+
+  private def closeableOf(fn: () => Unit): AutoCloseable =
+    new AutoCloseable { override def close(): Unit = fn() }
+
+  override def open(): Unit = {
+    inference = desc.runInference()
+    schema = desc.withOptionalSourceFile(inference.schema)
+    tupleSource = openReader()
+  }
+
+  override def produceTuple(): Iterator[TupleLike] = {
+    var it = tupleSource.drop(desc.offset.getOrElse(0))
+    if (desc.limit.isDefined) it = it.take(desc.limit.get)
+    it
+  }
+
+  override def close(): Unit = {
+    resources.foreach { c =>
+      try c.close()
+      catch { case _: Throwable => /* swallow on shutdown */ }
+    }
+    resources.clear()
+  }
+
+  // ---------------------------------------------------------------------------
+  // Per-format readers
+  // ---------------------------------------------------------------------------
+
+  private def openReader(): Iterator[TupleLike] = {
+    val input = FolderInputResolver.resolve(new URI(desc.fileName.get))
+    input.files.iterator.flatMap { file =>
+      val rows = inference.format match {
+        case SmartFileFormat.CSV | SmartFileFormat.TSV => csvReader(file.uri)
+        case SmartFileFormat.JSONL                     => jsonlReader(file.uri)
+        case SmartFileFormat.JSON                      => jsonReader(file.uri)
+        case SmartFileFormat.ARROW                     => arrowReader(file.uri)
+        case SmartFileFormat.PARQUET                   => parquetReader(file.uri)
+        case SmartFileFormat.EXCEL                     => excelReader(file.uri)
+        case SmartFileFormat.IMAGE                     => imageReader(file.uri)
+        case SmartFileFormat.TEXT                      => textReader(file.uri)
+        case SmartFileFormat.AUTO =>
+          throw new IllegalStateException("AUTO should have been resolved by inferencer")
+      }
+      if (desc.includeSourceFile) rows.map(appendSourceFile(_, file.displayName)) else rows
+    }
+  }
+
+  private def appendSourceFile(tuple: TupleLike, displayName: String): TupleLike =
+    TupleLike(tuple.getFields :+ displayName)
+
+  // CSV / TSV ----------------------------------------------------------------
+
+  private def csvReader(uri: URI): Iterator[TupleLike] = {
+    val delimiter = inference.csvDelimiter
+      .flatMap(_.headOption)
+      .getOrElse(if (inference.format == SmartFileFormat.TSV) '\t' else ',')
+    val hasHeader = inference.csvHasHeader.getOrElse(true)
+    val stream = DocumentFactory.openReadonlyDocument(uri).asInputStream()
+    val reader = new InputStreamReader(stream, desc.fileEncoding.getCharset)
+    resources += reader
+
+    val format = new CsvFormat()
+    format.setDelimiter(delimiter)
+    format.setLineSeparator("\n")
+    format.setComment('\u0000')
+    val settings = new CsvParserSettings()
+    settings.setMaxCharsPerColumn(-1)
+    settings.setFormat(format)
+    settings.setHeaderExtractionEnabled(hasHeader)
+    settings.setNullValue("")
+    val parser = new CsvParser(settings)
+    parser.beginParsing(reader)
+    resources += closeableOf(() => parser.stopParsing())
+
+    new Iterator[TupleLike] {
+      private var nextRow: Array[String] = parser.parseNext()
+      override def hasNext: Boolean = nextRow != null
+      override def next(): TupleLike = {
+        val row = nextRow
+        nextRow = parser.parseNext()
+        try {
+          TupleLike(
+            ArraySeq.unsafeWrapArray(
+              AttributeTypeUtils.parseFields(row.asInstanceOf[Array[Any]], schema)
+            ): _*
+          )
+        } catch {
+          case _: Throwable => null
+        }
+      }
+    }.filter(_ != null)
+  }
+
+  // JSONL --------------------------------------------------------------------
+
+  private def jsonlReader(uri: URI): Iterator[TupleLike] = {
+    val stream = DocumentFactory.openReadonlyDocument(uri).asInputStream()
+    val br = new BufferedReader(new InputStreamReader(stream, desc.fileEncoding.getCharset))
+    resources += br
+    val flatten = inference.flatten.getOrElse(false)
+    val names = schema.getAttributeNames
+
+    br.lines().iterator().asScala
+      .flatMap { line =>
+        if (line == null || line.trim.isEmpty) None
+        else {
+          try {
+            val node = objectMapper.readTree(line)
+            if (!node.isObject) None
+            else Some(buildTupleFromJsonObject(node, names, flatten))
+          } catch {
+            case _: Throwable => None
+          }
+        }
+      }
+  }
+
+  // JSON ---------------------------------------------------------------------
+
+  private def jsonReader(uri: URI): Iterator[TupleLike] = {
+    val stream = DocumentFactory.openReadonlyDocument(uri).asInputStream()
+    val reader = new InputStreamReader(stream, desc.fileEncoding.getCharset)
+    resources += reader
+    val flatten = inference.flatten.getOrElse(false)
+    val names = schema.getAttributeNames
+
+    val root = objectMapper.readTree(reader)
+    val nodes: Iterator[JsonNode] =
+      if (root.isArray) root.elements().asScala
+      else if (root.isObject) Iterator.single(root)
+      else Iterator.empty
+
+    nodes.flatMap { node =>
+      if (!node.isObject) None
+      else
+        try Some(buildTupleFromJsonObject(node, names, flatten))
+        catch { case _: Throwable => None }
+    }
+  }
+
+  private def buildTupleFromJsonObject(
+      node: JsonNode,
+      names: List[String],
+      flatten: Boolean
+  ): TupleLike = {
+    val fields = JSONToMap(node, flatten).withDefaultValue(null)
+    val parsed = names.map { name =>
+      AttributeTypeUtils.parseField(fields(name), schema.getAttribute(name).getType)
+    }
+    TupleLike(parsed: _*)
+  }
+
+  // Arrow --------------------------------------------------------------------
+
+  private def arrowReader(uri: URI): Iterator[TupleLike] = {
+    val file = DocumentFactory.openReadonlyDocument(uri).asFile()
+    val allocator = new RootAllocator()
+    val channel = Files.newByteChannel(file.toPath, StandardOpenOption.READ)
+    val arrowReader = new ArrowFileReader(channel, allocator)
+    val vectorRoot: VectorSchemaRoot = arrowReader.getVectorSchemaRoot
+    resources += vectorRoot
+    resources += arrowReader
+    resources += allocator
+    resources += closeableOf(() => channel.close())
+
+    new Iterator[TupleLike] {
+      private var idx = 0
+      override def hasNext: Boolean = {
+        if (vectorRoot.getRowCount > idx) true
+        else if (arrowReader.loadNextBatch()) { idx = 0; vectorRoot.getRowCount > 0 }
+        else false
+      }
+      override def next(): TupleLike = {
+        val tuple = ArrowUtils.getTexeraTuple(idx, vectorRoot)
+        idx += 1
+        tuple
+      }
+    }
+  }
+
+  // Parquet ------------------------------------------------------------------
+
+  private def parquetReader(uri: URI): Iterator[TupleLike] = {
+    val file = DocumentFactory.openReadonlyDocument(uri).asFile()
+    val handle = ParquetUtils.openRecords(file)
+    resources += closeableOf(() => handle.close())
+
+    val parquetSchema = handle.schema
+    val attributeNames = schema.getAttributeNames
+    val parquetIndex: Map[String, Int] =
+      (0 until parquetSchema.getFieldCount).map(i => parquetSchema.getType(i).getName -> i).toMap
+
+    handle.records.map { group =>
+      val values = attributeNames.map { name =>
+        parquetIndex.get(name) match {
+          case Some(i) =>
+            val raw = ParquetUtils.readField(group, i, parquetSchema)
+            try AttributeTypeUtils.parseField(raw, schema.getAttribute(name).getType)
+            catch { case _: Throwable => raw }
+          case None => null
+        }
+      }
+      TupleLike(values: _*)
+    }
+  }
+
+  // Excel --------------------------------------------------------------------
+
+  private def excelReader(uri: URI): Iterator[TupleLike] = {
+    val file = DocumentFactory.openReadonlyDocument(uri).asFile()
+    val workbook: Workbook = WorkbookFactory.create(file, null, true)
+    resources += workbook
+    val sheet = inference.sheetName
+      .flatMap(name => Option(workbook.getSheet(name)))
+      .getOrElse(workbook.getSheetAt(0))
+    val hasHeader = inference.csvHasHeader.getOrElse(true)
+    val attributeNames = schema.getAttributeNames
+
+    val rowIter = sheet.iterator().asScala
+    val dataRows = if (hasHeader && rowIter.hasNext) { rowIter.next(); rowIter } else rowIter
+
+    dataRows.map { row =>
+      val values = attributeNames.indices.map { i =>
+        val cell = row.getCell(i)
+        val raw = readExcelCell(cell)
+        try AttributeTypeUtils.parseField(raw, schema.getAttributes(i).getType)
+        catch { case _: Throwable => raw }
+      }
+      TupleLike(values: _*)
+    }
+  }
+
+  private def readExcelCell(cell: org.apache.poi.ss.usermodel.Cell): Any = {
+    import org.apache.poi.ss.usermodel.{CellType, DateUtil}
+    if (cell == null) return null
+    cell.getCellType match {
+      case CellType.STRING  => cell.getStringCellValue
+      case CellType.BOOLEAN => java.lang.Boolean.valueOf(cell.getBooleanCellValue)
+      case CellType.NUMERIC =>
+        if (DateUtil.isCellDateFormatted(cell))
+          new java.sql.Timestamp(cell.getDateCellValue.getTime)
+        else {
+          val d = cell.getNumericCellValue
+          if (d == d.toLong.toDouble) java.lang.Long.valueOf(d.toLong)
+          else java.lang.Double.valueOf(d)
+        }
+      case CellType.FORMULA =>
+        try {
+          val evaluator = cell.getSheet.getWorkbook.getCreationHelper.createFormulaEvaluator()
+          val evaluated = evaluator.evaluate(cell)
+          evaluated.getCellType match {
+            case CellType.STRING  => evaluated.getStringValue
+            case CellType.BOOLEAN => java.lang.Boolean.valueOf(evaluated.getBooleanValue)
+            case CellType.NUMERIC =>
+              val d = evaluated.getNumberValue
+              if (d == d.toLong.toDouble) java.lang.Long.valueOf(d.toLong)
+              else java.lang.Double.valueOf(d)
+            case _ => null
+          }
+        } catch {
+          case _: Throwable => null
+        }
+      case _ => null
+    }
+  }
+
+  // Images -------------------------------------------------------------------
+
+  private def imageReader(uri: URI): Iterator[TupleLike] = {
+    val stream = DocumentFactory.openReadonlyDocument(uri).asInputStream()
+    val bytes =
+      try stream.readAllBytes()
+      finally stream.close()
+    val image = ImageIO.read(new ByteArrayInputStream(bytes))
+    val format = ImageFormatUtils
+      .detectFormat(bytes)
+      .orElse(ImageFormatUtils.extensionFormat(uri.getPath))
+      .getOrElse("unknown")
+    val width = Option(image).map(image => Int.box(image.getWidth)).orNull
+    val height = Option(image).map(image => Int.box(image.getHeight)).orNull
+    Iterator.single(TupleLike(bytes, format, width, height))
+  }
+
+  // Plain text ---------------------------------------------------------------
+
+  private def textReader(uri: URI): Iterator[TupleLike] = {
+    val stream = DocumentFactory.openReadonlyDocument(uri).asInputStream()
+    val br = new BufferedReader(new InputStreamReader(stream, desc.fileEncoding.getCharset))
+    resources += br
+    br.lines().iterator().asScala.map(line => TupleLike(line))
+  }
+
+  // Keep the JSONUtils import live (used transitively by JSONToMap/objectMapper above).
+  locally(JSONUtils)
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpDescSpec.scala
new file mode 100644
index 00000000000..398f4de7729
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpDescSpec.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.fileSplit
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema}
+import org.apache.texera.amber.core.workflow.PortIdentity
+import org.scalatest.flatspec.AnyFlatSpec
+
+class FileSplitOpDescSpec extends AnyFlatSpec {
+
+  "FileSplitOpDesc" should "propagate the input schema to every output port" in {
+    val desc = new FileSplitOpDesc()
+    val inputSchema = Schema(
+      List(
+        new Attribute("source_file", AttributeType.STRING),
+        new Attribute("value", AttributeType.INTEGER)
+      )
+    )
+
+    val outputSchemas = desc.getExternalOutputSchemas(Map(PortIdentity() -> inputSchema))
+
+    assert(outputSchemas.keySet == Set(PortIdentity(), PortIdentity(1)))
+    assert(outputSchemas.values.forall(_ == inputSchema))
+  }
+
+  it should "reject inputs without a file identity column" in {
+    val desc = new FileSplitOpDesc()
+    val inputSchema = Schema(List(new Attribute("value", AttributeType.INTEGER)))
+
+    val err = intercept[IllegalArgumentException] {
+      desc.getExternalOutputSchemas(Map(PortIdentity() -> inputSchema))
+    }
+    assert(err.getMessage.contains("source_file"))
+    assert(err.getMessage.contains("filename"))
+  }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpExecSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpExecSpec.scala
new file mode 100644
index 00000000000..7b04ec961ba
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/fileSplit/FileSplitOpExecSpec.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.fileSplit
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, Tuple}
+import org.apache.texera.amber.core.workflow.PortIdentity
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.flatspec.AnyFlatSpec
+
+class FileSplitOpExecSpec extends AnyFlatSpec {
+
+  "FileSplitOpExec" should "keep rows from the same file on the same output port" in {
+    val desc = new FileSplitOpDesc()
+    val exec = new FileSplitOpExec(objectMapper.writeValueAsString(desc))
+    val schema = Schema(
+      List(
+        new Attribute("source_file", AttributeType.STRING),
+        new Attribute("value", AttributeType.INTEGER)
+      )
+    )
+
+    exec.open()
+    val outputs = List(
+      Tuple(schema, Array[Any]("a.csv", 1)),
+      Tuple(schema, Array[Any]("b.csv", 2)),
+      Tuple(schema, Array[Any]("a.csv", 3)),
+      Tuple(schema, Array[Any]("c.csv", 4))
+    ).flatMap(tuple => exec.processTupleMultiPort(tuple, 0).toList)
+    exec.close()
+
+    assert(outputs.map(_._2.get) == List(PortIdentity(), PortIdentity(1), PortIdentity(), PortIdentity()))
+  }
+
+  it should "auto-detect the filename column used by file scans" in {
+    val desc = new FileSplitOpDesc()
+    val exec = new FileSplitOpExec(objectMapper.writeValueAsString(desc))
+    val schema = Schema(
+      List(
+        new Attribute("filename", AttributeType.STRING),
+        new Attribute("content", AttributeType.BINARY)
+      )
+    )
+
+    exec.open()
+    val output = exec
+      .processTupleMultiPort(Tuple(schema, Array[Any]("cat.png", Array[Byte](1, 2, 3))), 0)
+      .next()
+    exec.close()
+
+    assert(output._2.contains(PortIdentity()))
+  }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpDescSpec.scala
index 4437c018bd5..b5906e4edfd 100644
--- a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpDescSpec.scala
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/file/FileScanSourceOpDescSpec.scala
@@ -27,6 +27,9 @@ import org.apache.texera.amber.util.JSONUtils.objectMapper
 import org.scalatest.BeforeAndAfter
 import org.scalatest.flatspec.AnyFlatSpec
 
+import java.nio.file.Files
+import scala.jdk.CollectionConverters._
+
 class FileScanSourceOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
 
   var fileScanSourceOpDesc: FileScanSourceOpDesc = _
@@ -185,4 +188,60 @@ class FileScanSourceOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
     FileScanSourceOpExec.close()
   }
 
+  it should "read a folder of binary files and preserve relative file names" in {
+    val dir = Files.createTempDirectory("file-scan-image-folder-")
+    try {
+      Files.write(dir.resolve("cat.png"), Array[Byte](1, 2, 3))
+      Files.write(dir.resolve("dog.png"), Array[Byte](4, 5, 6))
+
+      fileScanSourceOpDesc.setResolvedFileName(FileResolver.resolve(dir.toString))
+      fileScanSourceOpDesc.attributeType = FileAttributeType.BINARY
+      fileScanSourceOpDesc.outputFileName = true
+
+      val exec = new FileScanSourceOpExec(objectMapper.writeValueAsString(fileScanSourceOpDesc))
+      exec.open()
+      val tuples = exec
+        .produceTuple()
+        .map(_.asInstanceOf[SchemaEnforceable].enforceSchema(fileScanSourceOpDesc.sourceSchema()))
+        .toList
+      exec.close()
+
+      assert(tuples.map(_.getField[String]("filename")) == List("cat.png", "dog.png"))
+      assert(tuples.map(_.getField[Array[Byte]]("line").toList) == List(List[Byte](1, 2, 3), List[Byte](4, 5, 6)))
+    } finally deleteRecursively(dir)
+  }
+
+  it should "preserve relative file names for line-based folder scans" in {
+    val dir = Files.createTempDirectory("file-scan-text-folder-")
+    try {
+      Files.writeString(dir.resolve("a.txt"), "line-a\n")
+      Files.writeString(dir.resolve("b.txt"), "line-b\n")
+
+      fileScanSourceOpDesc.setResolvedFileName(FileResolver.resolve(dir.toString))
+      fileScanSourceOpDesc.attributeType = FileAttributeType.STRING
+      fileScanSourceOpDesc.outputFileName = true
+
+      val exec = new FileScanSourceOpExec(objectMapper.writeValueAsString(fileScanSourceOpDesc))
+      exec.open()
+      val tuples = exec
+        .produceTuple()
+        .map(_.asInstanceOf[SchemaEnforceable].enforceSchema(fileScanSourceOpDesc.sourceSchema()))
+        .toList
+      exec.close()
+
+      assert(tuples.map(_.getField[String]("filename")) == List("a.txt", "b.txt"))
+      assert(tuples.map(_.getField[String]("line")) == List("line-a", "line-b"))
+    } finally deleteRecursively(dir)
+  }
+
+  private def deleteRecursively(path: java.nio.file.Path): Unit = {
+    Files
+      .walk(path)
+      .iterator()
+      .asScala
+      .toSeq
+      .sortBy(_.getNameCount)(Ordering.Int.reverse)
+      .foreach(Files.deleteIfExists)
+  }
+
 }
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/CSVDialectSnifferSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/CSVDialectSnifferSpec.scala
new file mode 100644
index 00000000000..82f349d2c2f
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/CSVDialectSnifferSpec.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import org.scalatest.flatspec.AnyFlatSpec
+
+class CSVDialectSnifferSpec extends AnyFlatSpec {
+
+  "CSVDialectSniffer" should "detect comma as delimiter for plain CSV" in {
+    val text = "id,name,age\n1,Ada,36\n2,Lin,29\n3,Bob,42\n"
+    val dialect = CSVDialectSniffer.sniff(text)
+    assert(dialect.delimiter == ',')
+    assert(dialect.hasHeader)
+  }
+
+  it should "detect tab as delimiter for TSV-like content" in {
+    val text = "id\tname\tage\n1\tAda\t36\n2\tLin\t29\n3\tBob\t42\n"
+    val dialect = CSVDialectSniffer.sniff(text)
+    assert(dialect.delimiter == '\t')
+    assert(dialect.hasHeader)
+  }
+
+  it should "detect semicolon as delimiter when commas are absent" in {
+    val text = "id;name;age\n1;Ada;36\n2;Lin;29\n3;Bob;42\n"
+    val dialect = CSVDialectSniffer.sniff(text)
+    assert(dialect.delimiter == ';')
+  }
+
+  it should "detect missing header when all rows look like data" in {
+    val text = "1,Ada,36\n2,Lin,29\n3,Bob,42\n4,Eve,55\n"
+    val dialect = CSVDialectSniffer.sniff(text)
+    assert(dialect.delimiter == ',')
+    // First row is purely numeric/string mixed; later rows are the same shape.
+    // The sniffer defaults to "has header" only when row 1 looks distinct.
+    assert(!dialect.hasHeader)
+  }
+
+  it should "honor a preferred delimiter when the content is consistent with it" in {
+    val text = "a,b,c\n1,2,3\n4,5,6\n"
+    val dialect = CSVDialectSniffer.sniff(text, preferred = Some(','))
+    assert(dialect.delimiter == ',')
+  }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/FormatDetectorSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/FormatDetectorSpec.scala
new file mode 100644
index 00000000000..cecc74034f5
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/FormatDetectorSpec.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import org.scalatest.flatspec.AnyFlatSpec
+
+import java.nio.charset.StandardCharsets
+
+class FormatDetectorSpec extends AnyFlatSpec {
+
+  private val utf8 = StandardCharsets.UTF_8
+
+  "FormatDetector" should "detect Parquet by magic bytes" in {
+    val bytes = "PAR1".getBytes(utf8) ++ Array.fill(20)(0.toByte)
+    assert(FormatDetector.detect(None, bytes, utf8) == SmartFileFormat.PARQUET)
+  }
+
+  it should "detect XLSX by ZIP magic bytes" in {
+    val bytes = Array[Byte](0x50, 0x4b, 0x03, 0x04, 0, 0, 0, 0)
+    assert(FormatDetector.detect(Some("foo.xlsx"), bytes, utf8) == SmartFileFormat.EXCEL)
+  }
+
+  it should "not classify a generic ZIP container as Excel" in {
+    val bytes = Array[Byte](0x50, 0x4b, 0x03, 0x04, 0, 0, 0, 0)
+    assert(FormatDetector.detect(Some("archive.zip"), bytes, utf8) == SmartFileFormat.TEXT)
+  }
+
+  it should "detect Arrow by ARROW1 magic" in {
+    val bytes = "ARROW1\u0000\u0000".getBytes(utf8)
+    assert(FormatDetector.detect(None, bytes, utf8) == SmartFileFormat.ARROW)
+  }
+
+  it should "detect TSV when content contains tabs and extension matches" in {
+    val bytes = "id\tname\tage\n1\tAda\t36\n2\tLin\t29\n".getBytes(utf8)
+    assert(FormatDetector.detect(Some("users.tsv"), bytes, utf8) == SmartFileFormat.TSV)
+  }
+
+  it should "detect TSV by content even if extension is .csv" in {
+    val bytes = "id\tname\tage\n1\tAda\t36\n2\tLin\t29\n".getBytes(utf8)
+    val detected = FormatDetector.detect(Some("misnamed.csv"), bytes, utf8)
+    // The .csv extension wins over content sniffing — that's the expected ranking.
+    assert(detected == SmartFileFormat.CSV)
+  }
+
+  it should "fall back to content sniffing when extension is unknown" in {
+    val bytes = "id\tname\n1\tAda\n2\tLin\n".getBytes(utf8)
+    assert(FormatDetector.detect(Some("blob.bin"), bytes, utf8) == SmartFileFormat.TSV)
+  }
+
+  it should "detect JSONL when multiple lines start with {" in {
+    val bytes = "{\"a\":1}\n{\"a\":2}\n{\"a\":3}\n".getBytes(utf8)
+    assert(FormatDetector.detect(None, bytes, utf8) == SmartFileFormat.JSONL)
+  }
+
+  it should "detect JSON array when content starts with [" in {
+    val bytes = "[ {\"a\":1}, {\"a\":2} ]".getBytes(utf8)
+    assert(FormatDetector.detect(None, bytes, utf8) == SmartFileFormat.JSON)
+  }
+
+  it should "detect plain text when there are no delimiters" in {
+    val bytes = "hello world\nthis is text\n".getBytes(utf8)
+    assert(FormatDetector.detect(None, bytes, utf8) == SmartFileFormat.TEXT)
+  }
+
+  it should "prefer extension over content sniffing for CSV" in {
+    val bytes = "a,b,c\n1,2,3\n".getBytes(utf8)
+    assert(FormatDetector.detect(Some("data.csv"), bytes, utf8) == SmartFileFormat.CSV)
+  }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpDescSpec.scala
new file mode 100644
index 00000000000..ec092c69d22
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpDescSpec.scala
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.example.data.simple.SimpleGroupFactory
+import org.apache.parquet.hadoop.ParquetWriter
+import org.apache.parquet.hadoop.example.GroupWriteSupport
+import org.apache.parquet.schema.{MessageTypeParser, Type}
+import org.apache.poi.xssf.usermodel.XSSFWorkbook
+import org.apache.texera.amber.core.storage.FileResolver
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.TestOperators
+import org.scalatest.flatspec.AnyFlatSpec
+
+import java.awt.image.BufferedImage
+import java.io.{File, FileOutputStream}
+import javax.imageio.ImageIO
+import java.nio.file.Files
+import java.nio.charset.StandardCharsets
+import scala.jdk.CollectionConverters._
+
+class SmartFileSourceOpDescSpec extends AnyFlatSpec {
+
+  "SmartFileSourceOpDesc.operatorInfo" should "advertise the broader Smart Source name" in {
+    val desc = new SmartFileSourceOpDesc()
+
+    assert(desc.operatorInfo.userFriendlyName == "Smart Source")
+  }
+
+  "SmartFileSourceOpDesc" should "infer CSV format and schema from a CSV file" in {
+    val desc = new SmartFileSourceOpDesc()
+    desc.fileName = Some(TestOperators.CountrySalesSmallCsvPath)
+    desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+    val result = desc.runInference()
+    assert(result.format == SmartFileFormat.CSV)
+    assert(result.csvDelimiter.contains(","))
+    assert(result.csvHasHeader.contains(true))
+    assert(result.schema.getAttributes.length == 14)
+    assert(result.schema.getAttribute("Order ID").getType == AttributeType.INTEGER)
+  }
+
+  it should "infer JSONL format and schema from a JSONL file" in {
+    val desc = new SmartFileSourceOpDesc()
+    desc.fileName = Some(TestOperators.smallJsonLPath)
+    desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+    val result = desc.runInference()
+    assert(result.format == SmartFileFormat.JSONL)
+    assert(result.schema.getAttributes.nonEmpty)
+  }
+
+  it should "respect a formatOverride from the user" in {
+    val desc = new SmartFileSourceOpDesc()
+    desc.fileName = Some(TestOperators.CountrySalesSmallCsvPath)
+    desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+    desc.formatOverride = SmartFileFormat.CSV
+    desc.customDelimiter = Some(",")
+
+    val result = desc.runInference()
+    assert(result.format == SmartFileFormat.CSV)
+  }
+
+  it should "infer plain text format for a .txt file" in {
+    val desc = new SmartFileSourceOpDesc()
+    desc.fileName = Some(TestOperators.TestTextFilePath)
+    desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+    val result = desc.runInference()
+    assert(result.format == SmartFileFormat.TEXT)
+    assert(result.schema.getAttributeNames == List("line"))
+    assert(result.schema.getAttribute("line").getType == AttributeType.STRING)
+  }
+
+  it should "infer string columns for a header-only CSV file" in {
+    val tmp = Files.createTempFile("smartfile-header-only-", ".csv")
+    try {
+      Files.writeString(tmp, "id,name,score\n", StandardCharsets.UTF_8)
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(tmp.toFile.getAbsolutePath)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+      val result = desc.runInference()
+      assert(result.format == SmartFileFormat.CSV)
+      assert(result.schema.getAttributeNames == List("id", "name", "score"))
+      assert(result.schema.getAttributes.forall(_.getType == AttributeType.STRING))
+    } finally Files.deleteIfExists(tmp)
+  }
+
+  it should "infer one schema for a folder of similar CSV files" in {
+    val dir = Files.createTempDirectory("smartfile-folder-")
+    try {
+      Files.writeString(dir.resolve("2025-01.csv"), "id,name\n1,Ada\n", StandardCharsets.UTF_8)
+      Files.writeString(dir.resolve("2025-02.csv"), "id,name\n2,Lin\n", StandardCharsets.UTF_8)
+
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(dir.toString)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+      val result = desc.runInference()
+      assert(result.format == SmartFileFormat.CSV)
+      assert(result.isFolder)
+      assert(result.fileCount == 2)
+      assert(result.schema.getAttributeNames == List("id", "name"))
+    } finally deleteRecursively(dir)
+  }
+
+  it should "infer image folders as image records" in {
+    val dir = Files.createTempDirectory("smartfile-image-folder-")
+    try {
+      writePng(dir.resolve("cat.png").toFile, width = 3, height = 2)
+      writePng(dir.resolve("dog.png").toFile, width = 4, height = 5)
+
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(dir.toString)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+      val result = desc.runInference()
+      assert(result.format == SmartFileFormat.IMAGE)
+      assert(result.isFolder)
+      assert(result.fileCount == 2)
+      assert(result.schema.getAttributeNames == List("image", "format", "width", "height"))
+      assert(result.schema.getAttribute("image").getType == AttributeType.BINARY)
+      assert(result.schema.getAttribute("format").getType == AttributeType.STRING)
+      assert(result.schema.getAttribute("width").getType == AttributeType.INTEGER)
+      assert(result.schema.getAttribute("height").getType == AttributeType.INTEGER)
+    } finally deleteRecursively(dir)
+  }
+
+  it should "append a source file column when folder provenance is enabled" in {
+    val dir = Files.createTempDirectory("smartfile-folder-source-column-")
+    try {
+      Files.writeString(dir.resolve("2025-01.csv"), "id,name\n1,Ada\n", StandardCharsets.UTF_8)
+      Files.writeString(dir.resolve("2025-02.csv"), "id,name\n2,Lin\n", StandardCharsets.UTF_8)
+
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(dir.toString)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+      desc.includeSourceFile = true
+
+      val schema = desc.sourceSchema()
+      assert(schema.getAttributeNames == List("id", "name", "source_file"))
+      assert(schema.getAttribute("source_file").getType == AttributeType.STRING)
+    } finally deleteRecursively(dir)
+  }
+
+  it should "reject folders that mix file formats" in {
+    val dir = Files.createTempDirectory("smartfile-mixed-folder-")
+    try {
+      Files.writeString(dir.resolve("part.csv"), "id,name\n1,Ada\n", StandardCharsets.UTF_8)
+      Files.writeString(dir.resolve("part.jsonl"), """{"id":2,"name":"Lin"}""" + "\n", StandardCharsets.UTF_8)
+
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(dir.toString)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+      val err = intercept[IllegalArgumentException](desc.runInference())
+      assert(err.getMessage.contains("same detected format"))
+    } finally deleteRecursively(dir)
+  }
+
+  it should "reject empty folders" in {
+    val dir = Files.createTempDirectory("smartfile-empty-folder-")
+    try {
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(dir.toString)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+      val err = intercept[IllegalArgumentException](desc.runInference())
+      assert(err.getMessage.contains("does not contain any readable files"))
+    } finally deleteRecursively(dir)
+  }
+
+  it should "infer schema from a generated Excel file" in {
+    val tmp = Files.createTempFile("smartfile-test-", ".xlsx").toFile
+    try {
+      writeExcel(tmp)
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(tmp.getAbsolutePath)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+      val result = desc.runInference()
+      assert(result.format == SmartFileFormat.EXCEL)
+      val attrs = result.schema.getAttributes
+      assert(attrs.length == 3)
+      assert(attrs.head.getName == "id")
+      assert(attrs(1).getName == "name")
+      assert(attrs(2).getName == "score")
+      assert(attrs.head.getType == AttributeType.INTEGER)
+      assert(attrs(2).getType == AttributeType.DOUBLE)
+    } finally tmp.delete()
+  }
+
+  it should "infer schema from a generated Parquet file" in {
+    val tmp = Files.createTempFile("smartfile-test-", ".parquet").toFile
+    tmp.delete() // ParquetWriter wants to create the file itself
+    try {
+      writeParquet(tmp)
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(tmp.getAbsolutePath)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+      val result = desc.runInference()
+      assert(result.format == SmartFileFormat.PARQUET)
+      val attrs = result.schema.getAttributes
+      assert(attrs.length == 3)
+      assert(attrs.exists(_.getName == "id"))
+      assert(result.schema.getAttribute("id").getType == AttributeType.INTEGER)
+      assert(result.schema.getAttribute("name").getType == AttributeType.STRING)
+      assert(result.schema.getAttribute("score").getType == AttributeType.DOUBLE)
+    } finally tmp.delete()
+  }
+
+  private def writeExcel(out: File): Unit = {
+    val workbook = new XSSFWorkbook()
+    try {
+      val sheet = workbook.createSheet("Sheet1")
+      val header = sheet.createRow(0)
+      header.createCell(0).setCellValue("id")
+      header.createCell(1).setCellValue("name")
+      header.createCell(2).setCellValue("score")
+
+      val rows = Seq((1, "Ada", 36.5), (2, "Lin", 29.1), (3, "Bob", 42.0))
+      rows.zipWithIndex.foreach {
+        case ((id, name, score), i) =>
+          val row = sheet.createRow(i + 1)
+          row.createCell(0).setCellValue(id.toDouble)
+          row.createCell(1).setCellValue(name)
+          row.createCell(2).setCellValue(score)
+      }
+      val fos = new FileOutputStream(out)
+      try workbook.write(fos)
+      finally fos.close()
+    } finally workbook.close()
+  }
+
+  private def writePng(out: File, width: Int, height: Int): Unit = {
+    val image = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB)
+    ImageIO.write(image, "png", out)
+  }
+
+  private def writeParquet(out: File): Unit = {
+    val schemaStr =
+      """
+        |message simple {
+        |  required int32 id;
+        |  required binary name (UTF8);
+        |  required double score;
+        |}
+      """.stripMargin
+    val schema = MessageTypeParser.parseMessageType(schemaStr)
+    val conf = new Configuration(false)
+    conf.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem")
+    GroupWriteSupport.setSchema(schema, conf)
+
+    val factory = new SimpleGroupFactory(schema)
+    val writer = new ParquetWriter[org.apache.parquet.example.data.Group](
+      new Path(out.toURI),
+      new GroupWriteSupport(),
+      org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED,
+      ParquetWriter.DEFAULT_BLOCK_SIZE,
+      ParquetWriter.DEFAULT_PAGE_SIZE,
+      ParquetWriter.DEFAULT_PAGE_SIZE,
+      true,
+      false,
+      ParquetWriter.DEFAULT_WRITER_VERSION,
+      conf
+    )
+    try {
+      writer.write(factory.newGroup().append("id", 1).append("name", "Ada").append("score", 36.5d))
+      writer.write(factory.newGroup().append("id", 2).append("name", "Lin").append("score", 29.1d))
+    } finally writer.close()
+
+    // Avoid compiler unused-import warning for Type — keep an explicit reference here so that
+    // if MessageTypeParser ever changes its return type the compile fails loudly.
+    val _: Type = schema
+  }
+
+  private def deleteRecursively(path: java.nio.file.Path): Unit = {
+    Files
+      .walk(path)
+      .iterator()
+      .asScala
+      .toSeq
+      .sortBy(_.getNameCount)(Ordering.Int.reverse)
+      .foreach(Files.deleteIfExists)
+  }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpExecSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpExecSpec.scala
new file mode 100644
index 00000000000..13cc2d80cd1
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/scan/smart/SmartFileSourceOpExecSpec.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.source.scan.smart
+
+import org.apache.texera.amber.core.storage.FileResolver
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.flatspec.AnyFlatSpec
+
+import java.awt.image.BufferedImage
+import java.io.File
+import javax.imageio.ImageIO
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import scala.jdk.CollectionConverters._
+
+class SmartFileSourceOpExecSpec extends AnyFlatSpec {
+
+  "SmartFileSourceOpExec" should "read a folder of similar CSV files as one source" in {
+    val dir = Files.createTempDirectory("smartfile-folder-exec-")
+    try {
+      Files.writeString(dir.resolve("2025-01.csv"), "id,name\n1,Ada\n", StandardCharsets.UTF_8)
+      Files.writeString(dir.resolve("2025-02.csv"), "id,name\n2,Lin\n", StandardCharsets.UTF_8)
+
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(dir.toString)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+
+      val exec = new SmartFileSourceOpExec(objectMapper.writeValueAsString(desc))
+      exec.open()
+      val tuples = exec.produceTuple().toList
+      exec.close()
+
+      assert(tuples.size == 2)
+      assert(tuples.map(_.getFields(0)) == List(1, 2))
+      assert(tuples.map(_.getFields(1)) == List("Ada", "Lin"))
+    } finally deleteRecursively(dir)
+  }
+
+  it should "preserve the originating file for folder rows when enabled" in {
+    val dir = Files.createTempDirectory("smartfile-folder-source-column-exec-")
+    try {
+      Files.writeString(dir.resolve("2025-01.csv"), "id,name\n1,Ada\n", StandardCharsets.UTF_8)
+      Files.writeString(dir.resolve("2025-02.csv"), "id,name\n2,Lin\n", StandardCharsets.UTF_8)
+
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(dir.toString)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+      desc.includeSourceFile = true
+
+      val exec = new SmartFileSourceOpExec(objectMapper.writeValueAsString(desc))
+      exec.open()
+      val tuples = exec.produceTuple().toList
+      exec.close()
+
+      assert(tuples.map(_.getFields.last) == List("2025-01.csv", "2025-02.csv"))
+    } finally deleteRecursively(dir)
+  }
+
+  it should "read image folders as image records with metadata" in {
+    val dir = Files.createTempDirectory("smartfile-image-folder-exec-")
+    try {
+      writePng(dir.resolve("cat.png").toFile, width = 3, height = 2)
+      writePng(dir.resolve("dog.png").toFile, width = 4, height = 5)
+
+      val desc = new SmartFileSourceOpDesc()
+      desc.fileName = Some(dir.toString)
+      desc.setResolvedFileName(FileResolver.resolve(desc.fileName.get))
+      desc.includeSourceFile = true
+
+      val exec = new SmartFileSourceOpExec(objectMapper.writeValueAsString(desc))
+      exec.open()
+      val tuples = exec.produceTuple().toList
+      exec.close()
+
+      assert(tuples.size == 2)
+      assert(tuples.map(_.getFields(0).asInstanceOf[Array[Byte]].nonEmpty) == List(true, true))
+      assert(tuples.map(_.getFields(1)) == List("png", "png"))
+      assert(tuples.map(_.getFields(2)) == List(3, 4))
+      assert(tuples.map(_.getFields(3)) == List(2, 5))
+      assert(tuples.map(_.getFields(4)) == List("cat.png", "dog.png"))
+    } finally deleteRecursively(dir)
+  }
+
+  private def deleteRecursively(path: java.nio.file.Path): Unit = {
+    Files
+      .walk(path)
+      .iterator()
+      .asScala
+      .toSeq
+      .sortBy(_.getNameCount)(Ordering.Int.reverse)
+      .foreach(Files.deleteIfExists)
+  }
+
+  private def writePng(out: File, width: Int, height: Int): Unit = {
+    val image = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB)
+    ImageIO.write(image, "png", out)
+  }
+}
diff --git a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-filetree/user-dataset-version-filetree.component.ts b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-filetree/user-dataset-version-filetree.component.ts
index 2eda4b53bf6..89ae3fe95ff 100644
--- a/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-filetree/user-dataset-version-filetree.component.ts
+++ b/frontend/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/user-dataset-version-filetree/user-dataset-version-filetree.component.ts
@@ -58,6 +58,9 @@ export class UserDatasetVersionFiletreeComponent implements AfterViewInit {
   @Input()
   public isExpandAllAfterViewInit = false;
 
+  @Input()
+  public isDirectorySelectable = false;
+
   @ViewChild("tree") tree: any;
 
   @Output()
@@ -69,9 +72,13 @@ export class UserDatasetVersionFiletreeComponent implements AfterViewInit {
     actionMapping: {
       mouse: {
         click: (tree: any, node: any, $event: any) => {
+          const isDirectory = node.data.type === "directory";
+          if (isDirectory && this.isDirectorySelectable) {
+            this.selectedTreeNode.emit(node.data);
+          }
           if (node.hasChildren) {
             TREE_ACTIONS.TOGGLE_EXPANDED(tree, node, $event);
-          } else {
+          } else if (!isDirectory) {
             this.selectedTreeNode.emit(node.data);
           }
         },
diff --git a/frontend/src/app/workspace/component/dataset-file-selector/dataset-file-selector.component.ts b/frontend/src/app/workspace/component/dataset-file-selector/dataset-file-selector.component.ts
index 5de61b33860..55bb8450ae5 100644
--- a/frontend/src/app/workspace/component/dataset-file-selector/dataset-file-selector.component.ts
+++ b/frontend/src/app/workspace/component/dataset-file-selector/dataset-file-selector.component.ts
@@ -62,6 +62,7 @@ export class DatasetFileSelectorComponent extends FieldType<FieldTypeConfig> {
       nzData: {
         fileMode: true,
         selectedPath: this.formControl.getRawValue(),
+        allowDirectorySelection: this.props["allowFolderSelection"] === true,
       },
       nzBodyStyle: {
         resize: "both",
diff --git a/frontend/src/app/workspace/component/dataset-selection-modal/dataset-selection-modal.component.html b/frontend/src/app/workspace/component/dataset-selection-modal/dataset-selection-modal.component.html
index f8189ddb3ff..d6b43f767b7 100644
--- a/frontend/src/app/workspace/component/dataset-selection-modal/dataset-selection-modal.component.html
+++ b/frontend/src/app/workspace/component/dataset-selection-modal/dataset-selection-modal.component.html
@@ -55,6 +55,7 @@
 <texera-user-dataset-version-filetree
   [isExpandAllAfterViewInit]="true"
   [fileTreeNodes]="fileTree"
+  [isDirectorySelectable]="data.allowDirectorySelection === true"
   (selectedTreeNode)="onFileSelected($event)">
 </texera-user-dataset-version-filetree>
 <br />
diff --git a/frontend/src/app/workspace/component/dataset-selection-modal/dataset-selection-modal.component.ts b/frontend/src/app/workspace/component/dataset-selection-modal/dataset-selection-modal.component.ts
index 7f70792f937..9ec8a0809c4 100644
--- a/frontend/src/app/workspace/component/dataset-selection-modal/dataset-selection-modal.component.ts
+++ b/frontend/src/app/workspace/component/dataset-selection-modal/dataset-selection-modal.component.ts
@@ -53,9 +53,10 @@ import { ɵNzTransitionPatchDirective } from "ng-zorro-antd/core/transition-patc
   ],
 })
 export class DatasetSelectionModalComponent implements OnInit {
-  private readonly data = inject(NZ_MODAL_DATA) as {
+  public readonly data = inject(NZ_MODAL_DATA) as {
     fileMode: boolean;
     selectedPath?: string | null;
+    allowDirectorySelection?: boolean;
   };
 
   datasets: ReadonlyArray<DashboardDataset> = [];
diff --git a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.html b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.html
index 1f2c2963f29..ab8f203420c 100644
--- a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.html
+++ b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.html
@@ -88,6 +88,30 @@
   <p>{{ operatorDescription }}</p>
 </div>
 
+<div
+  *ngIf="currentOperatorSchema?.operatorType === smartFileScanType && smartFileInferenceLoading && !editingTitle"
+  class="smart-file-inference-summary">
+  Detecting file format...
+</div>
+
+<div
+  *ngIf="currentOperatorSchema?.operatorType === smartFileScanType && smartFileInferenceSummary && !editingTitle"
+  class="smart-file-inference-summary">
+  <span class="summary-label">Detected</span>
+  <span>{{ smartFileInferenceSummary.detectedFormat }}</span>
+  <span *ngIf="smartFileInferenceSummary.isFolder">
+    Folder: {{ smartFileInferenceSummary.fileCount }} files
+  </span>
+  <span *ngIf="formatSmartFileDelimiter(smartFileInferenceSummary.customDelimiter) as delimiter">
+    Delimiter: {{ delimiter }}
+  </span>
+  <span *ngIf="smartFileInferenceSummary.hasHeader !== null">
+    Header: {{ smartFileInferenceSummary.hasHeader ? "yes" : "no" }}
+  </span>
+  <span *ngIf="smartFileInferenceSummary.sheetName">Sheet: {{ smartFileInferenceSummary.sheetName }}</span>
+  <span>{{ smartFileInferenceSummary.schema.length }} columns</span>
+</div>
+
 <div
   id="customName"
   [hidden]="!editingTitle"
diff --git a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.scss b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.scss
index 4126a9ee1ce..325fe6e85be 100644
--- a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.scss
+++ b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.scss
@@ -73,3 +73,17 @@
     margin-bottom: 0;
   }
 }
+
+.smart-file-inference-summary {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px 12px;
+  margin: 0 16px 10px 16px;
+  color: rgba(0, 0, 0, 0.72);
+  font-size: 12px;
+  line-height: 1.4;
+
+  .summary-label {
+    font-weight: 600;
+  }
+}
diff --git a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts
index c7ab561f403..aa1ae61256d 100644
--- a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts
+++ b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts
@@ -61,6 +61,11 @@ import * as Y from "yjs";
 import { OperatorSchema } from "src/app/workspace/types/operator-schema.interface";
 import { AttributeType, PortSchema } from "../../../types/workflow-compiling.interface";
 import { GuiConfigService } from "../../../../common/service/gui-config.service";
+import {
+  SMART_FILE_SCAN_TYPE,
+  SmartFileInferenceResponse,
+  SmartFileInferenceService,
+} from "../../../service/smart-file-inference/smart-file-inference.service";
 import { NgIf } from "@angular/common";
 import { NzSpaceCompactItemDirective } from "ng-zorro-antd/space";
 import { NzButtonComponent } from "ng-zorro-antd/button";
@@ -112,6 +117,7 @@ Quill.register("modules/cursors", QuillCursors);
 })
 export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, OnDestroy {
   @Input() currentOperatorId?: string;
+  readonly smartFileScanType = SMART_FILE_SCAN_TYPE;
 
   currentOperatorSchema?: OperatorSchema;
 
@@ -163,6 +169,12 @@ export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, On
   // used to tear down subscriptions that takeUntil(teardownObservable)
   private teardownObservable: Subject<void> = new Subject();
 
+  /** Prevent duplicate inference calls for the same operator/file pair. */
+  private smartFileLastInferenceKey: string | undefined;
+  private smartFileInferenceByOperator = new Map<string, SmartFileInferenceResponse>();
+  public smartFileInferenceSummary?: SmartFileInferenceResponse;
+  public smartFileInferenceLoading = false;
+
   constructor(
     private formlyJsonschema: FormlyJsonschema,
     private workflowActionService: WorkflowActionService,
@@ -173,7 +185,8 @@ export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, On
     private changeDetectorRef: ChangeDetectorRef,
     private workflowVersionService: WorkflowVersionService,
     private workflowStatusSerivce: WorkflowStatusService,
-    private config: GuiConfigService
+    private config: GuiConfigService,
+    private smartFileInferenceService: SmartFileInferenceService
   ) {}
 
   ngOnChanges(changes: SimpleChanges): void {
@@ -243,6 +256,11 @@ export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, On
     this.setFormlyFormBinding(this.currentOperatorSchema.jsonSchema);
     this.formTitle = operator.customDisplayName ?? this.currentOperatorSchema.additionalMetadata.userFriendlyName;
     this.operatorDescription = this.currentOperatorSchema.additionalMetadata.operatorDescription;
+    this.smartFileInferenceSummary =
+      this.currentOperatorSchema.operatorType === SMART_FILE_SCAN_TYPE
+        ? this.smartFileInferenceByOperator.get(operator.operatorID)
+        : undefined;
+    this.smartFileInferenceLoading = false;
     /**
      * Important: make a deep copy of the initial property data object.
      * Prevent the form directly changes the value in the texera graph without going through workflow action service.
@@ -349,10 +367,120 @@ export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, On
         this.typeInferenceOnLambdaFunction(formData);
         this.workflowActionService.setOperatorProperty(this.currentOperatorId, cloneDeep(formData));
         this.listeningToChange = true;
+        this.runSmartFileInferenceIfNeeded(formData);
       }
     });
   }
 
+  /**
+   * For `SmartFileScan` operators, when the user picks a new file the backend can sniff the
+   * format, dialect, and schema and tell us what to prefill. This method only fires once per
+   * fileName change (so editing other fields doesn't re-trigger it) and silently no-ops for any
+   * other operator type.
+   */
+  private runSmartFileInferenceIfNeeded(formData: Record<string, unknown>): void {
+    if (!this.currentOperatorId) return;
+    if (this.currentOperatorSchema?.operatorType !== SMART_FILE_SCAN_TYPE) return;
+    const fileName = formData?.["fileName"];
+    if (typeof fileName !== "string" || fileName.length === 0) return;
+    const operatorIdAtRequestTime = this.currentOperatorId;
+    const inferenceKey = `${operatorIdAtRequestTime}:${fileName}`;
+    if (inferenceKey === this.smartFileLastInferenceKey) return;
+    this.smartFileLastInferenceKey = inferenceKey;
+    this.smartFileInferenceByOperator.delete(operatorIdAtRequestTime);
+    this.smartFileInferenceSummary = undefined;
+    this.smartFileInferenceLoading = true;
+
+    const formatOverride = formData["formatOverride"];
+    const requestFormat =
+      typeof formatOverride === "string" && formatOverride !== "Auto-detect" && formatOverride !== "AUTO"
+        ? formatOverride
+        : undefined;
+    const customDelimiter = formData["customDelimiter"];
+    const hasHeader = formData["hasHeader"];
+    const sheetName = formData["sheetName"];
+    const flatten = formData["flatten"];
+    const fileEncoding = formData["fileEncoding"];
+
+    this.smartFileInferenceService
+      .preview({
+        fileName,
+        fileEncoding: typeof fileEncoding === "string" ? fileEncoding : undefined,
+        formatOverride: requestFormat,
+        customDelimiter:
+          typeof customDelimiter === "string" && customDelimiter.length > 0 ? customDelimiter : undefined,
+        hasHeader: typeof hasHeader === "boolean" ? hasHeader : undefined,
+        sheetName: typeof sheetName === "string" && sheetName.length > 0 ? sheetName : undefined,
+        flatten: typeof flatten === "boolean" ? flatten : undefined,
+      })
+      .pipe(untilDestroyed(this))
+      .subscribe({
+        next: response => this.applySmartFileInference(operatorIdAtRequestTime, fileName, response),
+        error: (err: unknown) => {
+          if (this.currentOperatorId === operatorIdAtRequestTime) {
+            this.smartFileInferenceLoading = false;
+          }
+          if (this.smartFileLastInferenceKey === inferenceKey) {
+            this.smartFileLastInferenceKey = undefined;
+          }
+          // Surface as a non-blocking warning. Sniffing failure shouldn't break the workflow —
+          // the operator's own sourceSchema() call will re-attempt at compile time.
+          this.notificationService.warning(`Could not auto-detect file: ${this.smartFileInferenceErrorMessage(err)}`);
+        },
+      });
+  }
+
+  private applySmartFileInference(
+    operatorIdAtRequestTime: string,
+    fileNameAtRequestTime: string,
+    response: SmartFileInferenceResponse
+  ): void {
+    const operator = this.workflowActionService.getTexeraGraph().getOperator(operatorIdAtRequestTime);
+    if (!operator) return;
+    // Drop stale responses — user may have already changed the file again.
+    if (operator.operatorProperties["fileName"] !== fileNameAtRequestTime) return;
+
+    const merged: Record<string, unknown> = { ...operator.operatorProperties };
+    merged["formatOverride"] = response.detectedFormat;
+    if (response.customDelimiter !== null && response.customDelimiter !== undefined) {
+      merged["customDelimiter"] = response.customDelimiter;
+    }
+    if (response.hasHeader !== null && response.hasHeader !== undefined) {
+      merged["hasHeader"] = response.hasHeader;
+    }
+    if (response.sheetName !== null && response.sheetName !== undefined) {
+      merged["sheetName"] = response.sheetName;
+    }
+    if (response.flatten !== null && response.flatten !== undefined) {
+      merged["flatten"] = response.flatten;
+    }
+    const sourceFileColumnExists = response.schema.some(column => column.name.toLowerCase() === "source_file");
+    if (response.isFolder && !sourceFileColumnExists && merged["includeSourceFile"] === undefined) {
+      merged["includeSourceFile"] = true;
+    }
+    this.smartFileInferenceByOperator.set(operatorIdAtRequestTime, response);
+    if (this.currentOperatorId === operatorIdAtRequestTime) {
+      this.smartFileInferenceSummary = response;
+      this.smartFileInferenceLoading = false;
+    }
+    this.workflowActionService.setOperatorProperty(operatorIdAtRequestTime, merged);
+  }
+
+  public formatSmartFileDelimiter(delimiter: string | null): string | undefined {
+    if (delimiter === null) return undefined;
+    if (delimiter === "\t") return "tab";
+    if (delimiter === " ") return "space";
+    return delimiter;
+  }
+
+  private smartFileInferenceErrorMessage(err: unknown): string {
+    if (typeof err !== "object" || err === null) return "unknown error";
+    const maybeError = err as { error?: { message?: unknown }; message?: unknown };
+    if (typeof maybeError.error?.message === "string") return maybeError.error.message;
+    if (typeof maybeError.message === "string") return maybeError.message;
+    return "unknown error";
+  }
+
   typeInferenceOnLambdaFunction(formData: any): void {
     if (!this.currentOperatorId?.includes("PythonLambdaFunction")) {
       return;
@@ -468,6 +596,12 @@ export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, On
       // if the title is fileName, then change it to custom autocomplete input template
       if (mappedField.key === "fileName") {
         mappedField.type = "inputautocomplete";
+        mappedField.props = {
+          ...mappedField.props,
+          allowFolderSelection:
+            this.currentOperatorSchema?.operatorType === this.smartFileScanType ||
+            this.currentOperatorSchema?.operatorType === "FileScan",
+        };
       }
 
       if (mappedField.key === "datasetVersionPath") {
diff --git a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-cell.utils.spec.ts b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-cell.utils.spec.ts
new file mode 100644
index 00000000000..1b5428892c7
--- /dev/null
+++ b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-cell.utils.spec.ts
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { isImageDataUrl } from "./result-table-cell.utils";
+
+describe("isImageDataUrl", () => {
+  it("should recognize supported image data URLs", () => {
+    expect(isImageDataUrl("data:image/png;base64,AAAA")).toBe(true);
+    expect(isImageDataUrl("data:image/jpeg;base64,BBBB")).toBe(true);
+    expect(isImageDataUrl("data:image/webp;base64,CCCC")).toBe(true);
+  });
+
+  it("should reject binary previews and non-image strings", () => {
+    expect(isImageDataUrl("<binary 1010...001, size = 4 bytes>")).toBe(false);
+    expect(isImageDataUrl("data:text/plain;base64,AAAA")).toBe(false);
+    expect(isImageDataUrl(42)).toBe(false);
+  });
+});
diff --git a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-cell.utils.ts b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-cell.utils.ts
new file mode 100644
index 00000000000..830551304b9
--- /dev/null
+++ b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-cell.utils.ts
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+export function isImageDataUrl(value: unknown): value is string {
+  return typeof value === "string" && /^data:image\/(?:png|jpeg|gif|webp);base64,/i.test(value);
+}
diff --git a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html
index 5400d978ee3..6fc0b49dd89 100644
--- a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html
+++ b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html
@@ -161,7 +161,14 @@ <h5 class="rightAlign"><span [innerHTML]="compare(column.header, 'other')"></spa
             class="table-cell"
             nzEllipsis
             (click)="open(i, row)">
-            <span class="cell-content">{{ column.getCell(row) }}</span>
+            <img
+              *ngIf="isImageCell(row, column); else textCell"
+              class="image-thumbnail"
+              [src]="getImageCellSource(row, column)"
+              [alt]="column.header" />
+            <ng-template #textCell>
+              <span class="cell-content">{{ column.getCell(row) }}</span>
+            </ng-template>
             <button
               (click)="downloadData(currentResult[i][column.columnDef], i, columnIndex, column.columnDef); $event.stopPropagation()"
               nz-button
diff --git a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.scss b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.scss
index 6326b83eb2c..05b953a5fca 100644
--- a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.scss
+++ b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.scss
@@ -99,6 +99,14 @@ th.header-size {
   white-space: nowrap;
 }
 
+.image-thumbnail {
+  display: block;
+  width: 36px;
+  height: 36px;
+  object-fit: cover;
+  margin: 0 auto;
+}
+
 .download-button {
   position: absolute;
   right: 4px;
diff --git a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.ts b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.ts
index 72a0dbbf72c..7681c8aa8ea 100644
--- a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.ts
+++ b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.ts
@@ -47,6 +47,7 @@ import { NzButtonComponent } from "ng-zorro-antd/button";
 import { NzWaveDirective } from "ng-zorro-antd/core/wave";
 import { ɵNzTransitionPatchDirective } from "ng-zorro-antd/core/transition-patch";
 import { NzIconDirective } from "ng-zorro-antd/icon";
+import { isImageDataUrl } from "./result-table-cell.utils";
 
 /**
  * The Component will display the result in an excel table format,
@@ -446,6 +447,14 @@ export class ResultTableFrameComponent implements OnInit, OnChanges {
     }));
   }
 
+  isImageCell(row: IndexableObject, column: TableColumn): boolean {
+    return isImageDataUrl(row[column.columnDef]);
+  }
+
+  getImageCellSource(row: IndexableObject, column: TableColumn): string {
+    return row[column.columnDef].toString();
+  }
+
   downloadData(data: any, rowIndex: number, columnIndex: number, columnName: string): void {
     const realRowNumber = (this.currentPageIndex - 1) * this.pageSize + rowIndex;
     const defaultFileName = `${columnName}_${realRowNumber}`;
diff --git a/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.html b/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.html
new file mode 100644
index 00000000000..9cab40e6150
--- /dev/null
+++ b/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.html
@@ -0,0 +1,136 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<aside
+  class="trace-panel"
+  *ngIf="trace as currentTrace">
+  <header class="trace-header">
+    <button
+      class="close-button"
+      type="button"
+      (click)="close()"
+      aria-label="Close visual trace">
+      <span
+        nz-icon
+        nzType="close"></span>
+    </button>
+
+    <div class="hero">
+      <div
+        class="hero-media"
+        *ngIf="currentTrace.heroImage">
+        <img
+          [src]="currentTrace.heroImage"
+          [alt]="currentTrace.heroImageAlt ?? currentTrace.title" />
+      </div>
+
+      <div class="hero-copy">
+        <div class="hero-kicker">Visual Journey</div>
+        <div class="hero-title">{{ currentTrace.title }}</div>
+        <div
+          class="hero-subtitle"
+          *ngIf="currentTrace.subtitle">
+          {{ currentTrace.subtitle }}
+        </div>
+      </div>
+
+      <div
+        class="hero-metric"
+        *ngIf="currentTrace.heroMetric">
+        <span>{{ currentTrace.heroMetric.label }}</span>
+        <strong>{{ currentTrace.heroMetric.value }}</strong>
+      </div>
+    </div>
+
+    <p
+      class="trace-summary"
+      *ngIf="currentTrace.summary">
+      {{ currentTrace.summary }}
+    </p>
+  </header>
+
+  <section class="filmstrip">
+    <div
+      class="film-frame"
+      *ngFor="let step of currentTrace.steps; let i = index">
+      <div class="film-index">{{ i + 1 }}</div>
+      <div
+        class="film-image"
+        [ngClass]="{'film-image--empty': !step.image}">
+        <img
+          *ngIf="step.image"
+          [src]="step.image"
+          [alt]="step.imageAlt ?? step.title" />
+        <span *ngIf="!step.image">{{ getKindLabel(step.kind) }}</span>
+      </div>
+    </div>
+  </section>
+
+  <section class="trace-steps">
+    <article
+      class="trace-step"
+      *ngFor="let step of currentTrace.steps; let i = index"
+      [ngClass]="'trace-step--' + (step.kind ?? 'step')">
+      <div class="trace-line">
+        <div class="trace-dot"></div>
+      </div>
+
+      <div class="step-media">
+        <img
+          *ngIf="step.image"
+          [src]="step.image"
+          [alt]="step.imageAlt ?? step.title" />
+        <span *ngIf="!step.image">{{ i + 1 }}</span>
+      </div>
+
+      <div class="step-body">
+        <div class="step-meta">
+          <span class="kind-chip">{{ getKindLabel(step.kind) }}</span>
+          <button
+            *ngIf="step.operatorId"
+            class="operator-chip"
+            type="button"
+            (click)="focusOperator(step)">
+            {{ getStepLabel(step) }}
+          </button>
+          <span
+            *ngIf="!step.operatorId"
+            class="operator-chip operator-chip--static">
+            {{ getStepLabel(step) }}
+          </span>
+        </div>
+
+        <h3>{{ step.title }}</h3>
+        <p *ngIf="step.detail">{{ step.detail }}</p>
+
+        <div
+          class="metric-row"
+          *ngIf="step.metrics?.length">
+          <div
+            class="metric-pill"
+            *ngFor="let metric of step.metrics">
+            <span>{{ metric.label }}</span>
+            <strong>{{ metric.value }}</strong>
+          </div>
+        </div>
+      </div>
+    </article>
+  </section>
+</aside>
+
diff --git a/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.scss b/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.scss
new file mode 100644
index 00000000000..4adb8bade0c
--- /dev/null
+++ b/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.scss
@@ -0,0 +1,346 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+:host {
+  position: fixed;
+  inset: 0;
+  z-index: 6;
+  pointer-events: none;
+}
+
+.trace-panel {
+  position: absolute;
+  top: 74px;
+  right: 14px;
+  bottom: 14px;
+  width: min(420px, calc(100vw - 28px));
+  background: #fff;
+  border: 1px solid #dfe5ec;
+  box-shadow: 0 18px 42px rgba(19, 29, 40, 0.18);
+  display: flex;
+  flex-direction: column;
+  overflow: hidden;
+  pointer-events: auto;
+}
+
+.trace-header {
+  padding: 18px 18px 14px;
+  border-bottom: 1px solid #e8edf3;
+  background:
+    linear-gradient(135deg, rgba(255, 244, 214, 0.9), rgba(232, 247, 255, 0.94)),
+    #fff;
+}
+
+.close-button {
+  position: absolute;
+  top: 12px;
+  right: 12px;
+  border: 0;
+  background: #fff;
+  width: 30px;
+  height: 30px;
+  display: grid;
+  place-items: center;
+  color: #253040;
+  cursor: pointer;
+}
+
+.hero {
+  display: grid;
+  grid-template-columns: 74px 1fr;
+  gap: 12px;
+  align-items: center;
+  padding-right: 34px;
+}
+
+.hero-media {
+  width: 74px;
+  height: 74px;
+  background: #fff;
+  border: 1px solid #d9e0e8;
+  display: grid;
+  place-items: center;
+}
+
+.hero-media img {
+  max-width: 64px;
+  max-height: 64px;
+  object-fit: contain;
+  image-rendering: pixelated;
+}
+
+.hero-copy {
+  min-width: 0;
+}
+
+.hero-kicker {
+  color: #46617f;
+  font-size: 11px;
+  text-transform: uppercase;
+  letter-spacing: 0;
+}
+
+.hero-title {
+  color: #17202d;
+  font-size: 20px;
+  font-weight: 700;
+  line-height: 1.2;
+}
+
+.hero-subtitle {
+  color: #556577;
+  font-size: 13px;
+  margin-top: 4px;
+}
+
+.hero-metric {
+  grid-column: 1 / -1;
+  justify-self: start;
+  display: inline-flex;
+  align-items: baseline;
+  gap: 8px;
+  margin-top: 10px;
+  padding: 7px 10px;
+  background: #17202d;
+  color: #fff;
+}
+
+.hero-metric span,
+.metric-pill span {
+  font-size: 11px;
+  color: inherit;
+  opacity: 0.74;
+}
+
+.hero-metric strong {
+  font-size: 18px;
+}
+
+.trace-summary {
+  margin: 12px 0 0;
+  color: #334255;
+  font-size: 13px;
+  line-height: 1.45;
+}
+
+.filmstrip {
+  display: flex;
+  gap: 10px;
+  padding: 14px 18px;
+  overflow-x: auto;
+  border-bottom: 1px solid #edf1f5;
+}
+
+.film-frame {
+  position: relative;
+  flex: 0 0 auto;
+  width: 72px;
+}
+
+.film-index {
+  position: absolute;
+  top: -5px;
+  left: -5px;
+  z-index: 1;
+  width: 20px;
+  height: 20px;
+  display: grid;
+  place-items: center;
+  background: #17202d;
+  color: #fff;
+  font-size: 11px;
+}
+
+.film-image {
+  width: 72px;
+  height: 72px;
+  border: 1px solid #dfe5ec;
+  background: #fff;
+  display: grid;
+  place-items: center;
+}
+
+.film-image img {
+  width: 100%;
+  height: 100%;
+  object-fit: contain;
+  image-rendering: pixelated;
+}
+
+.film-image--empty {
+  background: #f3f6f8;
+  color: #526375;
+  font-size: 11px;
+  text-align: center;
+}
+
+.trace-steps {
+  padding: 18px;
+  overflow-y: auto;
+}
+
+.trace-step {
+  position: relative;
+  display: grid;
+  grid-template-columns: 16px 68px 1fr;
+  gap: 12px;
+  min-height: 92px;
+  padding-bottom: 18px;
+}
+
+.trace-step:last-child {
+  padding-bottom: 0;
+}
+
+.trace-line {
+  position: relative;
+  display: flex;
+  justify-content: center;
+}
+
+.trace-line::after {
+  content: "";
+  position: absolute;
+  top: 18px;
+  bottom: -18px;
+  width: 2px;
+  background: #d9e0e8;
+}
+
+.trace-step:last-child .trace-line::after {
+  display: none;
+}
+
+.trace-dot {
+  position: relative;
+  z-index: 1;
+  width: 12px;
+  height: 12px;
+  margin-top: 6px;
+  background: #62768d;
+}
+
+.trace-step--source .trace-dot {
+  background: #1f8a70;
+}
+
+.trace-step--match .trace-dot {
+  background: #2b6cb0;
+}
+
+.trace-step--compute .trace-dot {
+  background: #d97706;
+}
+
+.trace-step--render .trace-dot {
+  background: #b83280;
+}
+
+.step-media {
+  width: 68px;
+  height: 68px;
+  border: 1px solid #dfe5ec;
+  background: #f7f9fb;
+  display: grid;
+  place-items: center;
+  color: #526375;
+  font-size: 16px;
+  font-weight: 700;
+}
+
+.step-media img {
+  width: 100%;
+  height: 100%;
+  object-fit: contain;
+  image-rendering: pixelated;
+}
+
+.step-body {
+  min-width: 0;
+}
+
+.step-meta {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+  margin-bottom: 6px;
+}
+
+.kind-chip,
+.operator-chip {
+  border: 1px solid #d9e0e8;
+  background: #fff;
+  color: #314255;
+  padding: 3px 7px;
+  font-size: 11px;
+  line-height: 1.2;
+}
+
+.operator-chip {
+  cursor: pointer;
+}
+
+.operator-chip--static {
+  cursor: default;
+}
+
+.step-body h3 {
+  margin: 0;
+  color: #17202d;
+  font-size: 14px;
+  font-weight: 700;
+  line-height: 1.35;
+}
+
+.step-body p {
+  margin: 5px 0 0;
+  color: #556577;
+  font-size: 12px;
+  line-height: 1.45;
+}
+
+.metric-row {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+  margin-top: 8px;
+}
+
+.metric-pill {
+  display: inline-flex;
+  align-items: baseline;
+  gap: 5px;
+  padding: 4px 7px;
+  background: #f3f6f8;
+  color: #253040;
+}
+
+.metric-pill strong {
+  font-size: 12px;
+}
+
+@media (max-width: 720px) {
+  .trace-panel {
+    top: 60px;
+    right: 8px;
+    bottom: 8px;
+    width: calc(100vw - 16px);
+  }
+}
+
diff --git a/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.spec.ts b/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.spec.ts
new file mode 100644
index 00000000000..d5ff3f4f6ad
--- /dev/null
+++ b/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.spec.ts
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { ComponentFixture, TestBed } from "@angular/core/testing";
+import { BehaviorSubject } from "rxjs";
+import { VisualTracePanelComponent } from "./visual-trace-panel.component";
+import { VisualTraceService } from "../../service/visual-trace/visual-trace.service";
+import { WorkflowActionService } from "../../service/workflow-graph/model/workflow-action.service";
+import { VisualTrace } from "../../types/visual-trace.interface";
+
+describe("VisualTracePanelComponent", () => {
+  let fixture: ComponentFixture<VisualTracePanelComponent>;
+  let component: VisualTracePanelComponent;
+  let traceSubject: BehaviorSubject<VisualTrace | undefined>;
+
+  beforeEach(async () => {
+    traceSubject = new BehaviorSubject<VisualTrace | undefined>(undefined);
+
+    await TestBed.configureTestingModule({
+      imports: [VisualTracePanelComponent],
+      providers: [
+        {
+          provide: VisualTraceService,
+          useValue: {
+            trace$: traceSubject.asObservable(),
+            closeTrace: vi.fn(),
+          },
+        },
+        {
+          provide: WorkflowActionService,
+          useValue: {
+            getTexeraGraph: () => ({
+              hasOperator: vi.fn().mockReturnValue(true),
+              getOperator: vi.fn().mockReturnValue({
+                operatorID: "op1",
+                operatorType: "PythonUDFV2",
+                customDisplayName: "Battle Logic",
+              }),
+            }),
+            highlightOperators: vi.fn(),
+          },
+        },
+      ],
+    }).compileComponents();
+
+    fixture = TestBed.createComponent(VisualTracePanelComponent);
+    component = fixture.componentInstance;
+    fixture.detectChanges();
+  });
+
+  it("renders a visual journey with hero media, metrics, and ordered steps", () => {
+    traceSubject.next({
+      title: "Charizard wins",
+      subtitle: "Fire matchup",
+      heroImage: "data:image/png;base64,abc",
+      heroMetric: { label: "Advantage", value: "2x" },
+      steps: [
+        {
+          title: "Loaded sprite",
+          operatorId: "op1",
+          image: "data:image/png;base64,abc",
+          metrics: [{ label: "Rows", value: "440" }],
+        },
+        {
+          title: "Rendered result",
+          kind: "render",
+        },
+      ],
+    });
+    fixture.detectChanges();
+
+    const native = fixture.nativeElement as HTMLElement;
+    expect(native.querySelector(".trace-panel")).toBeTruthy();
+    expect(native.querySelector(".hero-title")?.textContent).toContain("Charizard wins");
+    expect(native.querySelector(".hero-media img")).toBeTruthy();
+    expect(native.querySelector(".hero-metric")?.textContent).toContain("2x");
+    expect(native.querySelectorAll(".trace-step")).toHaveLength(2);
+    expect(native.textContent).toContain("Loaded sprite");
+    expect(native.textContent).toContain("Rendered result");
+  });
+});
+
diff --git a/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.ts b/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.ts
new file mode 100644
index 00000000000..2962229517a
--- /dev/null
+++ b/frontend/src/app/workspace/component/visual-trace-panel/visual-trace-panel.component.ts
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { NgClass, NgFor, NgIf } from "@angular/common";
+import { Component, OnInit } from "@angular/core";
+import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy";
+import { NzIconDirective } from "ng-zorro-antd/icon";
+import { VisualTraceService } from "../../service/visual-trace/visual-trace.service";
+import { WorkflowActionService } from "../../service/workflow-graph/model/workflow-action.service";
+import { VisualTrace, VisualTraceStep, VisualTraceStepKind } from "../../types/visual-trace.interface";
+
+@UntilDestroy()
+@Component({
+  selector: "texera-visual-trace-panel",
+  templateUrl: "./visual-trace-panel.component.html",
+  styleUrls: ["./visual-trace-panel.component.scss"],
+  imports: [NgIf, NgFor, NgClass, NzIconDirective],
+})
+export class VisualTracePanelComponent implements OnInit {
+  public trace?: VisualTrace;
+
+  constructor(
+    private readonly visualTraceService: VisualTraceService,
+    private readonly workflowActionService: WorkflowActionService
+  ) {}
+
+  ngOnInit(): void {
+    this.visualTraceService.trace$.pipe(untilDestroyed(this)).subscribe(trace => {
+      this.trace = trace;
+    });
+  }
+
+  public close(): void {
+    this.visualTraceService.closeTrace();
+  }
+
+  public focusOperator(step: VisualTraceStep): void {
+    if (!step.operatorId || !this.workflowActionService.getTexeraGraph().hasOperator(step.operatorId)) {
+      return;
+    }
+    this.workflowActionService.highlightOperators(false, step.operatorId);
+  }
+
+  public getStepLabel(step: VisualTraceStep): string {
+    if (step.operatorLabel) {
+      return step.operatorLabel;
+    }
+    if (!step.operatorId || !this.workflowActionService.getTexeraGraph().hasOperator(step.operatorId)) {
+      return this.getKindLabel(step.kind);
+    }
+    const operator = this.workflowActionService.getTexeraGraph().getOperator(step.operatorId);
+    return operator.customDisplayName ?? operator.operatorType;
+  }
+
+  public getKindLabel(kind?: VisualTraceStepKind): string {
+    switch (kind) {
+      case "source":
+        return "Source";
+      case "match":
+        return "Match";
+      case "compute":
+        return "Compute";
+      case "render":
+        return "Render";
+      default:
+        return "Step";
+    }
+  }
+}
diff --git a/frontend/src/app/workspace/component/visualization-panel-content/visualization-frame-content.component.html b/frontend/src/app/workspace/component/visualization-panel-content/visualization-frame-content.component.html
index c092a4bf74a..0c759a0af5d 100644
--- a/frontend/src/app/workspace/component/visualization-panel-content/visualization-frame-content.component.html
+++ b/frontend/src/app/workspace/component/visualization-panel-content/visualization-frame-content.component.html
@@ -18,5 +18,7 @@
 -->
 
 <iframe
+  #visualizationFrame
   [srcdoc]="htmlData"
+  (load)="onVisualizationFrameLoad()"
   id="html-content"></iframe>
diff --git a/frontend/src/app/workspace/component/visualization-panel-content/visualization-frame-content.component.ts b/frontend/src/app/workspace/component/visualization-panel-content/visualization-frame-content.component.ts
index eb329c1c7f1..4602476e798 100644
--- a/frontend/src/app/workspace/component/visualization-panel-content/visualization-frame-content.component.ts
+++ b/frontend/src/app/workspace/component/visualization-panel-content/visualization-frame-content.component.ts
@@ -17,11 +17,22 @@
  * under the License.
  */
 
-import { AfterContentInit, Component, Input } from "@angular/core";
+import { AfterContentInit, Component, ElementRef, HostListener, Input, ViewChild } from "@angular/core";
 import { DomSanitizer } from "@angular/platform-browser";
 import { WorkflowResultService } from "../../service/workflow-result/workflow-result.service";
 import { auditTime, filter } from "rxjs/operators";
 import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy";
+import { VisualTraceService } from "../../service/visual-trace/visual-trace.service";
+import {
+  buildStructuralVisualTrace,
+  buildVisualTraceBridgeScript,
+  extractVisualTraceSelectionFromElement,
+  findVisualTraceElement,
+  parseVisualTraceMessage,
+  parseVisualTracePayloadAttribute,
+  parseVisualTraceSelectionMessage,
+} from "../../service/visual-trace/visual-trace.utils";
+import { WorkflowActionService } from "../../service/workflow-graph/model/workflow-action.service";
 
 @UntilDestroy()
 @Component({
@@ -32,13 +43,17 @@ import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy";
 export class VisualizationFrameContentComponent implements AfterContentInit {
   // operatorId: string = inject(NZ_MODAL_DATA).operatorId;
   @Input() operatorId?: string;
+  @ViewChild("visualizationFrame") visualizationFrame?: ElementRef<HTMLIFrameElement>;
   // progressive visualization update and redraw interval in milliseconds
   public static readonly UPDATE_INTERVAL_MS = 2000;
   htmlData: any = "";
+  private removeFrameClickListener?: () => void;
 
   constructor(
     private workflowResultService: WorkflowResultService,
-    private sanitizer: DomSanitizer
+    private sanitizer: DomSanitizer,
+    private visualTraceService: VisualTraceService,
+    private workflowActionService: WorkflowActionService
   ) {}
 
   ngAfterContentInit() {
@@ -79,9 +94,77 @@ export class VisualizationFrameContentComponent implements AfterContentInit {
     const firstDiv = doc.body.querySelector("div");
     if (firstDiv) firstDiv.style.height = "100%";
 
+    const bridgeScript = doc.createElement("script");
+    bridgeScript.textContent = buildVisualTraceBridgeScript();
+    doc.body.appendChild(bridgeScript);
+
     const serializer = new XMLSerializer();
     const newHtmlString = serializer.serializeToString(doc);
 
     this.htmlData = this.sanitizer.bypassSecurityTrustHtml(newHtmlString); // this line bypasses angular security
   }
+
+  @HostListener("window:message", ["$event"])
+  handleWindowMessage(event: MessageEvent): void {
+    if (this.visualizationFrame?.nativeElement.contentWindow && event.source !== this.visualizationFrame.nativeElement.contentWindow) {
+      return;
+    }
+    const trace = parseVisualTraceMessage(event.data);
+    if (trace) {
+      this.visualTraceService.openTrace(trace);
+      return;
+    }
+
+    const selection = parseVisualTraceSelectionMessage(event.data);
+    if (!selection || !this.operatorId) {
+      return;
+    }
+    this.openStructuralTrace(selection);
+  }
+
+  onVisualizationFrameLoad(): void {
+    this.removeFrameClickListener?.();
+
+    const frameDocument = this.visualizationFrame?.nativeElement.contentDocument;
+    if (!frameDocument) {
+      return;
+    }
+
+    const handleClick = (event: MouseEvent): void => {
+      const traceElement = findVisualTraceElement(event.target);
+      if (!traceElement) {
+        return;
+      }
+
+      const trace = parseVisualTracePayloadAttribute(traceElement.getAttribute("data-texera-trace"));
+      if (trace) {
+        this.visualTraceService.openTrace(trace);
+        return;
+      }
+
+      const selection = extractVisualTraceSelectionFromElement(traceElement);
+      if (selection && this.operatorId) {
+        this.openStructuralTrace(selection);
+      }
+    };
+
+    frameDocument.addEventListener("click", handleClick);
+    this.removeFrameClickListener = () => frameDocument.removeEventListener("click", handleClick);
+  }
+
+  private openStructuralTrace(selection: { title?: string; image?: string; imageAlt?: string }): void {
+    if (!this.operatorId) {
+      return;
+    }
+
+    const graph = this.workflowActionService.getTexeraGraph();
+    const structuralTrace = buildStructuralVisualTrace(selection, this.operatorId, {
+      hasOperator: operatorId => graph.hasOperator(operatorId),
+      getOperator: operatorId => graph.getOperator(operatorId),
+      getInputOperatorIds: operatorId => graph.getInputLinksByOperatorId(operatorId).map(link => link.source.operatorID),
+    });
+    if (structuralTrace) {
+      this.visualTraceService.openTrace(structuralTrace);
+    }
+  }
 }
diff --git a/frontend/src/app/workspace/component/workspace.component.html b/frontend/src/app/workspace/component/workspace.component.html
index c54446fb318..3d7dd8c9cb3 100644
--- a/frontend/src/app/workspace/component/workspace.component.html
+++ b/frontend/src/app/workspace/component/workspace.component.html
@@ -37,4 +37,5 @@
   *ngIf="copilotEnabled"
   [agentIdToActivate]="agentIdToActivate"></texera-agent-panel>
 <texera-property-editor></texera-property-editor>
+<texera-visual-trace-panel></texera-visual-trace-panel>
 <ng-template #codeEditor></ng-template>
diff --git a/frontend/src/app/workspace/component/workspace.component.ts b/frontend/src/app/workspace/component/workspace.component.ts
index 9968c26f647..f3af0802ba9 100644
--- a/frontend/src/app/workspace/component/workspace.component.ts
+++ b/frontend/src/app/workspace/component/workspace.component.ts
@@ -61,6 +61,7 @@ import { LeftPanelComponent } from "./left-panel/left-panel.component";
 import { AgentPanelComponent } from "./agent/agent-panel/agent-panel.component";
 import { PropertyEditorComponent } from "./property-editor/property-editor.component";
 import { FormlyRepeatDndComponent } from "../../common/formly/repeat-dnd/repeat-dnd.component";
+import { VisualTracePanelComponent } from "./visual-trace-panel/visual-trace-panel.component";
 
 export const SAVE_DEBOUNCE_TIME_IN_MS = 5000;
 
@@ -83,6 +84,7 @@ export const SAVE_DEBOUNCE_TIME_IN_MS = 5000;
     NgIf,
     AgentPanelComponent,
     PropertyEditorComponent,
+    VisualTracePanelComponent,
     FormlyRepeatDndComponent,
   ],
 })
diff --git a/frontend/src/app/workspace/service/smart-file-inference/smart-file-inference.service.ts b/frontend/src/app/workspace/service/smart-file-inference/smart-file-inference.service.ts
new file mode 100644
index 00000000000..2c48806aec4
--- /dev/null
+++ b/frontend/src/app/workspace/service/smart-file-inference/smart-file-inference.service.ts
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { HttpClient } from "@angular/common/http";
+import { Injectable } from "@angular/core";
+import { Observable } from "rxjs";
+import { AppSettings } from "../../../common/app-setting";
+
+export interface SmartFileInferenceColumn {
+  name: string;
+  type: string;
+}
+
+export interface SmartFileInferenceResponse {
+  detectedFormat: string;
+  schema: SmartFileInferenceColumn[];
+  customDelimiter: string | null;
+  hasHeader: boolean | null;
+  sheetName: string | null;
+  availableSheetNames: string[];
+  flatten: boolean | null;
+  isFolder: boolean;
+  fileCount: number;
+}
+
+export interface SmartFileInferenceRequest {
+  fileName: string;
+  fileEncoding?: string;
+  formatOverride?: string;
+  customDelimiter?: string;
+  hasHeader?: boolean;
+  sheetName?: string;
+  flatten?: boolean;
+}
+
+/** Operator type string registered in LogicalOp.scala. */
+export const SMART_FILE_SCAN_TYPE = "SmartFileScan";
+
+/**
+ * Talks to the backend `POST /api/file-inference/preview` endpoint that backs the
+ * SmartFileScan operator. The endpoint runs the same inference path the operator
+ * uses at workflow compile time, so what the user sees in the property panel is
+ * exactly what the workflow will produce for either one file or one folder.
+ */
+@Injectable({
+  providedIn: "root",
+})
+export class SmartFileInferenceService {
+  constructor(private http: HttpClient) {}
+
+  preview(request: SmartFileInferenceRequest): Observable<SmartFileInferenceResponse> {
+    return this.http.post<SmartFileInferenceResponse>(
+      `${AppSettings.getApiEndpoint()}/file-inference/preview`,
+      request
+    );
+  }
+}
diff --git a/frontend/src/app/workspace/service/visual-trace/visual-trace.service.ts b/frontend/src/app/workspace/service/visual-trace/visual-trace.service.ts
new file mode 100644
index 00000000000..6e8a72e2203
--- /dev/null
+++ b/frontend/src/app/workspace/service/visual-trace/visual-trace.service.ts
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { BehaviorSubject } from "rxjs";
+import { VisualTrace } from "../../types/visual-trace.interface";
+
+@Injectable({
+  providedIn: "root",
+})
+export class VisualTraceService {
+  private readonly traceSubject = new BehaviorSubject<VisualTrace | undefined>(undefined);
+  public readonly trace$ = this.traceSubject.asObservable();
+
+  public openTrace(trace: VisualTrace): void {
+    this.traceSubject.next(trace);
+  }
+
+  public closeTrace(): void {
+    this.traceSubject.next(undefined);
+  }
+}
+
diff --git a/frontend/src/app/workspace/service/visual-trace/visual-trace.utils.spec.ts b/frontend/src/app/workspace/service/visual-trace/visual-trace.utils.spec.ts
new file mode 100644
index 00000000000..7005c898018
--- /dev/null
+++ b/frontend/src/app/workspace/service/visual-trace/visual-trace.utils.spec.ts
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import {
+  buildStructuralVisualTrace,
+  extractVisualTraceSelectionFromElement,
+  findVisualTraceElement,
+  parseVisualTraceMessage,
+  parseVisualTracePayloadAttribute,
+  parseVisualTraceSelectionMessage,
+} from "./visual-trace.utils";
+
+describe("parseVisualTraceMessage", () => {
+  it("accepts a valid visual trace message", () => {
+    expect(
+      parseVisualTraceMessage({
+        type: "texera-visual-trace",
+        payload: {
+          title: "Charizard wins",
+          heroImage: "data:image/png;base64,abc",
+          steps: [
+            {
+              title: "Loaded sprite",
+              kind: "source",
+              metrics: [{ label: "Rows", value: "440" }],
+            },
+          ],
+        },
+      })
+    ).toEqual({
+      title: "Charizard wins",
+      heroImage: "data:image/png;base64,abc",
+      steps: [
+        {
+          title: "Loaded sprite",
+          kind: "source",
+          metrics: [{ label: "Rows", value: "440" }],
+        },
+      ],
+    });
+  });
+
+  it("rejects malformed or incomplete trace messages", () => {
+    expect(parseVisualTraceMessage(undefined)).toBeUndefined();
+    expect(parseVisualTraceMessage({ type: "other", payload: {} })).toBeUndefined();
+    expect(parseVisualTraceMessage({ type: "texera-visual-trace", payload: { title: "Missing steps" } })).toBeUndefined();
+    expect(
+      parseVisualTraceMessage({
+        type: "texera-visual-trace",
+        payload: {
+          title: "Bad step",
+          steps: [{ detail: "No title" }],
+        },
+      })
+    ).toBeUndefined();
+  });
+});
+
+describe("parseVisualTraceSelectionMessage", () => {
+  it("accepts a valid fallback selection message", () => {
+    expect(
+      parseVisualTraceSelectionMessage({
+        type: "texera-visual-trace-selection",
+        payload: {
+          title: "Charizard",
+          image: "data:image/png;base64,abc",
+          imageAlt: "Charizard sprite",
+        },
+      })
+    ).toEqual({
+      title: "Charizard",
+      image: "data:image/png;base64,abc",
+      imageAlt: "Charizard sprite",
+    });
+  });
+
+  it("rejects malformed selection messages", () => {
+    expect(parseVisualTraceSelectionMessage(undefined)).toBeUndefined();
+    expect(parseVisualTraceSelectionMessage({ type: "other", payload: {} })).toBeUndefined();
+    expect(parseVisualTraceSelectionMessage({ type: "texera-visual-trace-selection", payload: {} })).toBeUndefined();
+  });
+});
+
+describe("buildStructuralVisualTrace", () => {
+  it("builds an upstream workflow journey when a visualization only reports the clicked image", () => {
+    const operators = {
+      source: { operatorID: "source", operatorType: "Smart Source", customDisplayName: "Pokemon Images" },
+      udf: { operatorID: "udf", operatorType: "Python UDF", customDisplayName: "Map sprites" },
+      visualizer: { operatorID: "visualizer", operatorType: "HTML Visualizer" },
+    };
+    const inputs = {
+      source: [],
+      udf: ["source"],
+      visualizer: ["udf"],
+    };
+
+    expect(
+      buildStructuralVisualTrace(
+        { title: "Charizard", image: "data:image/png;base64,abc", imageAlt: "Charizard sprite" },
+        "visualizer",
+        {
+          hasOperator: (operatorId: string) => operatorId in operators,
+          getOperator: (operatorId: string) => operators[operatorId as keyof typeof operators],
+          getInputOperatorIds: (operatorId: string) => inputs[operatorId as keyof typeof inputs],
+        }
+      )
+    ).toEqual({
+      title: "Charizard",
+      subtitle: "Workflow path to HTML Visualizer",
+      summary:
+        "Auto-built from the upstream workflow graph. Add a trace payload in the visualization for row-level details.",
+      heroImage: "data:image/png;base64,abc",
+      heroImageAlt: "Charizard sprite",
+      heroMetric: { label: "Steps", value: "3" },
+      steps: [
+        {
+          title: "Pokemon Images",
+          operatorId: "source",
+          operatorLabel: "Pokemon Images",
+          kind: "source",
+        },
+        {
+          title: "Map sprites",
+          operatorId: "udf",
+          operatorLabel: "Map sprites",
+          kind: "compute",
+        },
+        {
+          title: "HTML Visualizer",
+          operatorId: "visualizer",
+          operatorLabel: "HTML Visualizer",
+          kind: "render",
+          image: "data:image/png;base64,abc",
+          imageAlt: "Charizard sprite",
+        },
+      ],
+    });
+  });
+
+  it("returns undefined when the visualizer operator is missing", () => {
+    expect(
+      buildStructuralVisualTrace(
+        { title: "Charizard", image: "data:image/png;base64,abc" },
+        "missing",
+        {
+          hasOperator: () => false,
+          getOperator: () => {
+            throw new Error("should not be called");
+          },
+          getInputOperatorIds: () => [],
+        }
+      )
+    ).toBeUndefined();
+  });
+});
+
+describe("visual trace DOM helpers", () => {
+  it("reads a rich trace payload from an element attribute", () => {
+    expect(
+      parseVisualTracePayloadAttribute(
+        JSON.stringify({
+          title: "Charizard wins",
+          steps: [{ title: "Rendered card" }],
+        })
+      )
+    ).toEqual({
+      title: "Charizard wins",
+      steps: [{ title: "Rendered card" }],
+    });
+  });
+
+  it("finds an image-bearing ancestor and extracts a fallback selection", () => {
+    const card = document.createElement("div");
+    card.className = "pokemon-side";
+    card.innerHTML = `
+      <div class="winner-badge">WINNER</div>
+      <img src="data:image/png;base64,abc" alt="Charizard" />
+      <div class="pokemon-name">Charizard</div>
+    `;
+    const badge = card.querySelector(".winner-badge");
+    expect(badge).not.toBeNull();
+    const traceElement = findVisualTraceElement(badge);
+
+    expect(traceElement).toBe(card);
+    expect(extractVisualTraceSelectionFromElement(traceElement as Element)).toEqual({
+      title: "Charizard",
+      image: "data:image/png;base64,abc",
+      imageAlt: "Charizard",
+    });
+  });
+
+  it("accepts element-like click targets from iframe documents", () => {
+    const frame = document.createElement("iframe");
+    document.body.appendChild(frame);
+    const frameDocument = frame.contentDocument as Document;
+    const card = frameDocument.createElement("div");
+    card.innerHTML = `
+      <div class="winner-badge">WINNER</div>
+      <img src="data:image/png;base64,abc" alt="Charizard" />
+    `;
+    frameDocument.body.appendChild(card);
+
+    const badge = card.querySelector(".winner-badge");
+    expect(findVisualTraceElement(badge)).toBe(card);
+
+    frame.remove();
+  });
+});
diff --git a/frontend/src/app/workspace/service/visual-trace/visual-trace.utils.ts b/frontend/src/app/workspace/service/visual-trace/visual-trace.utils.ts
new file mode 100644
index 00000000000..d94bde9723e
--- /dev/null
+++ b/frontend/src/app/workspace/service/visual-trace/visual-trace.utils.ts
@@ -0,0 +1,293 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import {
+  VisualTrace,
+  VisualTraceMetric,
+  VisualTraceSelection,
+  VisualTraceStep,
+  VisualTraceStepKind,
+} from "../../types/visual-trace.interface";
+
+const TRACE_MESSAGE_TYPE = "texera-visual-trace";
+const TRACE_SELECTION_MESSAGE_TYPE = "texera-visual-trace-selection";
+const VALID_STEP_KINDS = new Set<VisualTraceStepKind>(["source", "match", "compute", "render"]);
+
+export interface VisualTraceGraphOperator {
+  operatorID: string;
+  operatorType: string;
+  customDisplayName?: string;
+}
+
+export interface VisualTraceGraphReader {
+  hasOperator(operatorId: string): boolean;
+  getOperator(operatorId: string): VisualTraceGraphOperator;
+  getInputOperatorIds(operatorId: string): string[];
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null;
+}
+
+function isElementLike(value: EventTarget | null): value is Element {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    "nodeType" in value &&
+    value.nodeType === 1 &&
+    "matches" in value &&
+    typeof value.matches === "function" &&
+    "querySelector" in value &&
+    typeof value.querySelector === "function"
+  );
+}
+
+function parseMetric(value: unknown): VisualTraceMetric | undefined {
+  if (!isRecord(value) || typeof value.label !== "string" || typeof value.value !== "string") {
+    return undefined;
+  }
+  return {
+    label: value.label,
+    value: value.value,
+  };
+}
+
+function parseStep(value: unknown): VisualTraceStep | undefined {
+  if (!isRecord(value) || typeof value.title !== "string") {
+    return undefined;
+  }
+
+  const kind: VisualTraceStepKind | undefined =
+    typeof value.kind === "string" && VALID_STEP_KINDS.has(value.kind as VisualTraceStepKind)
+      ? (value.kind as VisualTraceStepKind)
+      : undefined;
+  const metrics = Array.isArray(value.metrics) ? value.metrics.map(parseMetric).filter(Boolean) : undefined;
+
+  return {
+    title: value.title,
+    detail: typeof value.detail === "string" ? value.detail : undefined,
+    operatorId: typeof value.operatorId === "string" ? value.operatorId : undefined,
+    operatorLabel: typeof value.operatorLabel === "string" ? value.operatorLabel : undefined,
+    image: typeof value.image === "string" ? value.image : undefined,
+    imageAlt: typeof value.imageAlt === "string" ? value.imageAlt : undefined,
+    kind,
+    metrics: metrics as VisualTraceMetric[] | undefined,
+  };
+}
+
+export function parseVisualTraceMessage(message: unknown): VisualTrace | undefined {
+  if (!isRecord(message) || message.type !== TRACE_MESSAGE_TYPE || !isRecord(message.payload)) {
+    return undefined;
+  }
+
+  const payload = message.payload;
+  if (typeof payload.title !== "string" || !Array.isArray(payload.steps)) {
+    return undefined;
+  }
+
+  const steps = payload.steps.map(parseStep);
+  if (steps.length === 0 || steps.some(step => step === undefined)) {
+    return undefined;
+  }
+
+  return {
+    title: payload.title,
+    subtitle: typeof payload.subtitle === "string" ? payload.subtitle : undefined,
+    summary: typeof payload.summary === "string" ? payload.summary : undefined,
+    heroImage: typeof payload.heroImage === "string" ? payload.heroImage : undefined,
+    heroImageAlt: typeof payload.heroImageAlt === "string" ? payload.heroImageAlt : undefined,
+    heroMetric: parseMetric(payload.heroMetric),
+    steps: steps as VisualTraceStep[],
+  };
+}
+
+export function parseVisualTraceSelectionMessage(message: unknown): VisualTraceSelection | undefined {
+  if (!isRecord(message) || message.type !== TRACE_SELECTION_MESSAGE_TYPE || !isRecord(message.payload)) {
+    return undefined;
+  }
+
+  const payload = message.payload;
+  const selection = {
+    title: typeof payload.title === "string" ? payload.title : undefined,
+    image: typeof payload.image === "string" ? payload.image : undefined,
+    imageAlt: typeof payload.imageAlt === "string" ? payload.imageAlt : undefined,
+  };
+
+  return selection.title || selection.image ? selection : undefined;
+}
+
+export function buildStructuralVisualTrace(
+  selection: VisualTraceSelection,
+  targetOperatorId: string,
+  graph: VisualTraceGraphReader
+): VisualTrace | undefined {
+  if (!graph.hasOperator(targetOperatorId)) {
+    return undefined;
+  }
+
+  const visited = new Set<string>();
+  const operatorIds: string[] = [];
+  const visit = (operatorId: string): void => {
+    if (visited.has(operatorId) || !graph.hasOperator(operatorId)) {
+      return;
+    }
+    visited.add(operatorId);
+    graph.getInputOperatorIds(operatorId).forEach(visit);
+    operatorIds.push(operatorId);
+  };
+  visit(targetOperatorId);
+
+  const targetOperator = graph.getOperator(targetOperatorId);
+  const targetLabel = targetOperator.customDisplayName ?? targetOperator.operatorType;
+  const steps = operatorIds.map(operatorId => {
+    const operator = graph.getOperator(operatorId);
+    const operatorLabel = operator.customDisplayName ?? operator.operatorType;
+    const inputIds = graph.getInputOperatorIds(operatorId);
+    const kind: VisualTraceStepKind =
+      operatorId === targetOperatorId ? "render" : inputIds.length === 0 ? "source" : "compute";
+
+    return {
+      title: operatorLabel,
+      operatorId,
+      operatorLabel,
+      kind,
+      image: operatorId === targetOperatorId ? selection.image : undefined,
+      imageAlt: operatorId === targetOperatorId ? selection.imageAlt : undefined,
+    };
+  });
+
+  return {
+    title: selection.title ?? "Selected result",
+    subtitle: `Workflow path to ${targetLabel}`,
+    summary: "Auto-built from the upstream workflow graph. Add a trace payload in the visualization for row-level details.",
+    heroImage: selection.image,
+    heroImageAlt: selection.imageAlt,
+    heroMetric: {
+      label: "Steps",
+      value: String(steps.length),
+    },
+    steps,
+  };
+}
+
+export function parseVisualTracePayloadAttribute(value: string | null): VisualTrace | undefined {
+  if (!value) {
+    return undefined;
+  }
+  try {
+    return parseVisualTraceMessage({
+      type: TRACE_MESSAGE_TYPE,
+      payload: JSON.parse(value),
+    });
+  } catch {
+    return undefined;
+  }
+}
+
+export function findVisualTraceElement(target: EventTarget | null): Element | undefined {
+  let element = isElementLike(target) ? target : undefined;
+  while (element && element !== document.body) {
+    if (element.hasAttribute("data-texera-trace") || element.matches("img") || element.querySelector("img")) {
+      return element;
+    }
+    element = element.parentElement ?? undefined;
+  }
+  return undefined;
+}
+
+export function extractVisualTraceSelectionFromElement(element: Element): VisualTraceSelection | undefined {
+  const image = element.matches("img") ? element : element.querySelector("img");
+  if (!image || image.tagName !== "IMG") {
+    return undefined;
+  }
+  const titleElement = element.querySelector("[data-texera-trace-title], .pokemon-name");
+  const imageAlt = image.getAttribute("alt") ?? undefined;
+  const title = titleElement?.textContent?.trim() || imageAlt || undefined;
+  const selection = {
+    title,
+    image: image.getAttribute("src") ?? undefined,
+    imageAlt: imageAlt || title,
+  };
+  return selection.title || selection.image ? selection : undefined;
+}
+
+export function buildVisualTraceBridgeScript(): string {
+  return `
+(() => {
+  const TRACE_MESSAGE_TYPE = "texera-visual-trace";
+  const TRACE_SELECTION_MESSAGE_TYPE = "texera-visual-trace-selection";
+  const emitTrace = payload => window.parent.postMessage({ type: TRACE_MESSAGE_TYPE, payload }, "*");
+  const emitSelection = payload => window.parent.postMessage({ type: TRACE_SELECTION_MESSAGE_TYPE, payload }, "*");
+  const parseTrace = value => {
+    try {
+      return JSON.parse(value);
+    } catch {
+      return undefined;
+    }
+  };
+
+  const findFallbackElement = target => {
+    let element = target instanceof Element ? target : null;
+    while (element && element !== document.body) {
+      if (element.hasAttribute("data-texera-trace")) {
+        return element;
+      }
+      if (element.matches("img") || element.querySelector("img")) {
+        return element;
+      }
+      element = element.parentElement;
+    }
+    return null;
+  };
+
+  const buildFallbackSelection = element => {
+    const image = element.matches("img") ? element : element.querySelector("img");
+    if (!image) {
+      return undefined;
+    }
+    const titleElement = element.querySelector("[data-texera-trace-title], .pokemon-name");
+    const title = titleElement?.textContent?.trim() || image.getAttribute("alt") || undefined;
+    return {
+      title,
+      image: image.getAttribute("src") || undefined,
+      imageAlt: image.getAttribute("alt") || title,
+    };
+  };
+
+  document.addEventListener("click", event => {
+    const element = findFallbackElement(event.target);
+    if (!element) {
+      return;
+    }
+    const payload = parseTrace(element.getAttribute("data-texera-trace"));
+    if (payload) {
+      emitTrace(payload);
+      return;
+    }
+    const selection = buildFallbackSelection(element);
+    if (selection) {
+      emitSelection(selection);
+    }
+  });
+
+  window.texera = window.texera || {};
+  window.texera.showTrace = emitTrace;
+})();
+`;
+}
diff --git a/frontend/src/app/workspace/types/visual-trace.interface.ts b/frontend/src/app/workspace/types/visual-trace.interface.ts
new file mode 100644
index 00000000000..0af072ec34f
--- /dev/null
+++ b/frontend/src/app/workspace/types/visual-trace.interface.ts
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+export type VisualTraceStepKind = "source" | "match" | "compute" | "render";
+
+export interface VisualTraceMetric {
+  label: string;
+  value: string;
+}
+
+export interface VisualTraceStep {
+  title: string;
+  detail?: string;
+  operatorId?: string;
+  operatorLabel?: string;
+  image?: string;
+  imageAlt?: string;
+  kind?: VisualTraceStepKind;
+  metrics?: VisualTraceMetric[];
+}
+
+export interface VisualTrace {
+  title: string;
+  subtitle?: string;
+  summary?: string;
+  heroImage?: string;
+  heroImageAlt?: string;
+  heroMetric?: VisualTraceMetric;
+  steps: VisualTraceStep[];
+}
+
+export interface VisualTraceSelection {
+  title?: string;
+  image?: string;
+  imageAlt?: string;
+}
diff --git a/frontend/src/assets/operator_images/FileSplit.png b/frontend/src/assets/operator_images/FileSplit.png
new file mode 100644
index 0000000000000000000000000000000000000000..f6e57404cbe41e3c92c35d4b7dc17458aeefeeeb
GIT binary patch
literal 1657
zcmeAS@N?(olHy`uVBq!ia0vp^2_VeD1|%QND7OGojKx9jP7LeL$-D$|0#YM9(|mmy
zw18|51|~)!24;{FAY^FIWMBca85q=nGy?<k0!D=F0w%a@!UARl8>DdeyTSmVA(5FO
z5hW46K32*3xq68y`AMmI6}bf<1q?P7RzPNMYDuC(MQ%=Bu~mhw5?F;5kPQ;nS5g2g
zDap1~itr6kaLzAERWQ>t&@)i7<5EyiuqjGOvkG!?gK7uzY?U%fN(!v>^~=l4^~#O)
z@{7{-4J|D#^$m>ljf`}GDs+o0^GXscbn}XpA%?)raY-#sF3Kz@$;{7F0GXSZlwVq6
ztE2?72o50bEXhnm*pycc^%l^B`XCv7Lp=k1xY<ChHXu<e|Dw!Ppv@rj?F?<eDpACs
z_SooyEJtz-#HV0UpjYj<Z1my5X2&HN_mG`|fu+dP#WAFU@$DSzysHi(ZOp9sjH?;8
z9mwW9ASaNcz$?3m$HcLvNo(RJ7A_I46%DEkqA!aUHYzDBc<}Lmkl&Mtd6L?*rXT-5
zvG17N?{kl>)8}3Pc4?9ei;DviT9Cx#9%!~o#kkS<L_YH`;a|+V*lhWxrYJf5_+Ryl
z>G^S^r}nRN`WAfq@@F4o#Xp%<(-=5brix4QNH!E^ajtx+{5Q0pagFK<Yp)EBGUG2L
z;o1`fg$`|BlFFhSXfQ>P>(KT!NX*O@-4$$U%0UV?jW(v%x0uiSZd!f8{X+Q#{R{U4
z=C?oFb?r;n%-DlQC4rnd(*xxf)to)4dnH5q)v6NTk0le*TBK9>7t}8_b^JCpl|41r
ziKAZl)#{wy>tVAN^n2Zztx)3^ZW-nqm$!(met{i-dTPG2zQa%dFt3Mv85vTiJ{9b;
zs$9sP>m2*x{^MI)d$y)jTI?;E+WztE0rhY{)7YuCt=Z2jg4GoN+^n#hSu}BL1E1sr
z=0~jZ0zs!lRqR;i7w+6WH?ne{<kZ{T&TI+tv%S-<zb!eq@94=1jPFYFy((<7e?0jm
zX_eC8&AG6Aa^0H~_fK89lCwOz(7Nnx*1T=fLTxdX?Q-6kN%P((XUx0CU%S7v^z8Qp
zG28ifgFjzl&wsvCYhPcFjz71hbk@Dkyc<Gy$=~{VMRB`H28-RI6Yn~bolMTB3$Kpl
zsPibAK9}36xM|@h87;kXRleX8Q9K1RHnG<`ulC{+n!w0)DE*pI15dyVhlg*jGHIC|
z@SO5i^NaMAW$~(q4t1PL{lCWG$ApciCw9)*oGp9c$NLMxf}gwQPFekC<7dxV^Nuwv
zsW{xZo9Do@{gVH7zVq;GX#ODlB0OV-y<VYh>fOytZCmT69Fv`-Ii+^R9ri~tu@@R=
zmrUww;F-|(cEyy`hQJw@?y4{^Nw(HXl}r#>R$_jV;qlE$p(Sw_g=OOBpS+V9f9BEk
zO+S|Zkute8|5NaS2T@O+YW-$2dNsYVf9VI--bj!ApUUPf{uJ}(?7!~F{R{^elpmJq
zSG#m#p}X+xe9oov=VD7O8Z4RPL$hvi<jXK*RoU##RQ_OE|9M;95224cQ_fg3os~NH
zz3bA~7%>OC&8AG(wYW2`|9<;J`O$gj;*&YO8+Z<}?Umnm%v4fwBlo4x+icwuyieSf
z&8<A;te%<Yv!l_gb>D<TjX(9iD1Ff}0lDcy_Uf~eiL6uKl{;>of0-lapf3me6W@%t
z7k)p@e$^d$XZ}up-ct8k4%PGG8u`VC3?B+_QMs|P^1X?B{d%W2(Uz`D+k0CBAKCTA
z`f?v{I~`CL-c;>==Ron>MbFF_majg`b7+OP=^@z*7fhynu#BDRqUO;5?8~L=vsmnn
zO1%3%JpJjWT&(e9W@Gtg;Xiu4XO1_`m#i?iKKTBUMuE+5{WQ^pee+en&voiwnlW#G
zzxKbz_Xl;>7Cine=Tj)V@h_|Rb;eJB9AiEzPp&i6t}DFlJ5jps%_J2i1ttVCC}sN3
YxNSlC#}C?h*FXiir>mdKI;Vst0HBhi7XSbN

literal 0
HcmV?d00001

diff --git a/frontend/src/assets/operator_images/SmartFileScan.png b/frontend/src/assets/operator_images/SmartFileScan.png
new file mode 100644
index 0000000000000000000000000000000000000000..b1266bd66561bd2c4c6029daa985f4a60ae1b396
GIT binary patch
literal 6977
zcmds6cT|(xmOn`lP!R|ypj5+EP>Lc72!zCIC>OYZpcFwtiYQV<dIBU|3jxGf5JaSj
zh$tNal`2wHq^NX+DBaKu7<!U9@vS#=XRVp}@2&OLgq8JuIeUNm?6dc8|Gs?=F;*7F
zTQ=?31OV9bhY7(N03^Ic0zrOw`5D@|3NQSwX2t}-<^5$;=iC8c$KoG^BgaD@%nY8o
z?lckB%bfVoHY@BIvgg*pJ+X=AF%L&ySa_=xT&Pc!i93mUj6m><sT{ekdnn%&@fY8B
z!*8dbQz%0Xc4}5qPflF6xG&)&Gx7e3ht4xvZ;7I_j+dLGuUhOa%>9SUJ5N^Xz3HPR
zf8K1}8S6CLJUv|%u(%#c0d66Q;Vx~nD|2D(hyD1VkN*!o9U=K{WhmY^cLGq#o<5Ru
zQ8{NkCO-4T{Barzben>ogkZ8<A3HY(ir-s{SOu<R9XnrhVY-a^mci}qy@TD6+Nqh}
zC|gS=T6$!zMx$3yXl5E!>JO~AP=P+CMcqga_x@QNO@{lvwLK?IQ0x76$`-GolUx=x
z)R-{sHDp_>$I`#x|6{YKfsKO{aVLgbfo5_p{)j%|jt=vVqmoJ*8J4rU#92#gOFrpc
zKuei6{l1@^=7}Q9*1gX1l>2~LOGg)DS*li64ryb~RSTH`Ts4+L5gO@oxkR-fkL?*I
z^fL%lu;VmIrK%*=^~yI*V=JD9z;GO2gjnf$D1nwgHJ?dZC~Pi2zL0k|^vS#c+W#oy
zuuF?5V(UH7OEJ1WD9tQKMdoYhEq<J;HmF{Y>J+a_R75H(BZ?4rJuFu2N}^8lztCiX
zqFvDR9C32m`H>u|`zY|n%lWq&lEWfrn)ywzP<~=8OkWwPEQaXF{*FuO-G*)%TwI#^
z?3^5GTswjIB#zh`117D5G-`2)TyCMY?7O+CCH*=V=*d+$UQ>UPZ1A-Lg-kKyv*JT$
zkW}(n<cEGX0JW!m<Vc9X{<nO1Np&A~N?{S(&zIy<H;52Ai0HT)eB743g}zvazcL2+
z?GCX(<MY+-+N27B$UKoZ-mlWhU4L%zjtId9`hMn2x1dFFT*fSY=y@$)_q*+sp!f3e
zri47=U9yBfcdQ4gBVJeMPdS;hYk*tuc>e$j#omdO`VHxln&``|WL(%>gx%lajG-La
z3A*<<HkWb)B1kR@ThMh1qF}1%!(#2T?UOT6QGo=IyO9!Pe|C-DCA17(GsRF6`08|3
zE7)^Soi%%*-Iy3)jfT&BfzL#?y7-9*2>~>I6VfH#wwDufs9fNhnraJiVPZ5)KeR`(
zjYAt+KVPJaC^Ay<=2q?)K)sPzY{wn$t`wv$%X%UwC3zAb=wi<+S<&)5(vb6%51+ko
zh9zHJzmK~b3{TP37!dpREWSsB?I~XQG<E&QSlse%yCFE)N>o!ZQ&Q1&Z;Pu}@onr&
z_R`t5B`;frYG@{oO2u)573af8$Z1=y^m-$|maI<||3ZstgS(u@_L;W9Fw6FvqXL-W
zZ~C<8=>D)AfA8s+s;*DPH4bSFEk`c8I86`IH#|~6D&PLZkLR2_e!rq?u5H2lK#N<+
zVd*7zbeUXudx7CDjKMwQ@cnVit^vV$T{;r&qYmaC4hAR4xhqbK=K@zXH+7g{C=ZoS
zfaf>5gr8>}*naCzEk$8trC`>%cfMI(XpQ7CjqPC)ts(KLell@Niu1XdljdDF$}Jwr
ziQ4O-@o4Yw-~~~(wmLIty{3N~Iz8sC!RH&9=VYXD=mL4l?`VY3E46JPrS?X8?36$T
zl>jd1bs%nr%Hp>5h~T5Bfd9ldqzk2oY2(iZLOL+%{m7hV1b=$;290x@LioSV`#q-_
zHn`dCLl7FU{@=BS51co&M^Z2scT<j7zX0}(Pf-OcS=8lYgz)e8<G{X|p^C1yOM#er
zLyDCZ1l6%g@S;B(b;fNk<}rrI)}bdHs6f<9V31OLj(b5t@Ctq<DTzxO*l-2yucJQb
zhoiO`zqlz<egKX4a?}L{T6H}l_RY~^Vu(k_p91^6Octt}8ATOEJhJJBmdBlOF0%6W
zX@UwH5Eo_boN*mGS{&CU^vzT$JzbLV++$Sv4m7^jr_6Cw>vJc6&0--*YeSJon7t_b
zH_)xzG7?izt2M>B5OoQS_!$T6vl_h?2?RU4@eqlL(i|dIq$nr`tR4Y*AIB~r)24>a
zxV~$OniD)hSq#uh0rt^(aZ{kg`)H3a*tHwkYmRi0)jgFr7NT`=22ccDN(CuFQgKa7
zm~K{GT6b^g$Xd!F5i4N~rBwlR?_3Lij&a2b{G1D`m+#n&p_~>6QwfD536ZmlxQu85
zsD@7Ni3@L)WO?-UJ8v$MfGV)Uj+3)jLeRyD1FLO4&R)R*0}6GqP#IeMKCe~%%%E3^
z7FwTJQ+F1P@J;}erZtTI$ntAF!261ziy~y4YY$5q;|51R7IQ?rb<y|=IOJvQsDvpI
z?;#ro4}ZD<w9*ALq<MVBzUH^)oKZ*sQ@g;c=bskq%DEe?MG<+?VDi(Z0n`9T(!_|b
z$QGGlrs6H)z2>a22VHkkkg~-dve^I^c^S`Fv;&fEiw{reVnqeE7@*~nJQL-^-7K^f
zUB}|-p8chZGUh9iK@=S^s}Lw>@7w=`0PMtp^z*=f7&7N8N&vkXyME0n6^fx~Z=`G?
z2K*8l?nF{W0rocT4D{860FW61*B&nPKv!dp_!3gUWK{ph#2Iqb?_!9tT>zL`@Fl<&
z1<#jo695Ga0pBMGK!GS?Y$pJr(r7DTB%oZNQt}J$69EN7TDEM<k;~k#Zj4`Fe^yD>
zW2x$(0ABvE_<kMORz%#LV1BKvy*P_|oR&4s8|ZnrfP2LbD6HPclJVFLz+|mz3FG2e
zOK!8WCIaAhIE3$K3GxIciZq{GW8R=BdccSn_(pH*6#%GIw)U26ImD_d0EC8bOSDJ-
zy!V`Y*4Uuc0urKmT7%lhhv{Tr{k=OVw?zR+ieMbgV#>=UQ~f=W%7_#MLe+mX{YDdx
zv~+QOMLa)rp8KIWu28+H-j$J?vBU@JP6YpXe3*tiMZ<9du`K1Q{yj%0tZ8++4siKm
zVgOkE4*NV)6BwwC8v#(>i2z{NZvbRmfs5ot?%!Ccs}qH<I330cmm#kNKF|%=xuexq
zRkhrP#)s8*l8t2&)qICa6@_Q%(iKBhDl8LkG?G%kG8U+qMqJGCXMDKYnN|1L(cZiH
z>USfjd%g_|&6Z6yDZxz<(L>VO7M(UUqt_0|3@eM&m!kCLMG@KcJ^40;)pGREbq6}i
z`yR;HA9;9?>W5`JeQ4eHNN&UJ`zg-U=JZJQp?l{a%Uwl^gRM=Uap6O+<<&2=?rXS1
zUc1xjX61sU{2n^iwZM$)Ic<sM7fP`4RBw3qdBTQv<j6*_>DzKkjtvc<-YxMDih-?v
zZBBCu%Q?MJeI$erT<thlZVhGUyO;P+!Q5>~^l)IHVZVdldA;jGt}!L*psoC$SmcIX
zK>11pDHvjk|5FWC&xVHoW%?_pnx`~My6nrFsXHa4QH{iYj53?4JfAbS`PFWqe0@!r
z?bT^yY)wO;GPMrKM2l2wv!r0dm9f8^<`<_}%Zz#wFO&lp`!rst`4){XtKEyfxkVIg
zeGexZ)~3nCh7659R~q}-1|#Vwdw8_oUMmlln0=wN5`+2OGF;gr%|O1{*+4d@&57D)
zq#_|PkZXK7O}aq1aoJvNai$L~utfK6O;1CEt=(!Qo{J&ba;S`(s<5p7r5Lq>Em%kX
zcO#Y4<gD$btuOC6J6*R>vc!rx+GpQxe0%ZGZbnwI8wp!Z>$0ZtNn?QWgYI?Ls&Gd5
zhl8-Devb*%4SPN=@$gWEsOP>9O@xSpi*p0p%FsBYJ-f|dfXoBbBNs(-WY_<gZCx79
zTf*@2bRC+IA&+@KA4u)Y!V?Jas>6BTcQ3`+9J?tm1;+h!Br8aO%-`l(DZVdNH=QVC
zuMHD!u6<JD^Enup#6zDS#GRkB+++l-1Pg0Fnq!f9ll9M+lpCRge3B<MCY|n?eiQ_o
z&P0ARv=u5}*)Vr3))4OL@DdO_E~g-@_FJ=xw($@A_+j<Wfzvz~<(;c1CHw``P~Rra
zfWX-{^JNxmTFf)GX0Nu}yQZFU{|Svqw$_1l^}>hV_Cz?W<$(Ruqj%wtf=;)(S1adD
z&s-O_8~K`FX$VkRy2oQKSIpixriZk~N8Ywp9$|f4zB{m_J7{`I+^sCMeTH-@!}QEM
z+f6xI*6|^8bn(yewy-dpVBp3jC9*!hv!OLf*yg%x9B_({Sv@!X<+Nnx)-;e&1T*4V
zDPIpx6+_ng*V5PjPC6H~{wLB&xDGNd@^~UAjeVX+QY__;va5~3Ji>vzqfcIUpRPRb
z`6Xns+JA(AWt~g7vI}e)>rCFvjQ8VqyiQk}=)6$atSv|GCZ*KCWHjx|k!I%Sq{qEn
z`9)w5DSsm!F<6QJAf0rz!b7#9mYf+&Yub^GFlpcJ+O;?DJ59o}q&g&d#djyCJxA`c
zngzrStjAO$-=xZ180>a3M4hAz`yLQmNolP-CNV0#3uVt7pfDK#DZE2swp`S&$J|&a
z8R(3{CPr4Su}d7}yLw0hkf9>V{My?(6^j~qH1iMEshLmnEd3kn7!eM`b>}SUrNH^6
zmSk7Ep2XjP+cT{bKJGfg@<cy|hB5+yKjSS=moSbTK^>FcZh_t89ozr*qRh(jp#NoA
z#CH?|pE*5Hsu;QOHZyeMPYYT}S=GYhRHmb`@XN1vYjPjUf$piM7-IOUxO^;JZ8I~;
zvN=~%(zX0!EKC98qf6_xXGb**Y~vtVHoRtQJ1*qR!FmX489L<;zq!po`8TtF^*i$T
zpI8uh;nm039{RZ^ZywYoe?h&0|M*^q>q2&J4-IzcmG`*8nq!$1Wy2We9IBPtm#!<X
z+4{0GtK(>@g<Ss2tLIwP8u~VuY8;b~XRiet`ZUn?^a>>c<%HyMr}b%h3%%u}x)g4A
zDJS2-Xv%)3u90h9iDk(c@<{*h9MqvS6RdQj_LA24xTFoOo^syv?wa1wyK6S>$J}9^
zhpu$>$gZ26D;|tVvPgw5Fx2k$(zOMB*u*J7tU&fWyCc8T?dDrpw2#?q0S;-#iH0(5
zB{~l#Ze^$CJb~f;u`1o#sd%&Z>iIc@`gs{?k~3lY1{E7$c!PbC5f*C-@u<%C=4!gC
z#>zPN$#MEBJ&y)KIWBpM&o+2r^AO7EwuisR46gl65P+{i{7&~oVhnX%MMLHmy>e7h
zzVfF=w;42|ctYgzp2~0EGCLLbgOiHl(U=lug~9Fkil^f87TU-BVI<E~8QdrGiKYCI
zQ^4q^uHU!eq?!&Kz)M0eix#dC8E6yliQqW|1~cUpP_(2^nf*`l6Sg@i&N{~B5IAFl
zBj~<EIcufVqe%wMM5r-R9-)_|JF4cW)s<=brQFYsp&<oYi6d_4meW79zvLa7JvBa(
zS7&Ln$u0234gtSGHKggI{~|{*blAjd&T8q019ileKTI>P_ZW}Z`O>EXFK#ry-ZhM^
zm<vKE+qwI*qdB80;Wx(KdHr96`F9jm^cc$dX)ZW7FEzN<d~zexwFA7vml{6yl}1QE
z{R$Ud7Lg}0Ap2X!)@%L#I<#%+{CBq)%G{vVPWG4^dTs=isER~m4pH^TD(t2Cor7mk
zl!AzOU@roTMCifyLi4ID-P`n}361UOJGUk(3OOg>4!x(z!ceZAFly6h#i?b~hA<^l
z47_1K0&cg|im{r0^cwF)-$^Q%ocoeGaq2;K+sHU|*_^PLT$O4#(z_f(z4yN4o>!{~
z*>gfsa%ghp2YhRS$;!+;**Q6qLE2h>uVktI?iBRWS#(x)ZITjB6fH6lrmwH7PxqCP
zUZXoS99MGn@-AmqPt#lrCdeK~dYwxNv+MPJ1vM3soUV|c#H}uIFQ+mCp4E&e_T!qO
z{X%yegpX)4Z^_c*n3>52=H(C164n*k;W5K}?y6bqM2T_M9paNne||vuvzRnm_JXuV
z8-Am0AigxT>^{96n@W82tV_`q9upjxSG_;CL$60i!qRZm=IF=dr?Nh&CfKiMFG=#+
zYfS*WV@nsb8$;S6_SOW<+Mv&Dbm@SxT^)9xI^|!&s$|*~vS;tc0q&ZB+_H9Gwj+Bs
zp4vLJoGFV|M^>2w&=E2duAG+6E|cPhkC>sE*%$m!*DJG>S~`;lK;cmUZaOWR98}z4
zgVxwq{NXZ1eXy<K8cz8%OCh5S&CA*h?ZbmOYHiiJ^T8UeV9dPz*m+7{OmTbrQ1QxV
zpDnXUlP4@~fmv^CQ1k1~03cMQPjY4TG%yOkZ9?3%%Ekshy4`q-CwQgm$o^WB*z-dI
zjwg;8mq1Pc&(4W7-a!|C`fR=s+;E3|-%~{t#EjxzcsRN<i>b?@oYn|d=K>HaiD_?5
zi_wp)syE|A4o^<^)ogjbL6Ab|U&xZD=`+y9KK|hY&DQOx{gd-8A%dz(j^6@QEy1Ht
zxm7_7dJvrG{oa+Jp5=x0)pWNoq{3ZQ@>Is0E})#~N^zsdQ?Gg$lv(-3M-<W*wx$A;
z4BnDHC?*bZCW7&l6Yk`T!CDyZw=Vg1#kA_5hv6CFZ&v3;PAy2+4j>TS3WZJqPjM{|
zICIy4iGm|>mD@^l;0N|6vM<e`qV}flM09LGba<M<leYU%-}YEadYBf*`O+&~dcInl
zQ>khlgAbps_L4p4<txBvZ@6fpN^mz*Uv8-(eRs=C0+SWl72H#;zgCf?zd#=$S1N&3
zpJWzS#Yq4ZN!ZD)es=f?GJN}lL+^~i;0+8rv9r*r@7@ka>C^7|E>#9FzhB;G<Zgpr
z>9h)(9O%Px{T}<hL2j+<{n$j;uGNC8n=g*ufxAIYCO&i(11TAYf_kD%v$4q@eWSxp
z;RhaSqR}dTZvGBw%~90#Z`9CG|IERYp^O`zn37!m_8cSO+U2a)R2i2a9uA|W&BO2B
zDz=xDt-E_1hTHH0jv8*h4Hxs4$TaT3l)R(f`%t1Pk!}!L--n>6EOGRf{V4Dx!6>q2
zF0m8^l7z-&CAmH~fpURL?yZoLxIF@h$%_7u-7%e-FgcUfKJybwnBih-=(LX+K;J<#
zU$-vnm8!xGK!-S-u7viWN-D|MEIBrGZ5~euKY-c&YCUV8(I}nv_Ob5F^7!N^bAoj6
z0NAAjv|byv2`{V3QzAt{$~2=C6=x|M{_SZ#C5f21${Yz{`RD-CmOEfgtzE8mcA^Br
z2St$+18m#xPF#g_xChVJ2XnV2{sUIl;b78AXCq^}ac(g}0hs;)emX4Px+AGb8x#I%
z=_&Hyq`DBunBc=BR}S<bM!UI)Nkzm>yyugM*4H&|Y<Y@-7}(euInuD%p@F$eo|qX-
zk83Vm+W<P;AYQXufCU%T;mNzLK<TnY@aY>Vgv2kxlZ!a8+LlETe`j!Mez@L7pjH~a
zl0oN${MyI^JZ&T8%Dg^Fe{ZTjO9mv+fXS0PK{eZix!^YiAa-d=9^}3d6#Z{od@FTS
zMCDE^-S|#aLNZWJlC%~9MOQXwii?5l+Yy>9KH$gH+_MW|h1opQmZUfw56hb{l>BmC
zGlsy2kE!~ZL$a6%<6I;JTDKz;PUc`2$lO6Q@~eqJ#*z2tAmIbkjZ~9n%+W(@q!J}E
zuIrYjCZ}_rEi-G&H+#7{Fc2mqH|V(PoDwVc8|r!#HPn)D{ZZP+-jZ>}VNZ0Qw%(gK
p?)OG+)si_Q>fi4FIptTlBFD~rMk#BR%k!N3!_b0I@Vm=j{{hmAuu%X2

literal 0
HcmV?d00001