From 1906d5c891fa622287158ec5e803d0033a5f12b7 Mon Sep 17 00:00:00 2001
From: JacobChamie <jacobchamie@gmail.com>
Date: Sat, 4 Apr 2026 14:35:03 -0700
Subject: [PATCH 1/2] Improve PDF to Markdown conversion and add backend engine

---
 .claude/settings.local.json                   |    9 +
 .../backend_mac/LocalPDF_Studio_api.deps.json |    0
 assets/backend_mac/LocalPDF_Studio_api.dll    |  Bin
 .../LocalPDF_Studio_api.runtimeconfig.json    |    0
 ..._Studio_api.staticwebassets.endpoints.json |    0
 assets/backend_mac/PdfSharp.BarCodes.dll      |  Bin
 assets/backend_mac/PdfSharp.Charting.dll      |  Bin
 assets/backend_mac/PdfSharp.Cryptography.dll  |  Bin
 assets/backend_mac/PdfSharp.Quality.dll       |  Bin
 assets/backend_mac/PdfSharp.Shared.dll        |  Bin
 assets/backend_mac/PdfSharp.Snippets.dll      |  Bin
 assets/backend_mac/PdfSharp.System.dll        |  Bin
 assets/backend_mac/PdfSharp.WPFonts.dll       |  Bin
 assets/backend_mac/PdfSharp.dll               |  Bin
 .../backend_mac/appsettings.Development.json  |    0
 assets/backend_mac/appsettings.json           |    0
 package-lock.json                             |    4 +-
 .../localpdf_studio_python.py                 |   11 +-
 .../localpdf_studio_python/pdf_to_markdown.py |  696 +++++++++++
 .../requirements-pdf-to-markdown.txt          |    6 +
 src/main/main.js                              |  214 +++-
 src/preload/preload.js                        |    4 +
 src/renderer/index.html                       |    1 +
 src/renderer/locales/bn/bn.json               |   39 +
 src/renderer/locales/chi/chi.json             |   39 +
 src/renderer/locales/en/en.json               |   40 +-
 src/renderer/locales/jp/jp.json               |   39 +
 .../tools/pdfToMarkdown/pdfToMarkdown.css     |  356 ++++++
 .../tools/pdfToMarkdown/pdfToMarkdown.html    |  187 +++
 .../tools/pdfToMarkdown/pdfToMarkdown.js      | 1087 +++++++++++++++++
 .../pdfToMarkdown/pdfToMarkdownFixture.css    |  255 ++++
 .../pdfToMarkdown/pdfToMarkdownFixture.html   |   92 ++
 .../pdfToMarkdown/pdfToMarkdownFixture.js     |  173 +++
 33 files changed, 3244 insertions(+), 8 deletions(-)
 create mode 100644 .claude/settings.local.json
 mode change 100644 => 100755 assets/backend_mac/LocalPDF_Studio_api.deps.json
 mode change 100644 => 100755 assets/backend_mac/LocalPDF_Studio_api.dll
 mode change 100644 => 100755 assets/backend_mac/LocalPDF_Studio_api.runtimeconfig.json
 mode change 100644 => 100755 assets/backend_mac/LocalPDF_Studio_api.staticwebassets.endpoints.json
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.BarCodes.dll
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.Charting.dll
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.Cryptography.dll
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.Quality.dll
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.Shared.dll
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.Snippets.dll
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.System.dll
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.WPFonts.dll
 mode change 100644 => 100755 assets/backend_mac/PdfSharp.dll
 mode change 100644 => 100755 assets/backend_mac/appsettings.Development.json
 mode change 100644 => 100755 assets/backend_mac/appsettings.json
 create mode 100644 scripts/localpdf_studio_python/pdf_to_markdown.py
 create mode 100644 scripts/localpdf_studio_python/requirements-pdf-to-markdown.txt
 create mode 100644 src/renderer/tools/pdfToMarkdown/pdfToMarkdown.css
 create mode 100644 src/renderer/tools/pdfToMarkdown/pdfToMarkdown.html
 create mode 100644 src/renderer/tools/pdfToMarkdown/pdfToMarkdown.js
 create mode 100644 src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.css
 create mode 100644 src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.html
 create mode 100644 src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.js

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 00000000..42f275a7
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,9 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(python3:*)",
+      "Bash(node --input-type=module --eval ':*)",
+      "Bash(node:*)"
+    ]
+  }
+}
diff --git a/assets/backend_mac/LocalPDF_Studio_api.deps.json b/assets/backend_mac/LocalPDF_Studio_api.deps.json
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/LocalPDF_Studio_api.dll b/assets/backend_mac/LocalPDF_Studio_api.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/LocalPDF_Studio_api.runtimeconfig.json b/assets/backend_mac/LocalPDF_Studio_api.runtimeconfig.json
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/LocalPDF_Studio_api.staticwebassets.endpoints.json b/assets/backend_mac/LocalPDF_Studio_api.staticwebassets.endpoints.json
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.BarCodes.dll b/assets/backend_mac/PdfSharp.BarCodes.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.Charting.dll b/assets/backend_mac/PdfSharp.Charting.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.Cryptography.dll b/assets/backend_mac/PdfSharp.Cryptography.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.Quality.dll b/assets/backend_mac/PdfSharp.Quality.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.Shared.dll b/assets/backend_mac/PdfSharp.Shared.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.Snippets.dll b/assets/backend_mac/PdfSharp.Snippets.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.System.dll b/assets/backend_mac/PdfSharp.System.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.WPFonts.dll b/assets/backend_mac/PdfSharp.WPFonts.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/PdfSharp.dll b/assets/backend_mac/PdfSharp.dll
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/appsettings.Development.json b/assets/backend_mac/appsettings.Development.json
old mode 100644
new mode 100755
diff --git a/assets/backend_mac/appsettings.json b/assets/backend_mac/appsettings.json
old mode 100644
new mode 100755
diff --git a/package-lock.json b/package-lock.json
index cea70c99..0dce6ec0 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "localpdf-studio",
-  "version": "2.0.0",
+  "version": "3.0.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "localpdf-studio",
-      "version": "2.0.0",
+      "version": "3.0.0",
       "license": "AGPL-3.0",
       "dependencies": {
         "@pdf-lib/fontkit": "^1.1.1",
diff --git a/scripts/localpdf_studio_python/localpdf_studio_python.py b/scripts/localpdf_studio_python/localpdf_studio_python.py
index cb5c03f5..44354d06 100644
--- a/scripts/localpdf_studio_python/localpdf_studio_python.py
+++ b/scripts/localpdf_studio_python/localpdf_studio_python.py
@@ -1,7 +1,7 @@
 # localpdf_studio_python.py
 # Single entry point for all LocalPDF Studio Python features.
 # Usage: localpdf_studio_python <command> [args...]
-# Commands: watermark, extract_images, convert_pdf_images, grayscale, redact
+# Commands: watermark, extract_images, convert_pdf_images, grayscale, redact, pdf_to_markdown
 
 import sys
 import json
@@ -9,7 +9,7 @@
 
 def main():
     if len(sys.argv) < 2:
-        print(json.dumps({"success": False, "error": "No command specified. Available: watermark, extract_images, convert_pdf_images, grayscale, redact"}))
+        print(json.dumps({"success": False, "error": "No command specified. Available: watermark, extract_images, convert_pdf_images, grayscale, redact, pdf_to_markdown"}))
         sys.exit(1)
 
     command = sys.argv[1]
@@ -31,8 +31,11 @@ def main():
     elif command == "redact":
         from redact_pdf import main as _main
         _main()
+    elif command == "pdf_to_markdown":
+        from pdf_to_markdown import main as _main
+        _main()
     else:
-        print(json.dumps({"success": False, "error": f"Unknown command: '{command}'. Available: watermark, extract_images, convert_pdf_images, grayscale, redact"}))
+        print(json.dumps({"success": False, "error": f"Unknown command: '{command}'. Available: watermark, extract_images, convert_pdf_images, grayscale, redact, pdf_to_markdown"}))
         sys.exit(1)
 
 
@@ -810,4 +813,4 @@ def _make_module(name, main_func):
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/scripts/localpdf_studio_python/pdf_to_markdown.py b/scripts/localpdf_studio_python/pdf_to_markdown.py
new file mode 100644
index 00000000..99bcf907
--- /dev/null
+++ b/scripts/localpdf_studio_python/pdf_to_markdown.py
@@ -0,0 +1,696 @@
+import base64
+import io
+import json
+import os
+import re
+import sys
+from collections import Counter
+
+
+SUPERSCRIPT_FLAG = 1
+ITALIC_FLAG = 2
+MONO_FLAG = 8
+BOLD_FLAG = 16
+
+LIST_MARKER_RE = re.compile(
+    r"^(?P<marker>(?:\((?:\d+|[ivxlcdmIVXLCDM]{1,8}|[A-Za-z])\)|(?:\d+|[ivxlcdmIVXLCDM]{1,8}|[A-Za-z])[.)]))\s*(?P<rest>.*)$"
+)
+SENTENCE_END_RE = re.compile(r"[.!?:;…\"')\]>]$")
+LOWERCASE_START_RE = re.compile(r"^[a-z]")
+CONTINUATION_START_RE = re.compile(r"^[A-Za-z0-9(\"'`]")
+
+
+def _progress(stage, value, page=None, total_pages=None):
+    payload = {"stage": stage, "value": value}
+    if page is not None:
+        payload["page"] = page
+    if total_pages is not None:
+        payload["totalPages"] = total_pages
+    sys.stderr.write("PROGRESS_JSON:" + json.dumps(payload) + "\n")
+    sys.stderr.flush()
+
+
+def _load_dependencies():
+    missing = []
+    modules = {}
+
+    try:
+        import fitz
+        modules["fitz"] = fitz
+    except Exception:
+        missing.append("PyMuPDF (fitz)")
+
+    try:
+        import pdfplumber
+        modules["pdfplumber"] = pdfplumber
+    except Exception:
+        missing.append("pdfplumber")
+
+    try:
+        import pandas as pd
+        modules["pd"] = pd
+    except Exception:
+        missing.append("pandas")
+
+    try:
+        import pytesseract
+        modules["pytesseract"] = pytesseract
+    except Exception:
+        missing.append("pytesseract")
+
+    try:
+        import spacy
+        modules["spacy"] = spacy
+    except Exception:
+        missing.append("spacy")
+
+    return modules, missing
+
+
+def _load_nlp(spacy_mod):
+    try:
+        nlp = spacy_mod.blank("en")
+        if "sentencizer" not in nlp.pipe_names:
+            nlp.add_pipe("sentencizer")
+        return nlp
+    except Exception:
+        return None
+
+
+def _escape_md(text):
+    return (
+        text.replace("\\", "\\\\")
+        .replace("*", "\\*")
+        .replace("_", "\\_")
+        .replace("`", "\\`")
+        .replace("[", "\\[")
+        .replace("|", "\\|")
+    )
+
+
+def _normalize_ws(text):
+    return re.sub(r"\s+", " ", text or "").strip()
+
+
+def _compute_base_font_size(pages):
+    freq = Counter()
+    for page in pages:
+        for line in page["lines"]:
+            for span in line["spans"]:
+                text = _normalize_ws(span.get("text", ""))
+                if not text:
+                    continue
+                size = round(float(span.get("size", 0)) * 2) / 2
+                freq[size] += len(text)
+    if not freq:
+        return 12.0
+    return max(freq.items(), key=lambda kv: kv[1])[0]
+
+
+def _heading_levels_from_fonts(pages, base_font_size):
+    larger = Counter()
+    for page in pages:
+        for line in page["lines"]:
+            size = round(line["font_size"] * 2) / 2
+            if size > base_font_size:
+                larger[size] += max(1, len(line["plain_text"]))
+
+    sizes = [size for size, _ in sorted(larger.items(), key=lambda kv: (-kv[0], -kv[1]))]
+    heading_map = {}
+    for idx, size in enumerate(sizes[:3]):
+        heading_map[size] = idx + 1
+    return heading_map
+
+
+def _marker_body(marker):
+    return marker.strip().lstrip("(").rstrip(")").rstrip(".)").lower()
+
+
+def _is_roman(body):
+    return bool(body) and bool(re.fullmatch(r"[ivxlcdm]+", body))
+
+
+def _list_kind(marker):
+    body = _marker_body(marker)
+    if re.fullmatch(r"\d+", body):
+        return "numeric"
+    if len(body) == 1 and re.fullmatch(r"[a-z]", body):
+        return "ambiguous"
+    if _is_roman(body):
+        return "roman"
+    return "alpha"
+
+
+def _detect_list(line):
+    text = line["plain_text"]
+    match = LIST_MARKER_RE.match(text)
+    if not match:
+        return None
+    marker = match.group("marker")
+    rest = match.group("rest").strip()
+    return {
+        "marker": marker,
+        "rest": rest,
+        "kind": _list_kind(marker)
+    }
+
+
+def _format_span(text, flags):
+    if not text:
+        return ""
+    text = _escape_md(text)
+    is_bold = bool(flags & BOLD_FLAG)
+    is_italic = bool(flags & ITALIC_FLAG)
+    is_mono = bool(flags & MONO_FLAG)
+    is_super = bool(flags & SUPERSCRIPT_FLAG)
+
+    if is_mono:
+        text = f"`{text}`"
+    elif is_bold and is_italic:
+        text = f"***{text}***"
+    elif is_bold:
+        text = f"**{text}**"
+    elif is_italic:
+        text = f"*{text}*"
+
+    if is_super:
+        text = f"<sup>{text}</sup>"
+    return text
+
+
+def _join_spans(spans):
+    if not spans:
+        return "", ""
+
+    plain = ""
+    styled = ""
+    prev_end = None
+    for span in spans:
+        text = span.get("text", "")
+        if not text:
+            continue
+        x0 = float(span.get("x0", 0))
+        x1 = float(span.get("x1", x0))
+        gap = 0 if prev_end is None else x0 - prev_end
+        needs_space = prev_end is not None and gap >= max(float(span.get("size", 10)) * 0.22, 2)
+        if needs_space and not plain.endswith(" "):
+            plain += " "
+            styled += " "
+        plain += text
+        styled += _format_span(text, int(span.get("flags", 0)))
+        prev_end = x1
+    return _normalize_ws(plain), styled.strip()
+
+
+def _line_from_raw(line):
+    spans = []
+    xs = []
+    ys = []
+    font_sizes = []
+    for span in line.get("spans", []):
+        text = span.get("text", "")
+        if not text or not text.strip():
+            continue
+        spans.append(span)
+        xs.extend([float(span.get("bbox", [0, 0, 0, 0])[0]), float(span.get("bbox", [0, 0, 0, 0])[2])])
+        ys.extend([float(span.get("bbox", [0, 0, 0, 0])[1]), float(span.get("bbox", [0, 0, 0, 0])[3])])
+        font_sizes.append(float(span.get("size", 0)))
+
+    if not spans:
+        return None
+
+    plain_text, styled_text = _join_spans(spans)
+    if not plain_text:
+        return None
+
+    return {
+        "spans": spans,
+        "plain_text": plain_text,
+        "text": styled_text or _escape_md(plain_text),
+        "x0": min(xs),
+        "x1": max(xs),
+        "y0": min(ys),
+        "y1": max(ys),
+        "font_size": sum(font_sizes) / len(font_sizes),
+    }
+
+
+def _bbox_intersects(a, b):
+    return not (a[2] <= b[0] or a[0] >= b[2] or a[3] <= b[1] or a[1] >= b[3])
+
+
+def _extract_page_lines(page):
+    raw = page.get_text("dict")
+    lines = []
+    for block in raw.get("blocks", []):
+        if block.get("type") != 0:
+            continue
+        for line in block.get("lines", []):
+            item = _line_from_raw(line)
+            if item:
+                lines.append(item)
+    lines.sort(key=lambda item: (item["y0"], item["x0"]))
+    return lines
+
+
+def _table_to_markdown(pd, rows):
+    normalized = []
+    max_cols = max((len(row) for row in rows), default=0)
+    if max_cols < 2:
+        return ""
+
+    for row in rows:
+        norm_row = [(_normalize_ws(cell) if cell is not None else "") for cell in row]
+        if len(norm_row) < max_cols:
+            norm_row.extend([""] * (max_cols - len(norm_row)))
+        normalized.append(norm_row)
+
+    header = normalized[0]
+    body = normalized[1:] if len(normalized) > 1 else []
+    if not any(cell.strip() for cell in header):
+        header = [f"Column {i + 1}" for i in range(max_cols)]
+    df = pd.DataFrame(body, columns=header)
+    return df.to_markdown(index=False)
+
+
+def _extract_tables(pdfplumber_page, pd):
+    tables = []
+    try:
+        found = pdfplumber_page.find_tables()
+    except Exception:
+        found = []
+
+    for idx, table in enumerate(found):
+        rows = table.extract() or []
+        if len(rows) < 2:
+            continue
+        markdown = _table_to_markdown(pd, rows)
+        if not markdown:
+            continue
+        x0, top, x1, bottom = table.bbox
+        tables.append({
+            "type": "table",
+            "text": markdown,
+            "bbox": (float(x0), float(top), float(x1), float(bottom)),
+            "x0": float(x0),
+            "y0": float(top),
+            "sort_y": float(top),
+            "sort_x": float(x0),
+            "table_index": idx,
+        })
+    return tables
+
+
+def _extract_images(fitz_doc, page_index, asset_prefix):
+    page = fitz_doc[page_index]
+    images = []
+    refs = []
+    seen_xrefs = set()
+
+    for img_index, img in enumerate(page.get_images(full=True)):
+        xref = img[0]
+        if xref in seen_xrefs:
+            continue
+        seen_xrefs.add(xref)
+        try:
+            data = fitz_doc.extract_image(xref)
+        except Exception:
+            continue
+        ext = data.get("ext", "png")
+        name = f"{asset_prefix}-page-{page_index + 1:03d}-img-{len(images) + 1:02d}.{ext}"
+        images.append({
+            "filename": name,
+            "mimeType": f"image/{'jpeg' if ext in ('jpg', 'jpeg') else ext}",
+            "data": base64.b64encode(data["image"]).decode("ascii"),
+        })
+        refs.append({
+            "type": "image",
+            "text": f"![Figure {len(images)}](assets/{name})",
+            "sort_y": float(page.rect.height) + (len(refs) + 1) * 10,
+            "sort_x": 0.0,
+        })
+    return images, refs
+
+
+def _ocr_page(page, fitz_mod, pytesseract_mod):
+    pix = page.get_pixmap(matrix=fitz_mod.Matrix(2.5, 2.5), alpha=False)
+    try:
+        from PIL import Image
+    except Exception as exc:
+        raise RuntimeError(f"Pillow is required for OCR fallback: {exc}")
+    image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+    return _normalize_ws(pytesseract_mod.image_to_string(image))
+
+
+def _detect_heading(line, heading_map):
+    rounded = round(line["font_size"] * 2) / 2
+    level = heading_map.get(rounded)
+    if not level:
+        return None
+    return {
+        "type": f"h{level}",
+        "text": line["text"],
+        "plain_text": line["plain_text"],
+        "x0": line["x0"],
+        "y0": line["y0"],
+        "sort_y": line["y0"],
+        "sort_x": line["x0"],
+    }
+
+
+def _line_to_element(line, heading_map):
+    heading = _detect_heading(line, heading_map)
+    if heading:
+        return heading
+
+    list_info = _detect_list(line)
+    if list_info:
+        marker = list_info["marker"]
+        prefix = "1. " if marker[0].isdigit() else f"- {marker} "
+        return {
+            "type": "list",
+            "text": list_info["rest"],
+            "plain_text": list_info["rest"],
+            "marker": marker,
+            "list_kind": list_info["kind"],
+            "prefix": prefix,
+            "x0": line["x0"],
+            "content_x": line["x0"] + max(12, line["font_size"] * 1.2),
+            "y0": line["y0"],
+            "sort_y": line["y0"],
+            "sort_x": line["x0"],
+        }
+
+    return {
+        "type": "paragraph",
+        "text": line["text"],
+        "plain_text": line["plain_text"],
+        "x0": line["x0"],
+        "y0": line["y0"],
+        "sort_y": line["y0"],
+        "sort_x": line["x0"],
+    }
+
+
+def _heal_sentences(elements, nlp):
+    out = []
+    for el in elements:
+        if el["type"] != "paragraph":
+            out.append(el)
+            continue
+
+        prev = out[-1] if out else None
+        if prev and prev["type"] == "list":
+            if (
+                CONTINUATION_START_RE.match(el["plain_text"])
+                and (
+                    LOWERCASE_START_RE.match(el["plain_text"])
+                    or el["x0"] >= prev.get("content_x", prev["x0"]) - 12
+                    or not SENTENCE_END_RE.search(prev["plain_text"])
+                )
+            ):
+                joiner = "" if prev["text"].endswith("-") else " "
+                if prev["text"].endswith("-"):
+                    prev["text"] = prev["text"][:-1] + el["text"]
+                    prev["plain_text"] = prev["plain_text"][:-1] + el["plain_text"]
+                else:
+                    prev["text"] += joiner + el["text"]
+                    prev["plain_text"] += joiner + el["plain_text"]
+                continue
+
+        if prev and prev["type"] == "paragraph":
+            if prev["plain_text"].endswith("-") and LOWERCASE_START_RE.match(el["plain_text"]):
+                prev["text"] = prev["text"][:-1] + el["text"]
+                prev["plain_text"] = prev["plain_text"][:-1] + el["plain_text"]
+                continue
+
+            join_candidate = prev["plain_text"] + " " + el["plain_text"]
+            should_join = False
+            if nlp is not None:
+                try:
+                    doc = nlp(join_candidate)
+                    should_join = len(list(doc.sents)) <= 1
+                except Exception:
+                    should_join = False
+
+            if should_join or (
+                not SENTENCE_END_RE.search(prev["plain_text"])
+                and LOWERCASE_START_RE.match(el["plain_text"])
+            ):
+                prev["text"] += " " + el["text"]
+                prev["plain_text"] += " " + el["plain_text"]
+                continue
+
+        out.append(dict(el))
+    return out
+
+
+def _list_resolved_kind(element, same_level, parent):
+    if element["list_kind"] != "ambiguous":
+        return element["list_kind"]
+    if same_level and same_level.get("resolved_kind") == "alpha":
+        return "alpha"
+    if same_level and same_level.get("resolved_kind") == "roman":
+        return "roman"
+    if parent and parent.get("resolved_kind") == "alpha":
+        return "roman"
+    return "alpha"
+
+
+def _matching_depth(stack, element):
+    for depth in range(len(stack) - 1, -1, -1):
+        entry = stack[depth]
+        if not entry:
+            continue
+        if entry["resolved_kind"] == "alpha" and element["list_kind"] in ("alpha", "ambiguous"):
+            return depth
+        if entry["resolved_kind"] == "numeric" and element["list_kind"] == "numeric":
+            return depth
+        if entry["resolved_kind"] == "roman" and element["list_kind"] in ("roman", "ambiguous"):
+            return depth
+    return None
+
+
+def _resolve_list_depth(element, stack, previous_list):
+    if not stack:
+        return 0, _list_resolved_kind(element, None, None)
+
+    depth = len(stack) - 1
+    while depth > 0 and element["x0"] < stack[depth]["x0"] - 6:
+        depth -= 1
+
+    top = stack[depth]
+    if top and element["x0"] > top.get("content_x", top["x0"]) + 8:
+        return depth + 1, _list_resolved_kind(element, None, top)
+
+    match = _matching_depth(stack, element)
+    if match is not None:
+        return match, _list_resolved_kind(element, stack[match], stack[match - 1] if match > 0 else None)
+
+    if previous_list and previous_list["resolved_kind"] == "alpha" and element["list_kind"] in ("roman", "ambiguous"):
+        return previous_list["depth"] + 1, _list_resolved_kind(element, None, previous_list)
+
+    return 0, _list_resolved_kind(element, stack[0], None)
+
+
+def _remove_edge_artifacts(page_markdowns):
+    first_counts = Counter()
+    last_counts = Counter()
+
+    split_pages = []
+    for part in page_markdowns:
+        lines = [line for line in part.split("\n") if line.strip()]
+        split_pages.append(lines)
+        if lines:
+            first_counts[_normalize_ws(lines[0])] += 1
+            last_counts[_normalize_ws(lines[-1])] += 1
+
+    cleaned = []
+    for idx, lines in enumerate(split_pages, start=1):
+        lines = list(lines)
+        if lines and first_counts[_normalize_ws(lines[0])] > 1:
+            lines.pop(0)
+        if lines:
+            last = _normalize_ws(lines[-1])
+            if last == str(idx) or last_counts[last] > 1:
+                lines.pop()
+        if lines:
+            cleaned.append("\n".join(lines).strip())
+    return cleaned
+
+
+def _render_elements(elements):
+    lines = []
+    prev_type = None
+    stack = []
+    previous_list = None
+
+    for element in elements:
+        if element["type"] in ("h1", "h2", "h3"):
+            if prev_type:
+                lines.append("")
+            level = int(element["type"][1])
+            lines.append("#" * level + " " + element["text"])
+            stack = []
+            previous_list = None
+        elif element["type"] == "list":
+            depth, resolved_kind = _resolve_list_depth(element, stack, previous_list)
+            indent = "  " * depth
+            lines.append(indent + element["prefix"] + element["text"])
+            rendered = dict(element)
+            rendered["depth"] = depth
+            rendered["resolved_kind"] = resolved_kind
+            stack = stack[:depth]
+            stack.append(rendered)
+            previous_list = rendered
+        elif element["type"] == "table":
+            if prev_type:
+                lines.append("")
+            lines.append(element["text"])
+            stack = []
+            previous_list = None
+        else:
+            if prev_type and prev_type != "paragraph":
+                lines.append("")
+            lines.append(element["text"])
+            if prev_type != "list":
+                stack = []
+                previous_list = None
+        prev_type = element["type"]
+    return "\n".join(lines)
+
+
+def convert_pdf_to_markdown(payload):
+    modules, missing = _load_dependencies()
+    if missing:
+        return {
+            "success": False,
+            "error": "Missing Python dependencies: " + ", ".join(missing),
+            "markdown": "",
+            "assets": [],
+            "engine": "python"
+        }
+
+    fitz = modules["fitz"]
+    pdfplumber = modules["pdfplumber"]
+    pd = modules["pd"]
+    pytesseract = modules["pytesseract"]
+    nlp = _load_nlp(modules["spacy"])
+
+    input_path = payload["filePath"]
+    options = payload.get("options", {})
+    asset_prefix = re.sub(r"[^a-zA-Z0-9]+", "-", os.path.splitext(os.path.basename(input_path))[0]).strip("-").lower() or "document"
+
+    include_images = bool(options.get("includeImages", True))
+    detect_headings = bool(options.get("detectHeadings", True))
+    detect_tables = bool(options.get("detectTables", True))
+    detect_formatting = bool(options.get("detectFormatting", True))
+    ocr_fallback = bool(options.get("ocrFallback", False))
+    heal_paragraphs = bool(options.get("healParagraphs", True))
+
+    _progress("loading", 3)
+    fitz_doc = fitz.open(input_path)
+    plumber_doc = pdfplumber.open(input_path)
+    total_pages = len(fitz_doc)
+
+    try:
+        _progress("analyzing", 8, total_pages=total_pages)
+        page_models = []
+        for page_index in range(total_pages):
+            page = fitz_doc[page_index]
+            lines = _extract_page_lines(page)
+            page_models.append({"page_index": page_index, "lines": lines})
+
+        base_font_size = _compute_base_font_size(page_models)
+        heading_map = _heading_levels_from_fonts(page_models, base_font_size) if detect_headings else {}
+
+        assets = []
+        page_markdowns = []
+
+        for page_index in range(total_pages):
+            page_num = page_index + 1
+            _progress("page", 15 + int((page_index / max(total_pages, 1)) * 80), page=page_num, total_pages=total_pages)
+            fitz_page = fitz_doc[page_index]
+            plumber_page = plumber_doc.pages[page_index]
+
+            text_lines = page_models[page_index]["lines"]
+            tables = _extract_tables(plumber_page, pd) if detect_tables else []
+            table_bboxes = [table["bbox"] for table in tables]
+
+            text_present = bool(text_lines)
+            if ocr_fallback and not text_present:
+                ocr_text = _ocr_page(fitz_page, fitz, pytesseract)
+                if ocr_text:
+                    page_markdowns.append(ocr_text)
+                continue
+
+            elements = list(tables)
+            seen_positions = set()
+
+            for line in text_lines:
+                line_bbox = (line["x0"], line["y0"], line["x1"], line["y1"])
+                if any(_bbox_intersects(line_bbox, bbox) for bbox in table_bboxes):
+                    continue
+
+                dedup_key = (round(line["x0"], 1), round(line["y0"], 1), line["plain_text"])
+                if dedup_key in seen_positions:
+                    continue
+                seen_positions.add(dedup_key)
+
+                element = _line_to_element(line, heading_map)
+                if not detect_formatting:
+                    element["text"] = _escape_md(element.get("plain_text", element["text"]))
+                elements.append(element)
+
+            if include_images:
+                page_assets, image_refs = _extract_images(fitz_doc, page_index, asset_prefix)
+                assets.extend(page_assets)
+                elements.extend(image_refs)
+
+            elements.sort(key=lambda item: (item.get("sort_y", 0), item.get("sort_x", 0)))
+            if heal_paragraphs:
+                elements = _heal_sentences(elements, nlp)
+            page_markdown = _render_elements(elements).strip()
+            if page_markdown:
+                page_markdowns.append(page_markdown)
+
+        _progress("assembling", 98, total_pages=total_pages)
+        cleaned_pages = _remove_edge_artifacts(page_markdowns)
+        return {
+            "success": True,
+            "markdown": "\n\n".join(cleaned_pages),
+            "assets": assets,
+            "engine": "python",
+            "meta": {
+                "baseFontSize": base_font_size,
+                "pageCount": total_pages
+            }
+        }
+    finally:
+        plumber_doc.close()
+        fitz_doc.close()
+
+
+def main():
+    if len(sys.argv) < 2:
+        print(json.dumps({"success": False, "error": "Expected a JSON payload file path"}))
+        return 1
+
+    try:
+        with open(sys.argv[1], "r", encoding="utf-8") as handle:
+            payload = json.load(handle)
+    except Exception as exc:
+        print(json.dumps({"success": False, "error": f"Failed to read payload: {exc}"}))
+        return 1
+
+    try:
+        result = convert_pdf_to_markdown(payload)
+        print(json.dumps(result))
+        return 0 if result.get("success") else 1
+    except Exception as exc:
+        print(json.dumps({"success": False, "error": str(exc), "markdown": "", "assets": [], "engine": "python"}))
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/localpdf_studio_python/requirements-pdf-to-markdown.txt b/scripts/localpdf_studio_python/requirements-pdf-to-markdown.txt
new file mode 100644
index 00000000..8c7d51b6
--- /dev/null
+++ b/scripts/localpdf_studio_python/requirements-pdf-to-markdown.txt
@@ -0,0 +1,6 @@
+PyMuPDF
+pdfplumber
+pandas
+pytesseract
+spacy
+Pillow
diff --git a/src/main/main.js b/src/main/main.js
index 96e8b95b..c1152f9f 100644
--- a/src/main/main.js
+++ b/src/main/main.js
@@ -24,6 +24,7 @@
 const { app, BrowserWindow, dialog, ipcMain, shell, Menu } = require('electron/main');
 const path = require('path');
 const fs = require('fs');
+const os = require('os');
 const { spawn } = require('child_process');
 const { autoUpdater } = require('electron-updater');
 const { PDFDocument, PDFName, PDFRawStream } = require('pdf-lib');
@@ -37,6 +38,94 @@ let isDownloading = false;
 let lastUpdateStatus = { status: 'No updates checked yet.', details: '' };
 let openFileQueue = [];
 
+function getPythonToolLaunchConfig() {
+    if (app.isPackaged) {
+        let relativeExecutable = '';
+        switch (process.platform) {
+            case 'win32':
+                relativeExecutable = 'assets/backend_win/scripts/localpdf_studio_python.exe';
+                break;
+            case 'linux':
+                relativeExecutable = 'assets/backend_linux/scripts/localpdf_studio_python';
+                break;
+            case 'darwin':
+                relativeExecutable = 'assets/backend_mac/scripts/localpdf_studio_python';
+                break;
+            default:
+                throw new Error(`Unsupported platform for Python helper: ${process.platform}`);
+        }
+
+        const executablePath = path.join(process.resourcesPath, relativeExecutable);
+        return { command: executablePath, baseArgs: [] };
+    }
+
+    const scriptPath = path.join(app.getAppPath(), 'scripts/localpdf_studio_python/localpdf_studio_python.py');
+    return { command: 'python3', baseArgs: [scriptPath] };
+}
+
+async function runPythonJsonCommand(commandName, payload, event, progressChannel) {
+    const { command, baseArgs } = getPythonToolLaunchConfig();
+    const payloadPath = path.join(os.tmpdir(), `localpdf-studio-${commandName}-${Date.now()}-${Math.random().toString(36).slice(2)}.json`);
+    fs.writeFileSync(payloadPath, JSON.stringify(payload), 'utf-8');
+
+    return await new Promise((resolve, reject) => {
+        const child = spawn(command, [...baseArgs, commandName, payloadPath], {
+            stdio: ['ignore', 'pipe', 'pipe']
+        });
+
+        let stdout = '';
+        let stderr = '';
+        let stderrBuffer = '';
+
+        const cleanup = () => {
+            try { fs.unlinkSync(payloadPath); } catch {}
+        };
+
+        child.stdout.on('data', chunk => {
+            stdout += chunk.toString();
+        });
+
+        child.stderr.on('data', chunk => {
+            const text = chunk.toString();
+            stderr += text;
+            stderrBuffer += text;
+
+            const lines = stderrBuffer.split(/\r?\n/);
+            stderrBuffer = lines.pop() || '';
+
+            for (const line of lines) {
+                if (line.startsWith('PROGRESS_JSON:') && event?.sender && progressChannel) {
+                    try {
+                        const progress = JSON.parse(line.slice('PROGRESS_JSON:'.length));
+                        event.sender.send(progressChannel, progress);
+                    } catch (err) {
+                        console.warn('Failed to parse python progress update:', err);
+                    }
+                }
+            }
+        });
+
+        child.on('error', err => {
+            cleanup();
+            reject(err);
+        });
+
+        child.on('close', code => {
+            cleanup();
+            try {
+                const result = JSON.parse(stdout || '{}');
+                if (code === 0 || result.success) {
+                    resolve(result);
+                } else {
+                    reject(new Error(result.error || stderr || `Python command failed with code ${code}`));
+                }
+            } catch (err) {
+                reject(new Error(`Failed to parse python output: ${err.message}\n${stdout}\n${stderr}`));
+            }
+        });
+    });
+}
+
 // Helper to send or queue file paths to renderer
 function queueOrSendOpenFile(filePath) {
     try {
@@ -660,6 +749,129 @@ ipcMain.handle('save-text-file', async (event, { filename, text }) => {
     }
 });
 
+ipcMain.handle('save-markdown-file', async (event, { filename, text, sourcePath, assets = [] }) => {
+    const sourceDir   = sourcePath ? path.dirname(sourcePath) : undefined;
+    const defaultPath = sourceDir ? path.join(sourceDir, filename) : filename;
+    const { filePath, canceled } = await dialog.showSaveDialog({
+        defaultPath,
+        filters: [
+            { name: 'Markdown Files', extensions: ['md'] },
+            { name: 'All Files', extensions: ['*'] }
+        ]
+    });
+
+    if (canceled || !filePath) {
+        return { success: false };
+    }
+
+    try {
+        fs.writeFileSync(filePath, Buffer.from(text, 'utf-8'));
+        if (Array.isArray(assets) && assets.length) {
+            const assetDir = path.join(path.dirname(filePath), 'assets');
+            fs.mkdirSync(assetDir, { recursive: true });
+            for (const asset of assets) {
+                if (!asset?.filename || !asset?.data) continue;
+                fs.writeFileSync(path.join(assetDir, asset.filename), Buffer.from(asset.data, 'base64'));
+            }
+        }
+        return { success: true, path: filePath };
+    } catch (err) {
+        console.error('Failed to save markdown file:', err);
+        return { success: false, error: err.message };
+    }
+});
+
+ipcMain.handle('convert-pdf-to-markdown', async (event, { filePath, options = {} }) => {
+    try {
+        return await runPythonJsonCommand(
+            'pdf_to_markdown',
+            { filePath, options },
+            event,
+            'pdf-to-markdown-progress'
+        );
+    } catch (err) {
+        console.error('Python PDF to Markdown conversion failed:', err);
+        return { success: false, error: err.message, markdown: '', assets: [], engine: 'python' };
+    }
+});
+
+ipcMain.handle('extract-pdf-images', async (event, { filePath }) => {
+    try {
+        const { PDFDocument, PDFName, PDFRawStream } = require('pdf-lib');
+        const pdfBytes = fs.readFileSync(filePath);
+        const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true });
+
+        const images = [];
+        const pages = pdfDoc.getPages();
+
+        for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) {
+            const page = pages[pageIndex];
+            const { node } = page;
+
+            let resources;
+            try {
+                resources = node.Resources();
+            } catch {
+                continue;
+            }
+            if (!resources) continue;
+
+            let xObjectDict;
+            try {
+                xObjectDict = resources.lookup(PDFName.of('XObject'));
+            } catch {
+                continue;
+            }
+            if (!xObjectDict || typeof xObjectDict.keys !== 'function') continue;
+
+            const keys = xObjectDict.keys();
+            for (const key of keys) {
+                let xobj;
+                try {
+                    xobj = xObjectDict.lookup(key);
+                } catch {
+                    continue;
+                }
+                if (!xobj) continue;
+
+                let subtype;
+                try {
+                    subtype = xobj.lookup(PDFName.of('Subtype'));
+                } catch {
+                    continue;
+                }
+                if (!subtype || subtype.toString() !== '/Image') continue;
+
+                if (!(xobj instanceof PDFRawStream)) continue;
+
+                let filter;
+                try {
+                    filter = xobj.lookup(PDFName.of('Filter'));
+                } catch {
+                    filter = null;
+                }
+                const filterStr = filter ? filter.toString() : '';
+                const mimeType = (filterStr.includes('DCTDecode') || filterStr.includes('JFIF'))
+                    ? 'image/jpeg'
+                    : 'image/png';
+
+                const data = Buffer.from(xobj.contents).toString('base64');
+                images.push({
+                    pageNum: pageIndex + 1,
+                    name: key.toString().replace('/', ''),
+                    data,
+                    mimeType
+                });
+            }
+        }
+
+        return { success: true, images };
+    } catch (err) {
+        console.error('Failed to extract PDF images:', err);
+        return { success: false, error: err.message, images: [] };
+    }
+});
+
 ipcMain.handle('save-json-file', async (event, { filename, json }) => {
     const { filePath, canceled } = await dialog.showSaveDialog({
         defaultPath: filename,
@@ -1294,4 +1506,4 @@ ipcMain.handle('build-fillable-pdf', async (event, { mode, pages, existingPdfPat
 
 function sanitizeName(name) {
     return (name || 'field').replace(/[^a-zA-Z0-9_\-.]/g, '_').substring(0, 64);
-}
\ No newline at end of file
+}
diff --git a/src/preload/preload.js b/src/preload/preload.js
index 162f42dc..6ae64301 100644
--- a/src/preload/preload.js
+++ b/src/preload/preload.js
@@ -69,4 +69,8 @@ contextBridge.exposeInMainWorld('electronAPI', {
     onTesseractProgress: (callback) => ipcRenderer.on('tesseract-progress', (event, progress) => callback(progress)),
     saveImageFile: (filename, buffer) => ipcRenderer.invoke('save-image-file', { filename, buffer }),
     buildFillablePdf: (options) => ipcRenderer.invoke('build-fillable-pdf', options),
+    saveMarkdownFile: (filename, text, sourcePath, assets) => ipcRenderer.invoke('save-markdown-file', { filename, text, sourcePath, assets }),
+    extractPdfImages: (filePath) => ipcRenderer.invoke('extract-pdf-images', { filePath }),
+    convertPdfToMarkdown: (filePath, options) => ipcRenderer.invoke('convert-pdf-to-markdown', { filePath, options }),
+    onPdfToMarkdownProgress: (callback) => ipcRenderer.on('pdf-to-markdown-progress', (event, progress) => callback(progress)),
 });
diff --git a/src/renderer/index.html b/src/renderer/index.html
index f11552f9..c3938266 100644
--- a/src/renderer/index.html
+++ b/src/renderer/index.html
@@ -64,6 +64,7 @@
                         <a href="./tools/pdfToPdfa/pdfToPdfa.html" data-i18n="tools.pdf-to-pdfa">PDF to PDF/A Converter</a>
                         <a href="./tools/splitPdfVertical/splitPdfVertical.html" data-i18n="tools.split-pdf-vertical">Split PDF (Vertical)</a>
                         <a href="./tools/imageEditor/imageEditor.html" data-i18n="tools.image-editor">Image Editor</a>
+                        <a href="./tools/pdfToMarkdown/pdfToMarkdown.html" data-i18n="tools.pdf-to-markdown">PDF to Markdown</a>
                     </div>
                 </div>
             </div>
diff --git a/src/renderer/locales/bn/bn.json b/src/renderer/locales/bn/bn.json
index 0aadab95..7ee03f04 100644
--- a/src/renderer/locales/bn/bn.json
+++ b/src/renderer/locales/bn/bn.json
@@ -29,6 +29,8 @@
     "image-editor": "ইমেজ এডিটর",
     "pdf-to-pdfa": "পিডিএফ থেকে PDF/A কনভার্টার",
     "split-pdf-vertical": "পিডিএফ বিভক্ত করুন (উল্লম্বভাবে)"
+,
+    "pdf-to-markdown": "PDF to Markdown"
   },
   "error": {
     "empty-message": "কোন পিডিএফ খোলা নেই। শুরু করতে \"পিডিএফ রিডার খুলুন\" ক্লিক করুন!"
@@ -1074,6 +1076,43 @@
     "dropped-error2": "ড্রপ করা ফাইল প্রসেস করার সময় একটি ত্রুটি ঘটেছে।",
     "drop-valid-pdf": "একটি বৈধ PDF ফাইল ড্রপ করুন।"
   },
+  "pdfToMarkdown": {
+    "page-title": "PDF to Markdown",
+    "options-title": "Conversion Options",
+    "heading-options": "Heading Detection",
+    "detect-headings": "Semantic heading detection (font-size ratio mapping)",
+    "detect-headings-help": "Computes document base font size and maps larger text to # / ## / ### levels.",
+    "table-options": "Table Extraction",
+    "detect-tables": "Detect and render tables as GitHub-Flavored Markdown",
+    "detect-tables-help": "Uses spatial column alignment to detect grids and output pipe-delimited tables.",
+    "format-options": "Inline Formatting",
+    "detect-formatting": "Detect bold, italic, and monospace text",
+    "detect-formatting-help": "Reads font name flags to wrap text in **bold**, *italic*, and monospace markers.",
+    "image-options": "Image Extraction",
+    "include-images": "Extract and embed images as data URIs",
+    "include-images-help": "Extracts embedded images from the PDF and links them inline in the Markdown output.",
+    "ocr-options": "OCR Fallback",
+    "ocr-fallback": "OCR fallback for scanned / image-only pages",
+    "ocr-fallback-help": "Detects pages with no extractable text and runs Tesseract OCR automatically.",
+    "paragraph-options": "Paragraph Healing",
+    "heal-paragraphs": "Repair PDF line-wrap artifacts into natural paragraphs",
+    "heal-paragraphs-help": "Joins broken lines using sentence-boundary heuristics to restore paragraph flow.",
+    "convert-btn": "Convert to Markdown",
+    "progress-title": "Converting PDF to Markdown",
+    "progress-init": "Initializing...",
+    "cancel-btn": "Cancel"
+  },
+  "pdfToMarkdownJS": {
+    "selecting": "Selecting PDF...",
+    "initializing": "Initializing...",
+    "empty-result": "No text content could be extracted from this PDF.",
+    "saved": "Markdown file saved successfully.",
+    "cancelled": "Conversion was cancelled.",
+    "error": "Conversion failed: ",
+    "drop-one": "Please drop only one PDF file.",
+    "drop-pdf": "Please drop a valid PDF file.",
+    "drop-failed": "Failed to save dropped file."
+  },
   "splitPdfVertical": {
     "tool-title": "PDF বিভক্ত করুন (উল্লম্বভাবে)",
     "pdf-preview": "PDF প্রিভিউ",
diff --git a/src/renderer/locales/chi/chi.json b/src/renderer/locales/chi/chi.json
index 0d8ec79a..6bd8783c 100644
--- a/src/renderer/locales/chi/chi.json
+++ b/src/renderer/locales/chi/chi.json
@@ -29,6 +29,8 @@
     "image-editor": "图片编辑器",
     "pdf-to-pdfa": "PDF 转 PDF/A 转换器",
     "split-pdf-vertical": "拆分 PDF（垂直）"
+,
+    "pdf-to-markdown": "PDF to Markdown"
   },
   "error": {
     "empty-message": "未打开PDF。点击\"打开PDF阅读器\"开始使用！"
@@ -1074,6 +1076,43 @@
     "dropped-error2": "处理拖放的文件时发生错误。",
     "drop-valid-pdf": "请拖放一个有效的 PDF 文件。"
   },
+  "pdfToMarkdown": {
+    "page-title": "PDF to Markdown",
+    "options-title": "Conversion Options",
+    "heading-options": "Heading Detection",
+    "detect-headings": "Semantic heading detection (font-size ratio mapping)",
+    "detect-headings-help": "Computes document base font size and maps larger text to # / ## / ### levels.",
+    "table-options": "Table Extraction",
+    "detect-tables": "Detect and render tables as GitHub-Flavored Markdown",
+    "detect-tables-help": "Uses spatial column alignment to detect grids and output pipe-delimited tables.",
+    "format-options": "Inline Formatting",
+    "detect-formatting": "Detect bold, italic, and monospace text",
+    "detect-formatting-help": "Reads font name flags to wrap text in **bold**, *italic*, and monospace markers.",
+    "image-options": "Image Extraction",
+    "include-images": "Extract and embed images as data URIs",
+    "include-images-help": "Extracts embedded images from the PDF and links them inline in the Markdown output.",
+    "ocr-options": "OCR Fallback",
+    "ocr-fallback": "OCR fallback for scanned / image-only pages",
+    "ocr-fallback-help": "Detects pages with no extractable text and runs Tesseract OCR automatically.",
+    "paragraph-options": "Paragraph Healing",
+    "heal-paragraphs": "Repair PDF line-wrap artifacts into natural paragraphs",
+    "heal-paragraphs-help": "Joins broken lines using sentence-boundary heuristics to restore paragraph flow.",
+    "convert-btn": "Convert to Markdown",
+    "progress-title": "Converting PDF to Markdown",
+    "progress-init": "Initializing...",
+    "cancel-btn": "Cancel"
+  },
+  "pdfToMarkdownJS": {
+    "selecting": "Selecting PDF...",
+    "initializing": "Initializing...",
+    "empty-result": "No text content could be extracted from this PDF.",
+    "saved": "Markdown file saved successfully.",
+    "cancelled": "Conversion was cancelled.",
+    "error": "Conversion failed: ",
+    "drop-one": "Please drop only one PDF file.",
+    "drop-pdf": "Please drop a valid PDF file.",
+    "drop-failed": "Failed to save dropped file."
+  },
   "splitPdfVertical": {
     "tool-title": "拆分 PDF（垂直）",
     "pdf-preview": "PDF 预览",
diff --git a/src/renderer/locales/en/en.json b/src/renderer/locales/en/en.json
index 32661340..43d8cf7d 100644
--- a/src/renderer/locales/en/en.json
+++ b/src/renderer/locales/en/en.json
@@ -28,7 +28,8 @@
     "fillable-pdf-builder": "Fillable PDF Builder",
     "image-editor": "Image Editor",
     "pdf-to-pdfa": "PDF to PDF/A Converter",
-    "split-pdf-vertical": "Split PDF (Vertical)"
+    "split-pdf-vertical": "Split PDF (Vertical)",
+    "pdf-to-markdown": "PDF to Markdown"
   },
   "error": {
     "empty-message": "No PDFs open. Click \"Open PDF Reader\" to get started!"
@@ -1085,6 +1086,43 @@
     "split-option2-helpTxt": "Drag the slider or type a value between 1 and 99.",
     "split-btn": "Split PDF (Vertical)"
   },
+  "pdfToMarkdown": {
+    "page-title": "PDF to Markdown",
+    "options-title": "Conversion Options",
+    "heading-options": "Heading Detection",
+    "detect-headings": "Semantic heading detection (font-size ratio mapping)",
+    "detect-headings-help": "Computes document base font size and maps larger text to # / ## / ### levels.",
+    "table-options": "Table Extraction",
+    "detect-tables": "Detect and render tables as GitHub-Flavored Markdown",
+    "detect-tables-help": "Uses spatial column alignment to detect grids and output pipe-delimited tables.",
+    "format-options": "Inline Formatting",
+    "detect-formatting": "Detect bold, italic, and monospace text",
+    "detect-formatting-help": "Reads font name flags to wrap text in **bold**, *italic*, and `mono` markers.",
+    "image-options": "Image Extraction",
+    "include-images": "Extract and embed images as data URIs",
+    "include-images-help": "Extracts embedded images from the PDF and links them inline in the Markdown output.",
+    "ocr-options": "OCR Fallback",
+    "ocr-fallback": "OCR fallback for scanned / image-only pages",
+    "ocr-fallback-help": "Detects pages with no extractable text and runs Tesseract OCR automatically. Requires internet connection once per language to download the model.",
+    "paragraph-options": "Paragraph Healing",
+    "heal-paragraphs": "Repair PDF line-wrap artifacts into natural paragraphs",
+    "heal-paragraphs-help": "Joins broken lines using sentence-boundary heuristics to restore paragraph flow.",
+    "convert-btn": "Convert to Markdown",
+    "progress-title": "Converting PDF to Markdown",
+    "progress-init": "Initializing...",
+    "cancel-btn": "Cancel"
+  },
+  "pdfToMarkdownJS": {
+    "selecting": "Selecting PDF...",
+    "initializing": "Initializing...",
+    "empty-result": "No text content could be extracted from this PDF.",
+    "saved": "Markdown file saved successfully.",
+    "cancelled": "Conversion was cancelled.",
+    "error": "Conversion failed: ",
+    "drop-one": "Please drop only one PDF file.",
+    "drop-pdf": "Please drop a valid PDF file.",
+    "drop-failed": "Failed to save dropped file."
+  },
   "splitPdfVerticalJS": {
     "selecting-pdf": "Selecting PDF...",
     "loading-preview": "Loading preview...",
diff --git a/src/renderer/locales/jp/jp.json b/src/renderer/locales/jp/jp.json
index 1301d7d5..89d1ae0b 100644
--- a/src/renderer/locales/jp/jp.json
+++ b/src/renderer/locales/jp/jp.json
@@ -29,6 +29,8 @@
     "image-editor": "画像エディタ",
     "pdf-to-pdfa": "PDF から PDF/A への変換",
     "split-pdf-vertical": "PDF を分割（垂直）"
+,
+    "pdf-to-markdown": "PDF to Markdown"
   },
   "error": {
     "empty-message": "PDFが開かれていません。「PDFリーダーを開く」をクリックして開始してください！"
@@ -1074,6 +1076,43 @@
     "dropped-error2": "ドロップされたファイルの処理中にエラーが発生しました。",
     "drop-valid-pdf": "有効な PDF ファイルをドロップしてください。"
   },
+  "pdfToMarkdown": {
+    "page-title": "PDF to Markdown",
+    "options-title": "Conversion Options",
+    "heading-options": "Heading Detection",
+    "detect-headings": "Semantic heading detection (font-size ratio mapping)",
+    "detect-headings-help": "Computes document base font size and maps larger text to # / ## / ### levels.",
+    "table-options": "Table Extraction",
+    "detect-tables": "Detect and render tables as GitHub-Flavored Markdown",
+    "detect-tables-help": "Uses spatial column alignment to detect grids and output pipe-delimited tables.",
+    "format-options": "Inline Formatting",
+    "detect-formatting": "Detect bold, italic, and monospace text",
+    "detect-formatting-help": "Reads font name flags to wrap text in **bold**, *italic*, and monospace markers.",
+    "image-options": "Image Extraction",
+    "include-images": "Extract and embed images as data URIs",
+    "include-images-help": "Extracts embedded images from the PDF and links them inline in the Markdown output.",
+    "ocr-options": "OCR Fallback",
+    "ocr-fallback": "OCR fallback for scanned / image-only pages",
+    "ocr-fallback-help": "Detects pages with no extractable text and runs Tesseract OCR automatically.",
+    "paragraph-options": "Paragraph Healing",
+    "heal-paragraphs": "Repair PDF line-wrap artifacts into natural paragraphs",
+    "heal-paragraphs-help": "Joins broken lines using sentence-boundary heuristics to restore paragraph flow.",
+    "convert-btn": "Convert to Markdown",
+    "progress-title": "Converting PDF to Markdown",
+    "progress-init": "Initializing...",
+    "cancel-btn": "Cancel"
+  },
+  "pdfToMarkdownJS": {
+    "selecting": "Selecting PDF...",
+    "initializing": "Initializing...",
+    "empty-result": "No text content could be extracted from this PDF.",
+    "saved": "Markdown file saved successfully.",
+    "cancelled": "Conversion was cancelled.",
+    "error": "Conversion failed: ",
+    "drop-one": "Please drop only one PDF file.",
+    "drop-pdf": "Please drop a valid PDF file.",
+    "drop-failed": "Failed to save dropped file."
+  },
   "splitPdfVertical": {
     "tool-title": "PDF を分割（垂直）",
     "pdf-preview": "PDF プレビュー",
diff --git a/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.css b/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.css
new file mode 100644
index 00000000..de42e8b0
--- /dev/null
+++ b/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.css
@@ -0,0 +1,356 @@
+/**
+ * LocalPDF Studio - Offline PDF Toolkit
+ * ======================================
+ *
+ * @author      Md. Alinur Hossain <alinur1160@gmail.com>
+ * @license     AGPL 3.0 (GNU Affero General Public License version 3)
+ * @website     https://alinur1.github.io/LocalPDF_Studio_Website/
+ * @repository  https://github.com/Alinur1/LocalPDF_Studio
+ *
+ * Copyright (c) 2025 Md. Alinur Hossain. All rights reserved.
+ *
+ * Architecture:
+ * - Frontend: Electron + HTML/CSS/JS
+ * - Backend: ASP.NET Core Web API, Python
+ * - PDF Engine: PdfSharp + Mozilla PDF.js
+**/
+
+
+/* src/renderer/tools/pdfToMarkdown/pdfToMarkdown.css */
+
+:root {
+    --tool-title-color: #ecf0f1;
+    --select-btn-color: #ecf0f1;
+    --select-btn-bg: #2c3e50;
+    --select-btn-border: #3498db;
+    --select-btn-hover-bg: #34495e;
+    --select-btn-hover-border: #3498db;
+    --selected-file-bg: #2c3e50;
+    --selected-file-border: #18222e;
+    --selected-file-color: #ecf0f1;
+    --pdf-name-color: #ecf0f1;
+    --pdf-size-color: #bdc3c7;
+    --remove-btn-bg: #e74c3c;
+    --remove-btn-hover: #c0392b;
+    --options-bg: #2c3e50;
+    --options-border: #18222e;
+    --options-title-color: #ecf0f1;
+    --options-title-border: #34495e;
+    --option-group-title-color: #ecf0f1;
+    --option-label-color: #bdc3c7;
+    --options-input-bg: #1c2833;
+    --options-input-border: #34495e;
+    --options-input-color: #ecf0f1;
+    --options-input-focus-border: #3498db;
+    --checkbox-accent: #3498db;
+    --action-btn-bg: #27ae60;
+    --action-btn-hover: #1e8449;
+    --progress-overlay-bg: rgba(0, 0, 0, 0.85);
+    --progress-content-bg: #1c2833;
+    --progress-content-border: #34495e;
+    --progress-title-color: #ecf0f1;
+    --progress-bar-bg: #34495e;
+    --progress-bar-fill: #3498db;
+    --progress-info-color: #bdc3c7;
+    --progress-cancel-bg: #e74c3c;
+    --progress-cancel-hover: #c0392b;
+    --preview-bg: #2c3e50;
+    --preview-border: #18222e;
+    --preview-header-color: #ecf0f1;
+    --preview-text-color: #bdc3c7;
+    --preview-code-bg: #1c2833;
+}
+
+[data-theme="light"] {
+    --tool-title-color: #2c3e50;
+    --select-btn-color: #2c3e50;
+    --select-btn-bg: #f0f3f4;
+    --select-btn-border: #2980b9;
+    --select-btn-hover-bg: #dce1e7;
+    --select-btn-hover-border: #2980b9;
+    --selected-file-bg: #f0f3f4;
+    --selected-file-border: #bdc3c7;
+    --selected-file-color: #2c3e50;
+    --pdf-name-color: #2c3e50;
+    --pdf-size-color: #7f8c8d;
+    --remove-btn-bg: #e74c3c;
+    --remove-btn-hover: #c0392b;
+    --options-bg: #f0f3f4;
+    --options-border: #bdc3c7;
+    --options-title-color: #2c3e50;
+    --options-title-border: #bdc3c7;
+    --option-group-title-color: #2c3e50;
+    --option-label-color: #7f8c8d;
+    --options-input-bg: #ffffff;
+    --options-input-border: #bdc3c7;
+    --options-input-color: #2c3e50;
+    --options-input-focus-border: #2980b9;
+    --checkbox-accent: #2980b9;
+    --action-btn-bg: #27ae60;
+    --action-btn-hover: #1e8449;
+    --progress-content-bg: #ffffff;
+    --progress-content-border: #bdc3c7;
+    --progress-title-color: #2c3e50;
+    --progress-bar-bg: #bdc3c7;
+    --progress-info-color: #7f8c8d;
+    --preview-bg: #f0f3f4;
+    --preview-border: #bdc3c7;
+    --preview-header-color: #2c3e50;
+    --preview-text-color: #2c3e50;
+    --preview-code-bg: #e8ecf0;
+}
+
+* {
+    box-sizing: border-box;
+}
+
+body {
+    margin: 0;
+    padding: 0;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+}
+
+#app {
+    min-height: 100vh;
+    display: flex;
+    flex-direction: column;
+}
+
+.tool-container {
+    flex: 1;
+    padding: 20px;
+    display: flex;
+    flex-direction: column;
+    gap: 16px;
+    max-width: 900px;
+    margin: 0 auto;
+    width: 100%;
+}
+
+/* File Selection */
+.file-selection-area {
+    width: 100%;
+}
+
+.select-pdf-btn {
+    width: 100%;
+    padding: 32px 20px;
+    background: var(--select-btn-bg);
+    color: var(--select-btn-color);
+    border: 2px dashed var(--select-btn-border);
+    border-radius: 8px;
+    font-size: 15px;
+    cursor: pointer;
+    transition: background 0.2s, border-color 0.2s;
+    text-align: center;
+}
+
+.select-pdf-btn:hover {
+    background: var(--select-btn-hover-bg);
+    border-color: var(--select-btn-hover-border);
+}
+
+.select-hint {
+    color: var(--pdf-size-color);
+    font-size: 12px;
+}
+
+.selected-file-info {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    background: var(--selected-file-bg);
+    border: 1px solid var(--selected-file-border);
+    border-radius: 8px;
+    padding: 12px 16px;
+    color: var(--selected-file-color);
+}
+
+.pdf-name {
+    margin: 0;
+    font-size: 14px;
+    font-weight: 600;
+    color: var(--pdf-name-color);
+    word-break: break-all;
+}
+
+.pdf-size {
+    margin: 4px 0 0;
+    font-size: 12px;
+    color: var(--pdf-size-color);
+}
+
+.remove-btn {
+    background: var(--remove-btn-bg);
+    border: none;
+    border-radius: 6px;
+    color: white;
+    cursor: pointer;
+    padding: 6px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    flex-shrink: 0;
+    transition: background 0.2s;
+}
+
+.remove-btn:hover {
+    background: var(--remove-btn-hover);
+}
+
+/* Options Panel */
+.options-container {
+    background: var(--options-bg);
+    border: 1px solid var(--options-border);
+    border-radius: 8px;
+    padding: 20px;
+}
+
+.options-container h2 {
+    margin: 0 0 16px;
+    font-size: 16px;
+    color: var(--options-title-color);
+    border-bottom: 1px solid var(--options-title-border);
+    padding-bottom: 10px;
+}
+
+.option-group {
+    margin-bottom: 16px;
+}
+
+.option-group:last-child {
+    margin-bottom: 0;
+}
+
+.option-group h3 {
+    margin: 0 0 10px;
+    font-size: 14px;
+    color: var(--option-group-title-color);
+}
+
+.checkbox-row {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    margin-bottom: 8px;
+}
+
+.checkbox-row input[type="checkbox"] {
+    accent-color: var(--checkbox-accent);
+    width: 16px;
+    height: 16px;
+    cursor: pointer;
+}
+
+.checkbox-row label {
+    font-size: 14px;
+    color: var(--option-label-color);
+    cursor: pointer;
+    user-select: none;
+}
+
+.help-text {
+    font-size: 12px;
+    color: var(--option-label-color);
+    display: block;
+    margin-top: 4px;
+    margin-left: 26px;
+}
+
+/* Action Buttons */
+.action-buttons {
+    display: flex;
+    gap: 12px;
+    justify-content: flex-start;
+}
+
+.action-btn {
+    padding: 10px 24px;
+    border: none;
+    border-radius: 6px;
+    font-size: 14px;
+    font-weight: 600;
+    cursor: pointer;
+    transition: background 0.2s, opacity 0.2s;
+    background: var(--action-btn-bg);
+    color: white;
+}
+
+.action-btn:hover:not(:disabled) {
+    background: var(--action-btn-hover);
+}
+
+.action-btn:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}
+
+/* Progress Modal */
+.progress-overlay {
+    position: fixed;
+    inset: 0;
+    background: var(--progress-overlay-bg);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    z-index: 1000;
+}
+
+.progress-content {
+    background: var(--progress-content-bg);
+    border: 1px solid var(--progress-content-border);
+    border-radius: 12px;
+    padding: 32px;
+    min-width: 360px;
+    max-width: 480px;
+    width: 90%;
+    display: flex;
+    flex-direction: column;
+    gap: 16px;
+}
+
+.progress-title {
+    margin: 0;
+    font-size: 18px;
+    font-weight: 600;
+    color: var(--progress-title-color);
+    text-align: center;
+}
+
+.progress-bar-container {
+    background: var(--progress-bar-bg);
+    border-radius: 8px;
+    height: 10px;
+    overflow: hidden;
+}
+
+.progress-bar-fill {
+    height: 100%;
+    background: var(--progress-bar-fill);
+    border-radius: 8px;
+    transition: width 0.3s ease;
+    width: 0%;
+}
+
+.progress-info {
+    font-size: 13px;
+    color: var(--progress-info-color);
+    text-align: center;
+    min-height: 20px;
+}
+
+.progress-cancel-btn {
+    background: var(--progress-cancel-bg);
+    color: white;
+    border: none;
+    border-radius: 6px;
+    padding: 8px 20px;
+    font-size: 13px;
+    cursor: pointer;
+    transition: background 0.2s;
+    align-self: center;
+}
+
+.progress-cancel-btn:hover {
+    background: var(--progress-cancel-hover);
+}
diff --git a/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.html b/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.html
new file mode 100644
index 00000000..2183deb8
--- /dev/null
+++ b/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.html
@@ -0,0 +1,187 @@
+<!--
+ * LocalPDF Studio - Offline PDF Toolkit
+ * ======================================
+ *
+ * @author      Md. Alinur Hossain <alinur1160@gmail.com>
+ * @license     AGPL 3.0 (GNU Affero General Public License version 3)
+ * @website     https://alinur1.github.io/LocalPDF_Studio_Website/
+ * @repository  https://github.com/Alinur1/LocalPDF_Studio
+ *
+ * Copyright (c) 2025 Md. Alinur Hossain. All rights reserved.
+ *
+ * Architecture:
+ * - Frontend: Electron + HTML/CSS/JS
+ * - Backend: ASP.NET Core Web API, Python
+ * - PDF Engine: PdfSharp + Mozilla PDF.js
+-->
+
+
+<!-- src/renderer/tools/pdfToMarkdown/pdfToMarkdown.html -->
+
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>LocalPDF Studio - PDF to Markdown</title>
+    <meta http-equiv="Content-Security-Policy" content="
+    default-src 'self';
+    script-src 'self';
+    style-src 'self' 'unsafe-inline';
+    img-src 'self' data:;
+    connect-src 'self' http://localhost:*;">
+    <link rel="stylesheet" href="../../styles/main.css">
+    <link rel="stylesheet" href="./pdfToMarkdown.css">
+    <script type="module" src="../../utils/themeManager.js"></script>
+</head>
+
+<body>
+    <div id="app">
+        <div class="top-bar">
+            <a href="../../index.html" class="top-btn" data-i18n="common.back-btn">← Back</a>
+            <h1 class="tool-title" data-i18n="pdfToMarkdown.page-title">PDF to Markdown</h1>
+            <a href="./pdfToMarkdownFixture.html" class="top-btn">Fixture</a>
+        </div>
+
+        <main class="tool-container">
+
+            <!-- File Selection -->
+            <div class="file-selection-area">
+                <button id="select-pdf-btn" class="select-pdf-btn">
+                    <span data-i18n="common.btn-select-pdf">Click to Select a PDF File or drop it here</span>
+                    <p><small class="select-hint" data-i18n="common.supported-formats-pdf">Supports PDF</small></p>
+                </button>
+                <div id="selected-file-info" class="selected-file-info" style="display: none;">
+                    <div style="flex-grow: 1;">
+                        <p id="pdf-name" class="pdf-name"></p>
+                        <p id="pdf-size" class="pdf-size"></p>
+                    </div>
+                    <button id="remove-pdf-btn" class="remove-btn" title="Remove file">
+                        <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"
+                            fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round"
+                            stroke-linejoin="round">
+                            <path d="M18 6 6 18"/>
+                            <path d="m6 6 12 12"/>
+                        </svg>
+                    </button>
+                </div>
+            </div>
+
+            <!-- Conversion Options -->
+            <div class="options-container">
+                <h2 data-i18n="pdfToMarkdown.options-title">Conversion Options</h2>
+
+                <div class="option-group">
+                    <h3 data-i18n="pdfToMarkdown.heading-options">Heading Detection</h3>
+                    <div class="checkbox-row">
+                        <input type="checkbox" id="detect-headings" checked>
+                        <label for="detect-headings" data-i18n="pdfToMarkdown.detect-headings">
+                            Semantic heading detection (font-size ratio mapping)
+                        </label>
+                    </div>
+                    <small class="help-text" data-i18n="pdfToMarkdown.detect-headings-help">
+                        Computes document base font size and maps larger text to # / ## / ### levels.
+                    </small>
+                </div>
+
+                <div class="option-group">
+                    <h3 data-i18n="pdfToMarkdown.table-options">Table Extraction</h3>
+                    <div class="checkbox-row">
+                        <input type="checkbox" id="detect-tables" checked>
+                        <label for="detect-tables" data-i18n="pdfToMarkdown.detect-tables">
+                            Detect and render tables as GitHub-Flavored Markdown
+                        </label>
+                    </div>
+                    <small class="help-text" data-i18n="pdfToMarkdown.detect-tables-help">
+                        Uses spatial column alignment to detect grids and output pipe-delimited tables.
+                    </small>
+                </div>
+
+                <div class="option-group">
+                    <h3 data-i18n="pdfToMarkdown.format-options">Inline Formatting</h3>
+                    <div class="checkbox-row">
+                        <input type="checkbox" id="detect-formatting" checked>
+                        <label for="detect-formatting" data-i18n="pdfToMarkdown.detect-formatting">
+                            Detect bold, italic, and monospace text
+                        </label>
+                    </div>
+                    <small class="help-text" data-i18n="pdfToMarkdown.detect-formatting-help">
+                        Reads font name flags to wrap text in **bold**, *italic*, and `mono` markers.
+                    </small>
+                </div>
+
+                <div class="option-group">
+                    <h3 data-i18n="pdfToMarkdown.image-options">Image Extraction</h3>
+                    <div class="checkbox-row">
+                        <input type="checkbox" id="include-images" checked>
+                        <label for="include-images" data-i18n="pdfToMarkdown.include-images">
+                            Extract and embed images as data URIs
+                        </label>
+                    </div>
+                    <small class="help-text" data-i18n="pdfToMarkdown.include-images-help">
+                        Extracts embedded images from the PDF and links them inline in the Markdown output.
+                    </small>
+                </div>
+
+                <div class="option-group">
+                    <h3 data-i18n="pdfToMarkdown.ocr-options">OCR Fallback</h3>
+                    <div class="checkbox-row">
+                        <input type="checkbox" id="ocr-fallback">
+                        <label for="ocr-fallback" data-i18n="pdfToMarkdown.ocr-fallback">
+                            OCR fallback for scanned / image-only pages
+                        </label>
+                    </div>
+                    <small class="help-text" data-i18n="pdfToMarkdown.ocr-fallback-help">
+                        Detects pages with no extractable text and runs Tesseract OCR automatically.
+                        Requires internet connection once per language to download the model.
+                    </small>
+                </div>
+
+                <div class="option-group">
+                    <h3 data-i18n="pdfToMarkdown.paragraph-options">Paragraph Healing</h3>
+                    <div class="checkbox-row">
+                        <input type="checkbox" id="heal-paragraphs" checked>
+                        <label for="heal-paragraphs" data-i18n="pdfToMarkdown.heal-paragraphs">
+                            Repair PDF line-wrap artifacts into natural paragraphs
+                        </label>
+                    </div>
+                    <small class="help-text" data-i18n="pdfToMarkdown.heal-paragraphs-help">
+                        Joins broken lines using sentence-boundary heuristics to restore paragraph flow.
+                    </small>
+                </div>
+            </div>
+
+            <!-- Convert Button -->
+            <div class="action-buttons">
+                <button id="convert-btn" class="action-btn" disabled data-i18n="pdfToMarkdown.convert-btn">
+                    Convert to Markdown
+                </button>
+            </div>
+
+        </main>
+    </div>
+
+    <!-- Progress Modal -->
+    <div id="progress-modal" class="progress-overlay" style="display: none;">
+        <div class="progress-content">
+            <h3 class="progress-title" data-i18n="pdfToMarkdown.progress-title">Converting PDF to Markdown</h3>
+
+            <div class="progress-bar-container">
+                <div class="progress-bar-fill" id="progress-fill"></div>
+            </div>
+
+            <div class="progress-info" id="progress-info" data-i18n="pdfToMarkdown.progress-init">
+                Initializing...
+            </div>
+
+            <button class="progress-cancel-btn" id="cancel-btn" data-i18n="pdfToMarkdown.cancel-btn">
+                Cancel
+            </button>
+        </div>
+    </div>
+
+    <script type="module" src="./pdfToMarkdown.js"></script>
+</body>
+
+</html>
diff --git a/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.js b/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.js
new file mode 100644
index 00000000..f5698584
--- /dev/null
+++ b/src/renderer/tools/pdfToMarkdown/pdfToMarkdown.js
@@ -0,0 +1,1087 @@
+/**
+ * LocalPDF Studio - Offline PDF Toolkit
+ * ======================================
+ *
+ * @author      Md. Alinur Hossain <alinur1160@gmail.com>
+ * @license     AGPL 3.0 (GNU Affero General Public License version 3)
+ * @website     https://alinur1.github.io/LocalPDF_Studio_Website/
+ * @repository  https://github.com/Alinur1/LocalPDF_Studio
+ *
+ * Copyright (c) 2025 Md. Alinur Hossain. All rights reserved.
+ *
+ * Architecture:
+ * - Frontend: Electron + HTML/CSS/JS
+ * - Backend: ASP.NET Core Web API, Python
+ * - PDF Engine: PdfSharp + Mozilla PDF.js
+**/
+
+
+// src/renderer/tools/pdfToMarkdown/pdfToMarkdown.js
+
+import * as pdfjsLib from '../../../pdf/build/pdf.mjs';
+import customAlert from '../../utils/customAlert.js';
+import { initializeGlobalDragDrop } from '../../utils/globalDragDrop.js';
+import i18n from '../../utils/i18n.js';
+import loadingUI from '../../utils/loading.js';
+import tesseractOcr from '../../utils/tesseractOcr.js';
+import { ThemeManager } from '../../utils/themeManager.js';
+
+pdfjsLib.GlobalWorkerOptions.workerSrc = '../../../pdf/build/pdf.worker.mjs';
+
+// ── Constants ──────────────────────────────────────────────────────────────
+const LINE_Y_TOLERANCE_FACTOR = 0.55;
+const H1_RATIO  = 1.5;
+const H2_RATIO  = 1.2;
+const H3_RATIO  = 1.08;
+// Table detection: minimum meaningful column gap as a fraction of page width
+const TABLE_COL_GAP_MIN = 0.06;   // 6% of page width between columns
+const TABLE_COL_ALIGN_TOL = 8;    // px: x-position tolerance for column alignment
+const TABLE_MIN_ROWS = 2;
+const TABLE_MIN_COLS = 2;
+
+// ── Font helpers ───────────────────────────────────────────────────────────
+
+function getFontSize(transform) {
+    const sy = Math.sqrt(transform[2] * transform[2] + transform[3] * transform[3]);
+    return sy > 0 ? sy : Math.abs(transform[0]);
+}
+
+function getFontStyle(fontName) {
+    const fn = (fontName || '').toLowerCase();
+    return {
+        bold:  /bold|heavy|black|demi|semibold/.test(fn),
+        italic: /italic|oblique|slant/.test(fn),
+        mono:  /mono|courier|consol|typewriter|code|fixed|letter/.test(fn)
+    };
+}
+
+// ── Calibration ────────────────────────────────────────────────────────────
+
+function computeBaseFontSize(samples) {
+    if (samples.length === 0) return 12;
+    const freq = {};
+    for (const { size, len } of samples) {
+        const key = (Math.round(size * 2) / 2).toFixed(1);
+        freq[key] = (freq[key] || 0) + len;
+    }
+    const [bestKey] = Object.entries(freq).sort((a, b) => b[1] - a[1]);
+    return bestKey ? parseFloat(bestKey) : 12;
+}
+
+// ── Markdown escaping ──────────────────────────────────────────────────────
+// Only escape characters that would actually alter rendering in body text.
+// Periods, parens, hyphens etc. are safe inside paragraphs.
+function escapeInline(str) {
+    return str
+        .replace(/\\/g, '\\\\')
+        .replace(/\*/g, '\\*')
+        .replace(/_/g, '\\_')
+        .replace(/`/g, '\\`')
+        .replace(/\[/g, '\\[')
+        .replace(/\|/g, '\\|');
+}
+
+function wrapFormatting(str, bold, italic, mono) {
+    if (!str) return '';
+    if (mono)          return '`' + str + '`';
+    const s = escapeInline(str);
+    if (bold && italic) return '***' + s + '***';
+    if (bold)           return '**' + s + '**';
+    if (italic)         return '*' + s + '*';
+    return s;
+}
+
+// ── Item enrichment ────────────────────────────────────────────────────────
+
+function enrichItem(raw) {
+    return {
+        str:      raw.str || '',
+        x:        raw.transform[4],
+        y:        raw.transform[5],
+        width:    raw.width  || 0,
+        height:   raw.height || 0,
+        fontSize: getFontSize(raw.transform),
+        fontName: raw.fontName || '',
+        style:    getFontStyle(raw.fontName),
+        hasEOL:   raw.hasEOL || false
+    };
+}
+
+// ── Line grouping ──────────────────────────────────────────────────────────
+
+function groupIntoLines(items) {
+    if (items.length === 0) return [];
+
+    // Sort top-to-bottom (PDF y is baseline, increases upward → sort descending)
+    const sorted = [...items].sort((a, b) => {
+        const dy = b.y - a.y;
+        if (Math.abs(dy) > 1) return dy;
+        return a.x - b.x;
+    });
+
+    const lines = [];
+    let cur = [sorted[0]];
+    let baseY = sorted[0].y;
+    let lineH  = Math.max(sorted[0].fontSize, 6);
+
+    for (let i = 1; i < sorted.length; i++) {
+        const item = sorted[i];
+        const tol  = Math.max(lineH, item.fontSize, 6) * LINE_Y_TOLERANCE_FACTOR;
+        if (Math.abs(item.y - baseY) <= tol) {
+            cur.push(item);
+        } else {
+            lines.push(cur.slice().sort((a, b) => a.x - b.x));
+            cur    = [item];
+            baseY  = item.y;
+            lineH  = Math.max(item.fontSize, 6);
+        }
+    }
+    if (cur.length) lines.push(cur.sort((a, b) => a.x - b.x));
+    return lines;
+}
+
+// ── Inter-item spacing ─────────────────────────────────────────────────────
+// PDF text items may omit space characters; infer spaces from the gap
+// between (x + width) of the previous item and x of the current item.
+
+function joinItems(line, detectFmt) {
+    if (line.length === 0) return '';
+
+    let result = '';
+    let prevEndX = null;
+
+    for (const item of line) {
+        if (!item.str) continue;
+
+        const text = detectFmt
+            ? wrapFormatting(item.str, item.style.bold, item.style.italic, item.style.mono)
+            : escapeInline(item.str);
+
+        if (prevEndX !== null) {
+            const gap = item.x - prevEndX;
+            // A word space is roughly 0.25–0.35 × fontSize wide.
+            // If the gap is positive and no space already at the boundary, inject one.
+            const minSpaceGap = Math.max(item.fontSize * 0.2, 2);
+            const needsSpace  = gap >= minSpaceGap
+                && !result.endsWith(' ')
+                && !text.startsWith(' ');
+            if (needsSpace) result += ' ';
+        }
+
+        result += text;
+        prevEndX = item.x + item.width;
+    }
+    return result.trim();
+}
+
+// ── Table detection ────────────────────────────────────────────────────────
+// Strategy: a table row must have items spread across at least TABLE_MIN_COLS
+// distinct column bands with meaningful gaps (≥ TABLE_COL_GAP_MIN × pageWidth).
+// At least TABLE_MIN_ROWS such rows with matching column positions = a table.
+
+function buildColumnBands(line, pageWidth) {
+    const items = line.filter(i => i.str.trim());
+    if (items.length < TABLE_MIN_COLS) return null;
+
+    const gapMin = pageWidth * TABLE_COL_GAP_MIN;
+    const bands  = [{ x: items[0].x, end: items[0].x + items[0].width }];
+
+    for (let i = 1; i < items.length; i++) {
+        const gap = items[i].x - bands[bands.length - 1].end;
+        if (gap >= gapMin) {
+            bands.push({ x: items[i].x, end: items[i].x + items[i].width });
+        } else {
+            bands[bands.length - 1].end = Math.max(
+                bands[bands.length - 1].end, items[i].x + items[i].width
+            );
+        }
+    }
+    return bands.length >= TABLE_MIN_COLS ? bands : null;
+}
+
+function bandsMatch(a, b) {
+    if (!a || !b) return false;
+    const minLen = Math.min(a.length, b.length);
+    const maxLen = Math.max(a.length, b.length);
+    let matches  = 0;
+    for (let i = 0; i < minLen; i++) {
+        if (Math.abs(a[i].x - b[i].x) <= TABLE_COL_ALIGN_TOL) matches++;
+    }
+    return matches / maxLen >= 0.6;
+}
+
+function detectTables(lines, pageWidth) {
+    const tables = [];
+    const bandSigs = lines.map(l => buildColumnBands(l, pageWidth));
+
+    let start = -1;
+    let prevBands = null;
+
+    const flush = (end) => {
+        if (start !== -1 && end - start + 1 >= TABLE_MIN_ROWS) {
+            tables.push({ startLine: start, endLine: end });
+        }
+        start = -1; prevBands = null;
+    };
+
+    for (let i = 0; i < lines.length; i++) {
+        const bands = bandSigs[i];
+        if (!bands) { flush(i - 1); continue; }
+
+        if (prevBands && bandsMatch(prevBands, bands)) {
+            if (start === -1) start = i - 1;
+            prevBands = bands;
+        } else {
+            flush(i - 1);
+            prevBands = bands;
+        }
+    }
+    flush(lines.length - 1);
+    return tables;
+}
+
+// ── Table renderer ─────────────────────────────────────────────────────────
+
+function renderTable(lines, tbl, pageWidth) {
+    const tblLines = lines.slice(tbl.startLine, tbl.endLine + 1);
+
+    // Derive unified column positions (leftmost x of each band)
+    const allBands = tblLines
+        .map(l => buildColumnBands(l, pageWidth))
+        .filter(Boolean);
+    if (!allBands.length) return '';
+
+    // Merge band starts into global columns
+    const allXs = allBands.flatMap(b => b.map(band => band.x));
+    const colXs  = clusterValues(allXs, TABLE_COL_ALIGN_TOL * 2);
+    const numCols = colXs.length;
+
+    const rows = tblLines.map(line => {
+        const row = Array(numCols).fill('');
+        const items = line.filter(i => i.str.trim());
+        for (const item of items) {
+            // Find the closest column
+            let best = 0, bestDist = Infinity;
+            for (let c = 0; c < colXs.length; c++) {
+                const d = Math.abs(item.x - colXs[c]);
+                if (d < bestDist) { bestDist = d; best = c; }
+            }
+            const sep = row[best] ? ' ' : '';
+            row[best] += sep + item.str.trim().replace(/\|/g, '\\|');
+        }
+        return row;
+    });
+
+    if (!rows.length) return '';
+    const fmtRow = r => '| ' + r.join(' | ') + ' |';
+    const header = rows[0];
+    const sep    = header.map(() => '---');
+    return [fmtRow(header), fmtRow(sep), ...rows.slice(1).map(fmtRow)].join('\n');
+}
+
+function clusterValues(vals, tol) {
+    if (!vals.length) return [];
+    const sorted = [...new Set(vals.map(v => Math.round(v)))].sort((a, b) => a - b);
+    const clusters = [[sorted[0]]];
+    for (let i = 1; i < sorted.length; i++) {
+        if (sorted[i] - clusters[clusters.length - 1][0] <= tol) {
+            clusters[clusters.length - 1].push(sorted[i]);
+        } else {
+            clusters.push([sorted[i]]);
+        }
+    }
+    return clusters.map(c => Math.min(...c));
+}
+
+// ── List detection ─────────────────────────────────────────────────────────
+// Bullet: explicit bullet characters.
+// Ordered: numeric (1. 1) 1/) (1) …), Roman numeral (i. i) ii. ii) (i) …),
+//          or single-letter (a. a) b. b) (a) …).
+
+const BULLET_RE = /^(?<marker>[•●○◦▪▸▹·*+])(?=\s|\S|$)/;
+
+// Ordered markers detected:
+//   • numeric:       1.  1)  1/  1.)  (1)
+//   • Roman numeral: i.  i)  ii.  iv)  (i)  (iv)
+//   • single letter: a.  a)  b.  b)  a.)  (a)
+const ORDERED_RE = /^(?<marker>(?:\((?:\d+|[ivxlcdmIVXLCDM]{1,6}|[a-zA-Z])\)|(?:\d+|[ivxlcdmIVXLCDM]{1,6}|[a-zA-Z])(?:\.\)|[.)\/])))/;
+
+function buildListProbeText(line, maxItems = 4, maxChars = 24) {
+    let text = '';
+    let seenText = false;
+
+    for (const item of line) {
+        let piece = item.str || '';
+        if (!seenText) {
+            piece = piece.trimStart();
+            if (!piece) continue;
+            seenText = true;
+        }
+        if (!piece) continue;
+
+        text += piece;
+        if (text.length >= maxChars) break;
+        if (--maxItems <= 0) break;
+    }
+
+    return text;
+}
+
+function stripLeadingMarker(line, markerLen) {
+    const rest = [];
+    let remaining = markerLen;
+    let seenText = false;
+
+    for (const item of line) {
+        let str = item.str || '';
+        if (!seenText) {
+            str = str.trimStart();
+            if (!str) continue;
+            seenText = true;
+        }
+
+        if (remaining > 0) {
+            if (remaining >= str.length) {
+                remaining -= str.length;
+                continue;
+            }
+            str = str.slice(remaining);
+            remaining = 0;
+        }
+
+        if (str) rest.push({ ...item, str });
+    }
+
+    return rest;
+}
+
+function getListPrefix(listInfo) {
+    if (listInfo.type === 'ul') return '- ';
+    if (/^\d/.test(listInfo.marker)) return '1. ';
+    if (/^\(\d+\)$/.test(listInfo.marker)) return '- ' + listInfo.marker + ' ';
+    return '- ' + listInfo.marker + ' ';
+}
+
+function getMarkerBody(marker) {
+    return marker
+        .replace(/^\(/, '')
+        .replace(/\)$/, '')
+        .replace(/[.)\/]+$/g, '')
+        .toLowerCase();
+}
+
+function isRomanBody(body) {
+    return /^[ivxlcdm]+$/i.test(body);
+}
+
+function getListKind(listInfo) {
+    if (listInfo.type === 'ul') return 'bullet';
+    const body = getMarkerBody(listInfo.marker);
+    if (/^\d+$/.test(body)) return 'numeric';
+    if (body.length === 1 && /^[a-z]$/i.test(body)) return 'ambiguous';
+    if (isRomanBody(body)) return 'roman';
+    return 'alpha';
+}
+
+function detectList(line) {
+    if (!line.length) return null;
+    const str = buildListProbeText(line);
+    if (!str) return null;
+
+    const bm = str.match(BULLET_RE);
+    if (bm) return { type: 'ul', marker: bm.groups?.marker || bm[0], matchLen: bm[0].length };
+
+    if (str.startsWith('-')) {
+        const first = (line[0]?.str || '').trimStart();
+        if (first === '-' || first === '- ' || first.startsWith('-\t')) {
+            return { type: 'ul', marker: '-', matchLen: 1 };
+        }
+    }
+
+    const om = str.match(ORDERED_RE);
+    if (om) return { type: 'ol', marker: om.groups?.marker || om[0], matchLen: om[0].length };
+
+    return null;
+}
+
+// ── Line → element ─────────────────────────────────────────────────────────
+
+function lineToElement(line, baseFontSize, pageLeftMargin, detectFmt, detectHeadings) {
+    if (!line.length) return { type: 'empty' };
+
+    const text = joinItems(line, detectFmt);
+    if (!text.trim()) return { type: 'empty' };
+
+    // Dominant font size (weighted by character count)
+    let totalChars = 0, weightedSz = 0;
+    for (const item of line) {
+        const l = item.str.length;
+        totalChars += l;
+        weightedSz += item.fontSize * l;
+    }
+    const domSz = totalChars > 0 ? weightedSz / totalChars : baseFontSize;
+
+    // Heading detection
+    if (detectHeadings && baseFontSize > 0) {
+        const ratio = domSz / baseFontSize;
+        if (ratio >= H1_RATIO) return { type: 'h1', text };
+        if (ratio >= H2_RATIO) return { type: 'h2', text };
+        if (ratio >= H3_RATIO) return { type: 'h3', text };
+    }
+
+    // List detection
+    const listInfo = detectList(line);
+    if (listInfo) {
+        const restLine = stripLeadingMarker(line, listInfo.matchLen);
+        const content = restLine.length ? joinItems(restLine, detectFmt) : '';
+        const prefix = getListPrefix(listInfo);
+        return {
+            type: 'list',
+            text: content.trim(),
+            prefix,
+            marker: listInfo.marker,
+            listKind: getListKind(listInfo),
+            x: line[0]?.x || 0,
+            contentX: restLine[0]?.x || line[0]?.x || 0
+        };
+    }
+
+    return {
+        type:     'paragraph',
+        text,
+        x:        line[0]?.x || 0,
+        y:        line[0]?.y || 0,
+        fontSize: domSz
+    };
+}
+
+// ── Paragraph healing ──────────────────────────────────────────────────────
+// Rejoin lines broken by PDF right-margin wrapping.
+// Heuristic: previous line doesn't end with sentence-final punctuation
+// AND the next line starts with a lowercase letter → merge.
+
+const SENTENCE_END_RE    = /[.!?:;…""')\]>]$/;
+const STARTS_LOWERCASE_RE = /^[a-z]/;
+const STARTS_CONTINUATION_RE = /^[A-Za-z0-9("'`]/;
+
+function joinBrokenWord(prevText, nextText) {
+    if (!prevText.endsWith('-')) return null;
+    if (!/^[a-z]/.test(nextText)) return null;
+    return prevText.slice(0, -1) + nextText;
+}
+
+function healParagraphs(elements) {
+    const out = [];
+    for (const el of elements) {
+        if (el.type !== 'paragraph') { out.push(el); continue; }
+
+        const prev = out[out.length - 1];
+        const hyphenJoin = prev ? joinBrokenWord(prev.text, el.text) : null;
+        if (hyphenJoin) {
+            prev.text = hyphenJoin;
+            continue;
+        }
+
+        if (
+            prev?.type === 'list' &&
+            STARTS_CONTINUATION_RE.test(el.text) &&
+            (
+                STARTS_LOWERCASE_RE.test(el.text) ||
+                el.x >= (prev.contentX || prev.x || 0) - 12 ||
+                !SENTENCE_END_RE.test(prev.text)
+            )
+        ) {
+            const joiner = prev.text.endsWith('-') ? '' : ' ';
+            prev.text = prev.text.endsWith('-')
+                ? prev.text.slice(0, -1) + el.text
+                : prev.text + joiner + el.text;
+            continue;
+        }
+
+        if (
+            prev?.type === 'paragraph' &&
+            !SENTENCE_END_RE.test(prev.text) &&
+            STARTS_LOWERCASE_RE.test(el.text)
+        ) {
+            // Merge: don't add a redundant space if prev ends with one
+            const joiner = prev.text.endsWith(' ') ? '' : ' ';
+            prev.text += joiner + el.text;
+        } else {
+            out.push({ ...el });
+        }
+    }
+    return out;
+}
+
+function normalizeEdgeLine(str) {
+    return str.replace(/\s+/g, ' ').trim();
+}
+
+function removeRepeatedEdgeArtifacts(pageParts) {
+    const firstCounts = new Map();
+    const lastCounts = new Map();
+
+    for (const part of pageParts) {
+        const lines = part.split('\n').filter(line => line.trim());
+        if (!lines.length) continue;
+        const first = normalizeEdgeLine(lines[0]);
+        const last = normalizeEdgeLine(lines[lines.length - 1]);
+        firstCounts.set(first, (firstCounts.get(first) || 0) + 1);
+        lastCounts.set(last, (lastCounts.get(last) || 0) + 1);
+    }
+
+    return pageParts.map((part, index) => {
+        const lines = part.split('\n');
+
+        while (lines.length && !lines[0].trim()) lines.shift();
+        while (lines.length && !lines[lines.length - 1].trim()) lines.pop();
+
+        if (lines.length) {
+            const first = normalizeEdgeLine(lines[0]);
+            if (firstCounts.get(first) > 1) lines.shift();
+        }
+
+        if (lines.length) {
+            const last = normalizeEdgeLine(lines[lines.length - 1]);
+            const expectedPageNumber = String(index + 1);
+            if (last === expectedPageNumber || lastCounts.get(last) > 1) lines.pop();
+        }
+
+        return lines.join('\n').trim();
+    }).filter(Boolean);
+}
+
+// ── Element renderer ───────────────────────────────────────────────────────
+
+function romanToInt(str) {
+    const vals = { i: 1, v: 5, x: 10, l: 50, c: 100, d: 500, m: 1000 };
+    let total = 0;
+    let prev = 0;
+    for (let i = str.length - 1; i >= 0; i--) {
+        const cur = vals[str[i].toLowerCase()] || 0;
+        total += cur < prev ? -cur : cur;
+        prev = cur;
+    }
+    return total;
+}
+
+function markerOrderValue(el, resolvedKind) {
+    const body = getMarkerBody(el.marker || '');
+    if (!body) return null;
+    if (resolvedKind === 'numeric' && /^\d+$/.test(body)) return parseInt(body, 10);
+    if (resolvedKind === 'alpha' && /^[a-z]$/i.test(body)) return body.toLowerCase().charCodeAt(0) - 96;
+    if (resolvedKind === 'roman' && isRomanBody(body)) return romanToInt(body);
+    return null;
+}
+
+function chooseResolvedKind(el, parentEntry, sameLevelEntry) {
+    if (el.listKind !== 'ambiguous') return el.listKind;
+    if (sameLevelEntry?.resolvedKind === 'alpha') return 'alpha';
+    if (sameLevelEntry?.resolvedKind === 'roman') return 'roman';
+    if (parentEntry?.resolvedKind === 'alpha') return 'roman';
+    return 'alpha';
+}
+
+function findMatchingDepth(stack, el) {
+    for (let depth = stack.length - 1; depth >= 0; depth--) {
+        const entry = stack[depth];
+        if (!entry) continue;
+
+        if (entry.resolvedKind === 'alpha' && (el.listKind === 'alpha' || el.listKind === 'ambiguous')) {
+            return depth;
+        }
+        if (entry.resolvedKind === 'numeric' && el.listKind === 'numeric') {
+            return depth;
+        }
+        if (entry.resolvedKind === 'roman' && (el.listKind === 'roman' || el.listKind === 'ambiguous')) {
+            return depth;
+        }
+    }
+    return null;
+}
+
+function resolveListDepth(el, stack, prevList) {
+    if (!stack.length) {
+        return { depth: 0, resolvedKind: chooseResolvedKind(el, null, null) };
+    }
+
+    let depth = stack.length - 1;
+    while (depth > 0 && el.x < stack[depth].x - 6) depth--;
+
+    const top = stack[depth];
+    if (top && el.x > (top.contentX || top.x) + 8) {
+        const resolvedKind = chooseResolvedKind(el, top, null);
+        return { depth: depth + 1, resolvedKind };
+    }
+
+    const matchingDepth = findMatchingDepth(stack, el);
+    if (matchingDepth !== null) {
+        const sameLevelEntry = stack[matchingDepth];
+        const resolvedKind = chooseResolvedKind(el, stack[matchingDepth - 1], sameLevelEntry);
+        return { depth: matchingDepth, resolvedKind };
+    }
+
+    if (
+        prevList?.resolvedKind === 'alpha' &&
+        (el.listKind === 'roman' || el.listKind === 'ambiguous')
+    ) {
+        const resolvedKind = chooseResolvedKind(el, prevList, null);
+        return { depth: prevList.depth + 1, resolvedKind };
+    }
+
+    return { depth: 0, resolvedKind: chooseResolvedKind(el, null, stack[0]) };
+}
+
+function renderElements(elements) {
+    const lines = [];
+    let prevType = null;
+    const listStack = [];
+    let prevList = null;
+
+    for (const el of elements) {
+        switch (el.type) {
+            case 'h1':
+                if (prevType) lines.push('');
+                lines.push('# ' + el.text);
+                listStack.length = 0;
+                prevList = null;
+                break;
+            case 'h2':
+                if (prevType) lines.push('');
+                lines.push('## ' + el.text);
+                listStack.length = 0;
+                prevList = null;
+                break;
+            case 'h3':
+                if (prevType) lines.push('');
+                lines.push('### ' + el.text);
+                listStack.length = 0;
+                prevList = null;
+                break;
+            case 'list': {
+                const { depth, resolvedKind } = resolveListDepth(el, listStack, prevList);
+                const indent = '  '.repeat(depth);
+                lines.push(indent + el.prefix + el.text);
+                const rendered = { ...el, depth, resolvedKind, orderValue: markerOrderValue(el, resolvedKind) };
+                listStack.length = depth;
+                listStack[depth] = rendered;
+                prevList = rendered;
+                break;
+            }
+            case 'paragraph':
+                if (prevType && prevType !== 'paragraph') lines.push('');
+                lines.push(el.text);
+                if (prevType !== 'list') {
+                    listStack.length = 0;
+                    prevList = null;
+                }
+                break;
+            case 'table':
+                if (prevType) lines.push('');
+                lines.push(el.text);
+                listStack.length = 0;
+                prevList = null;
+                break;
+        }
+        prevType = el.type;
+    }
+    return lines.join('\n');
+}
+
+// ── OCR fallback ────────────────────────────────────────────────────────────
+
+async function ocrFallback(page) {
+    const scale    = 2.5;
+    const viewport = page.getViewport({ scale });
+    const canvas   = document.createElement('canvas');
+    canvas.width   = viewport.width;
+    canvas.height  = viewport.height;
+    const ctx      = canvas.getContext('2d');
+    ctx.fillStyle  = 'white';
+    ctx.fillRect(0, 0, canvas.width, canvas.height);
+    await page.render({ canvasContext: ctx, viewport }).promise;
+
+    try {
+        await tesseractOcr.initialize('eng');
+        const results = await tesseractOcr.processCanvasBatch([canvas], 0);
+        if (results?.[0]?.success) return (results[0].text || '').trim();
+    } catch (e) {
+        console.warn('OCR fallback error:', e);
+    }
+    return '';
+}
+
+// ── Main converter ─────────────────────────────────────────────────────────
+
+export const DEFAULT_PDF_TO_MARKDOWN_OPTIONS = {
+    detectHeadings: true,
+    detectTables: true,
+    detectFormatting: true,
+    includeImages: true,
+    ocrFallback: false,
+    healParagraphs: true
+};
+
+export async function convertPdfToMarkdown(filePath, options = {}, onProgress) {
+    const mergedOptions = {
+        ...DEFAULT_PDF_TO_MARKDOWN_OPTIONS,
+        ...options
+    };
+    const progress = (pct, msg) => onProgress?.(pct, msg);
+
+    progress(3, 'Loading PDF…');
+    const pdfDoc   = await pdfjsLib.getDocument(`file://${filePath}`).promise;
+    const numPages = pdfDoc.numPages;
+
+    // Pass 1: calibrate base font size
+    progress(8, 'Calibrating font sizes…');
+    const samples = [];
+    for (let p = 1; p <= numPages; p++) {
+        const page    = await pdfDoc.getPage(p);
+        const content = await page.getTextContent();
+        for (const item of content.items) {
+            if (!item.str?.trim()) continue;
+            const sz = getFontSize(item.transform);
+            if (sz > 0) samples.push({ size: sz, len: item.str.length });
+        }
+    }
+    const baseFontSize = computeBaseFontSize(samples);
+
+    // Optional image extraction
+    let imagesByPage = {};
+    if (mergedOptions.includeImages) {
+        progress(12, 'Extracting images…');
+        try {
+            const res = await window.electronAPI.extractPdfImages(filePath);
+            if (res?.success) {
+                for (const img of res.images) {
+                    (imagesByPage[img.pageNum] ||= []).push(img);
+                }
+            }
+        } catch (e) {
+            console.warn('Image extraction skipped:', e.message);
+        }
+    }
+
+    // Pass 2: convert pages
+    const parts = [];
+    for (let pageNum = 1; pageNum <= numPages; pageNum++) {
+        const pct = 15 + Math.round(((pageNum - 1) / numPages) * 80);
+        progress(pct, `Page ${pageNum} of ${numPages}…`);
+
+        const page     = await pdfDoc.getPage(pageNum);
+        const viewport = page.getViewport({ scale: 1 });
+        const content  = await page.getTextContent();
+        const rawItems = content.items.filter(i => i.str?.trim());
+
+        // OCR on scanned pages
+        if (mergedOptions.ocrFallback && rawItems.length < 5) {
+            progress(pct, `Page ${pageNum}: scanned — running OCR…`);
+            const ocrText = await ocrFallback(page);
+            if (ocrText) parts.push(ocrText);
+            continue;
+        }
+
+        const items         = rawItems.map(enrichItem);
+        const pageWidth     = viewport.width;
+        const pageLeftMargin = Math.min(...items.map(i => i.x));
+        const lines          = groupIntoLines(items);
+
+        // Detect tables
+        const tableRegions = mergedOptions.detectTables ? detectTables(lines, pageWidth) : [];
+        const tableLineSet  = new Set();
+        for (const t of tableRegions) {
+            for (let li = t.startLine; li <= t.endLine; li++) tableLineSet.add(li);
+        }
+
+        // Build elements list
+        const elements  = [];
+        let tableIdx    = 0;
+
+        for (let li = 0; li < lines.length; li++) {
+            // Check if a table starts at this line
+            while (tableIdx < tableRegions.length && tableRegions[tableIdx].startLine === li) {
+                const t   = tableRegions[tableIdx];
+                const tmd = renderTable(lines, t, pageWidth);
+                if (tmd) elements.push({ type: 'table', text: tmd });
+                li = t.endLine;
+                tableIdx++;
+                break;
+            }
+            if (tableLineSet.has(li)) continue;
+
+            const el = lineToElement(
+                lines[li], baseFontSize, pageLeftMargin,
+                mergedOptions.detectFormatting, mergedOptions.detectHeadings
+            );
+            if (el.type !== 'empty') elements.push(el);
+        }
+
+        const final = mergedOptions.healParagraphs ? healParagraphs(elements) : elements;
+        const pageMd = renderElements(final);
+        if (pageMd.trim()) parts.push(pageMd);
+
+        // Append images
+        if (mergedOptions.includeImages && imagesByPage[pageNum]) {
+            imagesByPage[pageNum].forEach((img, idx) => {
+                parts.push(`\n![Figure ${idx + 1}](data:${img.mimeType};base64,${img.data})\n`);
+            });
+        }
+    }
+
+    await pdfDoc.destroy();
+    progress(98, 'Assembling document…');
+    return removeRepeatedEdgeArtifacts(parts).join('\n\n');
+}
+
+export async function convertPdfToMarkdownWithFallback(filePath, options = {}, onProgress) {
+    try {
+        if (window.electronAPI?.convertPdfToMarkdown) {
+            const result = await window.electronAPI.convertPdfToMarkdown(filePath, options);
+            if (result?.success && result.markdown?.trim()) {
+                return {
+                    markdown: result.markdown,
+                    assets: result.assets || [],
+                    engine: result.engine || 'python'
+                };
+            }
+            if (result?.error) {
+                throw new Error(result.error);
+            }
+        }
+    } catch (err) {
+        console.warn('Falling back to renderer PDF to Markdown engine:', err);
+    }
+
+    const markdown = await convertPdfToMarkdown(filePath, options, onProgress);
+    return { markdown, assets: [], engine: 'renderer' };
+}
+
+// ── UI ─────────────────────────────────────────────────────────────────────
+
+async function initPdfToMarkdownTool() {
+    await i18n.init();
+    ThemeManager.init();
+
+    const selectPdfBtn    = document.getElementById('select-pdf-btn');
+    const removePdfBtn    = document.getElementById('remove-pdf-btn');
+    const convertBtn      = document.getElementById('convert-btn');
+    const selectedFileInfo = document.getElementById('selected-file-info');
+    const pdfNameEl       = document.getElementById('pdf-name');
+    const pdfSizeEl       = document.getElementById('pdf-size');
+
+    const progressModal   = document.getElementById('progress-modal');
+    const progressFill    = document.getElementById('progress-fill');
+    const progressInfo    = document.getElementById('progress-info');
+    const cancelBtn       = document.getElementById('cancel-btn');
+
+    const detectHeadingsChk   = document.getElementById('detect-headings');
+    const detectTablesChk     = document.getElementById('detect-tables');
+    const detectFormattingChk = document.getElementById('detect-formatting');
+    const includeImagesChk    = document.getElementById('include-images');
+    const ocrFallbackChk      = document.getElementById('ocr-fallback');
+    const healParagraphsChk   = document.getElementById('heal-paragraphs');
+
+    let selectedFile     = null;
+    let droppedFilePath  = null;
+    let cancelled        = false;
+    let backendActive    = false;
+
+    window.electronAPI.onPdfToMarkdownProgress?.((progress) => {
+        if (!backendActive || cancelled) return;
+
+        const pct = typeof progress?.value === 'number' ? progress.value : 0;
+        progressFill.style.width = `${pct}%`;
+
+        if (progress?.stage === 'page' && progress?.page && progress?.totalPages) {
+            progressInfo.textContent = `Page ${progress.page} of ${progress.totalPages}...`;
+        } else if (progress?.stage === 'loading') {
+            progressInfo.textContent = 'Loading PDF...';
+        } else if (progress?.stage === 'analyzing') {
+            progressInfo.textContent = 'Analyzing document structure...';
+        } else if (progress?.stage === 'assembling') {
+            progressInfo.textContent = 'Assembling document...';
+        }
+    });
+
+    const updateConvertBtn = () => { convertBtn.disabled = !selectedFile; };
+
+    function handleFileSelected(file) {
+        selectedFile = file;
+        pdfNameEl.textContent = file.name;
+        pdfSizeEl.textContent = `(${(file.size / 1024 / 1024).toFixed(2)} MB)`;
+        selectPdfBtn.style.display  = 'none';
+        selectedFileInfo.style.display = 'flex';
+        updateConvertBtn();
+    }
+
+    function clearFile() {
+        selectedFile = null;
+        droppedFilePath = null;
+        selectPdfBtn.style.display  = 'block';
+        selectedFileInfo.style.display = 'none';
+        updateConvertBtn();
+    }
+
+    async function cleanupDropped() {
+        if (droppedFilePath) {
+            try { await window.electronAPI.deleteFile(droppedFilePath); } catch {}
+            droppedFilePath = null;
+        }
+    }
+
+    selectPdfBtn.addEventListener('click', async () => {
+        loadingUI.show(i18n.t('pdfToMarkdownJS.selecting'));
+        const files = await window.electronAPI.selectPdfs();
+        loadingUI.hide();
+        if (files?.length > 0) {
+            const fp   = files[0];
+            const info = await window.electronAPI.getFileInfo(fp);
+            handleFileSelected({ path: fp, name: fp.split(/[\\/]/).pop(), size: info.size || 0 });
+        }
+    });
+
+    removePdfBtn.addEventListener('click', async () => {
+        await cleanupDropped();
+        clearFile();
+    });
+
+    const backBtn = document.querySelector('a[href="../../index.html"]');
+    if (backBtn) {
+        backBtn.addEventListener('click', async (e) => {
+            e.preventDefault();
+            await cleanupDropped();
+            window.location.href = '../../index.html';
+        });
+    }
+
+    cancelBtn.addEventListener('click', () => { cancelled = true; });
+
+    convertBtn.addEventListener('click', async () => {
+        if (!selectedFile) return;
+
+        const options = {
+            detectHeadings:   detectHeadingsChk.checked,
+            detectTables:     detectTablesChk.checked,
+            detectFormatting: detectFormattingChk.checked,
+            includeImages:    includeImagesChk.checked,
+            ocrFallback:      ocrFallbackChk.checked,
+            healParagraphs:   healParagraphsChk.checked
+        };
+
+        cancelled = false;
+        progressModal.style.display = 'flex';
+        progressFill.style.width    = '0%';
+        progressInfo.textContent    = i18n.t('pdfToMarkdownJS.initializing');
+
+        try {
+            backendActive = true;
+            const conversion = await convertPdfToMarkdownWithFallback(
+                selectedFile.path,
+                options,
+                (pct, msg) => {
+                    if (cancelled) throw new Error('cancelled');
+                    progressFill.style.width = `${pct}%`;
+                    progressInfo.textContent  = msg;
+                }
+            );
+            backendActive = false;
+            if (cancelled) throw new Error('cancelled');
+            const markdown = conversion.markdown;
+
+            progressModal.style.display = 'none';
+
+            if (!markdown.trim()) {
+                await customAlert.alert(
+                    i18n.t('alerts.warning'),
+                    i18n.t('pdfToMarkdownJS.empty-result'),
+                    [i18n.t('common.ok')]
+                );
+                return;
+            }
+
+            const baseName = selectedFile.name.replace(/\.pdf$/i, '') + '.md';
+            const saveResult   = await window.electronAPI.saveMarkdownFile(
+                baseName, markdown, selectedFile.path, conversion.assets || []
+            );
+
+            if (saveResult?.success) {
+                await customAlert.alert(
+                    i18n.t('alerts.success'),
+                    i18n.t('pdfToMarkdownJS.saved'),
+                    [i18n.t('common.ok')]
+                );
+            }
+        } catch (err) {
+            backendActive = false;
+            progressModal.style.display = 'none';
+            if (err.message === 'cancelled') {
+                await customAlert.alert(
+                    i18n.t('alerts.warning'),
+                    i18n.t('pdfToMarkdownJS.cancelled'),
+                    [i18n.t('common.ok')]
+                );
+            } else {
+                console.error('PDF to Markdown conversion failed:', err);
+                await customAlert.alert(
+                    i18n.t('alerts.error'),
+                    i18n.t('pdfToMarkdownJS.error') + err.message,
+                    [i18n.t('common.ok')]
+                );
+            }
+        }
+    });
+
+    // Drag-and-drop
+    initializeGlobalDragDrop({
+        onFilesDropped: async (files) => {
+            if (files.length > 1) {
+                await customAlert.alert(
+                    i18n.t('alerts.notice'),
+                    i18n.t('pdfToMarkdownJS.drop-one'),
+                    [i18n.t('common.ok')]
+                );
+                return;
+            }
+            const file = files[0];
+            if (!file.name.toLowerCase().endsWith('.pdf')) {
+                await customAlert.alert(
+                    i18n.t('alerts.notice'),
+                    i18n.t('pdfToMarkdownJS.drop-pdf'),
+                    [i18n.t('common.ok')]
+                );
+                return;
+            }
+            await cleanupDropped();
+            const buffer = await file.arrayBuffer();
+            const result = await window.electronAPI.saveDroppedFile({ name: file.name, buffer });
+            if (result.success) {
+                droppedFilePath = result.filePath;
+                handleFileSelected({ path: result.filePath, name: file.name, size: file.size || 0 });
+            } else {
+                await customAlert.alert(
+                    i18n.t('alerts.error'),
+                    i18n.t('pdfToMarkdownJS.drop-failed'),
+                    [i18n.t('common.ok')]
+                );
+            }
+        },
+        onInvalidFiles: async () => {
+            await customAlert.alert(
+                i18n.t('alerts.notice'),
+                i18n.t('pdfToMarkdownJS.drop-pdf'),
+                [i18n.t('common.ok')]
+            );
+        }
+    });
+
+    updateConvertBtn();
+}
+
+if (document.getElementById('convert-btn')) {
+    document.addEventListener('DOMContentLoaded', () => {
+        initPdfToMarkdownTool().catch(err => {
+            console.error('Failed to initialize PDF to Markdown tool:', err);
+        });
+    });
+}
diff --git a/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.css b/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.css
new file mode 100644
index 00000000..72d60763
--- /dev/null
+++ b/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.css
@@ -0,0 +1,255 @@
+:root {
+    --fixture-bg: #102033;
+    --fixture-panel: rgba(10, 22, 37, 0.82);
+    --fixture-border: rgba(120, 156, 187, 0.25);
+    --fixture-text: #e8f0f5;
+    --fixture-muted: #98a9b8;
+    --fixture-accent: #3d8bfd;
+    --fixture-accent-strong: #2f74d8;
+    --fixture-card: rgba(18, 30, 47, 0.88);
+    --fixture-code: #0b1725;
+    --fixture-good: #1f9d63;
+}
+
+[data-theme="light"] {
+    --fixture-bg: #f3f5f7;
+    --fixture-panel: rgba(255, 255, 255, 0.9);
+    --fixture-border: rgba(40, 60, 80, 0.14);
+    --fixture-text: #1d2a38;
+    --fixture-muted: #586574;
+    --fixture-accent: #2368cc;
+    --fixture-accent-strong: #1857b0;
+    --fixture-card: rgba(255, 255, 255, 0.96);
+    --fixture-code: #eef3f7;
+    --fixture-good: #188053;
+}
+
+* {
+    box-sizing: border-box;
+}
+
+body {
+    margin: 0;
+    color: var(--fixture-text);
+    font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
+    background:
+        radial-gradient(circle at top left, rgba(61, 139, 253, 0.14), transparent 28%),
+        linear-gradient(180deg, rgba(9, 18, 30, 0.18), transparent 30%),
+        var(--fixture-bg);
+}
+
+.fixture-shell {
+    width: min(1220px, calc(100% - 32px));
+    margin: 0 auto 40px;
+    display: grid;
+    gap: 18px;
+}
+
+.fixture-panel,
+.fixture-card {
+    background: var(--fixture-panel);
+    border: 1px solid var(--fixture-border);
+    border-radius: 18px;
+    backdrop-filter: blur(12px);
+    box-shadow: 0 18px 40px rgba(0, 0, 0, 0.12);
+}
+
+.fixture-panel {
+    padding: 22px;
+}
+
+.fixture-panel h2,
+.fixture-card h3,
+.fixture-card h4 {
+    margin: 0;
+}
+
+.fixture-copy,
+.fixture-path,
+.fixture-progress-text,
+.fixture-status,
+.label {
+    color: var(--fixture-muted);
+}
+
+.option-grid {
+    margin-top: 18px;
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
+    gap: 12px;
+}
+
+.option-grid label {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    padding: 12px 14px;
+    border-radius: 12px;
+    background: var(--fixture-card);
+    border: 1px solid var(--fixture-border);
+}
+
+.fixture-actions {
+    margin-top: 18px;
+    display: flex;
+    gap: 12px;
+    flex-wrap: wrap;
+}
+
+.action-btn,
+.secondary-btn,
+.run-btn,
+.save-btn {
+    border: none;
+    border-radius: 12px;
+    padding: 11px 16px;
+    font-size: 14px;
+    font-weight: 600;
+    cursor: pointer;
+}
+
+.action-btn,
+.run-btn {
+    color: white;
+    background: linear-gradient(135deg, var(--fixture-accent), var(--fixture-accent-strong));
+}
+
+.secondary-btn,
+.save-btn {
+    color: var(--fixture-text);
+    background: transparent;
+    border: 1px solid var(--fixture-border);
+}
+
+.save-btn:disabled {
+    cursor: not-allowed;
+    opacity: 0.45;
+}
+
+.fixture-status {
+    margin: 16px 0 0;
+    font-size: 14px;
+}
+
+.fixture-list {
+    display: grid;
+    gap: 18px;
+}
+
+.fixture-card {
+    padding: 20px;
+    background: var(--fixture-card);
+}
+
+.fixture-card-header,
+.fixture-card-actions,
+.fixture-summary {
+    display: flex;
+    gap: 12px;
+}
+
+.fixture-card-header {
+    justify-content: space-between;
+    align-items: flex-start;
+}
+
+.fixture-title {
+    font-size: 20px;
+}
+
+.fixture-path {
+    margin: 6px 0 0;
+    font-size: 13px;
+    word-break: break-all;
+}
+
+.fixture-progress {
+    margin-top: 16px;
+}
+
+.fixture-progress-bar {
+    height: 10px;
+    background: rgba(255, 255, 255, 0.08);
+    border-radius: 999px;
+    overflow: hidden;
+}
+
+.fixture-progress-fill {
+    height: 100%;
+    width: 0%;
+    background: linear-gradient(90deg, var(--fixture-good), var(--fixture-accent));
+    transition: width 0.2s ease;
+}
+
+.fixture-progress-text {
+    margin: 8px 0 0;
+    font-size: 13px;
+}
+
+.fixture-summary {
+    margin-top: 16px;
+    flex-wrap: wrap;
+}
+
+.fixture-summary > div {
+    min-width: 110px;
+    padding: 10px 12px;
+    border-radius: 12px;
+    background: rgba(255, 255, 255, 0.04);
+    border: 1px solid var(--fixture-border);
+}
+
+.fixture-summary span {
+    display: block;
+}
+
+.fixture-summary .label {
+    font-size: 12px;
+}
+
+.fixture-panels {
+    margin-top: 16px;
+    display: grid;
+    grid-template-columns: 280px 1fr;
+    gap: 16px;
+}
+
+.fixture-subpanel {
+    min-height: 280px;
+    display: flex;
+    flex-direction: column;
+    gap: 10px;
+}
+
+.list-preview,
+.markdown-output {
+    flex: 1;
+    margin: 0;
+    width: 100%;
+    border: 1px solid var(--fixture-border);
+    border-radius: 14px;
+    background: var(--fixture-code);
+    color: var(--fixture-text);
+    padding: 14px;
+    font: 13px/1.55 "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace;
+}
+
+.list-preview {
+    overflow: auto;
+    white-space: pre-wrap;
+}
+
+.markdown-output {
+    resize: vertical;
+    min-height: 320px;
+}
+
+@media (max-width: 880px) {
+    .fixture-panels {
+        grid-template-columns: 1fr;
+    }
+
+    .fixture-card-header {
+        flex-direction: column;
+    }
+}
diff --git a/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.html b/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.html
new file mode 100644
index 00000000..5eaaa02c
--- /dev/null
+++ b/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.html
@@ -0,0 +1,92 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>PDF to Markdown Fixture</title>
+    <meta http-equiv="Content-Security-Policy" content="
+    default-src 'self';
+    script-src 'self';
+    style-src 'self' 'unsafe-inline';
+    img-src 'self' data:;
+    connect-src 'self' http://localhost:*;">
+    <link rel="stylesheet" href="../../styles/main.css">
+    <link rel="stylesheet" href="./pdfToMarkdownFixture.css">
+    <script type="module" src="../../utils/themeManager.js"></script>
+</head>
+<body>
+    <div id="app">
+        <div class="top-bar">
+            <a href="./pdfToMarkdown.html" class="top-btn">Back To Tool</a>
+            <h1 class="tool-title">PDF to Markdown Fixture</h1>
+        </div>
+
+        <main class="fixture-shell">
+            <section class="fixture-panel">
+                <h2>Fixture Options</h2>
+                <p class="fixture-copy">
+                    Runs the same converter as the production PDF-to-Markdown tool and preloads your two sample PDFs.
+                </p>
+
+                <div class="option-grid">
+                    <label><input type="checkbox" id="detect-headings" checked> Detect headings</label>
+                    <label><input type="checkbox" id="detect-tables" checked> Detect tables</label>
+                    <label><input type="checkbox" id="detect-formatting" checked> Detect formatting</label>
+                    <label><input type="checkbox" id="include-images" checked> Include images</label>
+                    <label><input type="checkbox" id="ocr-fallback"> OCR fallback</label>
+                    <label><input type="checkbox" id="heal-paragraphs" checked> Heal paragraphs</label>
+                </div>
+
+                <div class="fixture-actions">
+                    <button id="run-all-btn" class="action-btn">Run All Fixtures</button>
+                    <button id="select-pdf-btn" class="secondary-btn">Add PDF</button>
+                </div>
+
+                <p id="fixture-status" class="fixture-status">Ready.</p>
+            </section>
+
+            <section id="fixture-list" class="fixture-list"></section>
+        </main>
+    </div>
+
+    <template id="fixture-card-template">
+        <article class="fixture-card">
+            <div class="fixture-card-header">
+                <div>
+                    <h3 class="fixture-title"></h3>
+                    <p class="fixture-path"></p>
+                </div>
+                <div class="fixture-card-actions">
+                    <button class="run-btn">Run</button>
+                    <button class="save-btn" disabled>Save Markdown</button>
+                </div>
+            </div>
+
+            <div class="fixture-progress">
+                <div class="fixture-progress-bar"><div class="fixture-progress-fill"></div></div>
+                <p class="fixture-progress-text">Waiting to run.</p>
+            </div>
+
+            <div class="fixture-summary">
+                <div><span class="label">Headings</span><span class="summary-headings">0</span></div>
+                <div><span class="label">Tables</span><span class="summary-tables">0</span></div>
+                <div><span class="label">Lists</span><span class="summary-lists">0</span></div>
+                <div><span class="label">Output lines</span><span class="summary-lines">0</span></div>
+            </div>
+
+            <div class="fixture-panels">
+                <section class="fixture-subpanel">
+                    <h4>Detected List Lines</h4>
+                    <pre class="list-preview">No output yet.</pre>
+                </section>
+                <section class="fixture-subpanel">
+                    <h4>Markdown Output</h4>
+                    <textarea class="markdown-output" spellcheck="false" placeholder="Markdown will appear here..."></textarea>
+                </section>
+            </div>
+        </article>
+    </template>
+
+    <script type="module" src="./pdfToMarkdownFixture.js"></script>
+</body>
+</html>
diff --git a/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.js b/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.js
new file mode 100644
index 00000000..e74fee6d
--- /dev/null
+++ b/src/renderer/tools/pdfToMarkdown/pdfToMarkdownFixture.js
@@ -0,0 +1,173 @@
+import { convertPdfToMarkdownWithFallback, DEFAULT_PDF_TO_MARKDOWN_OPTIONS } from './pdfToMarkdown.js';
+import { ThemeManager } from '../../utils/themeManager.js';
+
+const FIXTURE_PDFS = [
+    {
+        label: 'CS170 Homework',
+        path: '/Users/jacobchamie/Documents/cs170hw01.pdf'
+    },
+    {
+        label: 'ArXiv Sample',
+        path: '/Users/jacobchamie/Downloads/2604.02248v1.pdf'
+    }
+];
+
+const LIST_LINE_RE = /^\s*(?:- |\d+\. )/;
+
+function getOptions() {
+    return {
+        ...DEFAULT_PDF_TO_MARKDOWN_OPTIONS,
+        detectHeadings: document.getElementById('detect-headings').checked,
+        detectTables: document.getElementById('detect-tables').checked,
+        detectFormatting: document.getElementById('detect-formatting').checked,
+        includeImages: document.getElementById('include-images').checked,
+        ocrFallback: document.getElementById('ocr-fallback').checked,
+        healParagraphs: document.getElementById('heal-paragraphs').checked
+    };
+}
+
+function summarizeMarkdown(markdown) {
+    const lines = markdown.split(/\r?\n/);
+    return {
+        headingCount: lines.filter(line => /^#{1,3}\s/.test(line)).length,
+        tableCount: lines.filter(line => /^\|/.test(line)).length,
+        listLines: lines.filter(line => LIST_LINE_RE.test(line)),
+        lineCount: lines.length
+    };
+}
+
+function slugifyName(name) {
+    return name
+        .replace(/\.pdf$/i, '')
+        .replace(/[^a-z0-9]+/gi, '-')
+        .replace(/^-+|-+$/g, '')
+        .toLowerCase();
+}
+
+function setGlobalStatus(message) {
+    document.getElementById('fixture-status').textContent = message;
+}
+
+function createFixtureCard(file) {
+    const template = document.getElementById('fixture-card-template');
+    const node = template.content.firstElementChild.cloneNode(true);
+
+    node.querySelector('.fixture-title').textContent = file.label;
+    node.querySelector('.fixture-path').textContent = file.path;
+
+    const refs = {
+        root: node,
+        runBtn: node.querySelector('.run-btn'),
+        saveBtn: node.querySelector('.save-btn'),
+        progressFill: node.querySelector('.fixture-progress-fill'),
+        progressText: node.querySelector('.fixture-progress-text'),
+        markdownOutput: node.querySelector('.markdown-output'),
+        listPreview: node.querySelector('.list-preview'),
+        headings: node.querySelector('.summary-headings'),
+        tables: node.querySelector('.summary-tables'),
+        lists: node.querySelector('.summary-lists'),
+        lines: node.querySelector('.summary-lines')
+    };
+
+    let latestMarkdown = '';
+    let latestAssets = [];
+
+    async function runFixture() {
+        refs.runBtn.disabled = true;
+        refs.saveBtn.disabled = true;
+        refs.progressFill.style.width = '0%';
+        refs.progressText.textContent = 'Starting conversion...';
+        setGlobalStatus(`Running fixture for ${file.label}...`);
+
+        try {
+            const result = await convertPdfToMarkdownWithFallback(file.path, getOptions(), (pct, msg) => {
+                refs.progressFill.style.width = `${pct}%`;
+                refs.progressText.textContent = msg;
+            });
+            const markdown = result.markdown;
+
+            latestMarkdown = markdown;
+            latestAssets = result.assets || [];
+            refs.markdownOutput.value = markdown;
+
+            const summary = summarizeMarkdown(markdown);
+            refs.headings.textContent = String(summary.headingCount);
+            refs.tables.textContent = String(summary.tableCount);
+            refs.lists.textContent = String(summary.listLines.length);
+            refs.lines.textContent = String(summary.lineCount);
+            refs.listPreview.textContent = summary.listLines.length
+                ? summary.listLines.slice(0, 80).join('\n')
+                : 'No markdown list lines detected.';
+
+            refs.progressFill.style.width = '100%';
+            refs.progressText.textContent = 'Finished.';
+            refs.saveBtn.disabled = !markdown.trim();
+            setGlobalStatus(`Finished ${file.label}.`);
+        } catch (err) {
+            refs.progressText.textContent = `Failed: ${err.message}`;
+            refs.listPreview.textContent = 'Conversion failed.';
+            setGlobalStatus(`Fixture failed for ${file.label}: ${err.message}`);
+            console.error('Fixture conversion failed:', err);
+        } finally {
+            refs.runBtn.disabled = false;
+        }
+    }
+
+    async function saveFixtureOutput() {
+        if (!latestMarkdown.trim()) return;
+        const filename = `${slugifyName(file.label)}-fixture.md`;
+        const result = await window.electronAPI.saveMarkdownFile(filename, latestMarkdown, file.path, latestAssets);
+        if (result?.success) {
+            setGlobalStatus(`Saved ${filename}.`);
+        }
+    }
+
+    refs.runBtn.addEventListener('click', runFixture);
+    refs.saveBtn.addEventListener('click', saveFixtureOutput);
+
+    return {
+        element: node,
+        runFixture
+    };
+}
+
+async function addSelectedPdfs() {
+    const paths = await window.electronAPI.selectPdfs();
+    if (!paths?.length) return [];
+
+    return paths.map(path => ({
+        label: path.split(/[\\/]/).pop(),
+        path
+    }));
+}
+
+document.addEventListener('DOMContentLoaded', async () => {
+    ThemeManager.init();
+
+    const fixtureList = document.getElementById('fixture-list');
+    const cards = [];
+
+    function appendFixtures(files) {
+        for (const file of files) {
+            const card = createFixtureCard(file);
+            cards.push(card);
+            fixtureList.appendChild(card.element);
+        }
+    }
+
+    appendFixtures(FIXTURE_PDFS);
+
+    document.getElementById('run-all-btn').addEventListener('click', async () => {
+        for (const card of cards) {
+            // Keep order deterministic so it is easier to compare outputs.
+            await card.runFixture();
+        }
+    });
+
+    document.getElementById('select-pdf-btn').addEventListener('click', async () => {
+        const files = await addSelectedPdfs();
+        if (!files.length) return;
+        appendFixtures(files);
+        setGlobalStatus(`Added ${files.length} PDF fixture${files.length === 1 ? '' : 's'}.`);
+    });
+});

From de534d6ba686b19687be5b29ffb8b79ae55f43cb Mon Sep 17 00:00:00 2001
From: JacobChamie <jacobchamie@gmail.com>
Date: Sat, 4 Apr 2026 14:41:33 -0700
Subject: [PATCH 2/2] remove unecessary files

---
 .claude/settings.local.json | 9 ---------
 .gitignore                  | 1 +
 2 files changed, 1 insertion(+), 9 deletions(-)
 delete mode 100644 .claude/settings.local.json

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
deleted file mode 100644
index 42f275a7..00000000
--- a/.claude/settings.local.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(python3:*)",
-      "Bash(node --input-type=module --eval ':*)",
-      "Bash(node:*)"
-    ]
-  }
-}
diff --git a/.gitignore b/.gitignore
index 65651567..5bfef504 100644
--- a/.gitignore
+++ b/.gitignore
@@ -590,3 +590,4 @@ FodyWeavers.xsd
 /build
 /.flatpak-builder
 *.traineddata
+.claude/