From e113d820a3d8a53c9e06aabd0e97e0e331de1ced Mon Sep 17 00:00:00 2001
From: Sean Moss <smoss117@gmail.com>
Date: Tue, 16 Jun 2026 20:50:18 -0400
Subject: [PATCH 1/2] Enhance bill summary component to load oversized document
 text lazily. Introduced loading state and spinner for better user experience.
 Updated database functions to handle chunked document text storage in
 Firestore, ensuring bills exceeding size limits are properly managed. Added
 tests for document text storage behavior.

---
 components/bill/Summary.tsx               |  49 ++++++--
 components/db/bills.ts                    |  27 +++++
 functions/src/bills/bills.test.ts         | 108 +++++++++++++++++-
 functions/src/bills/bills.ts              | 132 ++++++++++++++++------
 functions/src/bills/contentBlocks.test.ts | 126 +++++++++++++++++++++
 functions/src/bills/contentBlocks.ts      |  95 ++++++++++++++++
 functions/src/bills/types.ts              |   3 +
 tests/unit/billSummary.test.tsx           | 102 +++++++++++++++++
 8 files changed, 600 insertions(+), 42 deletions(-)
 create mode 100644 functions/src/bills/contentBlocks.test.ts
 create mode 100644 functions/src/bills/contentBlocks.ts
 create mode 100644 tests/unit/billSummary.test.tsx
diff --git a/components/bill/Summary.tsx b/components/bill/Summary.tsx
index bb08b15d2..084eee341 100644
--- a/components/bill/Summary.tsx
+++ b/components/bill/Summary.tsx
@@ -4,7 +4,17 @@ import { useCallback, useEffect, useState } from "react"
 import type { ModalProps } from "react-bootstrap"
 import styled, { ThemeConsumer } from "styled-components"
 import { useMediaQuery } from "usehooks-ts"
-import { Button, Col, Container, Image, Modal, Row, Stack } from "../bootstrap"
+import {
+  Button,
+  Col,
+  Container,
+  Image,
+  Modal,
+  Row,
+  Spinner,
+  Stack
+} from "../bootstrap"
+import { getBillDocumentText } from "../db"
 import { firestore } from "../firebase"
 import * as links from "../links"
 import {
@@ -113,9 +123,30 @@ export const Summary = ({
 }: BillProps & { className?: string }) => {
   const [showBillDetails, setShowBillDetails] = useState(false)
   const [showFullSummary, setShowFullSummary] = useState(false)
-  const handleShowBillDetails = () => setShowBillDetails(true)
+  // Long bills store their text in a `contentBlocks` subcollection rather than
+  // inline; reassemble it lazily when the modal opens.
+  const [billText, setBillText] = useState<string | undefined>(
+    bill?.content?.DocumentText
+  )
+  const [loadingBillText, setLoadingBillText] = useState(false)
+  const hasBillText =
+    !!bill?.content?.DocumentText ||
+    (bill?.content?.DocumentTextBlockCount ?? 0) > 0
+
+  const handleShowBillDetails = useCallback(async () => {
+    setShowBillDetails(true)
+    if (billText === undefined && bill) {
+      setLoadingBillText(true)
+      try {
+        setBillText(
+          await getBillDocumentText(bill.court, bill.id, bill.content)
+        )
+      } finally {
+        setLoadingBillText(false)
+      }
+    }
+  }, [bill, billText])
   const handleHideBillDetails = () => setShowBillDetails(false)
-  const billText = bill?.content?.DocumentText
   const hearingIds = bill?.hearingIds
   const isBallotMeasure =
     bill?.currentCommittee?.id === currentBallotInitiativeCommittee
@@ -129,7 +160,7 @@ export const Summary = ({
         <TitleFormat>
           {bill.content.Title}
           <div className="d-flex justify-content-end">
-            {billText ? (
+            {hasBillText ? (
               <StyledButton
                 variant="link"
                 className="m-1"
@@ -156,9 +187,13 @@ export const Summary = ({
               <Modal.Title>{bill?.id}</Modal.Title>
             </Modal.Header>
             <Modal.Body className="bg-white">
-              <FormattedBillDetails>
-                {bill?.content?.DocumentText}
-              </FormattedBillDetails>
+              {loadingBillText ? (
+                <div className="d-flex justify-content-center">
+                  <Spinner animation="border" role="status" />
+                </div>
+              ) : (
+                <FormattedBillDetails>{billText}</FormattedBillDetails>
+              )}
             </Modal.Body>
           </Modal>
         </TitleFormat>
diff --git a/components/db/bills.ts b/components/db/bills.ts
index 4ea11d6bf..3342eb12b 100644
--- a/components/db/bills.ts
+++ b/components/db/bills.ts
@@ -3,6 +3,7 @@ import {
   getDocs,
   limit,
   orderBy,
+  query,
   Timestamp,
   where
 } from "firebase/firestore"
@@ -34,6 +35,9 @@ export type BillContent = {
   LegislationTypeName: string
   Pinslip: string
   DocumentText?: string
+  /** Present only when DocumentText was too large to store inline and was
+   * chunked into the bill's `contentBlocks` subcollection. */
+  DocumentTextBlockCount?: number
 }
 
 export type BillTopic = {
@@ -87,6 +91,29 @@ export async function getBill(
   return bill as any
 }
 
+/**
+ * Resolve a bill's full document text. Returns the inline `DocumentText` when
+ * present; otherwise, for bills whose text was chunked past Firestore's 1 MiB
+ * limit, reassembles it from the ordered `contentBlocks` subcollection.
+ */
+export async function getBillDocumentText(
+  court: number,
+  id: string,
+  content: BillContent
+): Promise<string | undefined> {
+  if (content.DocumentText) return content.DocumentText
+  if (!content.DocumentTextBlockCount) return undefined
+
+  const blocksRef = collection(
+    firestore,
+    `/generalCourts/${court}/bills/${id}/contentBlocks`
+  )
+  const blocks = await getDocs(query(blocksRef, orderBy("index")))
+  if (blocks.empty) return undefined
+
+  return blocks.docs.map(d => (d.data().text as string) ?? "").join("")
+}
+
 export async function listBillsByHearingDate(
   limitCount: number
 ): Promise<Bill[]> {
diff --git a/functions/src/bills/bills.test.ts b/functions/src/bills/bills.test.ts
index 1d99b8712..4646b607b 100644
--- a/functions/src/bills/bills.test.ts
+++ b/functions/src/bills/bills.test.ts
@@ -1,11 +1,37 @@
+jest.mock("firebase-functions", () => ({
+  logger: { info: jest.fn(), warn: jest.fn() },
+  https: {}
+}))
 jest.mock("../malegislature", () => ({
   getDocument: jest.fn(),
-  getDocumentPdf: jest.fn()
+  getDocumentPdf: jest.fn(),
+  getBillHistory: jest.fn(),
+  getSimilarBills: jest.fn()
 }))
 jest.mock("./pdfText", () => ({
   extractBillTextFromPdf: jest.fn()
 }))
+// Avoid evaluating the real scraper (and Firebase init) when importing ./bills.
+jest.mock("../scraper", () => ({
+  createScraper: jest.fn(() => ({ fetchBatch: {}, startBatches: {} }))
+}))
+jest.mock("../firebase", () => ({
+  Timestamp: { fromMillis: jest.fn(() => "TS0") },
+  FieldValue: { delete: jest.fn(() => "__DELETE__") }
+}))
+jest.mock("./contentBlocks", () => ({
+  planDocumentTextStorage: jest.fn(),
+  writeDocumentTextBlocks: jest.fn(),
+  clearDocumentTextBlocks: jest.fn()
+}))
 
+import { FieldValue } from "../firebase"
+import { fetchBillResource } from "./bills"
+import {
+  clearDocumentTextBlocks,
+  planDocumentTextStorage,
+  writeDocumentTextBlocks
+} from "./contentBlocks"
 import { getDocumentWithPdfTextFallback } from "./documentTextFallback"
 import { extractBillTextFromPdf } from "./pdfText"
 
@@ -80,3 +106,83 @@ describe("getDocumentWithPdfTextFallback", () => {
     })
   })
 })
+
+describe("fetchBillResource document text storage", () => {
+  const mockedApi = jest.requireMock("../malegislature") as {
+    getDocument: jest.Mock
+    getBillHistory: jest.Mock
+    getSimilarBills: jest.Mock
+  }
+  const mockedPlan = planDocumentTextStorage as jest.MockedFunction<
+    typeof planDocumentTextStorage
+  >
+  const mockedWrite = writeDocumentTextBlocks as jest.MockedFunction<
+    typeof writeDocumentTextBlocks
+  >
+  const mockedClear = clearDocumentTextBlocks as jest.MockedFunction<
+    typeof clearDocumentTextBlocks
+  >
+
+  beforeEach(() => {
+    jest.resetAllMocks()
+    mockedApi.getBillHistory.mockResolvedValue([])
+    mockedApi.getSimilarBills.mockResolvedValue([])
+    mockedApi.getDocument.mockResolvedValue({
+      DocumentText: "bill text",
+      Cosponsors: []
+    })
+    ;(FieldValue.delete as jest.Mock).mockReturnValue("__DELETE__")
+    mockedWrite.mockResolvedValue(undefined)
+    mockedClear.mockResolvedValue(undefined)
+  })
+
+  it("keeps text inline when it fits", async () => {
+    mockedPlan.mockReturnValue({ inline: "bill text" })
+
+    const { content } = (await fetchBillResource(194, "H1")) as any
+
+    expect(mockedWrite).not.toHaveBeenCalled()
+    expect(mockedClear).not.toHaveBeenCalled()
+    expect(content.DocumentText).toBe("bill text")
+    expect(content).not.toHaveProperty("DocumentTextBlockCount")
+  })
+
+  it("chunks oversized text into blocks and drops the inline copy", async () => {
+    mockedPlan.mockReturnValue({ blocks: ["a", "b"] })
+
+    const { content } = (await fetchBillResource(194, "H5500")) as any
+
+    expect(mockedWrite).toHaveBeenCalledWith(194, "H5500", ["a", "b"])
+    expect(content.DocumentTextBlockCount).toBe(2)
+    expect(content.DocumentText).toBe("__DELETE__")
+  })
+
+  it("drops text without a count when block writing fails", async () => {
+    mockedPlan.mockReturnValue({ blocks: ["a"] })
+    mockedWrite.mockRejectedValue(new Error("write failed"))
+
+    const { content } = (await fetchBillResource(194, "H5500", {
+      content: { DocumentTextBlockCount: 3 }
+    })) as any
+
+    expect(mockedClear).toHaveBeenCalledWith(194, "H5500")
+    expect(content.DocumentText).toBe("__DELETE__")
+    expect(content.DocumentTextBlockCount).toBe("__DELETE__")
+  })
+
+  it("clears stale blocks when a previously chunked bill now fits inline", async () => {
+    mockedPlan.mockReturnValue({ inline: "small" })
+    mockedApi.getDocument.mockResolvedValue({
+      DocumentText: "small",
+      Cosponsors: []
+    })
+
+    const { content } = (await fetchBillResource(194, "H5500", {
+      content: { DocumentTextBlockCount: 2 }
+    })) as any
+
+    expect(mockedClear).toHaveBeenCalledWith(194, "H5500")
+    expect(content.DocumentText).toBe("small")
+    expect(content.DocumentTextBlockCount).toBe("__DELETE__")
+  })
+})
diff --git a/functions/src/bills/bills.ts b/functions/src/bills/bills.ts
index 58aeb8a71..736bb5558 100644
--- a/functions/src/bills/bills.ts
+++ b/functions/src/bills/bills.ts
@@ -1,13 +1,110 @@
 import { isString } from "lodash"
 import { logger } from "firebase-functions"
 import { logFetchError } from "../common"
+import { DocumentData, FieldValue } from "../firebase"
 import * as api from "../malegislature"
 import { createScraper } from "../scraper"
+import {
+  clearDocumentTextBlocks,
+  planDocumentTextStorage,
+  writeDocumentTextBlocks
+} from "./contentBlocks"
 import { getDocumentWithPdfTextFallback } from "./documentTextFallback"
 import { Bill, MISSING_TIMESTAMP } from "./types"
 
 export { getDocumentWithPdfTextFallback } from "./documentTextFallback"
 
+/**
+ * Fetch and assemble a bill's document for storage. Oversized bill text (past
+ * Firestore's 1 MiB document limit, e.g. budget bills) is moved into the
+ * `contentBlocks` subcollection and stripped from the inline document so the
+ * rest of the bill still scrapes; the UI reassembles the blocks on demand.
+ */
+export async function fetchBillResource(
+  court: number,
+  id: string,
+  current?: DocumentData
+): Promise<Partial<Bill>> {
+  const { content, pdfTextExtraction } = await getDocumentWithPdfTextFallback(
+    court,
+    id
+  )
+  const history = await api
+    .getBillHistory(court, id)
+    .catch(logFetchError("bill history", id))
+    .then(history => history ?? [])
+  // Most of our time is spent fetching similar bills
+  const similar = await api
+    .getSimilarBills(court, id)
+    .catch(logFetchError("similar bills", id))
+    .then(bills => bills?.map(b => b.BillNumber).filter(isString) ?? [])
+
+  if (content.DocumentText == null && pdfTextExtraction) {
+    logger.info(
+      `No bill text extracted from PDF for ${court}/${id}: ${pdfTextExtraction.status}`
+    )
+  }
+
+  await storeDocumentText(court, id, content, current)
+
+  const resource: Partial<Bill> = {
+    content,
+    history,
+    similar,
+    cosponsorCount: content.Cosponsors.length,
+    testimonyCount: current?.testimonyCount ?? 0,
+    endorseCount: current?.endorseCount ?? 0,
+    neutralCount: current?.neutralCount ?? 0,
+    opposeCount: current?.opposeCount ?? 0,
+    latestTestimonyAt: current?.latestTestimonyAt ?? MISSING_TIMESTAMP,
+    nextHearingAt: current?.nextHearingAt ?? MISSING_TIMESTAMP
+  }
+
+  return resource
+}
+
+/**
+ * Mutates `content` in place to reflect how the bill's text is stored: inline
+ * for normal bills, or chunked into the `contentBlocks` subcollection when it
+ * would overflow the document. Inline copies / stale counts are removed with a
+ * delete sentinel because the main document is written with `{ merge: true }`.
+ */
+async function storeDocumentText(
+  court: number,
+  id: string,
+  content: any,
+  current?: DocumentData
+): Promise<void> {
+  const plan = planDocumentTextStorage(content.DocumentText ?? undefined),
+    hadBlocks = !!current?.content?.DocumentTextBlockCount
+
+  if (plan.blocks) {
+    try {
+      await writeDocumentTextBlocks(court, id, plan.blocks)
+      content.DocumentTextBlockCount = plan.blocks.length
+    } catch (e) {
+      logger.warn(
+        `Failed to write content blocks for ${court}/${id}: ${
+          e instanceof Error ? e.message : String(e)
+        }`
+      )
+      // Fall back to the baseline behavior: drop the text entirely so the bill
+      // still scrapes (the UI falls back to the PDF download link).
+      if (hadBlocks) {
+        await clearDocumentTextBlocks(court, id).catch(() => undefined)
+        content.DocumentTextBlockCount = FieldValue.delete()
+      }
+    }
+    // The inline copy must never be written to the size-limited main document.
+    content.DocumentText = FieldValue.delete()
+  } else if (hadBlocks) {
+    // Text now fits inline (or is absent) but this bill previously stored
+    // blocks — remove the stale chunks and clear the count.
+    await clearDocumentTextBlocks(court, id)
+    content.DocumentTextBlockCount = FieldValue.delete()
+  }
+}
+
 /**
  * There are around 8000 documents. With 8 batches per day, 20 parallel
  * scrapers, and 50 documents per batch, we will process all documents once per
@@ -21,40 +118,7 @@ export const { fetchBatch: fetchBillBatch, startBatches: startBillBatches } =
     startBatchSchedule: "every 3 hours",
     fetchBatchTimeout: 240,
     startBatchTimeout: 240,
-    fetchResource: async (court: number, id: string, current) => {
-      const { content, pdfTextExtraction } =
-        await getDocumentWithPdfTextFallback(court, id)
-      const history = await api
-        .getBillHistory(court, id)
-        .catch(logFetchError("bill history", id))
-        .then(history => history ?? [])
-      // Most of our time is spent fetching similar bills
-      const similar = await api
-        .getSimilarBills(court, id)
-        .catch(logFetchError("similar bills", id))
-        .then(bills => bills?.map(b => b.BillNumber).filter(isString) ?? [])
-
-      if (content.DocumentText == null && pdfTextExtraction) {
-        logger.info(
-          `No bill text extracted from PDF for ${court}/${id}: ${pdfTextExtraction.status}`
-        )
-      }
-
-      const resource: Partial<Bill> = {
-        content,
-        history,
-        similar,
-        cosponsorCount: content.Cosponsors.length,
-        testimonyCount: current?.testimonyCount ?? 0,
-        endorseCount: current?.endorseCount ?? 0,
-        neutralCount: current?.neutralCount ?? 0,
-        opposeCount: current?.opposeCount ?? 0,
-        latestTestimonyAt: current?.latestTestimonyAt ?? MISSING_TIMESTAMP,
-        nextHearingAt: current?.nextHearingAt ?? MISSING_TIMESTAMP
-      }
-
-      return resource
-    },
+    fetchResource: fetchBillResource,
     listIds: (court: number) =>
       api.listDocuments({ court }).then(docs => docs.map(d => d.BillNumber))
   })
diff --git a/functions/src/bills/contentBlocks.test.ts b/functions/src/bills/contentBlocks.test.ts
new file mode 100644
index 000000000..d084eecc3
--- /dev/null
+++ b/functions/src/bills/contentBlocks.test.ts
@@ -0,0 +1,126 @@
+const mockSet = jest.fn()
+const mockClose = jest.fn().mockResolvedValue(undefined)
+const mockDoc = jest.fn((id: string) => ({ id }))
+const mockRecursiveDelete = jest.fn().mockResolvedValue(undefined)
+const mockCollection = jest.fn(() => ({ doc: mockDoc }))
+const mockBulkWriter = jest.fn(() => ({ set: mockSet, close: mockClose }))
+
+jest.mock("../firebase", () => ({
+  db: {
+    collection: mockCollection,
+    recursiveDelete: mockRecursiveDelete,
+    bulkWriter: mockBulkWriter
+  }
+}))
+
+import {
+  chunkDocumentText,
+  clearDocumentTextBlocks,
+  MAX_BLOCK_BYTES,
+  MAX_INLINE_TEXT_BYTES,
+  planDocumentTextStorage,
+  writeDocumentTextBlocks
+} from "./contentBlocks"
+
+const byteLength = (s: string) => Buffer.byteLength(s, "utf8")
+
+describe("chunkDocumentText", () => {
+  it("keeps small text in a single chunk", () => {
+    expect(chunkDocumentText("hello")).toEqual(["hello"])
+  })
+
+  it("splits ASCII text into byte-bounded chunks that rejoin exactly", () => {
+    const text = "a".repeat(MAX_BLOCK_BYTES + 100)
+    const chunks = chunkDocumentText(text)
+
+    expect(chunks.length).toBe(2)
+    chunks.forEach(c =>
+      expect(byteLength(c)).toBeLessThanOrEqual(MAX_BLOCK_BYTES)
+    )
+    expect(chunks.join("")).toBe(text)
+  })
+
+  it("never splits a multi-byte code point", () => {
+    // '€' is 3 UTF-8 bytes and does not divide evenly into the byte budget, so a
+    // naive byte split would land mid-character.
+    const text = "€".repeat(MAX_BLOCK_BYTES)
+    const chunks = chunkDocumentText(text)
+
+    expect(chunks.length).toBeGreaterThan(1)
+    chunks.forEach(c => {
+      expect(byteLength(c)).toBeLessThanOrEqual(MAX_BLOCK_BYTES)
+      // A broken code point would surface as a replacement char on re-decode.
+      expect(c).not.toContain("�")
+    })
+    expect(chunks.join("")).toBe(text)
+  })
+
+  it("never splits a surrogate pair (emoji)", () => {
+    const text = "😀".repeat(300_000) // 4 bytes each → exceeds the byte budget
+    const chunks = chunkDocumentText(text)
+
+    expect(chunks.length).toBeGreaterThan(1)
+    chunks.forEach(c =>
+      expect(byteLength(c)).toBeLessThanOrEqual(MAX_BLOCK_BYTES)
+    )
+    expect(chunks.join("")).toBe(text)
+  })
+})
+
+describe("planDocumentTextStorage", () => {
+  it("returns inline (undefined) when there is no text", () => {
+    expect(planDocumentTextStorage(undefined)).toEqual({ inline: undefined })
+  })
+
+  it("keeps text at or under the inline limit inline", () => {
+    const text = "a".repeat(MAX_INLINE_TEXT_BYTES)
+    expect(planDocumentTextStorage(text)).toEqual({ inline: text })
+  })
+
+  it("chunks text above the inline limit", () => {
+    const text = "a".repeat(MAX_INLINE_TEXT_BYTES + 1)
+    const plan = planDocumentTextStorage(text)
+
+    expect(plan.inline).toBeUndefined()
+    expect(plan.blocks).toBeDefined()
+    expect(plan.blocks!.join("")).toBe(text)
+  })
+})
+
+describe("writeDocumentTextBlocks", () => {
+  beforeEach(() => jest.clearAllMocks())
+
+  it("deletes existing blocks then writes new ordered chunks", async () => {
+    await writeDocumentTextBlocks(194, "H5500", ["a", "b", "c"])
+
+    expect(mockCollection).toHaveBeenCalledWith(
+      "/generalCourts/194/bills/H5500/contentBlocks"
+    )
+    expect(mockRecursiveDelete).toHaveBeenCalled()
+    expect(mockSet).toHaveBeenCalledTimes(3)
+    expect(mockSet).toHaveBeenNthCalledWith(
+      1,
+      { id: "0" },
+      { index: 0, text: "a" }
+    )
+    expect(mockSet).toHaveBeenNthCalledWith(
+      3,
+      { id: "2" },
+      { index: 2, text: "c" }
+    )
+    expect(mockClose).toHaveBeenCalled()
+  })
+})
+
+describe("clearDocumentTextBlocks", () => {
+  beforeEach(() => jest.clearAllMocks())
+
+  it("recursively deletes the subcollection", async () => {
+    await clearDocumentTextBlocks(194, "H5500")
+
+    expect(mockCollection).toHaveBeenCalledWith(
+      "/generalCourts/194/bills/H5500/contentBlocks"
+    )
+    expect(mockRecursiveDelete).toHaveBeenCalled()
+  })
+})
diff --git a/functions/src/bills/contentBlocks.ts b/functions/src/bills/contentBlocks.ts
new file mode 100644
index 000000000..c5dab21b5
--- /dev/null
+++ b/functions/src/bills/contentBlocks.ts
@@ -0,0 +1,95 @@
+import { db } from "../firebase"
+
+/**
+ * Max UTF-8 bytes of bill text kept inline on the bill document. Above this the
+ * whole bill document risks exceeding Firestore's 1 MiB (1,048,576 byte) limit,
+ * so the text is chunked into the `contentBlocks` subcollection instead. Leaves
+ * ~148 KB of headroom for the rest of the document (history, cosponsors, etc.).
+ */
+export const MAX_INLINE_TEXT_BYTES = 900_000
+
+/**
+ * Max UTF-8 bytes of `text` stored in a single content block document. Keeps
+ * each block document comfortably under the 1 MiB limit.
+ */
+export const MAX_BLOCK_BYTES = 900_000
+
+export type DocumentTextStoragePlan =
+  | { inline: string | undefined; blocks?: undefined }
+  | { inline?: undefined; blocks: string[] }
+
+/**
+ * Decide how a bill's `DocumentText` should be stored. Text that fits within
+ * {@link MAX_INLINE_TEXT_BYTES} (or is absent) stays inline; anything larger is
+ * split into chunks for the `contentBlocks` subcollection. Pure and easily
+ * tested — the Firestore writes live in {@link writeDocumentTextBlocks}.
+ */
+export function planDocumentTextStorage(
+  text: string | undefined
+): DocumentTextStoragePlan {
+  if (text == null || byteLength(text) <= MAX_INLINE_TEXT_BYTES) {
+    return { inline: text }
+  }
+  return { blocks: chunkDocumentText(text) }
+}
+
+/**
+ * Split `text` into chunks each at most {@link MAX_BLOCK_BYTES} UTF-8 bytes,
+ * never splitting a Unicode code point, such that `chunks.join("") === text`.
+ */
+export function chunkDocumentText(text: string): string[] {
+  const chunks: string[] = []
+  let current = "",
+    currentBytes = 0
+
+  // Iterating a string yields whole code points, keeping surrogate pairs intact.
+  for (const codePoint of text) {
+    const codePointBytes = byteLength(codePoint)
+    if (current && currentBytes + codePointBytes > MAX_BLOCK_BYTES) {
+      chunks.push(current)
+      current = ""
+      currentBytes = 0
+    }
+    current += codePoint
+    currentBytes += codePointBytes
+  }
+  if (current) chunks.push(current)
+
+  return chunks
+}
+
+function billContentBlocksRef(court: number, id: string) {
+  return db.collection(`/generalCourts/${court}/bills/${id}/contentBlocks`)
+}
+
+/**
+ * Replace a bill's content blocks with `blocks`, deleting any existing chunks
+ * first so a shrinking bill does not leave stale blocks behind. Each block is
+ * stored as `{ index, text }` and ordered by `index` on read.
+ */
+export async function writeDocumentTextBlocks(
+  court: number,
+  id: string,
+  blocks: string[]
+): Promise<void> {
+  const ref = billContentBlocksRef(court, id)
+  await db.recursiveDelete(ref)
+
+  const writer = db.bulkWriter()
+  blocks.forEach((text, index) => {
+    writer.set(ref.doc(String(index)), { index, text })
+  })
+  await writer.close()
+}
+
+/** Delete any content blocks for a bill (used when its text now fits inline). */
+export async function clearDocumentTextBlocks(
+  court: number,
+  id: string
+): Promise<void> {
+  await db.recursiveDelete(billContentBlocksRef(court, id))
+}
+
+function byteLength(s: string): number {
+  return Buffer.byteLength(s, "utf8")
+}
diff --git a/functions/src/bills/types.ts b/functions/src/bills/types.ts
index c75e8787a..95a4bcfd1 100644
--- a/functions/src/bills/types.ts
+++ b/functions/src/bills/types.ts
@@ -46,6 +46,9 @@ export const BillContent = Record({
   Title: String,
   PrimarySponsor: Nullable(Record({ Name: String })),
   DocumentText: Maybe(String),
+  // Set only when DocumentText is too large to store inline and is instead
+  // chunked into the bill's `contentBlocks` subcollection. See contentBlocks.ts.
+  DocumentTextBlockCount: Maybe(Number),
   Cosponsors: Array(Record({ Name: Maybe(String) }))
 })
 
diff --git a/tests/unit/billSummary.test.tsx b/tests/unit/billSummary.test.tsx
new file mode 100644
index 000000000..e1e6e9a6a
--- /dev/null
+++ b/tests/unit/billSummary.test.tsx
@@ -0,0 +1,102 @@
+import "@testing-library/jest-dom"
+import { render, screen, fireEvent, waitFor } from "@testing-library/react"
+import { Summary } from "components/bill/Summary"
+import { getBillDocumentText } from "components/db"
+import type { Bill } from "components/db/bills"
+import { Timestamp } from "firebase/firestore"
+
+// jsdom lacks matchMedia, which Summary's useMediaQuery relies on.
+Object.defineProperty(window, "matchMedia", {
+  writable: true,
+  value: jest.fn().mockImplementation(query => ({
+    matches: false,
+    media: query,
+    onchange: null,
+    addListener: jest.fn(),
+    removeListener: jest.fn(),
+    addEventListener: jest.fn(),
+    removeEventListener: jest.fn(),
+    dispatchEvent: jest.fn()
+  }))
+})
+
+jest.mock("next-i18next", () => ({
+  useTranslation: () => ({ t: (key: string) => key })
+}))
+
+// Keep every real db export, but stub the block reassembly so the test does not
+// touch Firestore.
+jest.mock("components/db", () => ({
+  __esModule: true,
+  ...jest.requireActual("components/db"),
+  getBillDocumentText: jest.fn()
+}))
+
+const mockedGetBillDocumentText = getBillDocumentText as jest.MockedFunction<
+  typeof getBillDocumentText
+>
+
+const makeBill = (content: Partial<Bill["content"]>): Bill =>
+  ({
+    id: "H5500",
+    court: 194,
+    content: {
+      Title: "An Act making appropriations",
+      BillNumber: "H5500",
+      DocketNumber: "HD1",
+      GeneralCourtNumber: 194,
+      Cosponsors: [],
+      LegislationTypeName: "Bill",
+      Pinslip: "",
+      ...content
+    },
+    cosponsorCount: 0,
+    testimonyCount: 0,
+    endorseCount: 0,
+    opposeCount: 0,
+    neutralCount: 0,
+    fetchedAt: new Timestamp(0, 0),
+    history: []
+  } as unknown as Bill)
+
+describe("Summary bill text", () => {
+  beforeEach(() => jest.clearAllMocks())
+
+  it("reassembles chunked text from content blocks when the modal opens", async () => {
+    mockedGetBillDocumentText.mockResolvedValue("Reassembled full bill text")
+    const bill = makeBill({ DocumentTextBlockCount: 2 })
+
+    render(<Summary bill={bill} />)
+
+    fireEvent.click(screen.getByRole("button", { name: "bill.view_bill" }))
+
+    await waitFor(() =>
+      expect(screen.getByText("Reassembled full bill text")).toBeInTheDocument()
+    )
+    expect(mockedGetBillDocumentText).toHaveBeenCalledWith(
+      194,
+      "H5500",
+      bill.content
+    )
+  })
+
+  it("shows inline text without reading content blocks", () => {
+    const bill = makeBill({ DocumentText: "Inline bill text" })
+
+    render(<Summary bill={bill} />)
+
+    fireEvent.click(screen.getByRole("button", { name: "bill.view_bill" }))
+
+    expect(screen.getByText("Inline bill text")).toBeInTheDocument()
+    expect(mockedGetBillDocumentText).not.toHaveBeenCalled()
+  })
+
+  it("falls back to a PDF download link when there is no text or blocks", () => {
+    render(<Summary bill={makeBill({})} />)
+
+    expect(
+      screen.queryByRole("button", { name: "bill.view_bill" })
+    ).not.toBeInTheDocument()
+    expect(screen.getByText("bill.download_pdf")).toBeInTheDocument()
+  })
+})

From f9532306231912ac8b7ce5a8b420fb00528827dc Mon Sep 17 00:00:00 2001
From: Sean Moss <smoss117@gmail.com>
Date: Tue, 16 Jun 2026 21:32:40 -0400
Subject: [PATCH 2/2] Update .prettierignore to include additional directories
 for exclusion: .claude/ and .serena/

---
 .prettierignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.prettierignore b/.prettierignore
index aaf25be4b..633f6ffd5 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -17,3 +17,5 @@ llm
 playwright-report
 CLAUDE.md
 .cursor/
+.claude/
+.serena/