-
Notifications
You must be signed in to change notification settings - Fork 312
feat(web): binary file attachments for Ask #1375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: whoisthey/text-file-attachments
Are you sure you want to change the base?
Changes from all commits
f893212
7d7138a
d31052a
a078097
5011cae
93bd8ae
ac42b73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| import { AttachmentStatus, PrismaClient } from "@sourcebot/db"; | ||
| import { createLogger, env } from "@sourcebot/shared"; | ||
| import { unlink } from "fs/promises"; | ||
| import path from "path"; | ||
| import { setIntervalAsync } from "./utils.js"; | ||
|
|
||
| const BATCH_SIZE = 1_000; | ||
| const ONE_HOUR_MS = 60 * 60 * 1000; | ||
|
|
||
| const logger = createLogger('attachment-pruner'); | ||
|
|
||
| /** | ||
| * Periodically deletes PENDING (uploaded-but-never-linked) attachment blobs | ||
| * older than the configured TTL, along with their stored bytes. These are the | ||
| * orphans produced when a user selects a file in the chat box but never sends | ||
| * the message. COMMITTED attachments are never touched here; their byte | ||
| * lifecycle is handled by the chat-delete sweep in the web app. | ||
| * | ||
| * @note Mirrors the local-FS layout used by `LocalFsStorageBackend` in the web | ||
| * package (`DATA_CACHE_DIR/attachments/<storageKey>`). When an S3 driver is | ||
| * added (Followup B), this deletion path must be generalized accordingly. | ||
| */ | ||
| export class AttachmentPruner { | ||
| private interval?: NodeJS.Timeout; | ||
| private readonly attachmentsDir = path.join(env.DATA_CACHE_DIR, 'attachments'); | ||
|
|
||
| constructor(private db: PrismaClient) {} | ||
|
|
||
| startScheduler() { | ||
| const ttlHours = env.SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS; | ||
| if (ttlHours <= 0) { | ||
| logger.info('SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS is 0, attachment orphan pruning is disabled.'); | ||
| return; | ||
| } | ||
|
|
||
| logger.debug(`Attachment pruner started. Pruning PENDING attachments older than ${ttlHours} hours.`); | ||
|
|
||
| // Run immediately on startup, then every hour. | ||
| this.pruneOrphanedAttachments(); | ||
| this.interval = setIntervalAsync(() => this.pruneOrphanedAttachments(), ONE_HOUR_MS); | ||
| } | ||
|
|
||
| async dispose() { | ||
| if (this.interval) { | ||
| clearInterval(this.interval); | ||
| this.interval = undefined; | ||
| } | ||
| } | ||
|
|
||
| private async pruneOrphanedAttachments() { | ||
| const cutoff = new Date(Date.now() - env.SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS * ONE_HOUR_MS); | ||
| let totalDeleted = 0; | ||
|
|
||
| while (true) { | ||
| const batch = await this.db.attachment.findMany({ | ||
| where: { | ||
| status: AttachmentStatus.PENDING, | ||
| createdAt: { lt: cutoff }, | ||
| }, | ||
| select: { id: true, storageKey: true }, | ||
| take: BATCH_SIZE, | ||
| }); | ||
|
|
||
| if (batch.length === 0) { | ||
| break; | ||
| } | ||
|
|
||
| await Promise.all(batch.map(async (attachment) => { | ||
| try { | ||
| await unlink(path.join(this.attachmentsDir, attachment.storageKey)); | ||
| } catch (error) { | ||
| if ((error as NodeJS.ErrnoException)?.code !== 'ENOENT') { | ||
| logger.warn(`Failed to delete bytes for orphaned attachment ${attachment.id}: ${error}`); | ||
| } | ||
| } | ||
| })); | ||
|
|
||
| const result = await this.db.attachment.deleteMany({ | ||
| where: { id: { in: batch.map((attachment) => attachment.id) } }, | ||
| }); | ||
|
Comment on lines
+55
to
+80
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗄️ Data Integrity & Integration | 🟠 Major | 🏗️ Heavy lift Re-check that a batch is still orphaned before deleting bytes. This loop selects rows once as old 🤖 Prompt for AI Agents |
||
| totalDeleted += result.count; | ||
|
|
||
| if (batch.length < BATCH_SIZE) { | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| if (totalDeleted > 0) { | ||
| logger.debug(`Pruned ${totalDeleted} orphaned PENDING attachment(s).`); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| -- CreateEnum | ||
| CREATE TYPE "AttachmentStatus" AS ENUM ('PENDING', 'COMMITTED'); | ||
|
|
||
| -- CreateTable | ||
| CREATE TABLE "Attachment" ( | ||
| "id" TEXT NOT NULL, | ||
| "orgId" INTEGER NOT NULL, | ||
| "storageKey" TEXT NOT NULL, | ||
| "filename" TEXT NOT NULL, | ||
| "mediaType" TEXT NOT NULL, | ||
| "sizeBytes" INTEGER NOT NULL, | ||
| "checksum" TEXT NOT NULL, | ||
| "uploadedById" TEXT NOT NULL, | ||
| "status" "AttachmentStatus" NOT NULL DEFAULT 'PENDING', | ||
| "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, | ||
|
|
||
| CONSTRAINT "Attachment_pkey" PRIMARY KEY ("id") | ||
| ); | ||
|
|
||
| -- CreateTable | ||
| CREATE TABLE "ChatAttachment" ( | ||
| "id" TEXT NOT NULL, | ||
| "chatId" TEXT NOT NULL, | ||
| "attachmentId" TEXT NOT NULL, | ||
| "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, | ||
|
|
||
| CONSTRAINT "ChatAttachment_pkey" PRIMARY KEY ("id") | ||
| ); | ||
|
|
||
| -- CreateIndex | ||
| CREATE INDEX "Attachment_status_createdAt_idx" ON "Attachment"("status", "createdAt"); | ||
|
|
||
| -- CreateIndex | ||
| CREATE INDEX "ChatAttachment_attachmentId_idx" ON "ChatAttachment"("attachmentId"); | ||
|
|
||
| -- CreateIndex | ||
| CREATE UNIQUE INDEX "ChatAttachment_chatId_attachmentId_key" ON "ChatAttachment"("chatId", "attachmentId"); | ||
|
|
||
| -- AddForeignKey | ||
| ALTER TABLE "Attachment" ADD CONSTRAINT "Attachment_orgId_fkey" FOREIGN KEY ("orgId") REFERENCES "Org"("id") ON DELETE CASCADE ON UPDATE CASCADE; | ||
|
|
||
| -- AddForeignKey | ||
| ALTER TABLE "Attachment" ADD CONSTRAINT "Attachment_uploadedById_fkey" FOREIGN KEY ("uploadedById") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE; | ||
|
Comment on lines
+42
to
+43
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗄️ Data Integrity & Integration | 🟠 Major | 🏗️ Heavy lift Keep committed attachments when the uploader is deleted. Line 43's Suggested direction- "uploadedById" TEXT NOT NULL,
+ "uploadedById" TEXT,
-ALTER TABLE "Attachment" ADD CONSTRAINT "Attachment_uploadedById_fkey" FOREIGN KEY ("uploadedById") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE;
+ALTER TABLE "Attachment" ADD CONSTRAINT "Attachment_uploadedById_fkey" FOREIGN KEY ("uploadedById") REFERENCES "User"("id") ON DELETE SET NULL ON UPDATE CASCADE;Update the Prisma model alongside the migration so historical chats can still resolve their attachments after uploader deletion. 🤖 Prompt for AI Agents |
||
|
|
||
| -- AddForeignKey | ||
| ALTER TABLE "ChatAttachment" ADD CONSTRAINT "ChatAttachment_chatId_fkey" FOREIGN KEY ("chatId") REFERENCES "Chat"("id") ON DELETE CASCADE ON UPDATE CASCADE; | ||
|
|
||
| -- AddForeignKey | ||
| ALTER TABLE "ChatAttachment" ADD CONSTRAINT "ChatAttachment_attachmentId_fkey" FOREIGN KEY ("attachmentId") REFERENCES "Attachment"("id") ON DELETE CASCADE ON UPDATE CASCADE; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🩺 Stability & Availability | 🟠 Major | ⚡ Quick win
Handle the startup prune promise.
Line 39 kicks off
pruneOrphanedAttachments()without awaiting or catching it. Any DB/filesystem failure there becomes an unhandled rejection, and this backend already exits onunhandledRejection, so the first prune can take the worker down during startup.🤖 Prompt for AI Agents