diff --git a/functions/package.json b/functions/package.json index 0f6ef6ce6..21b337856 100644 --- a/functions/package.json +++ b/functions/package.json @@ -13,13 +13,13 @@ }, "main": "lib/index.js", "dependencies": { + "@google-cloud/aiplatform": "^3.9.0", "@google-cloud/firestore": "^5.0.2", "@google-cloud/pubsub": "^3.0.1", "assemblyai": "^4.9.0", "axios": "^0.25.0", "date-fns": "^2.30.0", "firebase-admin": "^12.0.0", - "@google-cloud/aiplatform": "^3.9.0", "firebase-functions": "^5.1.1", "fluent-ffmpeg": "^2.1.3", "fuse.js": "6.5.3", @@ -34,6 +34,7 @@ "runtypes": "6.6.0", "ssl-root-cas": "^1.3.1", "typesense": "^1.2.2", + "unzipper": "^0.12.3", "zod": "^3.20.2" }, "devDependencies": { @@ -43,6 +44,7 @@ "@types/luxon": "^2.0.9", "@types/object-hash": "^2.2.1", "@types/pdf-parse": "1.1.5", + "@types/unzipper": "^0.10.10", "copyfiles": "^2.4.1", "firebase-functions-test": "^0.3.3", "firebase-tools": "^13.18.0", diff --git a/functions/src/index.ts b/functions/src/index.ts index 641255bf4..a39f68888 100644 --- a/functions/src/index.ts +++ b/functions/src/index.ts @@ -60,6 +60,8 @@ export { export { transcription } from "./webhooks" +export { matchOcpfMembers } from "./ocpf/matchOcpfMembers" + export * from "./triggerPubsubFunction" // Export the health check last so it is loaded last. diff --git a/functions/src/ocpf/matchOcpfMembers.ts b/functions/src/ocpf/matchOcpfMembers.ts new file mode 100644 index 000000000..eaf335568 --- /dev/null +++ b/functions/src/ocpf/matchOcpfMembers.ts @@ -0,0 +1,275 @@ +import * as functions from "firebase-functions" +import { getAuth } from "firebase-admin/auth" +import axios from "axios" +import unzipper from "unzipper" +import { db } from "../firebase" +import { currentGeneralCourt } from "../shared" +import { MemberContent } from "../members/types" +import { + OcpfFilerRow, + OcpfMemberMapping, + OcpfMemberMappingEntry, + OcpfMemberMappingFlags, + OcpfMemberMappingFlagsEntry +} from "./types" + +export const matchOcpfMembers = functions.https.onRequest(async (req, res) => { + if (req.method !== "POST") { + res.status(405).send("Method Not Allowed. Use POST.") + return + } + if (process.env.FUNCTIONS_EMULATOR !== "true") { + const authHeader = req.headers.authorization + if (!authHeader?.startsWith("Bearer ")) { + res.status(401).send("Unauthorized") + return + } + try { + const decoded = await getAuth().verifyIdToken(authHeader.slice(7)) + if (decoded["role"] !== "admin") { + res.status(403).send("Forbidden") + return + } + } catch { + res.status(401).send("Unauthorized") + return + } + } + + const filers = await downloadAndParseFilers() + const members = await loadMembers() + + const existingMappingDoc = await db.doc("/config/ocpfMemberMapping").get() + const existingMapping = (existingMappingDoc.data() ?? {}) as OcpfMemberMapping + + const mapping: OcpfMemberMapping = {} + const unmatched: OcpfMemberMappingFlagsEntry[] = [] + const ambiguous: OcpfMemberMappingFlagsEntry[] = [] + + for (const member of members) { + const lastName = extractLastName(member.Name) + const branch = member.Branch + + if (!branch || (branch !== "Senate" && branch !== "House")) continue + + const lastNameAndBranchMatches = filers.filter( + f => + f.lastName.toLowerCase() === lastName.toLowerCase() && + f.officeSought === branch + ) + + // Narrow by first name: compare first word of each (e.g. "Daniel" from "Daniel J. Ryan" + // vs "Daniel" from "Daniel Joseph"). If none align, falls back to matches by last name and branch. + const mapleFirstName = member.Name.trim().split(/\s+/)[0].toLowerCase() + const firstNameMatches = lastNameAndBranchMatches.filter( + f => f.firstName.trim().split(/\s+/)[0].toLowerCase() === mapleFirstName + ) + const candidates = + firstNameMatches.length > 0 ? firstNameMatches : lastNameAndBranchMatches + + if (firstNameMatches.length === 1) { + const entry: OcpfMemberMappingEntry = { + cpfId: candidates[0].cpfId, + name: member.Name + } + mapping[member.MemberCode] = entry + + // Matching was likley fixed manually + } else if (member.MemberCode in existingMapping) { + continue + + // Single last name match but first name didn't align. Flag rather than auto-match, + // since the OCPF filer may be a different person (e.g. original member changed office sought, + // and another person with same last name is running for original office). + } else if (candidates.length === 1 && firstNameMatches.length === 0) { + ambiguous.push({ memberCode: member.MemberCode, name: member.Name }) + functions.logger.warn( + "Ambiguous OCPF match. Single last name match but first name did not align.", + { + memberCode: member.MemberCode, + name: member.Name, + district: member.District, + branch, + ocpfFirstName: candidates[0].firstName, + ocpfLastName: candidates[0].lastName, + ocpfDistrict: candidates[0].district, + ocpfOfficeSought: candidates[0].officeSought + } + ) + } else if (candidates.length === 0) { + unmatched.push({ memberCode: member.MemberCode, name: member.Name }) + functions.logger.warn("No OCPF match.", { + memberCode: member.MemberCode, + name: member.Name, + district: member.District, + branch + }) + } else { + ambiguous.push({ memberCode: member.MemberCode, name: member.Name }) + functions.logger.warn("Ambiguous OCPF match.", { + memberCode: member.MemberCode, + name: member.Name, + district: member.District, + branch, + candidates: candidates.map(c => ({ + cpfId: c.cpfId, + firstName: c.firstName, + lastName: c.lastName, + district: c.district, + officeSought: c.officeSought + })) + }) + } + } + + const flags: OcpfMemberMappingFlags = { unmatched, ambiguous } + + await db.doc("/config/ocpfMemberMapping").set(mapping, { merge: true }) + await db.doc("/config/ocpfMemberMappingFlags").set(flags) + + functions.logger.info("matchOcpfMembers complete", { + matched: Object.keys(mapping).length, + unmatched: unmatched.length, + ambiguous: ambiguous.length + }) + + res.status(200).json({ + results: { + matched: Object.keys(mapping).length, + unmatched: unmatched.length, + ambiguous: ambiguous.length + }, + unmatched_members: unmatched, + ambiguous_members: ambiguous + }) +}) + +async function downloadAndParseFilers(): Promise { + const response = await axios.get( + "https://ocpf2.blob.core.windows.net/downloads/data2/ocpf-filers.zip", + { responseType: "arraybuffer" } + ) + + const buffer = Buffer.from(response.data as ArrayBuffer) + functions.logger.info("Downloaded ocpf-filers.zip", { + status: response.status, + contentType: response.headers["content-type"], + bytes: buffer.length, + firstBytes: buffer.subarray(0, 4).toString("hex") // should be 504b0304 for a valid ZIP + }) + const directory = await unzipper.Open.buffer(buffer) + const entry = directory.files.find( + f => f.type === "File" && f.path.toLowerCase().endsWith(".txt") + ) + if (!entry) throw new Error("No .txt file found inside ocpf-filers.zip") + + const content = await entry.buffer() + const text = content.toString("utf8") + const lines = text.split(/\r?\n/) + + const rawHeaders = lines[0].split("\t").map(h => h.trim()) + functions.logger.info("OCPF filers headers", { headers: rawHeaders }) + + const colIndex = buildColumnIndex(rawHeaders, [ + "cpfId", + "lastName", + "firstName", + "officeSought", + "district", + "closedDate" + ]) + + // Values in the file are wrapped in double quotes — strip them after splitting + const col = (cols: string[], idx: number) => + (cols[idx] ?? "").trim().replace(/^"|"$/g, "") + + const filers: OcpfFilerRow[] = [] + for (let i = 1; i < lines.length; i++) { + const line = lines[i] + if (!line.trim()) continue + + const cols = line.split("\t") + const closedDate = col(cols, colIndex.closedDate) + const officeSought = col(cols, colIndex.officeSought) + + if (closedDate !== "") continue + if (officeSought !== "Senate" && officeSought !== "House") continue + + filers.push({ + cpfId: parseInt(col(cols, colIndex.cpfId), 10), + lastName: col(cols, colIndex.lastName), + firstName: col(cols, colIndex.firstName), + officeSought, + district: col(cols, colIndex.district), + closedDate + }) + } + + functions.logger.info("Parsed active state legislators from OCPF", { + count: filers.length + }) + return filers +} + +const COLUMN_ALIASES: Record = { + cpfId: ["cpf_id"], + lastName: ["candidate_last_name"], + firstName: ["candidate_first_name"], + officeSought: ["office_type_sought"], + district: ["district_name_sought"], + closedDate: ["closed_date"] +} + +function buildColumnIndex( + headers: string[], + fields: string[] +): Record { + const normalized = headers.map(h => h.toLowerCase().replace(/\s+/g, "_")) + const index: Record = {} + + for (const field of fields) { + const aliases = COLUMN_ALIASES[field] ?? [field.toLowerCase()] + const found = aliases.findIndex(alias => + normalized.some((h, i) => { + if (h === alias) { + index[field] = i + return true + } + return false + }) + ) + if (found === -1 && !(field in index)) { + throw new Error( + `Required column '${field}' not found in OCPF filers file. ` + + `Headers: ${headers.join(", ")}` + ) + } + } + + return index +} + +const GENERATIONAL_SUFFIXES = new Set(["jr", "sr", "ii", "iii", "iv", "v"]) + +function extractLastName(fullName: string): string { + const parts = fullName.trim().split(/\s+/) + while (parts.length > 1) { + const last = parts[parts.length - 1].toLowerCase().replace(/[.,]/g, "") + if (GENERATIONAL_SUFFIXES.has(last)) parts.pop() + else break + } + return parts[parts.length - 1].replace(/[,.]$/, "") +} + +async function loadMembers(): Promise { + const snapshot = await db + .collection(`/generalCourts/${currentGeneralCourt}/members`) + .get() + + return snapshot.docs + .map(doc => { + const data = doc.data() + return data?.content as MemberContent | undefined + }) + .filter((c): c is MemberContent => !!c) +} diff --git a/functions/src/ocpf/types.ts b/functions/src/ocpf/types.ts new file mode 100644 index 000000000..ae61e2ce9 --- /dev/null +++ b/functions/src/ocpf/types.ts @@ -0,0 +1,41 @@ +// One active filer row parsed from ocpf-filers.txt +export interface OcpfFilerRow { + cpfId: number + lastName: string + firstName: string + officeSought: string // "Senate" | "House" + district: string + closedDate: string // empty string = active +} + +// Firestore: /config/ocpfMemberMapping +// memberCode → { cpfId, name }, e.g. { "SND1": { cpfId: 15031, name: "Sal N. DiDomenico" } } +export interface OcpfMemberMappingEntry { + cpfId: number + name: string +} + +export type OcpfMemberMapping = Record + +export interface OcpfMemberMappingFlagsEntry { + memberCode: string + name: string +} + +// Firestore: /config/ocpfMemberMappingFlags +export interface OcpfMemberMappingFlags { + unmatched: OcpfMemberMappingFlagsEntry[] + ambiguous: OcpfMemberMappingFlagsEntry[] +} + +export interface FinanceBreakdownEntry { + count: number + amount: number +} + +export interface MembersFinanceBreakdown { + individual: FinanceBreakdownEntry + committee: FinanceBreakdownEntry + union: FinanceBreakdownEntry + unitemized: { amount: number } +} diff --git a/functions/yarn.lock b/functions/yarn.lock index a4b30343e..c4190ab58 100644 --- a/functions/yarn.lock +++ b/functions/yarn.lock @@ -1594,6 +1594,13 @@ resolved "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz" integrity sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw== +"@types/unzipper@^0.10.10": + version "0.10.11" + resolved "https://registry.yarnpkg.com/@types/unzipper/-/unzipper-0.10.11.tgz#2a605ae639fc20ee6886be0f7d28dc61c1e6d3d3" + integrity sha512-D25im2zjyMCcgL9ag6N46+wbtJBnXIr7SI4zHf9eJD2Dw2tEB5e+p5MYkrxKIVRscs5QV0EhtU9rgXSPx90oJg== + dependencies: + "@types/node" "*" + "@types/yargs-parser@*": version "21.0.3" resolved "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz" @@ -2008,7 +2015,7 @@ bl@^4.1.0: inherits "^2.0.4" readable-stream "^3.4.0" -bluebird@^3.7.2: +bluebird@^3.7.2, bluebird@~3.7.2: version "3.7.2" resolved "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz" integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg== @@ -2828,6 +2835,13 @@ dunder-proto@^1.0.1: es-errors "^1.3.0" gopd "^1.2.0" +duplexer2@~0.1.4: + version "0.1.4" + resolved "https://registry.yarnpkg.com/duplexer2/-/duplexer2-0.1.4.tgz#8b12dab878c0d69e3e7891051662a32fc6bddcc1" + integrity sha512-asLFVfWWtJ90ZyOUHMqk7/S2w2guQKxUI2itj3d92ADHhxUSbCMGi1f1cBcJ7xM1To+pE/Khbwo1yuNbMEPKeA== + dependencies: + readable-stream "^2.0.2" + duplexify@^4.0.0, duplexify@^4.1.3: version "4.1.3" resolved "https://registry.npmjs.org/duplexify/-/duplexify-4.1.3.tgz" @@ -3524,6 +3538,15 @@ fs-extra@^10.1.0: jsonfile "^6.0.1" universalify "^2.0.0" +fs-extra@^11.2.0: + version "11.3.5" + resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-11.3.5.tgz#07a44eff40bea53e719909a532f91a23bf0769ff" + integrity sha512-eKpRKAovdpZtR1WopLHxlBWvAgPny3c4gX1G5Jhwmmw4XJj0ifSD5qB5TOo8hmA0wlRKDAOAhEE1yVPgs6Fgcg== + dependencies: + graceful-fs "^4.2.0" + jsonfile "^6.0.1" + universalify "^2.0.0" + fs-minipass@^2.0.0: version "2.1.0" resolved "https://registry.npmjs.org/fs-minipass/-/fs-minipass-2.1.0.tgz" @@ -3922,7 +3945,7 @@ graceful-fs@4.2.10: resolved "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz" integrity sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA== -graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.1.9, graceful-fs@^4.2.0, graceful-fs@^4.2.6, graceful-fs@^4.2.9: +graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.1.9, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.6, graceful-fs@^4.2.9: version "4.2.11" resolved "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz" integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ== @@ -6475,7 +6498,7 @@ react-is@^18.0.0: resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.3.1.tgz#e83557dc12eae63a99e003a46388b1dcbb44db7e" integrity sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg== -readable-stream@^2.0.5, readable-stream@~2.3.6: +readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@~2.3.6: version "2.3.8" resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz" integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA== @@ -7041,7 +7064,16 @@ string-length@^4.0.1: char-regex "^1.0.2" strip-ansi "^6.0.0" -"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.2, string-width@^4.2.3: +"string-width-cjs@npm:string-width@^4.2.0": + version "4.2.3" + resolved "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + +string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.2, string-width@^4.2.3: version "4.2.3" resolved "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -7078,7 +7110,14 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1": + version "6.0.1" + resolved "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + +strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -7517,6 +7556,17 @@ untildify@^4.0.0: resolved "https://registry.npmjs.org/untildify/-/untildify-4.0.0.tgz" integrity sha512-KK8xQ1mkzZeg9inewmFVDNkg3l5LUhoq9kN6iWYB/CC9YMG8HA+c1Q8HwDe6dEX7kErrEVNVBO3fWsVq5iDgtw== +unzipper@^0.12.3: + version "0.12.3" + resolved "https://registry.yarnpkg.com/unzipper/-/unzipper-0.12.3.tgz#31958f5eed7368ed8f57deae547e5a673e984f87" + integrity sha512-PZ8hTS+AqcGxsaQntl3IRBw65QrBI6lxzqDEL7IAo/XCEqRTKGfOX56Vea5TH9SZczRVxuzk1re04z/YjuYCJA== + dependencies: + bluebird "~3.7.2" + duplexer2 "~0.1.4" + fs-extra "^11.2.0" + graceful-fs "^4.2.2" + node-int64 "^0.4.0" + update-browserslist-db@^1.0.13: version "1.0.13" resolved "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz" @@ -7763,7 +7813,7 @@ wordwrap@^1.0.0: resolved "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz" integrity sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q== -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": version "7.0.0" resolved "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== @@ -7781,6 +7831,15 @@ wrap-ansi@^6.0.1: string-width "^4.1.0" strip-ansi "^6.0.0" +wrap-ansi@^7.0.0: + version "7.0.0" + resolved "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + wrap-ansi@^8.1.0: version "8.1.0" resolved "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz"