From a35afded09cb5b12f0e7d25db9d713ab281e3e1d Mon Sep 17 00:00:00 2001 From: kiminkim724 Date: Wed, 10 Dec 2025 22:49:33 -0500 Subject: [PATCH 1/3] Initial version of Hearing Backfill --- functions/src/events/scrapeEvents.ts | 3 + functions/src/hearings/backfillHearings.ts | 124 +++++++++++++++++++++ functions/src/hearings/index.ts | 1 + functions/src/index.ts | 4 + 4 files changed, 132 insertions(+) create mode 100644 functions/src/hearings/backfillHearings.ts create mode 100644 functions/src/hearings/index.ts diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts index e5d2512f2..23c915436 100644 --- a/functions/src/events/scrapeEvents.ts +++ b/functions/src/events/scrapeEvents.ts @@ -414,3 +414,6 @@ class HearingScraper extends EventScraper { export const scrapeSpecialEvents = new SpecialEventsScraper().function export const scrapeSessions = new SessionScraper().function export const scrapeHearings = new HearingScraper().function +const hearingScraper = new HearingScraper() +export const getEvent = hearingScraper.getEvent.bind(hearingScraper) +export const listEvents = hearingScraper.listEvents.bind(hearingScraper) diff --git a/functions/src/hearings/backfillHearings.ts b/functions/src/hearings/backfillHearings.ts new file mode 100644 index 000000000..e68a904f8 --- /dev/null +++ b/functions/src/hearings/backfillHearings.ts @@ -0,0 +1,124 @@ +import * as functions from "firebase-functions" +import { Number, Record, Union } from "runtypes" +import { getEvent, listEvents } from "../events" +import { db } from "../firebase" +import { logFetchError } from "../common" +import { DateTime } from "luxon" + +const EventIdHearingRequest = Record({ + EventId: Number +}) +const cutoffDaysHearingRequest = Record({ + cutoffDays: Number +}) + +const backfillHearingRequest = Union( + EventIdHearingRequest, + cutoffDaysHearingRequest +) + +export const backfillHearingTranscription = functions.https.onCall( + async data => { + const checked = backfillHearingRequest.check(data) + if ("EventId" in checked) { + const event = await getEvent({ EventId: checked.EventId }) + if (!event) { + throw new functions.https.HttpsError( + "not-found", + `Event with ID ${checked.EventId} not found` + ) + } + console.log(`Backfilling event ${event.id}`) + await db.doc(`/events/${event.id}`).set(event, { merge: true }) + return { success: true, eventId: event.id } + } else if ("cutoffDays" in checked) { + const cutoffDays = checked.cutoffDays + const list = await listEvents().catch(logFetchError("event list")) + + if (!list) return + + const writer = db.bulkWriter() + const upcomingOrRecentCutoff = DateTime.now().minus({ days: cutoffDays }) + let processedCount = 0 + + for (let item of list) { + const id = (item as any)?.EventId, + event = await getEvent(item).catch(logFetchError("event", id)) + + if (!event) continue + if (event.startsAt.toMillis() < upcomingOrRecentCutoff.toMillis()) break + + writer.set(db.doc(`/events/${event.id}`), event, { merge: true }) + console.log(`Backfilling event ${event.id}`) + processedCount++ + } + await writer.close() + + return { success: true, processedCount } + } else { + throw new functions.https.HttpsError( + "invalid-argument", + "Invalid request data" + ) + } + } +) + +export const backfillHearingTranscriptionHttp = functions.https.onRequest( + async (req, res) => { + try { + const checked = backfillHearingRequest.check(req.body) + if ("EventId" in checked) { + try { + const event = await getEvent({ EventId: checked.EventId }) + if (!event) { + res + .status(404) + .send({ error: `Event with ID ${checked.EventId} not found` }) + return + } + console.log(`Backfilling event ${event.id}`) + await db.doc(`/events/${event.id}`).set(event, { merge: true }) + res.status(200).send({ eventId: event.id }) + } catch (error) { + console.error("Error fetching event:", error) + res.status(500).send({ error: "Error fetching event" }) + } + } else if ("cutoffDays" in checked) { + const cutoffDays = checked.cutoffDays + const list = await listEvents().catch(logFetchError("event list")) + + if (!list) { + res.status(404).send({ error: "No events found" }) + return + } + + const writer = db.bulkWriter() + const upcomingOrRecentCutoff = DateTime.now().minus({ + days: cutoffDays + }) + let processedCount = 0 + + for (let item of list) { + const id = (item as any)?.EventId, + event = await getEvent(item).catch(logFetchError("event", id)) + + if (!event) continue + if (event.startsAt.toMillis() < upcomingOrRecentCutoff.toMillis()) + break + + writer.set(db.doc(`/events/${event.id}`), event, { merge: true }) + console.log(`Backfilling event ${event.id}`) + processedCount++ + } + await writer.close() + + res.status(200).send({ processedCount }) + } else { + res.status(400).send({ error: "Invalid request data" }) + } + } catch (err) { + res.status(400).send({ error: err || "Invalid request" }) + } + } +) diff --git a/functions/src/hearings/index.ts b/functions/src/hearings/index.ts new file mode 100644 index 000000000..0ad8e98c8 --- /dev/null +++ b/functions/src/hearings/index.ts @@ -0,0 +1 @@ +export * from "./backfillHearings" diff --git a/functions/src/index.ts b/functions/src/index.ts index 4d61004de..7e25b88a3 100644 --- a/functions/src/index.ts +++ b/functions/src/index.ts @@ -20,6 +20,10 @@ export { syncHearingToSearchIndex, upgradeHearingSearchIndex } from "./hearings/search" +export { + backfillHearingTranscriptionHttp, + backfillHearingTranscription +} from "./hearings" export { createMemberSearchIndex, fetchMemberBatch, From 7be10de9904814a769e3ffb18b51cf32881bfdf6 Mon Sep 17 00:00:00 2001 From: kiminkim724 Date: Wed, 10 Dec 2025 22:54:19 -0500 Subject: [PATCH 2/3] Remove duplicate HearingScraper --- functions/src/events/scrapeEvents.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts index 23c915436..63d70a548 100644 --- a/functions/src/events/scrapeEvents.ts +++ b/functions/src/events/scrapeEvents.ts @@ -413,7 +413,7 @@ class HearingScraper extends EventScraper { export const scrapeSpecialEvents = new SpecialEventsScraper().function export const scrapeSessions = new SessionScraper().function -export const scrapeHearings = new HearingScraper().function const hearingScraper = new HearingScraper() +export const scrapeHearings = hearingScraper.function export const getEvent = hearingScraper.getEvent.bind(hearingScraper) export const listEvents = hearingScraper.listEvents.bind(hearingScraper) From d7a1c3fb2a71498d93c10cfe6f09068cf55dbe20 Mon Sep 17 00:00:00 2001 From: kiminkim724 Date: Tue, 16 Dec 2025 20:40:01 -0500 Subject: [PATCH 3/3] Remove undefined committeeChairs --- functions/src/events/scrapeEvents.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts index 63d70a548..811fdb8ee 100644 --- a/functions/src/events/scrapeEvents.ts +++ b/functions/src/events/scrapeEvents.ts @@ -359,7 +359,7 @@ class HearingScraper extends EventScraper { host.GeneralCourtNumber, host.CommitteeCode ) - : undefined + : [] if (await shouldScrapeVideo(EventId)) { try {