From 213a324e00262699207d419adfcd1559712d83c6 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Wed, 27 Nov 2024 17:44:13 -0800 Subject: [PATCH] Added comment-groups.csv export. Colin will be using this data (or some filtered version of it) to pass to an LLM when it wants to summarize things. The code uses the summarized data from the PCA json blob instead of computing things from the raw comments and votes tables. The latter approach results in numbers that don't match up exactly with the data that appears on the HTML version of the report (our numbers are a little higher, so the Clojure backend is filtering out some votes/voters that we are not). We want the LLM to see the exact same data that's on the HTML page because it might refer to specific numbers and we want those numbers to be exactly the same as the numbers the user sees. --- server/src/routes/export.ts | 167 +++++++++++++++++++++++++++++++ server/src/server.ts | 60 +---------- server/src/utils/participants.ts | 64 ++++++++++++ 3 files changed, 232 insertions(+), 59 deletions(-) create mode 100644 server/src/utils/participants.ts diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index dab965da8c..97c02ce984 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -10,6 +10,7 @@ import { getZinvite, getZidForRid } from "../utils/zinvite"; import { getPca } from "../utils/pca"; import fail from "../utils/fail"; import logger from "../utils/logger"; +import { getPidsForGid } from "../utils/participants"; type Formatters = Record string>; const sep = "\n"; @@ -296,6 +297,168 @@ export async function sendParticipantVotesSummary(zid: number, res: Response) { ); } +type CommentGroupStats = { + tid: number; + txt: string; + total_votes: number; + total_agrees: number; + total_disagrees: number; + total_passes: number; + group_stats: Record< + number, + { + votes: number; + agrees: number; + disagrees: number; + passes: number; + } + >; +}; + +type GroupVoteStats = { + votes: Record< + number, + { + A: number; // agrees + D: number; // disagrees + S: number; // sum of all votes (agrees + disagrees + passes) + } + >; +}; + +async function sendCommentGroupsSummary(zid: number, res: Response) { + // Get PCA data to identify groups and get groupVotes + const pca = await getPca(zid); + if (!pca?.asPOJO) { + throw new Error("polis_error_no_pca_data"); + } + + const groupClusters = pca.asPOJO["group-clusters"] as Record; + const groupIds = Object.keys(groupClusters).map(Number); + const groupVotes = pca.asPOJO["group-votes"] as Record< + number, + GroupVoteStats + >; + + // Load comment texts + const commentRows = (await pgQueryP_readOnly( + "SELECT tid, txt FROM comments WHERE zid = ($1)", + [zid] + )) as { tid: number; txt: string }[]; + const commentTexts = new Map(commentRows.map((row) => [row.tid, row.txt])); + + // Initialize stats map + const commentStats = new Map(); + + // Process each group's votes + for (const groupId of groupIds) { + const groupVoteStats = groupVotes[groupId]; + if (!groupVoteStats?.votes) continue; + + // Process each comment's votes for this group + for (const [tidStr, votes] of Object.entries(groupVoteStats.votes)) { + const tid = parseInt(tidStr); + + // Initialize stats for this comment if we haven't seen it before + if (!commentStats.has(tid)) { + const groupStats: Record< + number, + { votes: number; agrees: number; disagrees: number; passes: number } + > = {}; + for (const gid of groupIds) { + groupStats[gid] = { votes: 0, agrees: 0, disagrees: 0, passes: 0 }; + } + + commentStats.set(tid, { + tid: tid, + txt: commentTexts.get(tid) || "", + total_votes: 0, + total_agrees: 0, + total_disagrees: 0, + total_passes: 0, + group_stats: groupStats, + }); + } + + // Get the stats object for this comment + const stats = commentStats.get(tid)!; + const groupStats = stats.group_stats[groupId]; + + // Update group stats + groupStats.agrees = votes.A; + groupStats.disagrees = votes.D; + groupStats.votes = votes.S; // S is the total number of votes + groupStats.passes = votes.S - (votes.A + votes.D); // Calculate passes from the sum + } + } + + // Calculate totals for each comment + for (const stats of commentStats.values()) { + stats.total_agrees = Object.values(stats.group_stats).reduce( + (sum, g) => sum + g.agrees, + 0 + ); + stats.total_disagrees = Object.values(stats.group_stats).reduce( + (sum, g) => sum + g.disagrees, + 0 + ); + stats.total_passes = Object.values(stats.group_stats).reduce( + (sum, g) => sum + g.passes, + 0 + ); + stats.total_votes = Object.values(stats.group_stats).reduce( + (sum, g) => sum + g.votes, + 0 + ); + } + + // Format and send CSV + res.setHeader("content-type", "text/csv"); + + // Create headers + const headers = [ + "comment-id", + "comment", + "total-votes", + "total-agrees", + "total-disagrees", + "total-passes", + ]; + for (const groupId of groupIds) { + const groupLetter = String.fromCharCode(97 + groupId); // 97 is 'a' in ASCII + headers.push( + `group-${groupLetter}-votes`, + `group-${groupLetter}-agrees`, + `group-${groupLetter}-disagrees`, + `group-${groupLetter}-passes` + ); + } + res.write(headers.join(",") + sep); + + // Write data rows + for (const stats of commentStats.values()) { + const row = [ + stats.tid, + formatEscapedText(stats.txt), + stats.total_votes, + stats.total_agrees, + stats.total_disagrees, + stats.total_passes, + ]; + for (const groupId of groupIds) { + const groupStats = stats.group_stats[groupId]; + row.push( + groupStats.votes, + groupStats.agrees, + groupStats.disagrees, + groupStats.passes + ); + } + res.write(row.join(",") + sep); + } + res.end(); +} + export async function handle_GET_reportExport( req: { p: { rid: string; report_type: string }; @@ -329,6 +492,10 @@ export async function handle_GET_reportExport( await sendParticipantVotesSummary(zid, res); break; + case "comment-groups.csv": + await sendCommentGroupsSummary(zid, res); + break; + default: fail(res, 404, "polis_error_data_unknown_report"); break; diff --git a/server/src/server.ts b/server/src/server.ts index b811fb8b6e..22765b662f 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -51,6 +51,7 @@ import Config from "./config"; import fail from "./utils/fail"; import { PcaCacheItem, getPca, fetchAndCacheLatestPcaData } from "./utils/pca"; import { getZinvite, getZinvites, getZidForRid } from "./utils/zinvite"; +import { getBidIndexToPidMapping, getPidsForGid } from "./utils/participants"; import { handle_GET_reportExport } from "./routes/export"; @@ -1720,27 +1721,6 @@ function initializePolisHelpers() { // return res.end(); } - function getBidIndexToPidMapping(zid: number, math_tick: number) { - math_tick = math_tick || -1; - return pgQueryP_readOnly( - "select * from math_bidtopid where zid = ($1) and math_env = ($2);", - [zid, Config.mathEnv] - // Argument of type '(rows: string | any[]) => any' is not assignable to parameter of type '(value: unknown) => any'. - // Types of parameters 'rows' and 'value' are incompatible. - // Type 'unknown' is not assignable to type 'string | any[]'. - // Type 'unknown' is not assignable to type 'any[]'.ts(2345) - // @ts-ignore - ).then((rows: string | any[]) => { - if (!rows || !rows.length) { - // Could actually be a 404, would require more work to determine that. - return new Error("polis_err_get_pca_results_missing"); - } else if (rows[0].data.math_tick <= math_tick) { - return new Error("polis_err_get_pca_results_not_new"); - } else { - return rows[0].data; - } - }); - } function handle_GET_bidToPid( req: { p: { zid: any; math_tick: any } }, res: { @@ -10994,44 +10974,6 @@ Thanks for using Polis! ); } - function getPidsForGid(zid: any, gid: number, math_tick: number) { - return Promise.all([ - getPca(zid, math_tick), - getBidIndexToPidMapping(zid, math_tick), - ]).then(function (o: ParticipantOption[]) { - if (!o[0] || !o[0].asPOJO) { - return []; - } - o[0] = o[0].asPOJO; - let clusters = o[0]["group-clusters"]; - let indexToBid = o[0]["base-clusters"].id; // index to bid - let bidToIndex = []; - for (let i = 0; i < indexToBid.length; i++) { - bidToIndex[indexToBid[i]] = i; - } - let indexToPids = o[1].bidToPid; // actually index to [pid] - let cluster = clusters[gid]; - if (!cluster) { - return []; - } - let members = cluster.members; // bids - let pids: any[] = []; - for (var i = 0; i < members.length; i++) { - let bid = members[i]; - let index = bidToIndex[bid]; - let morePids = indexToPids[index]; - Array.prototype.push.apply(pids, morePids); - } - pids = pids.map(function (x) { - return parseInt(x); - }); - pids.sort(function (a, b) { - return a - b; - }); - return pids; - }); - } - function geoCodeWithGoogleApi(locationString: string) { let googleApiKey = process.env.GOOGLE_API_KEY; let address = encodeURI(locationString); diff --git a/server/src/utils/participants.ts b/server/src/utils/participants.ts new file mode 100644 index 0000000000..8db6d9f0f3 --- /dev/null +++ b/server/src/utils/participants.ts @@ -0,0 +1,64 @@ +import { getPca } from "./pca"; +import { ParticipantOption } from "../d"; +import { queryP_readOnly as pgQueryP_readOnly } from "../db/pg-query"; +import Config from "../config"; + +export function getBidIndexToPidMapping(zid: number, math_tick: number) { + math_tick = math_tick || -1; + return pgQueryP_readOnly( + "select * from math_bidtopid where zid = ($1) and math_env = ($2);", + [zid, Config.mathEnv] + // Argument of type '(rows: string | any[]) => any' is not assignable to parameter of type '(value: unknown) => any'. + // Types of parameters 'rows' and 'value' are incompatible. + // Type 'unknown' is not assignable to type 'string | any[]'. + // Type 'unknown' is not assignable to type 'any[]'.ts(2345) + // @ts-ignore + ).then((rows: string | any[]) => { + if (!rows || !rows.length) { + // Could actually be a 404, would require more work to determine that. + return new Error("polis_err_get_pca_results_missing"); + } else if (rows[0].data.math_tick <= math_tick) { + return new Error("polis_err_get_pca_results_not_new"); + } else { + return rows[0].data; + } + }); +} + +export function getPidsForGid(zid: any, gid: number, math_tick: number) { + return Promise.all([ + getPca(zid, math_tick), + getBidIndexToPidMapping(zid, math_tick), + ]).then(function (o: ParticipantOption[]) { + if (!o[0] || !o[0].asPOJO) { + return []; + } + o[0] = o[0].asPOJO; + let clusters = o[0]["group-clusters"]; + let indexToBid = o[0]["base-clusters"].id; // index to bid + let bidToIndex = []; + for (let i = 0; i < indexToBid.length; i++) { + bidToIndex[indexToBid[i]] = i; + } + let indexToPids = o[1].bidToPid; // actually index to [pid] + let cluster = clusters[gid]; + if (!cluster) { + return []; + } + let members = cluster.members; // bids + let pids: any[] = []; + for (var i = 0; i < members.length; i++) { + let bid = members[i]; + let index = bidToIndex[bid]; + let morePids = indexToPids[index]; + Array.prototype.push.apply(pids, morePids); + } + pids = pids.map(function (x) { + return parseInt(x); + }); + pids.sort(function (a, b) { + return a - b; + }); + return pids; + }); +}