From 239fe7fa5d49d7750dae540dad09a1f4bb67df36 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Wed, 11 Sep 2024 14:47:45 -0700 Subject: [PATCH 01/13] Switch to non-native Postgres client. And add a "streaming" API for making database queries, which streams the results from the database to Node as they are generated by Postgres. This allows Node to process the rows one by one (and garbage collect in between), which is much easier on the VM when we need to do big queries that summarize data (or just format it and incrementally spit it out an HTTP response). --- server/package-lock.json | 113 +++++++++++++++++++++++++++++++++----- server/package.json | 2 +- server/src/db/pg-query.ts | 46 ++++++++++++++-- 3 files changed, 141 insertions(+), 20 deletions(-) diff --git a/server/package-lock.json b/server/package-lock.json index 92428cdfb..a4cef6ccb 100644 --- a/server/package-lock.json +++ b/server/package-lock.json @@ -35,7 +35,7 @@ "p3p": "~0.0.2", "pg": "~8.8.0", "pg-connection-string": "~2.5.0", - "pg-native": "~3.0.1", + "pg-query-stream": "^4.6.0", "replacestream": "~4.0.0", "request": "~2.88.2", "request-promise": "~4.2.6", @@ -2513,6 +2513,8 @@ "version": "1.5.0", "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", + "optional": true, + "peer": true, "dependencies": { "file-uri-to-path": "1.0.0" } @@ -6370,15 +6372,26 @@ } }, "node_modules/libpq": { - "version": "1.8.12", - "resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.12.tgz", - "integrity": "sha512-4lUY9BD9suz76mVS0kH4rRgRy620g/c9YZH5GYC3smfIpjtj6KiPuQ4IwQSHSZMMMhMM3tBFrYUrw8mHOOZVeg==", + "version": "1.8.13", + "resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.13.tgz", + "integrity": "sha512-t1wpnGVgwRIFSKoe4RFUllAFj953kNMcdXhGvFJwI0r6lJQqgSwTeiIciaCinjOmHk0HnFeWQSMC6Uw2591G4A==", "hasInstallScript": true, + "license": "MIT", + "optional": true, + "peer": true, "dependencies": { "bindings": "1.5.0", - "nan": "^2.14.0" + "nan": "2.19.0" } }, + "node_modules/libpq/node_modules/nan": { + "version": "2.19.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.19.0.tgz", + "integrity": "sha512-nO1xXxfh/RWNxfd/XPfbIfFk5vgLsAxUR9y5O0cHMJu/AW9U95JLXqthYHjEp+8gQ5p96K9jUp8nbVOxCdRbtw==", + "license": "MIT", + "optional": true, + "peer": true + }, "node_modules/lines-and-columns": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", @@ -7349,6 +7362,15 @@ "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.5.0.tgz", "integrity": "sha512-r5o/V/ORTA6TmUnyWZR9nCj1klXCO2CEKNRlVuJptZe85QuhFayC7WeMic7ndayT5IRIR0S0xFxFi2ousartlQ==" }, + "node_modules/pg-cursor": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/pg-cursor/-/pg-cursor-2.11.0.tgz", + "integrity": "sha512-TLCOCtu+rqMarzjUi+/Ffc2DV5ZqO/27y5GqnK9Z3w51rWXMwC8FcO96Uf9/ORo5o+qRXEVJxM9Ts3K2K31MLg==", + "license": "MIT", + "peerDependencies": { + "pg": "^8" + } + }, "node_modules/pg-int8": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", @@ -7361,6 +7383,8 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/pg-native/-/pg-native-3.0.1.tgz", "integrity": "sha512-LBVNWkNh0fVx/cienARRP2y22J5OpUsKBe0TpxzAx3arEUUdIs77aLSAHS3scS7SMaqc+OkG40CEu5fN0/cjIw==", + "optional": true, + "peer": true, "dependencies": { "libpq": "^1.8.10", "pg-types": "^1.12.1", @@ -7370,12 +7394,16 @@ "node_modules/pg-native/node_modules/isarray": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", - "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==" + "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==", + "optional": true, + "peer": true }, "node_modules/pg-native/node_modules/pg-types": { "version": "1.13.0", "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-1.13.0.tgz", "integrity": "sha512-lfKli0Gkl/+za/+b6lzENajczwZHc7D5kiUCZfgm914jipD2kIOIvEkAhZ8GrW3/TUoP9w8FHjwpPObBye5KQQ==", + "optional": true, + "peer": true, "dependencies": { "pg-int8": "1.0.1", "postgres-array": "~1.0.0", @@ -7388,6 +7416,8 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-1.0.3.tgz", "integrity": "sha512-5wClXrAP0+78mcsNX3/ithQ5exKvCyK5lr5NEEEeGwwM6NJdQgzIJBVxLvRW+huFpX92F2QnZ5CcokH0VhK2qQ==", + "optional": true, + "peer": true, "engines": { "node": ">=0.10.0" } @@ -7396,6 +7426,8 @@ "version": "1.0.31", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.0.31.tgz", "integrity": "sha512-tco/Dwv1f/sgIgN6CWdj/restacPKNskK6yps1981ivH2ZmLYcs5o5rVzL3qaO/cSkhN8hYOMWs7+glzOLSgRg==", + "optional": true, + "peer": true, "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.1", @@ -7406,7 +7438,9 @@ "node_modules/pg-native/node_modules/string_decoder": { "version": "0.10.31", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", - "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==" + "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==", + "optional": true, + "peer": true }, "node_modules/pg-pool": { "version": "3.5.2", @@ -7421,6 +7455,18 @@ "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.5.0.tgz", "integrity": "sha512-muRttij7H8TqRNu/DxrAJQITO4Ac7RmX3Klyr/9mJEOBeIpgnF8f9jAfRz5d3XwQZl5qBjF9gLsUtMPJE0vezQ==" }, + "node_modules/pg-query-stream": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/pg-query-stream/-/pg-query-stream-4.6.0.tgz", + "integrity": "sha512-sg2Hewe6ge6osEY07zGu7Z8djrsQBvyiTy5ZjQffoSatEgnNNVsV3EWDm9Px/8R9oaAL1YnfnP8AXPMmfzujZg==", + "license": "MIT", + "dependencies": { + "pg-cursor": "^2.11.0" + }, + "peerDependencies": { + "pg": "^8" + } + }, "node_modules/pg-types": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", @@ -11733,6 +11779,8 @@ "version": "1.5.0", "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", + "optional": true, + "peer": true, "requires": { "file-uri-to-path": "1.0.0" } @@ -14707,12 +14755,23 @@ } }, "libpq": { - "version": "1.8.12", - "resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.12.tgz", - "integrity": "sha512-4lUY9BD9suz76mVS0kH4rRgRy620g/c9YZH5GYC3smfIpjtj6KiPuQ4IwQSHSZMMMhMM3tBFrYUrw8mHOOZVeg==", + "version": "1.8.13", + "resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.13.tgz", + "integrity": "sha512-t1wpnGVgwRIFSKoe4RFUllAFj953kNMcdXhGvFJwI0r6lJQqgSwTeiIciaCinjOmHk0HnFeWQSMC6Uw2591G4A==", + "optional": true, + "peer": true, "requires": { "bindings": "1.5.0", - "nan": "^2.14.0" + "nan": "2.19.0" + }, + "dependencies": { + "nan": { + "version": "2.19.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.19.0.tgz", + "integrity": "sha512-nO1xXxfh/RWNxfd/XPfbIfFk5vgLsAxUR9y5O0cHMJu/AW9U95JLXqthYHjEp+8gQ5p96K9jUp8nbVOxCdRbtw==", + "optional": true, + "peer": true + } } }, "lines-and-columns": { @@ -15471,6 +15530,12 @@ "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.5.0.tgz", "integrity": "sha512-r5o/V/ORTA6TmUnyWZR9nCj1klXCO2CEKNRlVuJptZe85QuhFayC7WeMic7ndayT5IRIR0S0xFxFi2ousartlQ==" }, + "pg-cursor": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/pg-cursor/-/pg-cursor-2.11.0.tgz", + "integrity": "sha512-TLCOCtu+rqMarzjUi+/Ffc2DV5ZqO/27y5GqnK9Z3w51rWXMwC8FcO96Uf9/ORo5o+qRXEVJxM9Ts3K2K31MLg==", + "requires": {} + }, "pg-int8": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", @@ -15480,6 +15545,8 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/pg-native/-/pg-native-3.0.1.tgz", "integrity": "sha512-LBVNWkNh0fVx/cienARRP2y22J5OpUsKBe0TpxzAx3arEUUdIs77aLSAHS3scS7SMaqc+OkG40CEu5fN0/cjIw==", + "optional": true, + "peer": true, "requires": { "libpq": "^1.8.10", "pg-types": "^1.12.1", @@ -15489,12 +15556,16 @@ "isarray": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", - "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==" + "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==", + "optional": true, + "peer": true }, "pg-types": { "version": "1.13.0", "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-1.13.0.tgz", "integrity": "sha512-lfKli0Gkl/+za/+b6lzENajczwZHc7D5kiUCZfgm914jipD2kIOIvEkAhZ8GrW3/TUoP9w8FHjwpPObBye5KQQ==", + "optional": true, + "peer": true, "requires": { "pg-int8": "1.0.1", "postgres-array": "~1.0.0", @@ -15506,12 +15577,16 @@ "postgres-array": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-1.0.3.tgz", - "integrity": "sha512-5wClXrAP0+78mcsNX3/ithQ5exKvCyK5lr5NEEEeGwwM6NJdQgzIJBVxLvRW+huFpX92F2QnZ5CcokH0VhK2qQ==" + "integrity": "sha512-5wClXrAP0+78mcsNX3/ithQ5exKvCyK5lr5NEEEeGwwM6NJdQgzIJBVxLvRW+huFpX92F2QnZ5CcokH0VhK2qQ==", + "optional": true, + "peer": true }, "readable-stream": { "version": "1.0.31", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.0.31.tgz", "integrity": "sha512-tco/Dwv1f/sgIgN6CWdj/restacPKNskK6yps1981ivH2ZmLYcs5o5rVzL3qaO/cSkhN8hYOMWs7+glzOLSgRg==", + "optional": true, + "peer": true, "requires": { "core-util-is": "~1.0.0", "inherits": "~2.0.1", @@ -15522,7 +15597,9 @@ "string_decoder": { "version": "0.10.31", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", - "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==" + "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==", + "optional": true, + "peer": true } } }, @@ -15537,6 +15614,14 @@ "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.5.0.tgz", "integrity": "sha512-muRttij7H8TqRNu/DxrAJQITO4Ac7RmX3Klyr/9mJEOBeIpgnF8f9jAfRz5d3XwQZl5qBjF9gLsUtMPJE0vezQ==" }, + "pg-query-stream": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/pg-query-stream/-/pg-query-stream-4.6.0.tgz", + "integrity": "sha512-sg2Hewe6ge6osEY07zGu7Z8djrsQBvyiTy5ZjQffoSatEgnNNVsV3EWDm9Px/8R9oaAL1YnfnP8AXPMmfzujZg==", + "requires": { + "pg-cursor": "^2.11.0" + } + }, "pg-types": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", diff --git a/server/package.json b/server/package.json index c24349856..5390857c2 100644 --- a/server/package.json +++ b/server/package.json @@ -51,7 +51,7 @@ "p3p": "~0.0.2", "pg": "~8.8.0", "pg-connection-string": "~2.5.0", - "pg-native": "~3.0.1", + "pg-query-stream": "^4.6.0", "replacestream": "~4.0.0", "request": "~2.88.2", "request-promise": "~4.2.6", diff --git a/server/src/db/pg-query.ts b/server/src/db/pg-query.ts index 01fdca7af..9e2ef2e1e 100644 --- a/server/src/db/pg-query.ts +++ b/server/src/db/pg-query.ts @@ -1,6 +1,7 @@ import { isFunction, isString, isUndefined } from "underscore"; -import { native as pgnative, Pool } from "pg"; //.native, // native provides ssl (needed for dev laptop to access) http://stackoverflow.com/questions/10279965/authentication-error-when-connecting-to-heroku-postgresql-databa +import { Pool, QueryResult } from "pg"; import { parse as parsePgConnectionString } from "pg-connection-string"; +import QueryStream from "pg-query-stream"; import Config from "../config"; import logger from "../utils/logger"; @@ -53,19 +54,19 @@ const readsPgConnection = Object.assign( // import pgnative // Object is possibly 'null'.ts(2531) // @ts-ignore -const readWritePool = new pgnative.Pool(pgConnection); +const readWritePool = new Pool(pgConnection); // (alias) const pgnative: typeof Pg | null // import pgnative // Object is possibly 'null'.ts(2531) // @ts-ignore -const readPool = new pgnative.Pool(readsPgConnection); +const readPool = new Pool(readsPgConnection); // Same syntax as pg.client.query, but uses connection pool // Also takes care of calling 'done'. -function queryImpl(pool: Pool, queryString?: any, ...args: undefined[]) { +function queryImpl(pool: Pool, queryString?: any, ...args: any[]) { // variable arity depending on whether or not query has params (default to []) let params: never[] | undefined; - let callback: ((arg0: any, arg1?: undefined) => void) | undefined; + let callback: ((arg0: any, arg1?: any) => void) | undefined; if (isFunction(args[1])) { params = args[0]; callback = args[1]; @@ -200,6 +201,39 @@ function queryP_metered_readOnly(name: any, queryString: any, params: any) { return queryP_metered_impl(true, ...arguments); } +function stream_queryP_readOnly( + queryString: string, + params: any[], + onRow: (row: any) => void, + onEnd: () => void, + onError: (error: Error) => void +) { + const query = new QueryStream(queryString, params); + + readPool.connect((err, client, done) => { + if (err) { + onError(err); + return; + } + + const stream = client.query(query); + + stream.on("data", (row: QueryResult) => { + onRow(row); + }); + + stream.on("end", () => { + done(); + onEnd(); + }); + + stream.on("error", (error: Error) => { + done(error); + onError(error); + }); + }); +} + export { query, query_readOnly, @@ -208,6 +242,7 @@ export { queryP_metered_readOnly, queryP_readOnly, queryP_readOnly_wRetryIfEmpty, + stream_queryP_readOnly, }; export default { @@ -218,4 +253,5 @@ export default { queryP_metered_readOnly, queryP_readOnly, queryP_readOnly_wRetryIfEmpty, + stream_queryP_readOnly, }; From fc3b1a6756754f1d1923eee099fb0b5b0c323127 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Wed, 11 Sep 2024 14:56:42 -0700 Subject: [PATCH 02/13] Mostly refactoring. This moves the handle_GET_reportExport route into its own file, which necessitated refactoring some other things (zinvite and pca) out of server.ts as well. Chipping away at the monolith. This also converts the votes.csv report to use the streaming query from Postgres, which is mostly a smoke test. It seems to work, so next I'll convert it to stream the results incrementally to the HTTP response as well. --- server/src/routes/export.ts | 185 +++++++++++ server/src/server.ts | 612 +----------------------------------- server/src/utils/pca.ts | 317 +++++++++++++++++++ server/src/utils/zinvite.ts | 116 +++++++ 4 files changed, 632 insertions(+), 598 deletions(-) create mode 100644 server/src/routes/export.ts create mode 100644 server/src/utils/pca.ts create mode 100644 server/src/utils/zinvite.ts diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts new file mode 100644 index 000000000..9f1da390e --- /dev/null +++ b/server/src/routes/export.ts @@ -0,0 +1,185 @@ +// Copyright (C) 2012-present, The Authors. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License, version 3, as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . + +"use strict"; + +import { + queryP_readOnly as pgQueryP_readOnly, + stream_queryP_readOnly as stream_pgQueryP_readOnly, +} from "../db/pg-query"; +import { getZinvite, getZidForRid } from "../utils/zinvite"; +import { getPca } from "../utils/pca"; +import fail from "../utils/fail"; +import logger from "../utils/logger"; + +type Formatters = Record string>; +const sep = "\n"; + +function formatCSVHeaders(colFns: Formatters) { + return Object.keys(colFns).join(","); +} + +function formatCSVRow(row: object, colFns: Formatters) { + const fns = Object.values(colFns); + let csv = ""; + for (let ii = 0; ii < fns.length; ii += 1) { + if (ii > 0) csv += ","; + csv += fns[ii](row); + } + return csv; +} + +function formatCSV(colFns: Formatters, rows: object[]): string { + let csv = formatCSVHeaders(colFns) + sep; + if (rows.length > 0) { + for (const row of rows) { + csv += formatCSVRow(row, colFns); + csv += sep; + } + } + return csv; +} + +async function loadConversationSummary(zid: number, siteUrl: string) { + const [zinvite, convoRows, commentersRow, pca] = await Promise.all([ + getZinvite(zid), + pgQueryP_readOnly( + `SELECT topic, description FROM conversations WHERE zid = $1`, + [zid] + ), + pgQueryP_readOnly( + `SELECT COUNT(DISTINCT pid) FROM comments WHERE zid = $1`, + [zid] + ), + getPca(zid), + ]); + if (!zinvite || !convoRows || !commentersRow || !pca) { + throw new Error("polis_error_data_unknown_report"); + } + + const convo = (convoRows as { topic: string; description: string }[])[0]; + const commenters = (commentersRow as { count: number }[])[0].count; + + type PcaData = { + "in-conv": number[]; + "user-vote-counts": Record; + "group-clusters": Record; + "n-cmts": number; + }; + const data = pca.asPOJO as PcaData; + + const escapeQuotes = (s: string) => s.replace(/"/g, '""'); + return [ + ["topic", `"${escapeQuotes(convo.topic)}"`], + ["url", `${siteUrl}/${zinvite}`], + ["voters", Object.keys(data["user-vote-counts"]).length], + ["voters-in-conv", data["in-conv"].length], + ["commenters", commenters], + ["comments", data["n-cmts"]], + ["groups", Object.keys(data["group-clusters"]).length], + ["conversation-description", `"${escapeQuotes(convo.description)}"`], + ].map((row) => row.join(",")); +} + +const loadCommentSummary = (zid: number) => + pgQueryP_readOnly( + `SELECT + created, + tid, + pid, + COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = 1), 0) as agrees, + COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = -1), 0) as disagrees, + mod, + txt + FROM comments + WHERE zid = $1`, + [zid] + ); + +const formatDatetime = (timestamp: string) => + new Date(parseInt(timestamp)).toString(); + +export async function handle_GET_reportExport( + req: { + p: { rid: string; report_type: string }; + headers: { host: string; "x-forwarded-proto": string }; + }, + res: { + setHeader: (key: string, value: string) => void; + send: (data: string) => void; + write: (data: string) => void; + end: () => void; + } +) { + const { rid, report_type } = req.p; + try { + const zid = await getZidForRid(rid); + if (!zid) { + fail(res, 404, "polis_error_data_unknown_report"); + return; + } + + switch (report_type) { + case "summary.csv": + const siteUrl = `${req.headers["x-forwarded-proto"]}://${req.headers.host}`; + res.setHeader("content-type", "text/csv"); + res.send((await loadConversationSummary(zid, siteUrl)).join(sep)); + break; + + case "comments.csv": + const rows = (await loadCommentSummary(zid)) as object[] | undefined; + if (rows) { + res.setHeader("content-type", "text/csv"); + res.send( + formatCSV( + { + timestamp: (row) => String(Math.floor(row.created / 1000)), + datetime: (row) => formatDatetime(row.created), + "comment-id": (row) => String(row.tid), + "author-id": (row) => String(row.pid), + agrees: (row) => String(row.agrees), + disagrees: (row) => String(row.disagrees), + moderated: (row) => String(row.mod), + "comment-body": (row) => String(row.txt), + }, + rows + ) + ); + } else fail(res, 500, "polis_err_data_export"); + break; + + case "votes.csv": + const formatters: Formatters = { + timestamp: (row) => String(Math.floor(row.timestamp / 1000)), + datetime: (row) => formatDatetime(row.timestamp), + "comment-id": (row) => String(row.tid), + "voter-id": (row) => String(row.pid), + vote: (row) => String(row.vote), + }; + res.setHeader("Content-Type", "text/csv"); + res.write(formatCSVHeaders(formatters) + sep); + + stream_pgQueryP_readOnly( + "SELECT created as timestamp, tid, pid, vote FROM votes WHERE zid = $1 ORDER BY tid, pid", + [zid], + (row) => res.write(formatCSVRow(row, formatters) + sep), + () => res.end(), + (error) => { + // Handle any errors + logger.error("polis_err_report_votes_csv", error); + fail(res, 500, "polis_err_data_export", error); + } + ); + break; + + default: + fail(res, 404, "polis_error_data_unknown_report"); + break; + } + } catch (err) { + const msg = + err instanceof Error && err.message && err.message.startsWith("polis_") + ? err.message + : "polis_err_data_export"; + fail(res, 500, msg, err); + } +} diff --git a/server/src/server.ts b/server/src/server.ts index 21cd8edbe..d86319ad9 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -29,7 +29,6 @@ import responseTime from "response-time"; import request from "request-promise"; // includes Request, but adds promise methods import LruCache from "lru-cache"; import timeout from "connect-timeout"; -import zlib from "zlib"; import _ from "underscore"; import pg from "pg"; import { encode } from "html-entities"; @@ -37,10 +36,23 @@ import { encode } from "html-entities"; import { METRICS_IN_RAM, addInRamMetric, MPromise } from "./utils/metered"; import CreateUser from "./auth/create-user"; import Password from "./auth/password"; -import dbPgQuery from "./db/pg-query"; +import dbPgQuery, { + query as pgQuery, + query_readOnly as pgQuery_readOnly, + queryP as pgQueryP, + queryP_metered as pgQueryP_metered, + queryP_metered_readOnly as pgQueryP_metered_readOnly, + queryP_readOnly as pgQueryP_readOnly, + stream_queryP_readOnly as stream_pgQueryP_readOnly, + queryP_readOnly_wRetryIfEmpty as pgQueryP_readOnly_wRetryIfEmpty, +} from "./db/pg-query"; import Config from "./config"; import fail from "./utils/fail"; +import { PcaCacheItem, getPca, fetchAndCacheLatestPcaData } from "./utils/pca"; +import { getZinvite, getZinvites, getZidForRid } from "./utils/zinvite"; + +import { handle_GET_reportExport } from "./routes/export"; import { Body, @@ -69,20 +81,7 @@ import { AWS.config.update({ region: Config.awsRegion }); const devMode = Config.isDevMode; const s3Client = new AWS.S3({ apiVersion: "2006-03-01" }); -// Property 'Client' does not exist on type '{ query: (...args: any[]) => void; query_readOnly: -// (...args: any[]) => void; queryP: (...args: any[]) => Promise; queryP_metered: -// (name: any, queryString: any, params: any) => any; queryP_metered_readOnly: -// (name: any, queryString: any, params: any) => any; queryP_readOnly: -// (...args: any[]) => Promise <...>; ...'.ts(2339) -// @ts-ignore const escapeLiteral = pg.Client.prototype.escapeLiteral; -const pgQuery = dbPgQuery.query; -const pgQuery_readOnly = dbPgQuery.query_readOnly; -const pgQueryP = dbPgQuery.queryP; -const pgQueryP_metered = dbPgQuery.queryP_metered; -const pgQueryP_metered_readOnly = dbPgQuery.queryP_metered_readOnly; -const pgQueryP_readOnly = dbPgQuery.queryP_readOnly; -const pgQueryP_readOnly_wRetryIfEmpty = dbPgQuery.queryP_readOnly_wRetryIfEmpty; const doSendVerification = CreateUser.doSendVerification; const generateAndRegisterZinvite = CreateUser.generateAndRegisterZinvite; const generateToken = Password.generateToken; @@ -1298,82 +1297,6 @@ function initializePolisHelpers() { res.status(200).json({}); } - type PcaCacheItem = { - asPOJO: any; - consensus: { agree?: any; disagree?: any }; - repness: { [x: string]: any }; - asJSON: string; - asBufferOfGzippedJson: any; - expiration: number; - }; - let pcaCacheSize = Config.cacheMathResults ? 300 : 1; - let pcaCache = new LruCache({ - max: pcaCacheSize, - }); - - let lastPrefetchedMathTick = -1; - - // this scheme might not last forever. For now, there are only a couple of MB worth of conversation pca data. - function fetchAndCacheLatestPcaData() { - let lastPrefetchPollStartTime = Date.now(); - - function waitTime() { - let timePassed = Date.now() - lastPrefetchPollStartTime; - return Math.max(0, 2500 - timePassed); - } - // cursor.sort([["math_tick", "asc"]]); - pgQueryP_readOnly( - "select * from math_main where caching_tick > ($1) order by caching_tick limit 10;", - [lastPrefetchedMathTick] - ) - // Argument of type '(rows: any[]) => void' is not assignable to parameter of type '(value: unknown) => void | PromiseLike'. - // Types of parameters 'rows' and 'value' are incompatible. - // Type 'unknown' is not assignable to type 'any[]'.ts(2345) - // @ts-ignore - .then((rows: any[]) => { - if (!rows || !rows.length) { - // call again - logger.info("mathpoll done"); - setTimeout(fetchAndCacheLatestPcaData, waitTime()); - return; - } - - let results = rows.map( - (row: { data: any; math_tick: any; caching_tick: any }) => { - let item = row.data; - - if (row.math_tick) { - item.math_tick = Number(row.math_tick); - } - if (row.caching_tick) { - item.caching_tick = Number(row.caching_tick); - } - - logger.info("mathpoll updating", { - caching_tick: item.caching_tick, - zid: item.zid, - }); - - // let prev = pcaCache.get(item.zid); - if (item.caching_tick > lastPrefetchedMathTick) { - lastPrefetchedMathTick = item.caching_tick; - } - - processMathObject(item); - - return updatePcaCache(item.zid, item); - } - ); - Promise.all(results).then((a: any) => { - setTimeout(fetchAndCacheLatestPcaData, waitTime()); - }); - }) - .catch((err: any) => { - logger.error("mathpoll error", err); - setTimeout(fetchAndCacheLatestPcaData, waitTime()); - }); - } - // don't start immediately, let other things load first. // setTimeout(fetchAndCacheLatestPcaData, 5000); fetchAndCacheLatestPcaData; // TODO_DELETE @@ -1446,240 +1369,6 @@ function initializePolisHelpers() { } */ - function processMathObject(o: { [x: string]: any }) { - function remapSubgroupStuff(g: { val: any[] }) { - if (_.isArray(g.val)) { - g.val = g.val.map((x: { id: number }) => { - return { id: Number(x.id), val: x }; - }); - } else { - // Argument of type '(id: number) => { id: number; val: any; }' - // is not assignable to parameter of type '(value: string, index: number, array: string[]) => { id: number; val: any; }'. - // Types of parameters 'id' and 'value' are incompatible. - // Type 'string' is not assignable to type 'number'.ts(2345) - // @ts-ignore - g.val = _.keys(g.val).map((id: number) => { - return { id: Number(id), val: g.val[id] }; - }); - } - return g; - } - - // Normalize so everything is arrays of objects (group-clusters is already in this format, but needs to have the val: subobject style too). - - if (_.isArray(o["group-clusters"])) { - // NOTE this is different since group-clusters is already an array. - o["group-clusters"] = o["group-clusters"].map((g: { id: any }) => { - return { id: Number(g.id), val: g }; - }); - } - - if (!_.isArray(o["repness"])) { - o["repness"] = _.keys(o["repness"]).map((gid: string | number) => { - return { id: Number(gid), val: o["repness"][gid] }; - }); - } - if (!_.isArray(o["group-votes"])) { - o["group-votes"] = _.keys(o["group-votes"]).map( - (gid: string | number) => { - return { id: Number(gid), val: o["group-votes"][gid] }; - } - ); - } - if (!_.isArray(o["subgroup-repness"])) { - o["subgroup-repness"] = _.keys(o["subgroup-repness"]).map( - (gid: string | number) => { - return { id: Number(gid), val: o["subgroup-repness"][gid] }; - } - ); - o["subgroup-repness"].map(remapSubgroupStuff); - } - if (!_.isArray(o["subgroup-votes"])) { - o["subgroup-votes"] = _.keys(o["subgroup-votes"]).map( - (gid: string | number) => { - return { id: Number(gid), val: o["subgroup-votes"][gid] }; - } - ); - o["subgroup-votes"].map(remapSubgroupStuff); - } - if (!_.isArray(o["subgroup-clusters"])) { - o["subgroup-clusters"] = _.keys(o["subgroup-clusters"]).map( - (gid: string | number) => { - return { id: Number(gid), val: o["subgroup-clusters"][gid] }; - } - ); - o["subgroup-clusters"].map(remapSubgroupStuff); - } - - // Edge case where there are two groups and one is huge, split the large group. - // Once we have a better story for h-clust in the participation view, then we can just show the h-clust instead. - // var groupVotes = o['group-votes']; - // if (_.keys(groupVotes).length === 2 && o['subgroup-votes'] && o['subgroup-clusters'] && o['subgroup-repness']) { - // var s0 = groupVotes[0].val['n-members']; - // var s1 = groupVotes[1].val['n-members']; - // const scaleRatio = 1.1; - // if (s1 * scaleRatio < s0) { - // o = splitTopLevelGroup(o, groupVotes[0].id); - // } else if (s0 * scaleRatio < s1) { - // o = splitTopLevelGroup(o, groupVotes[1].id); - // } - // } - - // // Gaps in the gids are not what we want to show users, and they make client development difficult. - // // So this guarantees that the gids are contiguous. TODO look into Darwin. - // o = packGids(o); - - // Un-normalize to maintain API consistency. - // This could removed in a future API version. - function toObj(a: string | any[]) { - let obj = {}; - if (!a) { - return obj; - } - for (let i = 0; i < a.length; i++) { - // Element implicitly has an 'any' type - // because expression of type 'any' can't be used to index type '{ } '.ts(7053) - // @ts-ignore - obj[a[i].id] = a[i].val; - // Element implicitly has an 'any' type - // because expression of type 'any' can't be used to index type '{ } '.ts(7053) - // @ts-ignore - obj[a[i].id].id = a[i].id; - } - return obj; - } - function toArray(a: any[]) { - if (!a) { - return []; - } - return a.map((g: { id: any; val: any }) => { - let id = g.id; - g = g.val; - g.id = id; - return g; - }); - } - o["repness"] = toObj(o["repness"]); - o["group-votes"] = toObj(o["group-votes"]); - o["group-clusters"] = toArray(o["group-clusters"]); - - delete o["subgroup-repness"]; - delete o["subgroup-votes"]; - delete o["subgroup-clusters"]; - return o; - } - - function getPca( - zid?: any, - math_tick?: number - ): Promise { - let cached = pcaCache.get(zid); - // Object is of type 'unknown'.ts(2571) - // @ts-ignore - if (cached && cached.expiration < Date.now()) { - cached = undefined; - } - // Object is of type 'unknown'.ts(2571) - // @ts-ignore - let cachedPOJO = cached && cached.asPOJO; - if (cachedPOJO) { - if (cachedPOJO.math_tick <= (math_tick || 0)) { - logger.info("math was cached but not new", { - zid, - cached_math_tick: cachedPOJO.math_tick, - query_math_tick: math_tick, - }); - return Promise.resolve(undefined); - } else { - logger.info("math from cache", { zid, math_tick }); - return Promise.resolve(cached); - } - } - - logger.info("mathpoll cache miss", { zid, math_tick }); - - // NOTE: not caching results from this query for now, think about this later. - // not caching these means that conversations without new votes might not be cached. (closed conversations may be slower to load) - // It's probably not difficult to cache, but keeping things simple for now, and only caching things that come down with the poll. - - let queryStart = Date.now(); - - return pgQueryP_readOnly( - "select * from math_main where zid = ($1) and math_env = ($2);", - [zid, Config.mathEnv] - // Argument of type '(rows: string | any[]) => Promise | null' is not assignable to parameter of type '(value: unknown) => any'. - // Types of parameters 'rows' and 'value' are incompatible. - // Type 'unknown' is not assignable to type 'string | any[]'. - // Type 'unknown' is not assignable to type 'any[]'.ts(2345) - // @ts-ignore - ).then((rows: string | any[]) => { - let queryEnd = Date.now(); - let queryDuration = queryEnd - queryStart; - addInRamMetric("pcaGetQuery", queryDuration); - - if (!rows || !rows.length) { - logger.info( - "mathpoll related; after cache miss, unable to find data for", - { - zid, - math_tick, - math_env: Config.mathEnv, - } - ); - return undefined; - } - let item = rows[0].data; - - if (rows[0].math_tick) { - item.math_tick = Number(rows[0].math_tick); - } - - if (item.math_tick <= (math_tick || 0)) { - logger.info("after cache miss, unable to find newer item", { - zid, - math_tick, - }); - return undefined; - } - logger.info("after cache miss, found item, adding to cache", { - zid, - math_tick, - }); - - processMathObject(item); - - return updatePcaCache(zid, item); - }); - } - - function updatePcaCache(zid: any, item: { zid: any }): Promise { - return new Promise(function ( - resolve: (arg0: PcaCacheItem) => void, - reject: (arg0: any) => any - ) { - delete item.zid; // don't leak zid - let asJSON = JSON.stringify(item); - let buf = Buffer.from(asJSON, "utf-8"); - zlib.gzip(buf, function (err: any, jsondGzipdPcaBuffer: any) { - if (err) { - return reject(err); - } - - let o: PcaCacheItem = { - asPOJO: item as any, - asJSON: asJSON, - asBufferOfGzippedJson: jsondGzipdPcaBuffer, - expiration: Date.now() + 3000, - consensus: { agree: undefined, disagree: undefined }, - repness: {}, - }; - // save in LRU cache, but don't update the lastPrefetchedMathTick - pcaCache.set(zid, o); - resolve(o); - }); - }); - } - function redirectIfHasZidButNoConversationId( req: { body: { zid: any; conversation_id: any }; headers?: any }, res: { @@ -1813,22 +1502,6 @@ function initializePolisHelpers() { }); } - function getZidForRid(rid: any) { - return pgQueryP("select zid from reports where rid = ($1);", [rid]).then( - // Argument of type '(row: string | any[]) => any' is not assignable to parameter of type '(value: unknown) => any'. - // Types of parameters 'row' and 'value' are incompatible. - // Type 'unknown' is not assignable to type 'string | any[]'. - // Type 'unknown' is not assignable to type 'any[]'.ts(2345) - // @ts-ignore - (row: string | any[]) => { - if (!row || !row.length) { - return null; - } - return row[0].zid; - } - ); - } - function handle_POST_math_update( req: { p: { zid: any; uid?: any; math_update_type: any } }, res: { @@ -2073,173 +1746,6 @@ function initializePolisHelpers() { // return res.end(); } - async function handle_GET_reportExport( - req: { - p: { rid: string; report_type: string }; - headers: { host: string; "x-forwarded-proto": string }; - }, - res: { - send: (data: string) => void; - setHeader: (key: string, value: string) => void; - } - ) { - function formatCSV( - colFns: Record string>, - rows: object[] - ): string { - const fns = Object.values(colFns); - const sep = "\n"; - let csv = Object.keys(colFns).join(",") + sep; - if (rows.length > 0) { - for (const row of rows) { - // we append to a single string here (instead of creating an array of strings and joining - // them) to reduce the amount of garbage created; we may have millions of rows, I wish we - // could stream directly to the response... - for (let ii = 0; ii < fns.length; ii += 1) { - if (ii > 0) csv += ","; - csv += fns[ii](row); - } - csv += sep; - } - } - return csv; - } - - async function loadConversationSummary(zid: number) { - const [zinvite, convoRows, commentersRow, pca] = await Promise.all([ - getZinvite(zid), - pgQueryP_readOnly( - `SELECT topic, description FROM conversations WHERE zid = $1`, - [zid] - ), - pgQueryP_readOnly( - `SELECT COUNT(DISTINCT pid) FROM comments WHERE zid = $1`, - [zid] - ), - getPca(zid), - ]); - if (!zinvite || !convoRows || !commentersRow || !pca) { - throw new Error("polis_error_data_unknown_report"); - } - - const convo = (convoRows as { topic: string; description: string }[])[0]; - const commenters = (commentersRow as { count: number }[])[0].count; - - type PcaData = { - "in-conv": number[]; - "user-vote-counts": Record; - "group-clusters": Record; - "n-cmts": number; - }; - const data = (pca.asPOJO as unknown) as PcaData; - const siteUrl = `${req.headers["x-forwarded-proto"]}://${req.headers.host}`; - - const escapeQuotes = (s: string) => s.replace(/"/g, '""'); - return [ - ["topic", `"${escapeQuotes(convo.topic)}"`], - ["url", `${siteUrl}/${zinvite}`], - ["voters", Object.keys(data["user-vote-counts"]).length], - ["voters-in-conv", data["in-conv"].length], - ["commenters", commenters], - ["comments", data["n-cmts"]], - ["groups", Object.keys(data["group-clusters"]).length], - ["conversation-description", `"${escapeQuotes(convo.description)}"`], - ].map((row) => row.join(",")); - } - - const loadCommentSummary = (zid: number) => - pgQueryP_readOnly( - `SELECT - created, - tid, - pid, - COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = 1), 0) as agrees, - COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = -1), 0) as disagrees, - mod, - txt - FROM comments - WHERE zid = $1`, - [zid] - ); - - const loadVotes = (zid: number) => - pgQueryP_readOnly( - `SELECT created as timestamp, tid, pid, vote FROM votes WHERE zid = $1 order by tid, pid`, - [zid] - ); - - const formatDatetime = (timestamp: string) => - new Date(parseInt(timestamp)).toString(); - - const { rid, report_type } = req.p; - try { - const zid = await getZidForRid(rid); - if (!zid) { - fail(res, 404, "polis_error_data_unknown_report"); - return; - } - - switch (report_type) { - case "summary.csv": - res.setHeader("content-type", "text/csv"); - res.send((await loadConversationSummary(zid)).join("\n")); - break; - - case "comments.csv": - const rows = (await loadCommentSummary(zid)) as object[] | undefined; - console.log(rows); - if (rows) { - res.setHeader("content-type", "text/csv"); - res.send( - formatCSV( - { - timestamp: (row) => String(Math.floor(row.created / 1000)), - datetime: (row) => formatDatetime(row.created), - "comment-id": (row) => String(row.tid), - "author-id": (row) => String(row.pid), - agrees: (row) => String(row.agrees), - disagrees: (row) => String(row.disagrees), - moderated: (row) => String(row.mod), - "comment-body": (row) => String(row.txt), - }, - rows - ) - ); - } else fail(res, 500, "polis_err_data_export"); - break; - - case "votes.csv": - const votes = (await loadVotes(zid)) as object[] | undefined; - if (votes) { - res.setHeader("content-type", "text/csv"); - res.send( - formatCSV( - { - timestamp: (row) => String(Math.floor(row.timestamp / 1000)), - datetime: (row) => formatDatetime(row.timestamp), - "comment-id": (row) => String(row.tid), - "voter-id": (row) => String(row.pid), - vote: (row) => String(row.vote), - }, - votes - ) - ); - } else fail(res, 500, "polis_err_data_export"); - break; - - default: - fail(res, 404, "polis_error_data_unknown_report"); - break; - } - } catch (err) { - const msg = - err instanceof Error && err.message && err.message.startsWith("polis_") - ? err.message - : "polis_err_data_export"; - fail(res, 500, msg, err); - } - } - function getBidIndexToPidMapping(zid: number, math_tick: number) { math_tick = math_tick || -1; return pgQueryP_readOnly( @@ -2990,96 +2496,6 @@ Feel free to reply to this email if you need help.`; ); } - let zidToConversationIdCache = new LruCache({ - max: 1000, - }); - - function getZinvite(zid: any, dontUseCache?: boolean) { - let cachedConversationId = zidToConversationIdCache.get(zid); - if (!dontUseCache && cachedConversationId) { - return Promise.resolve(cachedConversationId); - } - return pgQueryP_metered( - "getZinvite", - "select * from zinvites where zid = ($1);", - [zid] - ).then(function (rows: { zinvite: any }[]) { - let conversation_id = (rows && rows[0] && rows[0].zinvite) || void 0; - if (conversation_id) { - zidToConversationIdCache.set(zid, conversation_id); - } - return conversation_id; - }); - } - - function getZinvites(zids: any[]) { - if (!zids.length) { - return Promise.resolve(zids); - } - zids = _.map(zids, function (zid: any) { - return Number(zid); // just in case - }); - zids = _.uniq(zids); - - let uncachedZids = zids.filter(function (zid: any) { - return !zidToConversationIdCache.get(zid); - }); - let zidsWithCachedConversationIds = zids - .filter(function (zid: any) { - return !!zidToConversationIdCache.get(zid); - }) - .map(function (zid: any) { - return { - zid: zid, - zinvite: zidToConversationIdCache.get(zid), - }; - }); - - function makeZidToConversationIdMap(arrays: any[]) { - let zid2conversation_id = {}; - arrays.forEach(function (a: any[]) { - a.forEach(function (o: { zid: string | number; zinvite: any }) { - // (property) zid: string | number - // Element implicitly has an 'any' type because expression of type 'string | number' can't be used to index type '{}'. - // No index signature with a parameter of type 'string' was found onpe '{}'.ts(7053) - // @ts-ignore - zid2conversation_id[o.zid] = o.zinvite; - }); - }); - return zid2conversation_id; - } - - // 'new' expression, whose target lacks a construct signature, implicitly has an 'any' type.ts(7009) - // @ts-ignore - return new MPromise( - "getZinvites", - function (resolve: (arg0: {}) => void, reject: (arg0: any) => void) { - if (uncachedZids.length === 0) { - resolve(makeZidToConversationIdMap([zidsWithCachedConversationIds])); - return; - } - pgQuery_readOnly( - "select * from zinvites where zid in (" + - uncachedZids.join(",") + - ");", - [], - function (err: any, result: { rows: any }) { - if (err) { - reject(err); - } else { - resolve( - makeZidToConversationIdMap([ - result.rows, - zidsWithCachedConversationIds, - ]) - ); - } - } - ); - } - ); - } - function addConversationId( o: { zid?: any; conversation_id?: any }, dontUseCache: any diff --git a/server/src/utils/pca.ts b/server/src/utils/pca.ts new file mode 100644 index 000000000..ee7548c43 --- /dev/null +++ b/server/src/utils/pca.ts @@ -0,0 +1,317 @@ +// Copyright (C) 2012-present, The Authors. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License, version 3, as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . + +"use strict"; + +import zlib from "zlib"; +import _ from "underscore"; +import LruCache from "lru-cache"; +import { queryP_readOnly as pgQueryP_readOnly } from "../db/pg-query"; +import Config from "../config"; +import logger from "./logger"; +import { addInRamMetric } from "./metered"; + +export type PcaCacheItem = { + asPOJO: any; + consensus: { agree?: any; disagree?: any }; + repness: { [x: string]: any }; + asJSON: string; + asBufferOfGzippedJson: any; + expiration: number; +}; +let pcaCacheSize = Config.cacheMathResults ? 300 : 1; +let pcaCache = new LruCache({ + max: pcaCacheSize, +}); + +let lastPrefetchedMathTick = -1; + +// this scheme might not last forever. For now, there are only a couple of MB worth of conversation pca data. +export function fetchAndCacheLatestPcaData() { + let lastPrefetchPollStartTime = Date.now(); + + function waitTime() { + let timePassed = Date.now() - lastPrefetchPollStartTime; + return Math.max(0, 2500 - timePassed); + } + // cursor.sort([["math_tick", "asc"]]); + pgQueryP_readOnly( + "select * from math_main where caching_tick > ($1) order by caching_tick limit 10;", + [lastPrefetchedMathTick] + ) + // Argument of type '(rows: any[]) => void' is not assignable to parameter of type '(value: unknown) => void | PromiseLike'. + // Types of parameters 'rows' and 'value' are incompatible. + // Type 'unknown' is not assignable to type 'any[]'.ts(2345) + // @ts-ignore + .then((rows: any[]) => { + if (!rows || !rows.length) { + // call again + logger.info("mathpoll done"); + setTimeout(fetchAndCacheLatestPcaData, waitTime()); + return; + } + + let results = rows.map( + (row: { data: any; math_tick: any; caching_tick: any }) => { + let item = row.data; + + if (row.math_tick) { + item.math_tick = Number(row.math_tick); + } + if (row.caching_tick) { + item.caching_tick = Number(row.caching_tick); + } + + logger.info("mathpoll updating", { + caching_tick: item.caching_tick, + zid: item.zid, + }); + + // let prev = pcaCache.get(item.zid); + if (item.caching_tick > lastPrefetchedMathTick) { + lastPrefetchedMathTick = item.caching_tick; + } + + processMathObject(item); + + return updatePcaCache(item.zid, item); + } + ); + Promise.all(results).then((a: any) => { + setTimeout(fetchAndCacheLatestPcaData, waitTime()); + }); + }) + .catch((err: any) => { + logger.error("mathpoll error", err); + setTimeout(fetchAndCacheLatestPcaData, waitTime()); + }); +} + +export function getPca( + zid?: any, + math_tick?: number +): Promise { + let cached = pcaCache.get(zid); + // Object is of type 'unknown'.ts(2571) + // @ts-ignore + if (cached && cached.expiration < Date.now()) { + cached = undefined; + } + // Object is of type 'unknown'.ts(2571) + // @ts-ignore + let cachedPOJO = cached && cached.asPOJO; + if (cachedPOJO) { + if (cachedPOJO.math_tick <= (math_tick || 0)) { + logger.info("math was cached but not new", { + zid, + cached_math_tick: cachedPOJO.math_tick, + query_math_tick: math_tick, + }); + return Promise.resolve(undefined); + } else { + logger.info("math from cache", { zid, math_tick }); + return Promise.resolve(cached); + } + } + + logger.info("mathpoll cache miss", { zid, math_tick }); + + // NOTE: not caching results from this query for now, think about this later. + // not caching these means that conversations without new votes might not be cached. (closed conversations may be slower to load) + // It's probably not difficult to cache, but keeping things simple for now, and only caching things that come down with the poll. + + let queryStart = Date.now(); + + return pgQueryP_readOnly( + "select * from math_main where zid = ($1) and math_env = ($2);", + [zid, Config.mathEnv] + // Argument of type '(rows: string | any[]) => Promise | null' is not assignable to parameter of type '(value: unknown) => any'. + // Types of parameters 'rows' and 'value' are incompatible. + // Type 'unknown' is not assignable to type 'string | any[]'. + // Type 'unknown' is not assignable to type 'any[]'.ts(2345) + // @ts-ignore + ).then((rows: string | any[]) => { + let queryEnd = Date.now(); + let queryDuration = queryEnd - queryStart; + addInRamMetric("pcaGetQuery", queryDuration); + + if (!rows || !rows.length) { + logger.info( + "mathpoll related; after cache miss, unable to find data for", + { + zid, + math_tick, + math_env: Config.mathEnv, + } + ); + return undefined; + } + let item = rows[0].data; + + if (rows[0].math_tick) { + item.math_tick = Number(rows[0].math_tick); + } + + if (item.math_tick <= (math_tick || 0)) { + logger.info("after cache miss, unable to find newer item", { + zid, + math_tick, + }); + return undefined; + } + logger.info("after cache miss, found item, adding to cache", { + zid, + math_tick, + }); + + processMathObject(item); + + return updatePcaCache(zid, item); + }); +} + +function updatePcaCache(zid: any, item: { zid: any }): Promise { + return new Promise(function ( + resolve: (arg0: PcaCacheItem) => void, + reject: (arg0: any) => any + ) { + delete item.zid; // don't leak zid + let asJSON = JSON.stringify(item); + let buf = Buffer.from(asJSON, "utf-8"); + zlib.gzip(buf, function (err: any, jsondGzipdPcaBuffer: any) { + if (err) { + return reject(err); + } + + let o = { + asPOJO: item, + asJSON: asJSON, + asBufferOfGzippedJson: jsondGzipdPcaBuffer, + expiration: Date.now() + 3000, + } as PcaCacheItem; + // save in LRU cache, but don't update the lastPrefetchedMathTick + pcaCache.set(zid, o); + resolve(o); + }); + }); +} + +function processMathObject(o: { [x: string]: any }) { + function remapSubgroupStuff(g: { val: any[] }) { + if (_.isArray(g.val)) { + g.val = g.val.map((x: { id: number }) => { + return { id: Number(x.id), val: x }; + }); + } else { + // Argument of type '(id: number) => { id: number; val: any; }' + // is not assignable to parameter of type '(value: string, index: number, array: string[]) => { id: number; val: any; }'. + // Types of parameters 'id' and 'value' are incompatible. + // Type 'string' is not assignable to type 'number'.ts(2345) + // @ts-ignore + g.val = _.keys(g.val).map((id: number) => { + return { id: Number(id), val: g.val[id] }; + }); + } + return g; + } + + // Normalize so everything is arrays of objects (group-clusters is already in this format, but needs to have the val: subobject style too). + + if (_.isArray(o["group-clusters"])) { + // NOTE this is different since group-clusters is already an array. + o["group-clusters"] = o["group-clusters"].map((g: { id: any }) => { + return { id: Number(g.id), val: g }; + }); + } + + if (!_.isArray(o["repness"])) { + o["repness"] = _.keys(o["repness"]).map((gid: string | number) => { + return { id: Number(gid), val: o["repness"][gid] }; + }); + } + if (!_.isArray(o["group-votes"])) { + o["group-votes"] = _.keys(o["group-votes"]).map((gid: string | number) => { + return { id: Number(gid), val: o["group-votes"][gid] }; + }); + } + if (!_.isArray(o["subgroup-repness"])) { + o["subgroup-repness"] = _.keys(o["subgroup-repness"]).map( + (gid: string | number) => { + return { id: Number(gid), val: o["subgroup-repness"][gid] }; + } + ); + o["subgroup-repness"].map(remapSubgroupStuff); + } + if (!_.isArray(o["subgroup-votes"])) { + o["subgroup-votes"] = _.keys(o["subgroup-votes"]).map( + (gid: string | number) => { + return { id: Number(gid), val: o["subgroup-votes"][gid] }; + } + ); + o["subgroup-votes"].map(remapSubgroupStuff); + } + if (!_.isArray(o["subgroup-clusters"])) { + o["subgroup-clusters"] = _.keys(o["subgroup-clusters"]).map( + (gid: string | number) => { + return { id: Number(gid), val: o["subgroup-clusters"][gid] }; + } + ); + o["subgroup-clusters"].map(remapSubgroupStuff); + } + + // Edge case where there are two groups and one is huge, split the large group. + // Once we have a better story for h-clust in the participation view, then we can just show the h-clust instead. + // var groupVotes = o['group-votes']; + // if (_.keys(groupVotes).length === 2 && o['subgroup-votes'] && o['subgroup-clusters'] && o['subgroup-repness']) { + // var s0 = groupVotes[0].val['n-members']; + // var s1 = groupVotes[1].val['n-members']; + // const scaleRatio = 1.1; + // if (s1 * scaleRatio < s0) { + // o = splitTopLevelGroup(o, groupVotes[0].id); + // } else if (s0 * scaleRatio < s1) { + // o = splitTopLevelGroup(o, groupVotes[1].id); + // } + // } + + // // Gaps in the gids are not what we want to show users, and they make client development difficult. + // // So this guarantees that the gids are contiguous. TODO look into Darwin. + // o = packGids(o); + + // Un-normalize to maintain API consistency. + // This could removed in a future API version. + function toObj(a: string | any[]) { + let obj = {}; + if (!a) { + return obj; + } + for (let i = 0; i < a.length; i++) { + // Element implicitly has an 'any' type + // because expression of type 'any' can't be used to index type '{ } '.ts(7053) + // @ts-ignore + obj[a[i].id] = a[i].val; + // Element implicitly has an 'any' type + // because expression of type 'any' can't be used to index type '{ } '.ts(7053) + // @ts-ignore + obj[a[i].id].id = a[i].id; + } + return obj; + } + function toArray(a: any[]) { + if (!a) { + return []; + } + return a.map((g: { id: any; val: any }) => { + let id = g.id; + g = g.val; + g.id = id; + return g; + }); + } + o["repness"] = toObj(o["repness"]); + o["group-votes"] = toObj(o["group-votes"]); + o["group-clusters"] = toArray(o["group-clusters"]); + + delete o["subgroup-repness"]; + delete o["subgroup-votes"]; + delete o["subgroup-clusters"]; + return o; +} diff --git a/server/src/utils/zinvite.ts b/server/src/utils/zinvite.ts new file mode 100644 index 000000000..8985d0515 --- /dev/null +++ b/server/src/utils/zinvite.ts @@ -0,0 +1,116 @@ +// Copyright (C) 2012-present, The Authors. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License, version 3, as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . + +"use strict"; + +import LruCache from "lru-cache"; +import _ from "underscore"; +import { + queryP as pgQueryP, + query_readOnly as pgQuery_readOnly, + queryP_metered as pgQueryP_metered, +} from "../db/pg-query"; +import { MPromise } from "./metered"; + +let zidToConversationIdCache = new LruCache({ + max: 1000, +}); + +export function getZinvite(zid: any, dontUseCache?: boolean) { + let cachedConversationId = zidToConversationIdCache.get(zid); + if (!dontUseCache && cachedConversationId) { + return Promise.resolve(cachedConversationId); + } + return pgQueryP_metered( + "getZinvite", + "select * from zinvites where zid = ($1);", + [zid] + ).then(function (rows: { zinvite: any }[]) { + let conversation_id = (rows && rows[0] && rows[0].zinvite) || void 0; + if (conversation_id) { + zidToConversationIdCache.set(zid, conversation_id); + } + return conversation_id; + }); +} + +export function getZinvites(zids: any[]) { + if (!zids.length) { + return Promise.resolve(zids); + } + zids = _.map(zids, function (zid: any) { + return Number(zid); // just in case + }); + zids = _.uniq(zids); + + let uncachedZids = zids.filter(function (zid: any) { + return !zidToConversationIdCache.get(zid); + }); + let zidsWithCachedConversationIds = zids + .filter(function (zid: any) { + return !!zidToConversationIdCache.get(zid); + }) + .map(function (zid: any) { + return { + zid: zid, + zinvite: zidToConversationIdCache.get(zid), + }; + }); + + function makeZidToConversationIdMap(arrays: any[]) { + let zid2conversation_id = {}; + arrays.forEach(function (a: any[]) { + a.forEach(function (o: { zid: string | number; zinvite: any }) { + // (property) zid: string | number + // Element implicitly has an 'any' type because expression of type 'string | number' can't be used to index type '{}'. + // No index signature with a parameter of type 'string' was found onpe '{}'.ts(7053) + // @ts-ignore + zid2conversation_id[o.zid] = o.zinvite; + }); + }); + return zid2conversation_id; + } + + // 'new' expression, whose target lacks a construct signature, implicitly has an 'any' type.ts(7009) + // @ts-ignore + return new MPromise( + "getZinvites", + function (resolve: (arg0: {}) => void, reject: (arg0: any) => void) { + if (uncachedZids.length === 0) { + resolve(makeZidToConversationIdMap([zidsWithCachedConversationIds])); + return; + } + pgQuery_readOnly( + "select * from zinvites where zid in (" + uncachedZids.join(",") + ");", + [], + function (err: any, result: { rows: any }) { + if (err) { + reject(err); + } else { + resolve( + makeZidToConversationIdMap([ + result.rows, + zidsWithCachedConversationIds, + ]) + ); + } + } + ); + } + ); +} + +export function getZidForRid(rid: any) { + return pgQueryP("select zid from reports where rid = ($1);", [rid]).then( + // Argument of type '(row: string | any[]) => any' is not assignable to parameter of type '(value: unknown) => any'. + // Types of parameters 'row' and 'value' are incompatible. + // Type 'unknown' is not assignable to type 'string | any[]'. + // Type 'unknown' is not assignable to type 'any[]'.ts(2345) + // @ts-ignore + (row: string | any[]) => { + if (!row || !row.length) { + return null; + } + return row[0].zid; + } + ); +} From cf0faa4595792efedca667d62935ef8c5ea7c79a Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Sat, 14 Sep 2024 12:14:23 -0700 Subject: [PATCH 03/13] Split each report into separate function. --- server/src/routes/export.ts | 123 +++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 52 deletions(-) diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index 9f1da390e..b0ad0149e 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -80,8 +80,28 @@ async function loadConversationSummary(zid: number, siteUrl: string) { ].map((row) => row.join(",")); } -const loadCommentSummary = (zid: number) => - pgQueryP_readOnly( +const formatDatetime = (timestamp: string) => + new Date(parseInt(timestamp)).toString(); + +type Response = { + setHeader: (key: string, value: string) => void; + send: (data: string) => void; + write: (data: string) => void; + end: () => void; +}; + +async function sendConversationSummary( + zid: number, + siteUrl: string, + res: Response +) { + const rows = await loadConversationSummary(zid, siteUrl); + res.setHeader("content-type", "text/csv"); + res.send(rows.join(sep)); +} + +async function sendCommentSummary(zid: number, res: Response) { + const rows = (await pgQueryP_readOnly( `SELECT created, tid, @@ -93,22 +113,60 @@ const loadCommentSummary = (zid: number) => FROM comments WHERE zid = $1`, [zid] + )) as object[] | undefined; + if (!rows) { + fail(res, 500, "polis_err_data_export"); + return; + } + + res.setHeader("content-type", "text/csv"); + res.send( + formatCSV( + { + timestamp: (row) => String(Math.floor(row.created / 1000)), + datetime: (row) => formatDatetime(row.created), + "comment-id": (row) => String(row.tid), + "author-id": (row) => String(row.pid), + agrees: (row) => String(row.agrees), + disagrees: (row) => String(row.disagrees), + moderated: (row) => String(row.mod), + "comment-body": (row) => String(row.txt), + }, + rows + ) ); +} -const formatDatetime = (timestamp: string) => - new Date(parseInt(timestamp)).toString(); +async function sendVotesSummary(zid: number, res: Response) { + const formatters: Formatters = { + timestamp: (row) => String(Math.floor(row.timestamp / 1000)), + datetime: (row) => formatDatetime(row.timestamp), + "comment-id": (row) => String(row.tid), + "voter-id": (row) => String(row.pid), + vote: (row) => String(row.vote), + }; + res.setHeader("Content-Type", "text/csv"); + res.write(formatCSVHeaders(formatters) + sep); + + stream_pgQueryP_readOnly( + "SELECT created as timestamp, tid, pid, vote FROM votes WHERE zid = $1 ORDER BY tid, pid", + [zid], + (row) => res.write(formatCSVRow(row, formatters) + sep), + () => res.end(), + (error) => { + // Handle any errors + logger.error("polis_err_report_votes_csv", error); + fail(res, 500, "polis_err_data_export", error); + } + ); +} export async function handle_GET_reportExport( req: { p: { rid: string; report_type: string }; headers: { host: string; "x-forwarded-proto": string }; }, - res: { - setHeader: (key: string, value: string) => void; - send: (data: string) => void; - write: (data: string) => void; - end: () => void; - } + res: Response ) { const { rid, report_type } = req.p; try { @@ -121,54 +179,15 @@ export async function handle_GET_reportExport( switch (report_type) { case "summary.csv": const siteUrl = `${req.headers["x-forwarded-proto"]}://${req.headers.host}`; - res.setHeader("content-type", "text/csv"); - res.send((await loadConversationSummary(zid, siteUrl)).join(sep)); + await sendConversationSummary(zid, siteUrl, res); break; case "comments.csv": - const rows = (await loadCommentSummary(zid)) as object[] | undefined; - if (rows) { - res.setHeader("content-type", "text/csv"); - res.send( - formatCSV( - { - timestamp: (row) => String(Math.floor(row.created / 1000)), - datetime: (row) => formatDatetime(row.created), - "comment-id": (row) => String(row.tid), - "author-id": (row) => String(row.pid), - agrees: (row) => String(row.agrees), - disagrees: (row) => String(row.disagrees), - moderated: (row) => String(row.mod), - "comment-body": (row) => String(row.txt), - }, - rows - ) - ); - } else fail(res, 500, "polis_err_data_export"); + await sendCommentSummary(zid, res); break; case "votes.csv": - const formatters: Formatters = { - timestamp: (row) => String(Math.floor(row.timestamp / 1000)), - datetime: (row) => formatDatetime(row.timestamp), - "comment-id": (row) => String(row.tid), - "voter-id": (row) => String(row.pid), - vote: (row) => String(row.vote), - }; - res.setHeader("Content-Type", "text/csv"); - res.write(formatCSVHeaders(formatters) + sep); - - stream_pgQueryP_readOnly( - "SELECT created as timestamp, tid, pid, vote FROM votes WHERE zid = $1 ORDER BY tid, pid", - [zid], - (row) => res.write(formatCSVRow(row, formatters) + sep), - () => res.end(), - (error) => { - // Handle any errors - logger.error("polis_err_report_votes_csv", error); - fail(res, 500, "polis_err_data_export", error); - } - ); + await sendVotesSummary(zid, res); break; default: From 0ac2891aabee190c2b6644bd94fb2804a7f70c69 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Sat, 14 Sep 2024 12:46:07 -0700 Subject: [PATCH 04/13] Count up comment votes in single pass over votes table. There was actually a bug in the old SQL that aggregated votes from _all_ conversations instead of just the conversation in question, which is why it took 30 seconds to run. With that bug fixed, even the super slow "do a full subquery for each comment row" was actually quite fast. But this is way cheaper/faster. --- server/src/routes/export.ts | 113 ++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 37 deletions(-) diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index b0ad0149e..51f5818b6 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -11,14 +11,14 @@ import { getPca } from "../utils/pca"; import fail from "../utils/fail"; import logger from "../utils/logger"; -type Formatters = Record string>; +type Formatters = Record string>; const sep = "\n"; -function formatCSVHeaders(colFns: Formatters) { +function formatCSVHeaders(colFns: Formatters) { return Object.keys(colFns).join(","); } -function formatCSVRow(row: object, colFns: Formatters) { +function formatCSVRow(row: T, colFns: Formatters) { const fns = Object.values(colFns); let csv = ""; for (let ii = 0; ii < fns.length; ii += 1) { @@ -28,7 +28,7 @@ function formatCSVRow(row: object, colFns: Formatters) { return csv; } -function formatCSV(colFns: Formatters, rows: object[]): string { +function formatCSV(colFns: Formatters, rows: T[]): string { let csv = formatCSVHeaders(colFns) + sep; if (rows.length > 0) { for (const row of rows) { @@ -100,45 +100,84 @@ async function sendConversationSummary( res.send(rows.join(sep)); } +type CommentRow = { + tid: number; + pid: number; + created: string; + txt: string; + mod: number; + velocity: number; + active: boolean; + agrees: number; + disagrees: number; + pass: number; +}; + async function sendCommentSummary(zid: number, res: Response) { - const rows = (await pgQueryP_readOnly( - `SELECT - created, - tid, - pid, - COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = 1), 0) as agrees, - COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = -1), 0) as disagrees, - mod, - txt - FROM comments - WHERE zid = $1`, - [zid] - )) as object[] | undefined; - if (!rows) { - fail(res, 500, "polis_err_data_export"); - return; - } + const comments = new Map(); - res.setHeader("content-type", "text/csv"); - res.send( - formatCSV( - { - timestamp: (row) => String(Math.floor(row.created / 1000)), - datetime: (row) => formatDatetime(row.created), - "comment-id": (row) => String(row.tid), - "author-id": (row) => String(row.pid), - agrees: (row) => String(row.agrees), - disagrees: (row) => String(row.disagrees), - moderated: (row) => String(row.mod), - "comment-body": (row) => String(row.txt), + try { + // First query: Load comments metadata + const commentRows = (await pgQueryP_readOnly( + "SELECT tid, pid, created, txt, mod, velocity, active FROM comments WHERE zid = ($1)", + [zid] + )) as CommentRow[]; + for (const comment of commentRows) { + comment.agrees = 0; + comment.disagrees = 0; + comment.pass = 0; + comments.set(comment.tid, comment); + } + + // Second query: Count votes in a single pass + stream_pgQueryP_readOnly( + "SELECT tid, vote FROM votes WHERE zid = ($1) ORDER BY tid", + [zid], + (row) => { + const comment = comments.get(row.tid); + if (comment) { + if (row.vote === 1) comment.agrees += 1; + else if (row.vote === -1) comment.disagrees += 1; + else if (row.vote === 0) comment.pass += 1; + } else { + logger.warn(`Comment row not found for [zid=${zid}, tid=${row.tid}]`); + } }, - rows - ) - ); + () => { + commentRows.sort((a, b) => { + return b.velocity - a.velocity; + }); + + res.setHeader("content-type", "text/csv"); + res.send( + formatCSV( + { + timestamp: (row) => + String(Math.floor(parseInt(row.created) / 1000)), + datetime: (row) => formatDatetime(row.created), + "comment-id": (row) => String(row.tid), + "author-id": (row) => String(row.pid), + agrees: (row) => String(row.agrees), + disagrees: (row) => String(row.disagrees), + moderated: (row) => String(row.mod), + "comment-body": (row) => String(row.txt), + }, + commentRows + ) + ); + }, + (error) => { + logger.error("polis_err_report_comments", error); + } + ); + } catch (err) { + logger.error("polis_err_report_comments", err); + fail(res, 500, "polis_err_data_export", err); + } } async function sendVotesSummary(zid: number, res: Response) { - const formatters: Formatters = { + const formatters: Formatters = { timestamp: (row) => String(Math.floor(row.timestamp / 1000)), datetime: (row) => formatDatetime(row.timestamp), "comment-id": (row) => String(row.tid), From 9063469ef4a0f688522d7848e7baf29f9adc31c0 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Thu, 19 Sep 2024 15:15:34 -0700 Subject: [PATCH 05/13] Add participant-votes.csv export. --- server/src/routes/export.ts | 100 ++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index 51f5818b6..cf4737aae 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -200,6 +200,102 @@ async function sendVotesSummary(zid: number, res: Response) { ); } +async function sendParticipantVotesSummary(zid: number, res: Response) { + // Load up the comment ids + const commentRows = (await pgQueryP_readOnly( + "SELECT tid, pid FROM comments WHERE zid = ($1) ORDER BY tid ASC, created ASC", // TODO: filter only active comments? + [zid] + )) as { tid: number; pid: number }[]; + const commentIds = commentRows.map((row) => row.tid); + const participantCommentCounts = new Map(); + for (const row of commentRows) { + const count = participantCommentCounts.get(row.pid) || 0; + participantCommentCounts.set(row.pid, count + 1); + } + + const pca = await getPca(zid); + const groupClusters: { id: number; members: number[] }[] | undefined = + pca?.asPOJO["group-clusters"]; + function getGroupId(pid: number) { + if (groupClusters) { + for (const group of groupClusters) { + if (group.members.includes(pid)) { + return group.id; + } + } + } + return undefined; + } + + res.setHeader("content-type", "text/csv"); + res.write( + [ + "participant", + "group-id", + "n-comments", + "n-votes", + "n-agree", + "n-disagree", + ...commentIds, + ].join(",") + sep + ); + + // Query the votes in participant order so that we can summarize them in a streaming pass + let currentParticipantId = -1; + const currentParticipantVotes = new Map(); + function sendCurrentParticipantRow() { + let agrees = 0; + let disagrees = 0; + for (const vote of currentParticipantVotes.values()) { + if (vote === 1) agrees += 1; + else if (vote === -1) disagrees += 1; + } + const values = [ + currentParticipantId, + getGroupId(currentParticipantId), + participantCommentCounts.get(currentParticipantId) || 0, + currentParticipantVotes.size, + agrees, + disagrees, + ...commentIds.map((tid) => currentParticipantVotes.get(tid)), + ]; + res.write( + values + .map((value) => (value === undefined ? "" : String(value))) + .join(",") + sep + ); + } + + stream_pgQueryP_readOnly( + "SELECT pid, tid, vote FROM votes WHERE zid = ($1) ORDER BY pid", + [zid], + (row) => { + const pid: number = row.pid; + if (pid != currentParticipantId) { + if (currentParticipantId != -1) { + sendCurrentParticipantRow(); + } + currentParticipantId = pid; + currentParticipantVotes.clear(); + } + + const tid: number = row.tid; + const vote: number = row.vote; + currentParticipantVotes.set(tid, vote); + }, + () => { + if (currentParticipantId != -1) { + sendCurrentParticipantRow(); + } + res.end(); + }, + (error) => { + logger.error("polis_err_report_participant_votes", error); + fail(res, 500, "polis_err_data_export", error); + } + ); +} + export async function handle_GET_reportExport( req: { p: { rid: string; report_type: string }; @@ -229,6 +325,10 @@ export async function handle_GET_reportExport( await sendVotesSummary(zid, res); break; + case "participant-votes.csv": + await sendParticipantVotesSummary(zid, res); + break; + default: fail(res, 404, "polis_error_data_unknown_report"); break; From 42adfac82cd8fdd3d32b2d1af94667f94264f151 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Wed, 11 Sep 2024 14:47:45 -0700 Subject: [PATCH 06/13] Switch to non-native Postgres client. And add a "streaming" API for making database queries, which streams the results from the database to Node as they are generated by Postgres. This allows Node to process the rows one by one (and garbage collect in between), which is much easier on the VM when we need to do big queries that summarize data (or just format it and incrementally spit it out an HTTP response). --- server/package-lock.json | 113 +++++++++++++++++++++++++++++++++----- server/package.json | 2 +- server/src/db/pg-query.ts | 46 ++++++++++++++-- 3 files changed, 141 insertions(+), 20 deletions(-) diff --git a/server/package-lock.json b/server/package-lock.json index 92428cdfb..a4cef6ccb 100644 --- a/server/package-lock.json +++ b/server/package-lock.json @@ -35,7 +35,7 @@ "p3p": "~0.0.2", "pg": "~8.8.0", "pg-connection-string": "~2.5.0", - "pg-native": "~3.0.1", + "pg-query-stream": "^4.6.0", "replacestream": "~4.0.0", "request": "~2.88.2", "request-promise": "~4.2.6", @@ -2513,6 +2513,8 @@ "version": "1.5.0", "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", + "optional": true, + "peer": true, "dependencies": { "file-uri-to-path": "1.0.0" } @@ -6370,15 +6372,26 @@ } }, "node_modules/libpq": { - "version": "1.8.12", - "resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.12.tgz", - "integrity": "sha512-4lUY9BD9suz76mVS0kH4rRgRy620g/c9YZH5GYC3smfIpjtj6KiPuQ4IwQSHSZMMMhMM3tBFrYUrw8mHOOZVeg==", + "version": "1.8.13", + "resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.13.tgz", + "integrity": "sha512-t1wpnGVgwRIFSKoe4RFUllAFj953kNMcdXhGvFJwI0r6lJQqgSwTeiIciaCinjOmHk0HnFeWQSMC6Uw2591G4A==", "hasInstallScript": true, + "license": "MIT", + "optional": true, + "peer": true, "dependencies": { "bindings": "1.5.0", - "nan": "^2.14.0" + "nan": "2.19.0" } }, + "node_modules/libpq/node_modules/nan": { + "version": "2.19.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.19.0.tgz", + "integrity": "sha512-nO1xXxfh/RWNxfd/XPfbIfFk5vgLsAxUR9y5O0cHMJu/AW9U95JLXqthYHjEp+8gQ5p96K9jUp8nbVOxCdRbtw==", + "license": "MIT", + "optional": true, + "peer": true + }, "node_modules/lines-and-columns": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", @@ -7349,6 +7362,15 @@ "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.5.0.tgz", "integrity": "sha512-r5o/V/ORTA6TmUnyWZR9nCj1klXCO2CEKNRlVuJptZe85QuhFayC7WeMic7ndayT5IRIR0S0xFxFi2ousartlQ==" }, + "node_modules/pg-cursor": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/pg-cursor/-/pg-cursor-2.11.0.tgz", + "integrity": "sha512-TLCOCtu+rqMarzjUi+/Ffc2DV5ZqO/27y5GqnK9Z3w51rWXMwC8FcO96Uf9/ORo5o+qRXEVJxM9Ts3K2K31MLg==", + "license": "MIT", + "peerDependencies": { + "pg": "^8" + } + }, "node_modules/pg-int8": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", @@ -7361,6 +7383,8 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/pg-native/-/pg-native-3.0.1.tgz", "integrity": "sha512-LBVNWkNh0fVx/cienARRP2y22J5OpUsKBe0TpxzAx3arEUUdIs77aLSAHS3scS7SMaqc+OkG40CEu5fN0/cjIw==", + "optional": true, + "peer": true, "dependencies": { "libpq": "^1.8.10", "pg-types": "^1.12.1", @@ -7370,12 +7394,16 @@ "node_modules/pg-native/node_modules/isarray": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", - "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==" + "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==", + "optional": true, + "peer": true }, "node_modules/pg-native/node_modules/pg-types": { "version": "1.13.0", "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-1.13.0.tgz", "integrity": "sha512-lfKli0Gkl/+za/+b6lzENajczwZHc7D5kiUCZfgm914jipD2kIOIvEkAhZ8GrW3/TUoP9w8FHjwpPObBye5KQQ==", + "optional": true, + "peer": true, "dependencies": { "pg-int8": "1.0.1", "postgres-array": "~1.0.0", @@ -7388,6 +7416,8 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-1.0.3.tgz", "integrity": "sha512-5wClXrAP0+78mcsNX3/ithQ5exKvCyK5lr5NEEEeGwwM6NJdQgzIJBVxLvRW+huFpX92F2QnZ5CcokH0VhK2qQ==", + "optional": true, + "peer": true, "engines": { "node": ">=0.10.0" } @@ -7396,6 +7426,8 @@ "version": "1.0.31", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.0.31.tgz", "integrity": "sha512-tco/Dwv1f/sgIgN6CWdj/restacPKNskK6yps1981ivH2ZmLYcs5o5rVzL3qaO/cSkhN8hYOMWs7+glzOLSgRg==", + "optional": true, + "peer": true, "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.1", @@ -7406,7 +7438,9 @@ "node_modules/pg-native/node_modules/string_decoder": { "version": "0.10.31", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", - "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==" + "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==", + "optional": true, + "peer": true }, "node_modules/pg-pool": { "version": "3.5.2", @@ -7421,6 +7455,18 @@ "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.5.0.tgz", "integrity": "sha512-muRttij7H8TqRNu/DxrAJQITO4Ac7RmX3Klyr/9mJEOBeIpgnF8f9jAfRz5d3XwQZl5qBjF9gLsUtMPJE0vezQ==" }, + "node_modules/pg-query-stream": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/pg-query-stream/-/pg-query-stream-4.6.0.tgz", + "integrity": "sha512-sg2Hewe6ge6osEY07zGu7Z8djrsQBvyiTy5ZjQffoSatEgnNNVsV3EWDm9Px/8R9oaAL1YnfnP8AXPMmfzujZg==", + "license": "MIT", + "dependencies": { + "pg-cursor": "^2.11.0" + }, + "peerDependencies": { + "pg": "^8" + } + }, "node_modules/pg-types": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", @@ -11733,6 +11779,8 @@ "version": "1.5.0", "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", + "optional": true, + "peer": true, "requires": { "file-uri-to-path": "1.0.0" } @@ -14707,12 +14755,23 @@ } }, "libpq": { - "version": "1.8.12", - "resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.12.tgz", - "integrity": "sha512-4lUY9BD9suz76mVS0kH4rRgRy620g/c9YZH5GYC3smfIpjtj6KiPuQ4IwQSHSZMMMhMM3tBFrYUrw8mHOOZVeg==", + "version": "1.8.13", + "resolved": "https://registry.npmjs.org/libpq/-/libpq-1.8.13.tgz", + "integrity": "sha512-t1wpnGVgwRIFSKoe4RFUllAFj953kNMcdXhGvFJwI0r6lJQqgSwTeiIciaCinjOmHk0HnFeWQSMC6Uw2591G4A==", + "optional": true, + "peer": true, "requires": { "bindings": "1.5.0", - "nan": "^2.14.0" + "nan": "2.19.0" + }, + "dependencies": { + "nan": { + "version": "2.19.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.19.0.tgz", + "integrity": "sha512-nO1xXxfh/RWNxfd/XPfbIfFk5vgLsAxUR9y5O0cHMJu/AW9U95JLXqthYHjEp+8gQ5p96K9jUp8nbVOxCdRbtw==", + "optional": true, + "peer": true + } } }, "lines-and-columns": { @@ -15471,6 +15530,12 @@ "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.5.0.tgz", "integrity": "sha512-r5o/V/ORTA6TmUnyWZR9nCj1klXCO2CEKNRlVuJptZe85QuhFayC7WeMic7ndayT5IRIR0S0xFxFi2ousartlQ==" }, + "pg-cursor": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/pg-cursor/-/pg-cursor-2.11.0.tgz", + "integrity": "sha512-TLCOCtu+rqMarzjUi+/Ffc2DV5ZqO/27y5GqnK9Z3w51rWXMwC8FcO96Uf9/ORo5o+qRXEVJxM9Ts3K2K31MLg==", + "requires": {} + }, "pg-int8": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", @@ -15480,6 +15545,8 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/pg-native/-/pg-native-3.0.1.tgz", "integrity": "sha512-LBVNWkNh0fVx/cienARRP2y22J5OpUsKBe0TpxzAx3arEUUdIs77aLSAHS3scS7SMaqc+OkG40CEu5fN0/cjIw==", + "optional": true, + "peer": true, "requires": { "libpq": "^1.8.10", "pg-types": "^1.12.1", @@ -15489,12 +15556,16 @@ "isarray": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", - "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==" + "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==", + "optional": true, + "peer": true }, "pg-types": { "version": "1.13.0", "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-1.13.0.tgz", "integrity": "sha512-lfKli0Gkl/+za/+b6lzENajczwZHc7D5kiUCZfgm914jipD2kIOIvEkAhZ8GrW3/TUoP9w8FHjwpPObBye5KQQ==", + "optional": true, + "peer": true, "requires": { "pg-int8": "1.0.1", "postgres-array": "~1.0.0", @@ -15506,12 +15577,16 @@ "postgres-array": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-1.0.3.tgz", - "integrity": "sha512-5wClXrAP0+78mcsNX3/ithQ5exKvCyK5lr5NEEEeGwwM6NJdQgzIJBVxLvRW+huFpX92F2QnZ5CcokH0VhK2qQ==" + "integrity": "sha512-5wClXrAP0+78mcsNX3/ithQ5exKvCyK5lr5NEEEeGwwM6NJdQgzIJBVxLvRW+huFpX92F2QnZ5CcokH0VhK2qQ==", + "optional": true, + "peer": true }, "readable-stream": { "version": "1.0.31", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.0.31.tgz", "integrity": "sha512-tco/Dwv1f/sgIgN6CWdj/restacPKNskK6yps1981ivH2ZmLYcs5o5rVzL3qaO/cSkhN8hYOMWs7+glzOLSgRg==", + "optional": true, + "peer": true, "requires": { "core-util-is": "~1.0.0", "inherits": "~2.0.1", @@ -15522,7 +15597,9 @@ "string_decoder": { "version": "0.10.31", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", - "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==" + "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==", + "optional": true, + "peer": true } } }, @@ -15537,6 +15614,14 @@ "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.5.0.tgz", "integrity": "sha512-muRttij7H8TqRNu/DxrAJQITO4Ac7RmX3Klyr/9mJEOBeIpgnF8f9jAfRz5d3XwQZl5qBjF9gLsUtMPJE0vezQ==" }, + "pg-query-stream": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/pg-query-stream/-/pg-query-stream-4.6.0.tgz", + "integrity": "sha512-sg2Hewe6ge6osEY07zGu7Z8djrsQBvyiTy5ZjQffoSatEgnNNVsV3EWDm9Px/8R9oaAL1YnfnP8AXPMmfzujZg==", + "requires": { + "pg-cursor": "^2.11.0" + } + }, "pg-types": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", diff --git a/server/package.json b/server/package.json index c24349856..5390857c2 100644 --- a/server/package.json +++ b/server/package.json @@ -51,7 +51,7 @@ "p3p": "~0.0.2", "pg": "~8.8.0", "pg-connection-string": "~2.5.0", - "pg-native": "~3.0.1", + "pg-query-stream": "^4.6.0", "replacestream": "~4.0.0", "request": "~2.88.2", "request-promise": "~4.2.6", diff --git a/server/src/db/pg-query.ts b/server/src/db/pg-query.ts index 01fdca7af..9e2ef2e1e 100644 --- a/server/src/db/pg-query.ts +++ b/server/src/db/pg-query.ts @@ -1,6 +1,7 @@ import { isFunction, isString, isUndefined } from "underscore"; -import { native as pgnative, Pool } from "pg"; //.native, // native provides ssl (needed for dev laptop to access) http://stackoverflow.com/questions/10279965/authentication-error-when-connecting-to-heroku-postgresql-databa +import { Pool, QueryResult } from "pg"; import { parse as parsePgConnectionString } from "pg-connection-string"; +import QueryStream from "pg-query-stream"; import Config from "../config"; import logger from "../utils/logger"; @@ -53,19 +54,19 @@ const readsPgConnection = Object.assign( // import pgnative // Object is possibly 'null'.ts(2531) // @ts-ignore -const readWritePool = new pgnative.Pool(pgConnection); +const readWritePool = new Pool(pgConnection); // (alias) const pgnative: typeof Pg | null // import pgnative // Object is possibly 'null'.ts(2531) // @ts-ignore -const readPool = new pgnative.Pool(readsPgConnection); +const readPool = new Pool(readsPgConnection); // Same syntax as pg.client.query, but uses connection pool // Also takes care of calling 'done'. -function queryImpl(pool: Pool, queryString?: any, ...args: undefined[]) { +function queryImpl(pool: Pool, queryString?: any, ...args: any[]) { // variable arity depending on whether or not query has params (default to []) let params: never[] | undefined; - let callback: ((arg0: any, arg1?: undefined) => void) | undefined; + let callback: ((arg0: any, arg1?: any) => void) | undefined; if (isFunction(args[1])) { params = args[0]; callback = args[1]; @@ -200,6 +201,39 @@ function queryP_metered_readOnly(name: any, queryString: any, params: any) { return queryP_metered_impl(true, ...arguments); } +function stream_queryP_readOnly( + queryString: string, + params: any[], + onRow: (row: any) => void, + onEnd: () => void, + onError: (error: Error) => void +) { + const query = new QueryStream(queryString, params); + + readPool.connect((err, client, done) => { + if (err) { + onError(err); + return; + } + + const stream = client.query(query); + + stream.on("data", (row: QueryResult) => { + onRow(row); + }); + + stream.on("end", () => { + done(); + onEnd(); + }); + + stream.on("error", (error: Error) => { + done(error); + onError(error); + }); + }); +} + export { query, query_readOnly, @@ -208,6 +242,7 @@ export { queryP_metered_readOnly, queryP_readOnly, queryP_readOnly_wRetryIfEmpty, + stream_queryP_readOnly, }; export default { @@ -218,4 +253,5 @@ export default { queryP_metered_readOnly, queryP_readOnly, queryP_readOnly_wRetryIfEmpty, + stream_queryP_readOnly, }; From c3535c9561c75f76a8f8652ce727519d60e8c8fc Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Wed, 11 Sep 2024 14:56:42 -0700 Subject: [PATCH 07/13] Mostly refactoring. This moves the handle_GET_reportExport route into its own file, which necessitated refactoring some other things (zinvite and pca) out of server.ts as well. Chipping away at the monolith. This also converts the votes.csv report to use the streaming query from Postgres, which is mostly a smoke test. It seems to work, so next I'll convert it to stream the results incrementally to the HTTP response as well. --- server/src/routes/export.ts | 185 +++++++++++ server/src/server.ts | 612 +----------------------------------- server/src/utils/pca.ts | 317 +++++++++++++++++++ server/src/utils/zinvite.ts | 116 +++++++ 4 files changed, 632 insertions(+), 598 deletions(-) create mode 100644 server/src/routes/export.ts create mode 100644 server/src/utils/pca.ts create mode 100644 server/src/utils/zinvite.ts diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts new file mode 100644 index 000000000..9f1da390e --- /dev/null +++ b/server/src/routes/export.ts @@ -0,0 +1,185 @@ +// Copyright (C) 2012-present, The Authors. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License, version 3, as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . + +"use strict"; + +import { + queryP_readOnly as pgQueryP_readOnly, + stream_queryP_readOnly as stream_pgQueryP_readOnly, +} from "../db/pg-query"; +import { getZinvite, getZidForRid } from "../utils/zinvite"; +import { getPca } from "../utils/pca"; +import fail from "../utils/fail"; +import logger from "../utils/logger"; + +type Formatters = Record string>; +const sep = "\n"; + +function formatCSVHeaders(colFns: Formatters) { + return Object.keys(colFns).join(","); +} + +function formatCSVRow(row: object, colFns: Formatters) { + const fns = Object.values(colFns); + let csv = ""; + for (let ii = 0; ii < fns.length; ii += 1) { + if (ii > 0) csv += ","; + csv += fns[ii](row); + } + return csv; +} + +function formatCSV(colFns: Formatters, rows: object[]): string { + let csv = formatCSVHeaders(colFns) + sep; + if (rows.length > 0) { + for (const row of rows) { + csv += formatCSVRow(row, colFns); + csv += sep; + } + } + return csv; +} + +async function loadConversationSummary(zid: number, siteUrl: string) { + const [zinvite, convoRows, commentersRow, pca] = await Promise.all([ + getZinvite(zid), + pgQueryP_readOnly( + `SELECT topic, description FROM conversations WHERE zid = $1`, + [zid] + ), + pgQueryP_readOnly( + `SELECT COUNT(DISTINCT pid) FROM comments WHERE zid = $1`, + [zid] + ), + getPca(zid), + ]); + if (!zinvite || !convoRows || !commentersRow || !pca) { + throw new Error("polis_error_data_unknown_report"); + } + + const convo = (convoRows as { topic: string; description: string }[])[0]; + const commenters = (commentersRow as { count: number }[])[0].count; + + type PcaData = { + "in-conv": number[]; + "user-vote-counts": Record; + "group-clusters": Record; + "n-cmts": number; + }; + const data = pca.asPOJO as PcaData; + + const escapeQuotes = (s: string) => s.replace(/"/g, '""'); + return [ + ["topic", `"${escapeQuotes(convo.topic)}"`], + ["url", `${siteUrl}/${zinvite}`], + ["voters", Object.keys(data["user-vote-counts"]).length], + ["voters-in-conv", data["in-conv"].length], + ["commenters", commenters], + ["comments", data["n-cmts"]], + ["groups", Object.keys(data["group-clusters"]).length], + ["conversation-description", `"${escapeQuotes(convo.description)}"`], + ].map((row) => row.join(",")); +} + +const loadCommentSummary = (zid: number) => + pgQueryP_readOnly( + `SELECT + created, + tid, + pid, + COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = 1), 0) as agrees, + COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = -1), 0) as disagrees, + mod, + txt + FROM comments + WHERE zid = $1`, + [zid] + ); + +const formatDatetime = (timestamp: string) => + new Date(parseInt(timestamp)).toString(); + +export async function handle_GET_reportExport( + req: { + p: { rid: string; report_type: string }; + headers: { host: string; "x-forwarded-proto": string }; + }, + res: { + setHeader: (key: string, value: string) => void; + send: (data: string) => void; + write: (data: string) => void; + end: () => void; + } +) { + const { rid, report_type } = req.p; + try { + const zid = await getZidForRid(rid); + if (!zid) { + fail(res, 404, "polis_error_data_unknown_report"); + return; + } + + switch (report_type) { + case "summary.csv": + const siteUrl = `${req.headers["x-forwarded-proto"]}://${req.headers.host}`; + res.setHeader("content-type", "text/csv"); + res.send((await loadConversationSummary(zid, siteUrl)).join(sep)); + break; + + case "comments.csv": + const rows = (await loadCommentSummary(zid)) as object[] | undefined; + if (rows) { + res.setHeader("content-type", "text/csv"); + res.send( + formatCSV( + { + timestamp: (row) => String(Math.floor(row.created / 1000)), + datetime: (row) => formatDatetime(row.created), + "comment-id": (row) => String(row.tid), + "author-id": (row) => String(row.pid), + agrees: (row) => String(row.agrees), + disagrees: (row) => String(row.disagrees), + moderated: (row) => String(row.mod), + "comment-body": (row) => String(row.txt), + }, + rows + ) + ); + } else fail(res, 500, "polis_err_data_export"); + break; + + case "votes.csv": + const formatters: Formatters = { + timestamp: (row) => String(Math.floor(row.timestamp / 1000)), + datetime: (row) => formatDatetime(row.timestamp), + "comment-id": (row) => String(row.tid), + "voter-id": (row) => String(row.pid), + vote: (row) => String(row.vote), + }; + res.setHeader("Content-Type", "text/csv"); + res.write(formatCSVHeaders(formatters) + sep); + + stream_pgQueryP_readOnly( + "SELECT created as timestamp, tid, pid, vote FROM votes WHERE zid = $1 ORDER BY tid, pid", + [zid], + (row) => res.write(formatCSVRow(row, formatters) + sep), + () => res.end(), + (error) => { + // Handle any errors + logger.error("polis_err_report_votes_csv", error); + fail(res, 500, "polis_err_data_export", error); + } + ); + break; + + default: + fail(res, 404, "polis_error_data_unknown_report"); + break; + } + } catch (err) { + const msg = + err instanceof Error && err.message && err.message.startsWith("polis_") + ? err.message + : "polis_err_data_export"; + fail(res, 500, msg, err); + } +} diff --git a/server/src/server.ts b/server/src/server.ts index 21cd8edbe..d86319ad9 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -29,7 +29,6 @@ import responseTime from "response-time"; import request from "request-promise"; // includes Request, but adds promise methods import LruCache from "lru-cache"; import timeout from "connect-timeout"; -import zlib from "zlib"; import _ from "underscore"; import pg from "pg"; import { encode } from "html-entities"; @@ -37,10 +36,23 @@ import { encode } from "html-entities"; import { METRICS_IN_RAM, addInRamMetric, MPromise } from "./utils/metered"; import CreateUser from "./auth/create-user"; import Password from "./auth/password"; -import dbPgQuery from "./db/pg-query"; +import dbPgQuery, { + query as pgQuery, + query_readOnly as pgQuery_readOnly, + queryP as pgQueryP, + queryP_metered as pgQueryP_metered, + queryP_metered_readOnly as pgQueryP_metered_readOnly, + queryP_readOnly as pgQueryP_readOnly, + stream_queryP_readOnly as stream_pgQueryP_readOnly, + queryP_readOnly_wRetryIfEmpty as pgQueryP_readOnly_wRetryIfEmpty, +} from "./db/pg-query"; import Config from "./config"; import fail from "./utils/fail"; +import { PcaCacheItem, getPca, fetchAndCacheLatestPcaData } from "./utils/pca"; +import { getZinvite, getZinvites, getZidForRid } from "./utils/zinvite"; + +import { handle_GET_reportExport } from "./routes/export"; import { Body, @@ -69,20 +81,7 @@ import { AWS.config.update({ region: Config.awsRegion }); const devMode = Config.isDevMode; const s3Client = new AWS.S3({ apiVersion: "2006-03-01" }); -// Property 'Client' does not exist on type '{ query: (...args: any[]) => void; query_readOnly: -// (...args: any[]) => void; queryP: (...args: any[]) => Promise; queryP_metered: -// (name: any, queryString: any, params: any) => any; queryP_metered_readOnly: -// (name: any, queryString: any, params: any) => any; queryP_readOnly: -// (...args: any[]) => Promise <...>; ...'.ts(2339) -// @ts-ignore const escapeLiteral = pg.Client.prototype.escapeLiteral; -const pgQuery = dbPgQuery.query; -const pgQuery_readOnly = dbPgQuery.query_readOnly; -const pgQueryP = dbPgQuery.queryP; -const pgQueryP_metered = dbPgQuery.queryP_metered; -const pgQueryP_metered_readOnly = dbPgQuery.queryP_metered_readOnly; -const pgQueryP_readOnly = dbPgQuery.queryP_readOnly; -const pgQueryP_readOnly_wRetryIfEmpty = dbPgQuery.queryP_readOnly_wRetryIfEmpty; const doSendVerification = CreateUser.doSendVerification; const generateAndRegisterZinvite = CreateUser.generateAndRegisterZinvite; const generateToken = Password.generateToken; @@ -1298,82 +1297,6 @@ function initializePolisHelpers() { res.status(200).json({}); } - type PcaCacheItem = { - asPOJO: any; - consensus: { agree?: any; disagree?: any }; - repness: { [x: string]: any }; - asJSON: string; - asBufferOfGzippedJson: any; - expiration: number; - }; - let pcaCacheSize = Config.cacheMathResults ? 300 : 1; - let pcaCache = new LruCache({ - max: pcaCacheSize, - }); - - let lastPrefetchedMathTick = -1; - - // this scheme might not last forever. For now, there are only a couple of MB worth of conversation pca data. - function fetchAndCacheLatestPcaData() { - let lastPrefetchPollStartTime = Date.now(); - - function waitTime() { - let timePassed = Date.now() - lastPrefetchPollStartTime; - return Math.max(0, 2500 - timePassed); - } - // cursor.sort([["math_tick", "asc"]]); - pgQueryP_readOnly( - "select * from math_main where caching_tick > ($1) order by caching_tick limit 10;", - [lastPrefetchedMathTick] - ) - // Argument of type '(rows: any[]) => void' is not assignable to parameter of type '(value: unknown) => void | PromiseLike'. - // Types of parameters 'rows' and 'value' are incompatible. - // Type 'unknown' is not assignable to type 'any[]'.ts(2345) - // @ts-ignore - .then((rows: any[]) => { - if (!rows || !rows.length) { - // call again - logger.info("mathpoll done"); - setTimeout(fetchAndCacheLatestPcaData, waitTime()); - return; - } - - let results = rows.map( - (row: { data: any; math_tick: any; caching_tick: any }) => { - let item = row.data; - - if (row.math_tick) { - item.math_tick = Number(row.math_tick); - } - if (row.caching_tick) { - item.caching_tick = Number(row.caching_tick); - } - - logger.info("mathpoll updating", { - caching_tick: item.caching_tick, - zid: item.zid, - }); - - // let prev = pcaCache.get(item.zid); - if (item.caching_tick > lastPrefetchedMathTick) { - lastPrefetchedMathTick = item.caching_tick; - } - - processMathObject(item); - - return updatePcaCache(item.zid, item); - } - ); - Promise.all(results).then((a: any) => { - setTimeout(fetchAndCacheLatestPcaData, waitTime()); - }); - }) - .catch((err: any) => { - logger.error("mathpoll error", err); - setTimeout(fetchAndCacheLatestPcaData, waitTime()); - }); - } - // don't start immediately, let other things load first. // setTimeout(fetchAndCacheLatestPcaData, 5000); fetchAndCacheLatestPcaData; // TODO_DELETE @@ -1446,240 +1369,6 @@ function initializePolisHelpers() { } */ - function processMathObject(o: { [x: string]: any }) { - function remapSubgroupStuff(g: { val: any[] }) { - if (_.isArray(g.val)) { - g.val = g.val.map((x: { id: number }) => { - return { id: Number(x.id), val: x }; - }); - } else { - // Argument of type '(id: number) => { id: number; val: any; }' - // is not assignable to parameter of type '(value: string, index: number, array: string[]) => { id: number; val: any; }'. - // Types of parameters 'id' and 'value' are incompatible. - // Type 'string' is not assignable to type 'number'.ts(2345) - // @ts-ignore - g.val = _.keys(g.val).map((id: number) => { - return { id: Number(id), val: g.val[id] }; - }); - } - return g; - } - - // Normalize so everything is arrays of objects (group-clusters is already in this format, but needs to have the val: subobject style too). - - if (_.isArray(o["group-clusters"])) { - // NOTE this is different since group-clusters is already an array. - o["group-clusters"] = o["group-clusters"].map((g: { id: any }) => { - return { id: Number(g.id), val: g }; - }); - } - - if (!_.isArray(o["repness"])) { - o["repness"] = _.keys(o["repness"]).map((gid: string | number) => { - return { id: Number(gid), val: o["repness"][gid] }; - }); - } - if (!_.isArray(o["group-votes"])) { - o["group-votes"] = _.keys(o["group-votes"]).map( - (gid: string | number) => { - return { id: Number(gid), val: o["group-votes"][gid] }; - } - ); - } - if (!_.isArray(o["subgroup-repness"])) { - o["subgroup-repness"] = _.keys(o["subgroup-repness"]).map( - (gid: string | number) => { - return { id: Number(gid), val: o["subgroup-repness"][gid] }; - } - ); - o["subgroup-repness"].map(remapSubgroupStuff); - } - if (!_.isArray(o["subgroup-votes"])) { - o["subgroup-votes"] = _.keys(o["subgroup-votes"]).map( - (gid: string | number) => { - return { id: Number(gid), val: o["subgroup-votes"][gid] }; - } - ); - o["subgroup-votes"].map(remapSubgroupStuff); - } - if (!_.isArray(o["subgroup-clusters"])) { - o["subgroup-clusters"] = _.keys(o["subgroup-clusters"]).map( - (gid: string | number) => { - return { id: Number(gid), val: o["subgroup-clusters"][gid] }; - } - ); - o["subgroup-clusters"].map(remapSubgroupStuff); - } - - // Edge case where there are two groups and one is huge, split the large group. - // Once we have a better story for h-clust in the participation view, then we can just show the h-clust instead. - // var groupVotes = o['group-votes']; - // if (_.keys(groupVotes).length === 2 && o['subgroup-votes'] && o['subgroup-clusters'] && o['subgroup-repness']) { - // var s0 = groupVotes[0].val['n-members']; - // var s1 = groupVotes[1].val['n-members']; - // const scaleRatio = 1.1; - // if (s1 * scaleRatio < s0) { - // o = splitTopLevelGroup(o, groupVotes[0].id); - // } else if (s0 * scaleRatio < s1) { - // o = splitTopLevelGroup(o, groupVotes[1].id); - // } - // } - - // // Gaps in the gids are not what we want to show users, and they make client development difficult. - // // So this guarantees that the gids are contiguous. TODO look into Darwin. - // o = packGids(o); - - // Un-normalize to maintain API consistency. - // This could removed in a future API version. - function toObj(a: string | any[]) { - let obj = {}; - if (!a) { - return obj; - } - for (let i = 0; i < a.length; i++) { - // Element implicitly has an 'any' type - // because expression of type 'any' can't be used to index type '{ } '.ts(7053) - // @ts-ignore - obj[a[i].id] = a[i].val; - // Element implicitly has an 'any' type - // because expression of type 'any' can't be used to index type '{ } '.ts(7053) - // @ts-ignore - obj[a[i].id].id = a[i].id; - } - return obj; - } - function toArray(a: any[]) { - if (!a) { - return []; - } - return a.map((g: { id: any; val: any }) => { - let id = g.id; - g = g.val; - g.id = id; - return g; - }); - } - o["repness"] = toObj(o["repness"]); - o["group-votes"] = toObj(o["group-votes"]); - o["group-clusters"] = toArray(o["group-clusters"]); - - delete o["subgroup-repness"]; - delete o["subgroup-votes"]; - delete o["subgroup-clusters"]; - return o; - } - - function getPca( - zid?: any, - math_tick?: number - ): Promise { - let cached = pcaCache.get(zid); - // Object is of type 'unknown'.ts(2571) - // @ts-ignore - if (cached && cached.expiration < Date.now()) { - cached = undefined; - } - // Object is of type 'unknown'.ts(2571) - // @ts-ignore - let cachedPOJO = cached && cached.asPOJO; - if (cachedPOJO) { - if (cachedPOJO.math_tick <= (math_tick || 0)) { - logger.info("math was cached but not new", { - zid, - cached_math_tick: cachedPOJO.math_tick, - query_math_tick: math_tick, - }); - return Promise.resolve(undefined); - } else { - logger.info("math from cache", { zid, math_tick }); - return Promise.resolve(cached); - } - } - - logger.info("mathpoll cache miss", { zid, math_tick }); - - // NOTE: not caching results from this query for now, think about this later. - // not caching these means that conversations without new votes might not be cached. (closed conversations may be slower to load) - // It's probably not difficult to cache, but keeping things simple for now, and only caching things that come down with the poll. - - let queryStart = Date.now(); - - return pgQueryP_readOnly( - "select * from math_main where zid = ($1) and math_env = ($2);", - [zid, Config.mathEnv] - // Argument of type '(rows: string | any[]) => Promise | null' is not assignable to parameter of type '(value: unknown) => any'. - // Types of parameters 'rows' and 'value' are incompatible. - // Type 'unknown' is not assignable to type 'string | any[]'. - // Type 'unknown' is not assignable to type 'any[]'.ts(2345) - // @ts-ignore - ).then((rows: string | any[]) => { - let queryEnd = Date.now(); - let queryDuration = queryEnd - queryStart; - addInRamMetric("pcaGetQuery", queryDuration); - - if (!rows || !rows.length) { - logger.info( - "mathpoll related; after cache miss, unable to find data for", - { - zid, - math_tick, - math_env: Config.mathEnv, - } - ); - return undefined; - } - let item = rows[0].data; - - if (rows[0].math_tick) { - item.math_tick = Number(rows[0].math_tick); - } - - if (item.math_tick <= (math_tick || 0)) { - logger.info("after cache miss, unable to find newer item", { - zid, - math_tick, - }); - return undefined; - } - logger.info("after cache miss, found item, adding to cache", { - zid, - math_tick, - }); - - processMathObject(item); - - return updatePcaCache(zid, item); - }); - } - - function updatePcaCache(zid: any, item: { zid: any }): Promise { - return new Promise(function ( - resolve: (arg0: PcaCacheItem) => void, - reject: (arg0: any) => any - ) { - delete item.zid; // don't leak zid - let asJSON = JSON.stringify(item); - let buf = Buffer.from(asJSON, "utf-8"); - zlib.gzip(buf, function (err: any, jsondGzipdPcaBuffer: any) { - if (err) { - return reject(err); - } - - let o: PcaCacheItem = { - asPOJO: item as any, - asJSON: asJSON, - asBufferOfGzippedJson: jsondGzipdPcaBuffer, - expiration: Date.now() + 3000, - consensus: { agree: undefined, disagree: undefined }, - repness: {}, - }; - // save in LRU cache, but don't update the lastPrefetchedMathTick - pcaCache.set(zid, o); - resolve(o); - }); - }); - } - function redirectIfHasZidButNoConversationId( req: { body: { zid: any; conversation_id: any }; headers?: any }, res: { @@ -1813,22 +1502,6 @@ function initializePolisHelpers() { }); } - function getZidForRid(rid: any) { - return pgQueryP("select zid from reports where rid = ($1);", [rid]).then( - // Argument of type '(row: string | any[]) => any' is not assignable to parameter of type '(value: unknown) => any'. - // Types of parameters 'row' and 'value' are incompatible. - // Type 'unknown' is not assignable to type 'string | any[]'. - // Type 'unknown' is not assignable to type 'any[]'.ts(2345) - // @ts-ignore - (row: string | any[]) => { - if (!row || !row.length) { - return null; - } - return row[0].zid; - } - ); - } - function handle_POST_math_update( req: { p: { zid: any; uid?: any; math_update_type: any } }, res: { @@ -2073,173 +1746,6 @@ function initializePolisHelpers() { // return res.end(); } - async function handle_GET_reportExport( - req: { - p: { rid: string; report_type: string }; - headers: { host: string; "x-forwarded-proto": string }; - }, - res: { - send: (data: string) => void; - setHeader: (key: string, value: string) => void; - } - ) { - function formatCSV( - colFns: Record string>, - rows: object[] - ): string { - const fns = Object.values(colFns); - const sep = "\n"; - let csv = Object.keys(colFns).join(",") + sep; - if (rows.length > 0) { - for (const row of rows) { - // we append to a single string here (instead of creating an array of strings and joining - // them) to reduce the amount of garbage created; we may have millions of rows, I wish we - // could stream directly to the response... - for (let ii = 0; ii < fns.length; ii += 1) { - if (ii > 0) csv += ","; - csv += fns[ii](row); - } - csv += sep; - } - } - return csv; - } - - async function loadConversationSummary(zid: number) { - const [zinvite, convoRows, commentersRow, pca] = await Promise.all([ - getZinvite(zid), - pgQueryP_readOnly( - `SELECT topic, description FROM conversations WHERE zid = $1`, - [zid] - ), - pgQueryP_readOnly( - `SELECT COUNT(DISTINCT pid) FROM comments WHERE zid = $1`, - [zid] - ), - getPca(zid), - ]); - if (!zinvite || !convoRows || !commentersRow || !pca) { - throw new Error("polis_error_data_unknown_report"); - } - - const convo = (convoRows as { topic: string; description: string }[])[0]; - const commenters = (commentersRow as { count: number }[])[0].count; - - type PcaData = { - "in-conv": number[]; - "user-vote-counts": Record; - "group-clusters": Record; - "n-cmts": number; - }; - const data = (pca.asPOJO as unknown) as PcaData; - const siteUrl = `${req.headers["x-forwarded-proto"]}://${req.headers.host}`; - - const escapeQuotes = (s: string) => s.replace(/"/g, '""'); - return [ - ["topic", `"${escapeQuotes(convo.topic)}"`], - ["url", `${siteUrl}/${zinvite}`], - ["voters", Object.keys(data["user-vote-counts"]).length], - ["voters-in-conv", data["in-conv"].length], - ["commenters", commenters], - ["comments", data["n-cmts"]], - ["groups", Object.keys(data["group-clusters"]).length], - ["conversation-description", `"${escapeQuotes(convo.description)}"`], - ].map((row) => row.join(",")); - } - - const loadCommentSummary = (zid: number) => - pgQueryP_readOnly( - `SELECT - created, - tid, - pid, - COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = 1), 0) as agrees, - COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = -1), 0) as disagrees, - mod, - txt - FROM comments - WHERE zid = $1`, - [zid] - ); - - const loadVotes = (zid: number) => - pgQueryP_readOnly( - `SELECT created as timestamp, tid, pid, vote FROM votes WHERE zid = $1 order by tid, pid`, - [zid] - ); - - const formatDatetime = (timestamp: string) => - new Date(parseInt(timestamp)).toString(); - - const { rid, report_type } = req.p; - try { - const zid = await getZidForRid(rid); - if (!zid) { - fail(res, 404, "polis_error_data_unknown_report"); - return; - } - - switch (report_type) { - case "summary.csv": - res.setHeader("content-type", "text/csv"); - res.send((await loadConversationSummary(zid)).join("\n")); - break; - - case "comments.csv": - const rows = (await loadCommentSummary(zid)) as object[] | undefined; - console.log(rows); - if (rows) { - res.setHeader("content-type", "text/csv"); - res.send( - formatCSV( - { - timestamp: (row) => String(Math.floor(row.created / 1000)), - datetime: (row) => formatDatetime(row.created), - "comment-id": (row) => String(row.tid), - "author-id": (row) => String(row.pid), - agrees: (row) => String(row.agrees), - disagrees: (row) => String(row.disagrees), - moderated: (row) => String(row.mod), - "comment-body": (row) => String(row.txt), - }, - rows - ) - ); - } else fail(res, 500, "polis_err_data_export"); - break; - - case "votes.csv": - const votes = (await loadVotes(zid)) as object[] | undefined; - if (votes) { - res.setHeader("content-type", "text/csv"); - res.send( - formatCSV( - { - timestamp: (row) => String(Math.floor(row.timestamp / 1000)), - datetime: (row) => formatDatetime(row.timestamp), - "comment-id": (row) => String(row.tid), - "voter-id": (row) => String(row.pid), - vote: (row) => String(row.vote), - }, - votes - ) - ); - } else fail(res, 500, "polis_err_data_export"); - break; - - default: - fail(res, 404, "polis_error_data_unknown_report"); - break; - } - } catch (err) { - const msg = - err instanceof Error && err.message && err.message.startsWith("polis_") - ? err.message - : "polis_err_data_export"; - fail(res, 500, msg, err); - } - } - function getBidIndexToPidMapping(zid: number, math_tick: number) { math_tick = math_tick || -1; return pgQueryP_readOnly( @@ -2990,96 +2496,6 @@ Feel free to reply to this email if you need help.`; ); } - let zidToConversationIdCache = new LruCache({ - max: 1000, - }); - - function getZinvite(zid: any, dontUseCache?: boolean) { - let cachedConversationId = zidToConversationIdCache.get(zid); - if (!dontUseCache && cachedConversationId) { - return Promise.resolve(cachedConversationId); - } - return pgQueryP_metered( - "getZinvite", - "select * from zinvites where zid = ($1);", - [zid] - ).then(function (rows: { zinvite: any }[]) { - let conversation_id = (rows && rows[0] && rows[0].zinvite) || void 0; - if (conversation_id) { - zidToConversationIdCache.set(zid, conversation_id); - } - return conversation_id; - }); - } - - function getZinvites(zids: any[]) { - if (!zids.length) { - return Promise.resolve(zids); - } - zids = _.map(zids, function (zid: any) { - return Number(zid); // just in case - }); - zids = _.uniq(zids); - - let uncachedZids = zids.filter(function (zid: any) { - return !zidToConversationIdCache.get(zid); - }); - let zidsWithCachedConversationIds = zids - .filter(function (zid: any) { - return !!zidToConversationIdCache.get(zid); - }) - .map(function (zid: any) { - return { - zid: zid, - zinvite: zidToConversationIdCache.get(zid), - }; - }); - - function makeZidToConversationIdMap(arrays: any[]) { - let zid2conversation_id = {}; - arrays.forEach(function (a: any[]) { - a.forEach(function (o: { zid: string | number; zinvite: any }) { - // (property) zid: string | number - // Element implicitly has an 'any' type because expression of type 'string | number' can't be used to index type '{}'. - // No index signature with a parameter of type 'string' was found onpe '{}'.ts(7053) - // @ts-ignore - zid2conversation_id[o.zid] = o.zinvite; - }); - }); - return zid2conversation_id; - } - - // 'new' expression, whose target lacks a construct signature, implicitly has an 'any' type.ts(7009) - // @ts-ignore - return new MPromise( - "getZinvites", - function (resolve: (arg0: {}) => void, reject: (arg0: any) => void) { - if (uncachedZids.length === 0) { - resolve(makeZidToConversationIdMap([zidsWithCachedConversationIds])); - return; - } - pgQuery_readOnly( - "select * from zinvites where zid in (" + - uncachedZids.join(",") + - ");", - [], - function (err: any, result: { rows: any }) { - if (err) { - reject(err); - } else { - resolve( - makeZidToConversationIdMap([ - result.rows, - zidsWithCachedConversationIds, - ]) - ); - } - } - ); - } - ); - } - function addConversationId( o: { zid?: any; conversation_id?: any }, dontUseCache: any diff --git a/server/src/utils/pca.ts b/server/src/utils/pca.ts new file mode 100644 index 000000000..ee7548c43 --- /dev/null +++ b/server/src/utils/pca.ts @@ -0,0 +1,317 @@ +// Copyright (C) 2012-present, The Authors. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License, version 3, as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . + +"use strict"; + +import zlib from "zlib"; +import _ from "underscore"; +import LruCache from "lru-cache"; +import { queryP_readOnly as pgQueryP_readOnly } from "../db/pg-query"; +import Config from "../config"; +import logger from "./logger"; +import { addInRamMetric } from "./metered"; + +export type PcaCacheItem = { + asPOJO: any; + consensus: { agree?: any; disagree?: any }; + repness: { [x: string]: any }; + asJSON: string; + asBufferOfGzippedJson: any; + expiration: number; +}; +let pcaCacheSize = Config.cacheMathResults ? 300 : 1; +let pcaCache = new LruCache({ + max: pcaCacheSize, +}); + +let lastPrefetchedMathTick = -1; + +// this scheme might not last forever. For now, there are only a couple of MB worth of conversation pca data. +export function fetchAndCacheLatestPcaData() { + let lastPrefetchPollStartTime = Date.now(); + + function waitTime() { + let timePassed = Date.now() - lastPrefetchPollStartTime; + return Math.max(0, 2500 - timePassed); + } + // cursor.sort([["math_tick", "asc"]]); + pgQueryP_readOnly( + "select * from math_main where caching_tick > ($1) order by caching_tick limit 10;", + [lastPrefetchedMathTick] + ) + // Argument of type '(rows: any[]) => void' is not assignable to parameter of type '(value: unknown) => void | PromiseLike'. + // Types of parameters 'rows' and 'value' are incompatible. + // Type 'unknown' is not assignable to type 'any[]'.ts(2345) + // @ts-ignore + .then((rows: any[]) => { + if (!rows || !rows.length) { + // call again + logger.info("mathpoll done"); + setTimeout(fetchAndCacheLatestPcaData, waitTime()); + return; + } + + let results = rows.map( + (row: { data: any; math_tick: any; caching_tick: any }) => { + let item = row.data; + + if (row.math_tick) { + item.math_tick = Number(row.math_tick); + } + if (row.caching_tick) { + item.caching_tick = Number(row.caching_tick); + } + + logger.info("mathpoll updating", { + caching_tick: item.caching_tick, + zid: item.zid, + }); + + // let prev = pcaCache.get(item.zid); + if (item.caching_tick > lastPrefetchedMathTick) { + lastPrefetchedMathTick = item.caching_tick; + } + + processMathObject(item); + + return updatePcaCache(item.zid, item); + } + ); + Promise.all(results).then((a: any) => { + setTimeout(fetchAndCacheLatestPcaData, waitTime()); + }); + }) + .catch((err: any) => { + logger.error("mathpoll error", err); + setTimeout(fetchAndCacheLatestPcaData, waitTime()); + }); +} + +export function getPca( + zid?: any, + math_tick?: number +): Promise { + let cached = pcaCache.get(zid); + // Object is of type 'unknown'.ts(2571) + // @ts-ignore + if (cached && cached.expiration < Date.now()) { + cached = undefined; + } + // Object is of type 'unknown'.ts(2571) + // @ts-ignore + let cachedPOJO = cached && cached.asPOJO; + if (cachedPOJO) { + if (cachedPOJO.math_tick <= (math_tick || 0)) { + logger.info("math was cached but not new", { + zid, + cached_math_tick: cachedPOJO.math_tick, + query_math_tick: math_tick, + }); + return Promise.resolve(undefined); + } else { + logger.info("math from cache", { zid, math_tick }); + return Promise.resolve(cached); + } + } + + logger.info("mathpoll cache miss", { zid, math_tick }); + + // NOTE: not caching results from this query for now, think about this later. + // not caching these means that conversations without new votes might not be cached. (closed conversations may be slower to load) + // It's probably not difficult to cache, but keeping things simple for now, and only caching things that come down with the poll. + + let queryStart = Date.now(); + + return pgQueryP_readOnly( + "select * from math_main where zid = ($1) and math_env = ($2);", + [zid, Config.mathEnv] + // Argument of type '(rows: string | any[]) => Promise | null' is not assignable to parameter of type '(value: unknown) => any'. + // Types of parameters 'rows' and 'value' are incompatible. + // Type 'unknown' is not assignable to type 'string | any[]'. + // Type 'unknown' is not assignable to type 'any[]'.ts(2345) + // @ts-ignore + ).then((rows: string | any[]) => { + let queryEnd = Date.now(); + let queryDuration = queryEnd - queryStart; + addInRamMetric("pcaGetQuery", queryDuration); + + if (!rows || !rows.length) { + logger.info( + "mathpoll related; after cache miss, unable to find data for", + { + zid, + math_tick, + math_env: Config.mathEnv, + } + ); + return undefined; + } + let item = rows[0].data; + + if (rows[0].math_tick) { + item.math_tick = Number(rows[0].math_tick); + } + + if (item.math_tick <= (math_tick || 0)) { + logger.info("after cache miss, unable to find newer item", { + zid, + math_tick, + }); + return undefined; + } + logger.info("after cache miss, found item, adding to cache", { + zid, + math_tick, + }); + + processMathObject(item); + + return updatePcaCache(zid, item); + }); +} + +function updatePcaCache(zid: any, item: { zid: any }): Promise { + return new Promise(function ( + resolve: (arg0: PcaCacheItem) => void, + reject: (arg0: any) => any + ) { + delete item.zid; // don't leak zid + let asJSON = JSON.stringify(item); + let buf = Buffer.from(asJSON, "utf-8"); + zlib.gzip(buf, function (err: any, jsondGzipdPcaBuffer: any) { + if (err) { + return reject(err); + } + + let o = { + asPOJO: item, + asJSON: asJSON, + asBufferOfGzippedJson: jsondGzipdPcaBuffer, + expiration: Date.now() + 3000, + } as PcaCacheItem; + // save in LRU cache, but don't update the lastPrefetchedMathTick + pcaCache.set(zid, o); + resolve(o); + }); + }); +} + +function processMathObject(o: { [x: string]: any }) { + function remapSubgroupStuff(g: { val: any[] }) { + if (_.isArray(g.val)) { + g.val = g.val.map((x: { id: number }) => { + return { id: Number(x.id), val: x }; + }); + } else { + // Argument of type '(id: number) => { id: number; val: any; }' + // is not assignable to parameter of type '(value: string, index: number, array: string[]) => { id: number; val: any; }'. + // Types of parameters 'id' and 'value' are incompatible. + // Type 'string' is not assignable to type 'number'.ts(2345) + // @ts-ignore + g.val = _.keys(g.val).map((id: number) => { + return { id: Number(id), val: g.val[id] }; + }); + } + return g; + } + + // Normalize so everything is arrays of objects (group-clusters is already in this format, but needs to have the val: subobject style too). + + if (_.isArray(o["group-clusters"])) { + // NOTE this is different since group-clusters is already an array. + o["group-clusters"] = o["group-clusters"].map((g: { id: any }) => { + return { id: Number(g.id), val: g }; + }); + } + + if (!_.isArray(o["repness"])) { + o["repness"] = _.keys(o["repness"]).map((gid: string | number) => { + return { id: Number(gid), val: o["repness"][gid] }; + }); + } + if (!_.isArray(o["group-votes"])) { + o["group-votes"] = _.keys(o["group-votes"]).map((gid: string | number) => { + return { id: Number(gid), val: o["group-votes"][gid] }; + }); + } + if (!_.isArray(o["subgroup-repness"])) { + o["subgroup-repness"] = _.keys(o["subgroup-repness"]).map( + (gid: string | number) => { + return { id: Number(gid), val: o["subgroup-repness"][gid] }; + } + ); + o["subgroup-repness"].map(remapSubgroupStuff); + } + if (!_.isArray(o["subgroup-votes"])) { + o["subgroup-votes"] = _.keys(o["subgroup-votes"]).map( + (gid: string | number) => { + return { id: Number(gid), val: o["subgroup-votes"][gid] }; + } + ); + o["subgroup-votes"].map(remapSubgroupStuff); + } + if (!_.isArray(o["subgroup-clusters"])) { + o["subgroup-clusters"] = _.keys(o["subgroup-clusters"]).map( + (gid: string | number) => { + return { id: Number(gid), val: o["subgroup-clusters"][gid] }; + } + ); + o["subgroup-clusters"].map(remapSubgroupStuff); + } + + // Edge case where there are two groups and one is huge, split the large group. + // Once we have a better story for h-clust in the participation view, then we can just show the h-clust instead. + // var groupVotes = o['group-votes']; + // if (_.keys(groupVotes).length === 2 && o['subgroup-votes'] && o['subgroup-clusters'] && o['subgroup-repness']) { + // var s0 = groupVotes[0].val['n-members']; + // var s1 = groupVotes[1].val['n-members']; + // const scaleRatio = 1.1; + // if (s1 * scaleRatio < s0) { + // o = splitTopLevelGroup(o, groupVotes[0].id); + // } else if (s0 * scaleRatio < s1) { + // o = splitTopLevelGroup(o, groupVotes[1].id); + // } + // } + + // // Gaps in the gids are not what we want to show users, and they make client development difficult. + // // So this guarantees that the gids are contiguous. TODO look into Darwin. + // o = packGids(o); + + // Un-normalize to maintain API consistency. + // This could removed in a future API version. + function toObj(a: string | any[]) { + let obj = {}; + if (!a) { + return obj; + } + for (let i = 0; i < a.length; i++) { + // Element implicitly has an 'any' type + // because expression of type 'any' can't be used to index type '{ } '.ts(7053) + // @ts-ignore + obj[a[i].id] = a[i].val; + // Element implicitly has an 'any' type + // because expression of type 'any' can't be used to index type '{ } '.ts(7053) + // @ts-ignore + obj[a[i].id].id = a[i].id; + } + return obj; + } + function toArray(a: any[]) { + if (!a) { + return []; + } + return a.map((g: { id: any; val: any }) => { + let id = g.id; + g = g.val; + g.id = id; + return g; + }); + } + o["repness"] = toObj(o["repness"]); + o["group-votes"] = toObj(o["group-votes"]); + o["group-clusters"] = toArray(o["group-clusters"]); + + delete o["subgroup-repness"]; + delete o["subgroup-votes"]; + delete o["subgroup-clusters"]; + return o; +} diff --git a/server/src/utils/zinvite.ts b/server/src/utils/zinvite.ts new file mode 100644 index 000000000..8985d0515 --- /dev/null +++ b/server/src/utils/zinvite.ts @@ -0,0 +1,116 @@ +// Copyright (C) 2012-present, The Authors. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License, version 3, as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . + +"use strict"; + +import LruCache from "lru-cache"; +import _ from "underscore"; +import { + queryP as pgQueryP, + query_readOnly as pgQuery_readOnly, + queryP_metered as pgQueryP_metered, +} from "../db/pg-query"; +import { MPromise } from "./metered"; + +let zidToConversationIdCache = new LruCache({ + max: 1000, +}); + +export function getZinvite(zid: any, dontUseCache?: boolean) { + let cachedConversationId = zidToConversationIdCache.get(zid); + if (!dontUseCache && cachedConversationId) { + return Promise.resolve(cachedConversationId); + } + return pgQueryP_metered( + "getZinvite", + "select * from zinvites where zid = ($1);", + [zid] + ).then(function (rows: { zinvite: any }[]) { + let conversation_id = (rows && rows[0] && rows[0].zinvite) || void 0; + if (conversation_id) { + zidToConversationIdCache.set(zid, conversation_id); + } + return conversation_id; + }); +} + +export function getZinvites(zids: any[]) { + if (!zids.length) { + return Promise.resolve(zids); + } + zids = _.map(zids, function (zid: any) { + return Number(zid); // just in case + }); + zids = _.uniq(zids); + + let uncachedZids = zids.filter(function (zid: any) { + return !zidToConversationIdCache.get(zid); + }); + let zidsWithCachedConversationIds = zids + .filter(function (zid: any) { + return !!zidToConversationIdCache.get(zid); + }) + .map(function (zid: any) { + return { + zid: zid, + zinvite: zidToConversationIdCache.get(zid), + }; + }); + + function makeZidToConversationIdMap(arrays: any[]) { + let zid2conversation_id = {}; + arrays.forEach(function (a: any[]) { + a.forEach(function (o: { zid: string | number; zinvite: any }) { + // (property) zid: string | number + // Element implicitly has an 'any' type because expression of type 'string | number' can't be used to index type '{}'. + // No index signature with a parameter of type 'string' was found onpe '{}'.ts(7053) + // @ts-ignore + zid2conversation_id[o.zid] = o.zinvite; + }); + }); + return zid2conversation_id; + } + + // 'new' expression, whose target lacks a construct signature, implicitly has an 'any' type.ts(7009) + // @ts-ignore + return new MPromise( + "getZinvites", + function (resolve: (arg0: {}) => void, reject: (arg0: any) => void) { + if (uncachedZids.length === 0) { + resolve(makeZidToConversationIdMap([zidsWithCachedConversationIds])); + return; + } + pgQuery_readOnly( + "select * from zinvites where zid in (" + uncachedZids.join(",") + ");", + [], + function (err: any, result: { rows: any }) { + if (err) { + reject(err); + } else { + resolve( + makeZidToConversationIdMap([ + result.rows, + zidsWithCachedConversationIds, + ]) + ); + } + } + ); + } + ); +} + +export function getZidForRid(rid: any) { + return pgQueryP("select zid from reports where rid = ($1);", [rid]).then( + // Argument of type '(row: string | any[]) => any' is not assignable to parameter of type '(value: unknown) => any'. + // Types of parameters 'row' and 'value' are incompatible. + // Type 'unknown' is not assignable to type 'string | any[]'. + // Type 'unknown' is not assignable to type 'any[]'.ts(2345) + // @ts-ignore + (row: string | any[]) => { + if (!row || !row.length) { + return null; + } + return row[0].zid; + } + ); +} From 0c8160372f27363deb558cda962050776a4a5728 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Sat, 14 Sep 2024 12:14:23 -0700 Subject: [PATCH 08/13] Split each report into separate function. --- server/src/routes/export.ts | 123 +++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 52 deletions(-) diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index 9f1da390e..b0ad0149e 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -80,8 +80,28 @@ async function loadConversationSummary(zid: number, siteUrl: string) { ].map((row) => row.join(",")); } -const loadCommentSummary = (zid: number) => - pgQueryP_readOnly( +const formatDatetime = (timestamp: string) => + new Date(parseInt(timestamp)).toString(); + +type Response = { + setHeader: (key: string, value: string) => void; + send: (data: string) => void; + write: (data: string) => void; + end: () => void; +}; + +async function sendConversationSummary( + zid: number, + siteUrl: string, + res: Response +) { + const rows = await loadConversationSummary(zid, siteUrl); + res.setHeader("content-type", "text/csv"); + res.send(rows.join(sep)); +} + +async function sendCommentSummary(zid: number, res: Response) { + const rows = (await pgQueryP_readOnly( `SELECT created, tid, @@ -93,22 +113,60 @@ const loadCommentSummary = (zid: number) => FROM comments WHERE zid = $1`, [zid] + )) as object[] | undefined; + if (!rows) { + fail(res, 500, "polis_err_data_export"); + return; + } + + res.setHeader("content-type", "text/csv"); + res.send( + formatCSV( + { + timestamp: (row) => String(Math.floor(row.created / 1000)), + datetime: (row) => formatDatetime(row.created), + "comment-id": (row) => String(row.tid), + "author-id": (row) => String(row.pid), + agrees: (row) => String(row.agrees), + disagrees: (row) => String(row.disagrees), + moderated: (row) => String(row.mod), + "comment-body": (row) => String(row.txt), + }, + rows + ) ); +} -const formatDatetime = (timestamp: string) => - new Date(parseInt(timestamp)).toString(); +async function sendVotesSummary(zid: number, res: Response) { + const formatters: Formatters = { + timestamp: (row) => String(Math.floor(row.timestamp / 1000)), + datetime: (row) => formatDatetime(row.timestamp), + "comment-id": (row) => String(row.tid), + "voter-id": (row) => String(row.pid), + vote: (row) => String(row.vote), + }; + res.setHeader("Content-Type", "text/csv"); + res.write(formatCSVHeaders(formatters) + sep); + + stream_pgQueryP_readOnly( + "SELECT created as timestamp, tid, pid, vote FROM votes WHERE zid = $1 ORDER BY tid, pid", + [zid], + (row) => res.write(formatCSVRow(row, formatters) + sep), + () => res.end(), + (error) => { + // Handle any errors + logger.error("polis_err_report_votes_csv", error); + fail(res, 500, "polis_err_data_export", error); + } + ); +} export async function handle_GET_reportExport( req: { p: { rid: string; report_type: string }; headers: { host: string; "x-forwarded-proto": string }; }, - res: { - setHeader: (key: string, value: string) => void; - send: (data: string) => void; - write: (data: string) => void; - end: () => void; - } + res: Response ) { const { rid, report_type } = req.p; try { @@ -121,54 +179,15 @@ export async function handle_GET_reportExport( switch (report_type) { case "summary.csv": const siteUrl = `${req.headers["x-forwarded-proto"]}://${req.headers.host}`; - res.setHeader("content-type", "text/csv"); - res.send((await loadConversationSummary(zid, siteUrl)).join(sep)); + await sendConversationSummary(zid, siteUrl, res); break; case "comments.csv": - const rows = (await loadCommentSummary(zid)) as object[] | undefined; - if (rows) { - res.setHeader("content-type", "text/csv"); - res.send( - formatCSV( - { - timestamp: (row) => String(Math.floor(row.created / 1000)), - datetime: (row) => formatDatetime(row.created), - "comment-id": (row) => String(row.tid), - "author-id": (row) => String(row.pid), - agrees: (row) => String(row.agrees), - disagrees: (row) => String(row.disagrees), - moderated: (row) => String(row.mod), - "comment-body": (row) => String(row.txt), - }, - rows - ) - ); - } else fail(res, 500, "polis_err_data_export"); + await sendCommentSummary(zid, res); break; case "votes.csv": - const formatters: Formatters = { - timestamp: (row) => String(Math.floor(row.timestamp / 1000)), - datetime: (row) => formatDatetime(row.timestamp), - "comment-id": (row) => String(row.tid), - "voter-id": (row) => String(row.pid), - vote: (row) => String(row.vote), - }; - res.setHeader("Content-Type", "text/csv"); - res.write(formatCSVHeaders(formatters) + sep); - - stream_pgQueryP_readOnly( - "SELECT created as timestamp, tid, pid, vote FROM votes WHERE zid = $1 ORDER BY tid, pid", - [zid], - (row) => res.write(formatCSVRow(row, formatters) + sep), - () => res.end(), - (error) => { - // Handle any errors - logger.error("polis_err_report_votes_csv", error); - fail(res, 500, "polis_err_data_export", error); - } - ); + await sendVotesSummary(zid, res); break; default: From a73ca2c1a17f35fb5cdf6f1114a5d9214de860d5 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Sat, 14 Sep 2024 12:46:07 -0700 Subject: [PATCH 09/13] Count up comment votes in single pass over votes table. There was actually a bug in the old SQL that aggregated votes from _all_ conversations instead of just the conversation in question, which is why it took 30 seconds to run. With that bug fixed, even the super slow "do a full subquery for each comment row" was actually quite fast. But this is way cheaper/faster. --- server/src/routes/export.ts | 113 ++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 37 deletions(-) diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index b0ad0149e..51f5818b6 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -11,14 +11,14 @@ import { getPca } from "../utils/pca"; import fail from "../utils/fail"; import logger from "../utils/logger"; -type Formatters = Record string>; +type Formatters = Record string>; const sep = "\n"; -function formatCSVHeaders(colFns: Formatters) { +function formatCSVHeaders(colFns: Formatters) { return Object.keys(colFns).join(","); } -function formatCSVRow(row: object, colFns: Formatters) { +function formatCSVRow(row: T, colFns: Formatters) { const fns = Object.values(colFns); let csv = ""; for (let ii = 0; ii < fns.length; ii += 1) { @@ -28,7 +28,7 @@ function formatCSVRow(row: object, colFns: Formatters) { return csv; } -function formatCSV(colFns: Formatters, rows: object[]): string { +function formatCSV(colFns: Formatters, rows: T[]): string { let csv = formatCSVHeaders(colFns) + sep; if (rows.length > 0) { for (const row of rows) { @@ -100,45 +100,84 @@ async function sendConversationSummary( res.send(rows.join(sep)); } +type CommentRow = { + tid: number; + pid: number; + created: string; + txt: string; + mod: number; + velocity: number; + active: boolean; + agrees: number; + disagrees: number; + pass: number; +}; + async function sendCommentSummary(zid: number, res: Response) { - const rows = (await pgQueryP_readOnly( - `SELECT - created, - tid, - pid, - COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = 1), 0) as agrees, - COALESCE((SELECT count(*) FROM votes WHERE votes.tid = comments.tid AND vote = -1), 0) as disagrees, - mod, - txt - FROM comments - WHERE zid = $1`, - [zid] - )) as object[] | undefined; - if (!rows) { - fail(res, 500, "polis_err_data_export"); - return; - } + const comments = new Map(); - res.setHeader("content-type", "text/csv"); - res.send( - formatCSV( - { - timestamp: (row) => String(Math.floor(row.created / 1000)), - datetime: (row) => formatDatetime(row.created), - "comment-id": (row) => String(row.tid), - "author-id": (row) => String(row.pid), - agrees: (row) => String(row.agrees), - disagrees: (row) => String(row.disagrees), - moderated: (row) => String(row.mod), - "comment-body": (row) => String(row.txt), + try { + // First query: Load comments metadata + const commentRows = (await pgQueryP_readOnly( + "SELECT tid, pid, created, txt, mod, velocity, active FROM comments WHERE zid = ($1)", + [zid] + )) as CommentRow[]; + for (const comment of commentRows) { + comment.agrees = 0; + comment.disagrees = 0; + comment.pass = 0; + comments.set(comment.tid, comment); + } + + // Second query: Count votes in a single pass + stream_pgQueryP_readOnly( + "SELECT tid, vote FROM votes WHERE zid = ($1) ORDER BY tid", + [zid], + (row) => { + const comment = comments.get(row.tid); + if (comment) { + if (row.vote === 1) comment.agrees += 1; + else if (row.vote === -1) comment.disagrees += 1; + else if (row.vote === 0) comment.pass += 1; + } else { + logger.warn(`Comment row not found for [zid=${zid}, tid=${row.tid}]`); + } }, - rows - ) - ); + () => { + commentRows.sort((a, b) => { + return b.velocity - a.velocity; + }); + + res.setHeader("content-type", "text/csv"); + res.send( + formatCSV( + { + timestamp: (row) => + String(Math.floor(parseInt(row.created) / 1000)), + datetime: (row) => formatDatetime(row.created), + "comment-id": (row) => String(row.tid), + "author-id": (row) => String(row.pid), + agrees: (row) => String(row.agrees), + disagrees: (row) => String(row.disagrees), + moderated: (row) => String(row.mod), + "comment-body": (row) => String(row.txt), + }, + commentRows + ) + ); + }, + (error) => { + logger.error("polis_err_report_comments", error); + } + ); + } catch (err) { + logger.error("polis_err_report_comments", err); + fail(res, 500, "polis_err_data_export", err); + } } async function sendVotesSummary(zid: number, res: Response) { - const formatters: Formatters = { + const formatters: Formatters = { timestamp: (row) => String(Math.floor(row.timestamp / 1000)), datetime: (row) => formatDatetime(row.timestamp), "comment-id": (row) => String(row.tid), From 9563246af986dbbe05f4f1b75d206b1239a600a8 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Thu, 19 Sep 2024 15:15:34 -0700 Subject: [PATCH 10/13] Add participant-votes.csv export. --- server/src/routes/export.ts | 100 ++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index 51f5818b6..cf4737aae 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -200,6 +200,102 @@ async function sendVotesSummary(zid: number, res: Response) { ); } +async function sendParticipantVotesSummary(zid: number, res: Response) { + // Load up the comment ids + const commentRows = (await pgQueryP_readOnly( + "SELECT tid, pid FROM comments WHERE zid = ($1) ORDER BY tid ASC, created ASC", // TODO: filter only active comments? + [zid] + )) as { tid: number; pid: number }[]; + const commentIds = commentRows.map((row) => row.tid); + const participantCommentCounts = new Map(); + for (const row of commentRows) { + const count = participantCommentCounts.get(row.pid) || 0; + participantCommentCounts.set(row.pid, count + 1); + } + + const pca = await getPca(zid); + const groupClusters: { id: number; members: number[] }[] | undefined = + pca?.asPOJO["group-clusters"]; + function getGroupId(pid: number) { + if (groupClusters) { + for (const group of groupClusters) { + if (group.members.includes(pid)) { + return group.id; + } + } + } + return undefined; + } + + res.setHeader("content-type", "text/csv"); + res.write( + [ + "participant", + "group-id", + "n-comments", + "n-votes", + "n-agree", + "n-disagree", + ...commentIds, + ].join(",") + sep + ); + + // Query the votes in participant order so that we can summarize them in a streaming pass + let currentParticipantId = -1; + const currentParticipantVotes = new Map(); + function sendCurrentParticipantRow() { + let agrees = 0; + let disagrees = 0; + for (const vote of currentParticipantVotes.values()) { + if (vote === 1) agrees += 1; + else if (vote === -1) disagrees += 1; + } + const values = [ + currentParticipantId, + getGroupId(currentParticipantId), + participantCommentCounts.get(currentParticipantId) || 0, + currentParticipantVotes.size, + agrees, + disagrees, + ...commentIds.map((tid) => currentParticipantVotes.get(tid)), + ]; + res.write( + values + .map((value) => (value === undefined ? "" : String(value))) + .join(",") + sep + ); + } + + stream_pgQueryP_readOnly( + "SELECT pid, tid, vote FROM votes WHERE zid = ($1) ORDER BY pid", + [zid], + (row) => { + const pid: number = row.pid; + if (pid != currentParticipantId) { + if (currentParticipantId != -1) { + sendCurrentParticipantRow(); + } + currentParticipantId = pid; + currentParticipantVotes.clear(); + } + + const tid: number = row.tid; + const vote: number = row.vote; + currentParticipantVotes.set(tid, vote); + }, + () => { + if (currentParticipantId != -1) { + sendCurrentParticipantRow(); + } + res.end(); + }, + (error) => { + logger.error("polis_err_report_participant_votes", error); + fail(res, 500, "polis_err_data_export", error); + } + ); +} + export async function handle_GET_reportExport( req: { p: { rid: string; report_type: string }; @@ -229,6 +325,10 @@ export async function handle_GET_reportExport( await sendVotesSummary(zid, res); break; + case "participant-votes.csv": + await sendParticipantVotesSummary(zid, res); + break; + default: fail(res, 404, "polis_error_data_unknown_report"); break; From 9b6871086c30337081f88aa911967211fc03c098 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Fri, 20 Sep 2024 11:26:21 -0700 Subject: [PATCH 11/13] Flip vote polarity. In the raw votes table, -1 means agree and 1 means disagree, so we need to count things correctly. And when exporting votes in participant votes, we flip the sign so that 1 means agree and -1 means disagree. --- server/src/routes/export.ts | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index cf4737aae..b43dfbfa2 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -136,8 +136,9 @@ async function sendCommentSummary(zid: number, res: Response) { (row) => { const comment = comments.get(row.tid); if (comment) { - if (row.vote === 1) comment.agrees += 1; - else if (row.vote === -1) comment.disagrees += 1; + // note that -1 means agree and 1 means disagree + if (row.vote === -1) comment.agrees += 1; + else if (row.vote === 1) comment.disagrees += 1; else if (row.vote === 0) comment.pass += 1; } else { logger.warn(`Comment row not found for [zid=${zid}, tid=${row.tid}]`); @@ -182,7 +183,7 @@ async function sendVotesSummary(zid: number, res: Response) { datetime: (row) => formatDatetime(row.timestamp), "comment-id": (row) => String(row.tid), "voter-id": (row) => String(row.pid), - vote: (row) => String(row.vote), + vote: (row) => String(-row.vote), // have to flip -1 to 1 and vice versa }; res.setHeader("Content-Type", "text/csv"); res.write(formatCSVHeaders(formatters) + sep); @@ -278,10 +279,8 @@ async function sendParticipantVotesSummary(zid: number, res: Response) { currentParticipantId = pid; currentParticipantVotes.clear(); } - - const tid: number = row.tid; - const vote: number = row.vote; - currentParticipantVotes.set(tid, vote); + // have to flip vote from -1 to 1 and vice versa + currentParticipantVotes.set(row.tid, -row.vote); }, () => { if (currentParticipantId != -1) { From 8e627ddd7c20b1a38eea5f5d5ca31475b564d8c6 Mon Sep 17 00:00:00 2001 From: Michael Bayne Date: Tue, 1 Oct 2024 13:51:24 -0700 Subject: [PATCH 12/13] Properly escape comment text. --- server/src/routes/export.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/server/src/routes/export.ts b/server/src/routes/export.ts index b43dfbfa2..df4cadf42 100644 --- a/server/src/routes/export.ts +++ b/server/src/routes/export.ts @@ -14,6 +14,8 @@ import logger from "../utils/logger"; type Formatters = Record string>; const sep = "\n"; +const formatEscapedText = (s: string) => `"${s.replace(/"/g, '""')}"`; + function formatCSVHeaders(colFns: Formatters) { return Object.keys(colFns).join(","); } @@ -67,16 +69,15 @@ async function loadConversationSummary(zid: number, siteUrl: string) { }; const data = pca.asPOJO as PcaData; - const escapeQuotes = (s: string) => s.replace(/"/g, '""'); return [ - ["topic", `"${escapeQuotes(convo.topic)}"`], + ["topic", formatEscapedText(convo.topic)], ["url", `${siteUrl}/${zinvite}`], ["voters", Object.keys(data["user-vote-counts"]).length], ["voters-in-conv", data["in-conv"].length], ["commenters", commenters], ["comments", data["n-cmts"]], ["groups", Object.keys(data["group-clusters"]).length], - ["conversation-description", `"${escapeQuotes(convo.description)}"`], + ["conversation-description", formatEscapedText(convo.description)], ].map((row) => row.join(",")); } @@ -161,7 +162,7 @@ async function sendCommentSummary(zid: number, res: Response) { agrees: (row) => String(row.agrees), disagrees: (row) => String(row.disagrees), moderated: (row) => String(row.mod), - "comment-body": (row) => String(row.txt), + "comment-body": (row) => formatEscapedText(row.txt), }, commentRows ) From 297471e918d15c3c1565698dd69629ce09ff3c0d Mon Sep 17 00:00:00 2001 From: Colin Megill Date: Tue, 22 Oct 2024 12:43:25 -0700 Subject: [PATCH 13/13] add votes matrix, show data license preprod, logging. --- client-report/src/components/overview.js | 67 ++++++++++++++---------- server/src/utils/parameter.ts | 7 ++- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/client-report/src/components/overview.js b/client-report/src/components/overview.js index 470448864..f61b5778f 100644 --- a/client-report/src/components/overview.js +++ b/client-report/src/components/overview.js @@ -27,6 +27,9 @@ const Number = ({ number, label }) => ( const pathname = window.location.pathname; // "/report/2arcefpshi" const report_id = pathname.split("/")[2]; +const doShowDataLicenseTerms = ["pol.is", "preprod.pol.is", "localhost"].includes( + window.location.hostname +); const getCurrentTimestamp = () => { const now = new Date(); @@ -147,6 +150,16 @@ const Overview = ({ {` (as event log)`}

+

+ {`---Votes matrix: `} + + {getDownloadFilename("participant-votes", conversation)} + + {` (as comments x participants matrix)`} +

Public API endpoints (read only, Jupyter notebook friendly) @@ -160,36 +173,36 @@ const Overview = ({

{`$ curl http://${window.location.hostname}/api/v3/reportExport/${report_id}/votes.csv`}

+

+ {`$ curl http://${window.location.hostname}/api/v3/reportExport/${report_id}/participant-votes.csv`} +

- {window.location.hostname === "pol.is" || - (window.location.hostname === "localhost" && ( -
-

- Attribution of Polis Data -

- -

- All Polis data is licensed under a Creative Commons Attribution 4.0 International - license: https://creativecommons.org/licenses/by/4.0/ -

-

- --------------- BEGIN STATEMENT --------------- -

-

{`Data was gathered using the Polis software (see: compdemocracy.org/polis and github.com/compdemocracy/polis) and is sub-licensed + {doShowDataLicenseTerms && ( +

+

+ Attribution of Polis Data +

+ +

+ All Polis data is licensed under a Creative Commons Attribution 4.0 International + license: https://creativecommons.org/licenses/by/4.0/ +

+

+ --------------- BEGIN STATEMENT --------------- +

+

{`Data was gathered using the Polis software (see: compdemocracy.org/polis and github.com/compdemocracy/polis) and is sub-licensed under CC BY 4.0 with Attribution to The Computational Democracy Project. The data and more information about how the data was collected can be found at the following link: ${window.location.href}`}

-

- --------------- END STATEMENT--------------- -

-

- For further information on best practices for Attribution of CC 4.0 licensed content - Please see: - https://wiki.creativecommons.org/wiki/Best_practices_for_attribution#Title.2C_Author.2C_Source.2C_License -

-
- ))} +

--------------- END STATEMENT---------------

+

+ For further information on best practices for Attribution of CC 4.0 licensed content + Please see: + https://wiki.creativecommons.org/wiki/Best_practices_for_attribution#Title.2C_Author.2C_Source.2C_License +

+
+ )} ); diff --git a/server/src/utils/parameter.ts b/server/src/utils/parameter.ts index 3fb1c7749..43ae41653 100644 --- a/server/src/utils/parameter.ts +++ b/server/src/utils/parameter.ts @@ -180,7 +180,7 @@ function buildCallback(config: { next(); }, function (err: any) { - let s = "polis_err_param_parse_failed_" + name; + let s = `polis_err_param_parse_failed_${name} (val='${val}', error=${err})`; logger.error(s, err); res.status(400); next(s); @@ -362,7 +362,10 @@ function getRidFromReportId(report_id: string) { "select rid from reports where report_id = ($1);", [report_id], function (err: any, results: { rows: string | any[] }) { - logger.error("polis_err_fetching_rid_for_report_id " + report_id, err); + logger.error( + "polis_err_fetching_rid_for_report_id " + report_id, + err + ); if (err) { return reject(err); } else if (!results || !results.rows || !results.rows.length) {