From eda62e280b08d616d8b4bf0b03d36899b5f35199 Mon Sep 17 00:00:00 2001 From: Kathleen Tuite Date: Thu, 12 Dec 2024 16:21:31 -0800 Subject: [PATCH] Make dataset usage metric faster by pulling properties query out --- lib/model/query/analytics.js | 24 ++++++++++++++++----- test/integration/other/analytics-queries.js | 2 +- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/lib/model/query/analytics.js b/lib/model/query/analytics.js index c6a2aced6..baf4fdc63 100644 --- a/lib/model/query/analytics.js +++ b/lib/model/query/analytics.js @@ -419,7 +419,7 @@ group by f."projectId"`); // Datasets const getDatasets = () => ({ all }) => all(sql` SELECT - ds.id, ds."projectId", COUNT(DISTINCT p.id) num_properties, COUNT(DISTINCT e.id) num_entities_total, + ds.id, ds."projectId", COUNT(DISTINCT e.id) num_entities_total, COUNT(DISTINCT CASE WHEN e."createdAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_recent, COUNT(DISTINCT CASE WHEN e."updatedAt" IS NOT NULL THEN e.id END) num_entities_updated_total, COUNT(DISTINCT CASE WHEN e."updatedAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_updated_recent, @@ -436,7 +436,6 @@ SELECT MAX(COALESCE(conflict_stats.conflicts, 0)) num_entity_conflicts, MAX(COALESCE(conflict_stats.resolved, 0)) num_entity_conflicts_resolved FROM datasets ds - LEFT JOIN ds_properties p ON p."datasetId" = ds.id AND p."publishedAt" IS NOT NULL LEFT JOIN entities e ON e."datasetId" = ds.id LEFT JOIN (dataset_form_defs dfd JOIN form_defs fd ON fd.id = dfd."formDefId" @@ -498,6 +497,15 @@ WHERE audits.action = 'entity.bulk.create' GROUP BY ds.id, ds."projectId" `); +const getDatasetProperties = () => ({ all }) => all(sql` +SELECT + ds.id, ds."projectId", COUNT(DISTINCT p.id) num_properties +FROM datasets ds + LEFT JOIN ds_properties p ON p."datasetId" = ds.id AND p."publishedAt" IS NOT NULL +WHERE ds."publishedAt" IS NOT NULL +GROUP BY ds.id, ds."projectId"; +`); + // Offline entities @@ -615,11 +623,12 @@ const projectMetrics = () => (({ Analytics }) => runSequentially([ Analytics.countSubmissionsByUserType, Analytics.getProjectsWithDescriptions, Analytics.getDatasets, - Analytics.getDatasetEvents + Analytics.getDatasetEvents, + Analytics.getDatasetProperties ]).then(([ userRoles, appUsers, deviceIds, pubLinks, forms, formGeoRepeats, formsEncrypt, formStates, reusedIds, subs, subStates, subEdited, subComments, subUsers, - projWithDesc, datasets, datasetEvents ]) => { + projWithDesc, datasets, datasetEvents, datasetProperties ]) => { const projects = {}; // users @@ -732,9 +741,13 @@ const projectMetrics = () => (({ Analytics }) => runSequentially([ const eventsRow = datasetEvents.find(d => (d.projectId === row.projectId && d.id === row.id)) || { num_bulk_create_events_total: 0, num_bulk_create_events_recent: 0, biggest_bulk_upload: 0 }; + // Properties row + const propertiesRow = datasetProperties.find(d => (d.projectId === row.projectId && d.id === row.id)) || + { num_properties: 0 }; + project.datasets.push({ id: row.id, - num_properties: row.num_properties, + num_properties: propertiesRow.num_properties, num_creation_forms: row.num_creation_forms, num_followup_forms: row.num_followup_forms, num_entities: { total: row.num_entities_total, recent: row.num_entities_recent }, @@ -898,6 +911,7 @@ module.exports = { getLatestAudit, getDatasets, getDatasetEvents, + getDatasetProperties, countOfflineBranches, countInterruptedBranches, countSubmissionBacklogEvents, diff --git a/test/integration/other/analytics-queries.js b/test/integration/other/analytics-queries.js index 72ad7a4a1..82e8e5e5d 100644 --- a/test/integration/other/analytics-queries.js +++ b/test/integration/other/analytics-queries.js @@ -1022,7 +1022,7 @@ describe('analytics task queries', function () { .replace(/simpleEntity/g, 'simpleEntity2') .replace(/age/g, 'gender'), 1); - const datasets = await container.Analytics.getDatasets(); + const datasets = await container.Analytics.getDatasetProperties(); datasets[0].num_properties.should.be.equal(3); }));