feat: add top earnings participant endpoint #170

PatrickNercessian · 2024-07-02T14:50:57Z

observer/test/observer.test.js

PatrickNercessian · 2024-07-02T14:52:30Z

stats/lib/platform-stats-fetchers.js

+export const fetchTopEarningParticipants = async (pgPool, filter) => {
+  const { rows } = await pgPool.query(`
+    WITH latest_scheduled_rewards AS (
+      SELECT DISTINCT ON (participant_address) participant_address, scheduled_rewards
+      FROM daily_scheduled_rewards
+      ORDER BY participant_address, day DESC
+    )
+    SELECT 
+      COALESCE(drt.to_address, lsr.participant_address) as participant_address, 
+      COALESCE(SUM(drt.amount), 0) + COALESCE(lsr.scheduled_rewards, 0) as total_rewards
+    FROM daily_reward_transfers drt
+    FULL OUTER JOIN latest_scheduled_rewards lsr
+      ON drt.to_address = lsr.participant_address
+    WHERE drt.day >= $1 AND drt.day <= $2 OR drt.day IS NULL
+    GROUP BY COALESCE(drt.to_address, lsr.participant_address), lsr.scheduled_rewards
+    ORDER BY total_rewards DESC
+  `, [filter.from, filter.to])
+  return rows
+}


Another set of eyes here would be nice for this complex query.

And also if we can run it on production data to see latency expectations.

spark_stats=# EXPLAIN WITH latest_scheduled_rewards AS ( SELECT DISTINCT ON (participant_address) participant_address, scheduled_rewards FROM daily_scheduled_rewards ORDER BY participant_address, day DESC ) SELECT COALESCE(drt.to_address, lsr.participant_address) as participant_address, COALESCE(SUM(drt.amount), 0) + COALESCE(lsr.scheduled_rewards, 0) as total_rewards FROM daily_reward_transfers drt FULL OUTER JOIN latest_scheduled_rewards lsr ON drt.to_address = lsr.participant_address WHERE drt.day >= '2024-06-26' AND drt.day <= '2024-07-03' OR drt.day IS NULL GROUP BY COALESCE(drt.to_address, lsr.participant_address), lsr.scheduled_rewards ORDER BY total_rewards DESC; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- Sort (cost=14296.01..14304.06 rows=3220 width=75) Sort Key: ((COALESCE(sum(drt.amount), '0'::numeric) + COALESCE(daily_scheduled_rewards.scheduled_rewards, '0'::numeric))) DESC -> HashAggregate (cost=14060.10..14108.40 rows=3220 width=75) Group Key: COALESCE(drt.to_address, daily_scheduled_rewards.participant_address), daily_scheduled_rewards.scheduled_rewards -> Hash Full Join (cost=13345.69..14035.95 rows=3220 width=56) Hash Cond: (daily_scheduled_rewards.participant_address = drt.to_address) Filter: (((drt.day >= '2024-06-26'::date) AND (drt.day <= '2024-07-03'::date)) OR (drt.day IS NULL)) -> Unique (cost=13295.05..13816.97 rows=8980 width=58) -> Sort (cost=13295.05..13556.01 rows=104383 width=58) Sort Key: daily_scheduled_rewards.participant_address, daily_scheduled_rewards.day DESC -> Seq Scan on daily_scheduled_rewards (cost=0.00..2293.83 rows=104383 width=58) -> Hash (cost=31.95..31.95 rows=1495 width=60) -> Seq Scan on daily_reward_transfers drt (cost=0.00..31.95 rows=1495 width=60) (13 rows)

For the last 7 days, and returning 11k rows, this query took

Time: 309.534 ms

which is ok for me

We can shave off ~40ms from the query by adding an index ON daily_scheduled_rewards (participant_address, day), but I guess the improvement is small.

I agree that the query duration in the 300-400ms range is acceptable.

EXPLAIN ANALYZE WITH latest_scheduled_rewards AS ( SELECT DISTINCT ON (participant_address) participant_address, scheduled_rewards FROM daily_scheduled_rewards ORDER BY participant_address, day DESC ) SELECT COALESCE(drt.to_address, lsr.participant_address) as participant_address, COALESCE(SUM(drt.amount), 0) + COALESCE(lsr.scheduled_rewards, 0) as total_rewards FROM daily_reward_transfers drt FULL OUTER JOIN latest_scheduled_rewards lsr ON drt.to_address = lsr.participant_address WHERE drt.day >= '2024-06-26' AND drt.day <= '2024-07-03' OR drt.day IS NULL GROUP BY COALESCE(drt.to_address, lsr.participant_address), lsr.scheduled_rewards ORDER BY total_rewards DESC; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------- Sort (cost=14693.79..14701.93 rows=3256 width=75) (actual time=380.119..381.786 rows=11118 loops=1) Sort Key: ((COALESCE(sum(drt.amount), '0'::numeric) + COALESCE(daily_scheduled_rewards.scheduled_rewards, '0'::numeric))) DESC Sort Method: quicksort Memory: 1948kB -> HashAggregate (cost=14454.98..14503.82 rows=3256 width=75) (actual time=370.194..376.246 rows=11118 loops=1) Group Key: COALESCE(drt.to_address, daily_scheduled_rewards.participant_address), daily_scheduled_rewards.scheduled_rewards Batches: 5 Memory Usage: 4273kB Disk Usage: 248kB -> Hash Full Join (cost=13724.09..14430.56 rows=3256 width=56) (actual time=309.549..364.272 rows=11118 loops=1) Hash Cond: (daily_scheduled_rewards.participant_address = drt.to_address) Filter: (((drt.day >= '2024-06-26'::date) AND (drt.day <= '2024-07-03'::date)) OR (drt.day IS NULL)) Rows Removed by Filter: 959 -> Unique (cost=13673.45..14209.91 rows=9082 width=58) (actual time=308.787..359.491 rows=11644 loops=1) -> Sort (cost=13673.45..13941.68 rows=107292 width=58) (actual time=308.783..348.916 rows=107429 loops=1) Sort Key: daily_scheduled_rewards.participant_address, daily_scheduled_rewards.day DESC Sort Method: external merge Disk: 7368kB -> Seq Scan on daily_scheduled_rewards (cost=0.00..2344.92 rows=107292 width=58) (actual time=0.012..23.680 rows=107429 loops=1) -> Hash (cost=31.95..31.95 rows=1495 width=60) (actual time=0.747..0.749 rows=1495 loops=1) Buckets: 2048 Batches: 1 Memory Usage: 151kB -> Seq Scan on daily_reward_transfers drt (cost=0.00..31.95 rows=1495 width=60) (actual time=0.040..0.383 rows=1495 loops=1) Planning Time: 0.377 ms Execution Time: 384.109 ms (20 rows)

juliangruber · 2024-07-03T10:06:23Z

stats/lib/platform-routes.js

@@ -32,6 +33,8 @@ export const handlePlatformRoutes = async (req, res, pgPools) => {
    await respond(pgPools.evaluate, fetchDailyStationAcceptedMeasurementCount)
  } else if (req.method === 'GET' && url === '/transfers/daily') {
    await respond(pgPools.stats, fetchDailyRewardTransfers)
+  } else if (req.method === 'GET' && url === '/participants/top-earnings') {


Suggested change

} else if (req.method === 'GET' && url === '/participants/top-earnings') {

} else if (req.method === 'GET' && url === '/participants/top-earning') {

This route is about the top earning participants, and not the top earnings among participants (subtle but I think important)

please add this route to the README as well

Sure, addressed in 633f194

stats/lib/platform-stats-fetchers.js

juliangruber · 2024-07-03T10:12:09Z

stats/lib/platform-stats-fetchers.js

+export const fetchTopEarningParticipants = async (pgPool, filter) => {
+  const { rows } = await pgPool.query(`
+    WITH latest_scheduled_rewards AS (
+      SELECT DISTINCT ON (participant_address) participant_address, scheduled_rewards
+      FROM daily_scheduled_rewards
+      ORDER BY participant_address, day DESC
+    )
+    SELECT 
+      COALESCE(drt.to_address, lsr.participant_address) as participant_address, 
+      COALESCE(SUM(drt.amount), 0) + COALESCE(lsr.scheduled_rewards, 0) as total_rewards
+    FROM daily_reward_transfers drt
+    FULL OUTER JOIN latest_scheduled_rewards lsr
+      ON drt.to_address = lsr.participant_address
+    WHERE drt.day >= $1 AND drt.day <= $2 OR drt.day IS NULL
+    GROUP BY COALESCE(drt.to_address, lsr.participant_address), lsr.scheduled_rewards
+    ORDER BY total_rewards DESC
+  `, [filter.from, filter.to])
+  return rows
+}


spark_stats=# EXPLAIN WITH latest_scheduled_rewards AS ( SELECT DISTINCT ON (participant_address) participant_address, scheduled_rewards FROM daily_scheduled_rewards ORDER BY participant_address, day DESC ) SELECT COALESCE(drt.to_address, lsr.participant_address) as participant_address, COALESCE(SUM(drt.amount), 0) + COALESCE(lsr.scheduled_rewards, 0) as total_rewards FROM daily_reward_transfers drt FULL OUTER JOIN latest_scheduled_rewards lsr ON drt.to_address = lsr.participant_address WHERE drt.day >= '2024-06-26' AND drt.day <= '2024-07-03' OR drt.day IS NULL GROUP BY COALESCE(drt.to_address, lsr.participant_address), lsr.scheduled_rewards ORDER BY total_rewards DESC; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- Sort (cost=14296.01..14304.06 rows=3220 width=75) Sort Key: ((COALESCE(sum(drt.amount), '0'::numeric) + COALESCE(daily_scheduled_rewards.scheduled_rewards, '0'::numeric))) DESC -> HashAggregate (cost=14060.10..14108.40 rows=3220 width=75) Group Key: COALESCE(drt.to_address, daily_scheduled_rewards.participant_address), daily_scheduled_rewards.scheduled_rewards -> Hash Full Join (cost=13345.69..14035.95 rows=3220 width=56) Hash Cond: (daily_scheduled_rewards.participant_address = drt.to_address) Filter: (((drt.day >= '2024-06-26'::date) AND (drt.day <= '2024-07-03'::date)) OR (drt.day IS NULL)) -> Unique (cost=13295.05..13816.97 rows=8980 width=58) -> Sort (cost=13295.05..13556.01 rows=104383 width=58) Sort Key: daily_scheduled_rewards.participant_address, daily_scheduled_rewards.day DESC -> Seq Scan on daily_scheduled_rewards (cost=0.00..2293.83 rows=104383 width=58) -> Hash (cost=31.95..31.95 rows=1495 width=60) -> Seq Scan on daily_reward_transfers drt (cost=0.00..31.95 rows=1495 width=60) (13 rows)

juliangruber · 2024-07-03T10:13:45Z

stats/lib/platform-stats-fetchers.js

+export const fetchTopEarningParticipants = async (pgPool, filter) => {
+  const { rows } = await pgPool.query(`
+    WITH latest_scheduled_rewards AS (
+      SELECT DISTINCT ON (participant_address) participant_address, scheduled_rewards
+      FROM daily_scheduled_rewards
+      ORDER BY participant_address, day DESC
+    )
+    SELECT 
+      COALESCE(drt.to_address, lsr.participant_address) as participant_address, 
+      COALESCE(SUM(drt.amount), 0) + COALESCE(lsr.scheduled_rewards, 0) as total_rewards
+    FROM daily_reward_transfers drt
+    FULL OUTER JOIN latest_scheduled_rewards lsr
+      ON drt.to_address = lsr.participant_address
+    WHERE drt.day >= $1 AND drt.day <= $2 OR drt.day IS NULL
+    GROUP BY COALESCE(drt.to_address, lsr.participant_address), lsr.scheduled_rewards
+    ORDER BY total_rewards DESC
+  `, [filter.from, filter.to])
+  return rows
+}


For the last 7 days, and returning 11k rows, this query took

Time: 309.534 ms

which is ok for me

bajtos · 2024-07-03T11:53:24Z

stats/lib/platform-stats-fetchers.js

+    FROM daily_reward_transfers drt
+    FULL OUTER JOIN latest_scheduled_rewards lsr
+      ON drt.to_address = lsr.participant_address
+    WHERE drt.day >= $1 AND drt.day <= $2 OR drt.day IS NULL


I never remember the precedence of AND & OR operators. Could you please add parenthesis to make it explicit?

For example:

WHERE (drt.day >= $1 AND drt.day <= $2) OR drt.day IS NULL

Sure! 633f194

stats/lib/platform-stats-fetchers.js

bajtos · 2024-07-03T12:15:43Z

stats/lib/platform-stats-fetchers.js

+export const fetchTopEarningParticipants = async (pgPool, filter) => {
+  const { rows } = await pgPool.query(`
+    WITH latest_scheduled_rewards AS (
+      SELECT DISTINCT ON (participant_address) participant_address, scheduled_rewards
+      FROM daily_scheduled_rewards
+      ORDER BY participant_address, day DESC
+    )
+    SELECT 
+      COALESCE(drt.to_address, lsr.participant_address) as participant_address, 
+      COALESCE(SUM(drt.amount), 0) + COALESCE(lsr.scheduled_rewards, 0) as total_rewards
+    FROM daily_reward_transfers drt
+    FULL OUTER JOIN latest_scheduled_rewards lsr
+      ON drt.to_address = lsr.participant_address
+    WHERE drt.day >= $1 AND drt.day <= $2 OR drt.day IS NULL
+    GROUP BY COALESCE(drt.to_address, lsr.participant_address), lsr.scheduled_rewards
+    ORDER BY total_rewards DESC
+  `, [filter.from, filter.to])
+  return rows
+}


We can shave off ~40ms from the query by adding an index ON daily_scheduled_rewards (participant_address, day), but I guess the improvement is small.

I agree that the query duration in the 300-400ms range is acceptable.

EXPLAIN ANALYZE WITH latest_scheduled_rewards AS ( SELECT DISTINCT ON (participant_address) participant_address, scheduled_rewards FROM daily_scheduled_rewards ORDER BY participant_address, day DESC ) SELECT COALESCE(drt.to_address, lsr.participant_address) as participant_address, COALESCE(SUM(drt.amount), 0) + COALESCE(lsr.scheduled_rewards, 0) as total_rewards FROM daily_reward_transfers drt FULL OUTER JOIN latest_scheduled_rewards lsr ON drt.to_address = lsr.participant_address WHERE drt.day >= '2024-06-26' AND drt.day <= '2024-07-03' OR drt.day IS NULL GROUP BY COALESCE(drt.to_address, lsr.participant_address), lsr.scheduled_rewards ORDER BY total_rewards DESC; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------- Sort (cost=14693.79..14701.93 rows=3256 width=75) (actual time=380.119..381.786 rows=11118 loops=1) Sort Key: ((COALESCE(sum(drt.amount), '0'::numeric) + COALESCE(daily_scheduled_rewards.scheduled_rewards, '0'::numeric))) DESC Sort Method: quicksort Memory: 1948kB -> HashAggregate (cost=14454.98..14503.82 rows=3256 width=75) (actual time=370.194..376.246 rows=11118 loops=1) Group Key: COALESCE(drt.to_address, daily_scheduled_rewards.participant_address), daily_scheduled_rewards.scheduled_rewards Batches: 5 Memory Usage: 4273kB Disk Usage: 248kB -> Hash Full Join (cost=13724.09..14430.56 rows=3256 width=56) (actual time=309.549..364.272 rows=11118 loops=1) Hash Cond: (daily_scheduled_rewards.participant_address = drt.to_address) Filter: (((drt.day >= '2024-06-26'::date) AND (drt.day <= '2024-07-03'::date)) OR (drt.day IS NULL)) Rows Removed by Filter: 959 -> Unique (cost=13673.45..14209.91 rows=9082 width=58) (actual time=308.787..359.491 rows=11644 loops=1) -> Sort (cost=13673.45..13941.68 rows=107292 width=58) (actual time=308.783..348.916 rows=107429 loops=1) Sort Key: daily_scheduled_rewards.participant_address, daily_scheduled_rewards.day DESC Sort Method: external merge Disk: 7368kB -> Seq Scan on daily_scheduled_rewards (cost=0.00..2344.92 rows=107292 width=58) (actual time=0.012..23.680 rows=107429 loops=1) -> Hash (cost=31.95..31.95 rows=1495 width=60) (actual time=0.747..0.749 rows=1495 loops=1) Buckets: 2048 Batches: 1 Memory Usage: 151kB -> Seq Scan on daily_reward_transfers drt (cost=0.00..31.95 rows=1495 width=60) (actual time=0.040..0.383 rows=1495 loops=1) Planning Time: 0.377 ms Execution Time: 384.109 ms (20 rows)

Signed-off-by: Miroslav Bajtoš <[email protected]>

feat: add top earnings participant endpoint

7d1e1b9

PatrickNercessian marked this pull request as ready for review July 2, 2024 14:51

PatrickNercessian commented Jul 2, 2024

View reviewed changes

juliangruber requested review from juliangruber and bajtos July 3, 2024 10:05

juliangruber requested changes Jul 3, 2024

View reviewed changes

bajtos requested changes Jul 3, 2024

View reviewed changes

PatrickNercessian and others added 2 commits July 3, 2024 11:10

constrant range query to today, endpoint path change

633f194

Merge branch 'main' into feat-add-top-earnings-endpoints

3e5cd6b

juliangruber approved these changes Jul 3, 2024

View reviewed changes

bajtos approved these changes Jul 4, 2024

View reviewed changes

bajtos added 2 commits July 4, 2024 09:36

Merge branch 'main' into feat-add-top-earnings-endpoints

384ddc1

fixup! document why filter.to must be today

9332282

Signed-off-by: Miroslav Bajtoš <[email protected]>

bajtos approved these changes Jul 4, 2024

View reviewed changes

bajtos enabled auto-merge (squash) July 4, 2024 07:41

bajtos merged commit cc5336d into filecoin-station:main Jul 4, 2024
8 of 9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: add top earnings participant endpoint #170

feat: add top earnings participant endpoint #170

PatrickNercessian commented Jul 2, 2024

PatrickNercessian Jul 2, 2024

juliangruber Jul 3, 2024

juliangruber Jul 3, 2024

bajtos Jul 3, 2024

juliangruber Jul 3, 2024

juliangruber Jul 3, 2024

PatrickNercessian Jul 3, 2024

juliangruber Jul 3, 2024

juliangruber Jul 3, 2024

bajtos Jul 3, 2024

PatrickNercessian Jul 3, 2024

bajtos Jul 3, 2024

	} else if (req.method === 'GET' && url === '/participants/top-earnings') {
	} else if (req.method === 'GET' && url === '/participants/top-earning') {

feat: add top earnings participant endpoint #170

feat: add top earnings participant endpoint #170

Conversation

PatrickNercessian commented Jul 2, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment