Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Build Enriched Traces & Transactions + Aggregation Tables #1161

Draft
wants to merge 47 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
c4353f0
draft
MSilb7 Dec 12, 2024
5ff10a9
adds
MSilb7 Dec 12, 2024
a2eade6
fix func names
MSilb7 Dec 12, 2024
ee2d62d
init push
MSilb7 Dec 12, 2024
ebeb470
push example aggregation
MSilb7 Dec 12, 2024
cc12c72
mod
MSilb7 Dec 13, 2024
443e6c4
mods
MSilb7 Dec 13, 2024
b33ee03
juice it up
MSilb7 Dec 13, 2024
dd03a2a
resolve sources
MSilb7 Dec 13, 2024
ccc8ed9
migrate to existing transaction_fees and rename
MSilb7 Dec 13, 2024
563ad68
update names
MSilb7 Dec 13, 2024
365ee57
checkpoint
MSilb7 Dec 13, 2024
ae2f95c
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 13, 2024
6503ab7
check
MSilb7 Dec 13, 2024
e4a2c77
check
MSilb7 Dec 13, 2024
a785060
undo
MSilb7 Dec 13, 2024
7905ab0
fix eet
MSilb7 Dec 13, 2024
6592682
simplify views
MSilb7 Dec 13, 2024
1617cba
mod nb
MSilb7 Dec 13, 2024
09cfe79
push latest - through refined transactions
MSilb7 Dec 13, 2024
012bed3
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 13, 2024
379100c
clean
MSilb7 Dec 13, 2024
9d09853
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 13, 2024
a185a85
push updates to models
MSilb7 Dec 13, 2024
0fc755f
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 13, 2024
e705cac
checkpoint, base models DONE
MSilb7 Dec 13, 2024
28d7b5f
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 14, 2024
61cab69
separate l1 fee scalar versions
MSilb7 Dec 14, 2024
ae7b7db
fixes and create aggregate trace models
MSilb7 Dec 14, 2024
dd00b26
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 14, 2024
b11df41
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 15, 2024
7112e82
split txs and traces - push tx aggregates
MSilb7 Dec 15, 2024
8164f9a
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 15, 2024
6e03596
Merge branch 'main' into traces-txs-models-v0
MSilb7 Dec 16, 2024
45ed343
rename traces & include all types
MSilb7 Dec 16, 2024
9f147e2
push dev notebook
MSilb7 Dec 16, 2024
afeb39d
Merge branch 'main' into traces-txs-models-v0
lithium323 Dec 16, 2024
f6da4d4
Merge branch 'main' into traces-txs-models-v0
lithium323 Dec 16, 2024
0633750
Undo changes to example_daily_data.ipynb
lithium323 Dec 16, 2024
e2e57ae
Apply sqlfluff to refined_transactions_fees.sql.j2
lithium323 Dec 16, 2024
d634da1
Merge branch 'main' into traces-txs-models-v0
lithium323 Dec 16, 2024
70b34f5
Make diff wih main smaller
lithium323 Dec 16, 2024
2190036
Merge branch 'main' into traces-txs-models-v0
lithium323 Dec 17, 2024
a49845e
Make diff wih main smaller
lithium323 Dec 17, 2024
0489465
Merge branch 'main' into traces-txs-models-v0
lithium323 Dec 17, 2024
9226dde
Format sql
lithium323 Dec 18, 2024
cbea965
Merge branch 'main' into traces-txs-models-v0
lithium323 Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import duckdb

from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery
from op_analytics.datapipeline.models.compute.registry import register_model
from op_analytics.datapipeline.models.compute.types import NamedRelations


@register_model(
input_datasets=["intermediate/enriched_transactions_v1"],
expected_outputs=["daily_transactions_by_to_v1"],
# TODO: Uncomment if we do this as a view (or some element as a view)
# auxiliary_views=[
# TemplatedSQLQuery(
# template_name="daily_transactions_by_to",
# context={},
# ),
# ],
)
def daily_transactions_by_to(duckdb_client: duckdb.DuckDBPyConnection) -> NamedRelations:
return {
"daily_transactions_by_to_v1": duckdb_client.view(
"""
TODO: AGGREGATION CODE
"""
),
}
21 changes: 21 additions & 0 deletions src/op_analytics/datapipeline/models/code/enriched_trace_calls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import duckdb

from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery
from op_analytics.datapipeline.models.compute.registry import register_model
from op_analytics.datapipeline.models.compute.types import NamedRelations


@register_model(
input_datasets=["ingestion/traces_v1", "enriched_transactions_v1"],
expected_outputs=["enriched_trace_calls_v1"],
auxiliary_views=[
TemplatedSQLQuery(
template_name="enriched_trace_calls",
context={},
),
],
)
def enriched_trace_calls(duckdb_client: duckdb.DuckDBPyConnection) -> NamedRelations:
return {
"enriched_trace_calls_v1": duckdb_client.view("enriched_trace_calls"),
}
21 changes: 21 additions & 0 deletions src/op_analytics/datapipeline/models/code/enriched_transactions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import duckdb

from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery
from op_analytics.datapipeline.models.compute.registry import register_model
from op_analytics.datapipeline.models.compute.types import NamedRelations


@register_model(
input_datasets=["ingestion/transactions_v1", "ingestion/blocks_v1", "event_emitting_transactions_list_v1"],
expected_outputs=["enriched_transactions_v1"],
auxiliary_views=[
TemplatedSQLQuery(
template_name="enriched_transactions",
context={},
),
],
)
def enriched_transactions(duckdb_client: duckdb.DuckDBPyConnection) -> NamedRelations:
return {
"enriched_transactions_v1": duckdb_client.view("enriched_transactions"),
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import duckdb

from op_analytics.datapipeline.models.compute.querybuilder import TemplatedSQLQuery
from op_analytics.datapipeline.models.compute.registry import register_model
from op_analytics.datapipeline.models.compute.types import NamedRelations


@register_model(
input_datasets=["ingestion/logs_v1"],
expected_outputs=["event_emitting_transactions_list_v1"],
auxiliary_views=[
TemplatedSQLQuery(
template_name="event_emitting_transactions_list",
context={},
),
],
)
def event_emitting_transactions_list(duckdb_client: duckdb.DuckDBPyConnection) -> NamedRelations:
MSilb7 marked this conversation as resolved.
Show resolved Hide resolved
return {
"event_emitting_transactions_list_v1": duckdb_client.view("event_emitting_transactions_list"),
}
46 changes: 45 additions & 1 deletion src/op_analytics/datapipeline/models/compute/udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ def create_duckdb_macros(duckdb_client: duckdb.DuckDBPyConnection):

CREATE OR REPLACE MACRO wei_to_gwei(a)
AS a::DECIMAL(28, 0) * 0.000000001::DECIMAL(10, 10);

CREATE OR REPLACE MACRO gwei_to_eth(a)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

modified decimal precision here, it wasn't allowing for a to be a decimal before

AS wei_to_gwei(a);

CREATE OR REPLACE MACRO safe_div(a, b) AS
IF(b = 0, NULL, a / b);
Expand All @@ -27,14 +30,55 @@ def create_duckdb_macros(duckdb_client: duckdb.DuckDBPyConnection):
-- Truncate a timestamp to hour.
CREATE OR REPLACE MACRO epoch_to_hour(a) AS
date_trunc('hour', make_timestamp(a * 1000000::BIGINT));

-- Truncate a timestamp to day.
CREATE OR REPLACE MACRO epoch_to_day(a) AS
date_trunc('day', make_timestamp(a * 1000000::BIGINT));

-- Division by 16 for DECIMAL types.
CREATE OR REPLACE MACRO div16(a)
AS a * 0.0625::DECIMAL(5, 5);

--Get the length in bytes for binary data that is encoded as a hex string
CREATE OR REPLACE MACRO hexstr_bytelen(x)
AS (length(x) - 2) / 2
AS (length(x) - 2) / 2;

--Count non-zero bytes for binary data that is encoded as a hex string. We don't use hexstr_bytelen because we need to substring the input data.
CREATE OR REPLACE MACRO hexstr_nonzero_bytes(x)
AS length( REPLACE(TO_HEX(FROM_HEX(SUBSTR(x, 3))), '00', '') ) / 2;

--Count non-zero bytes for binary data that is encoded as a hex string
CREATE OR REPLACE MACRO hexstr_zero_bytes(x)
AS hexstr_bytelen(x) - hexstr_nonzero_bytes(x);

--Calculate calldata gas used for binary data that is encoded as a hex string (can be updated by an EIP)
CREATE OR REPLACE MACRO hexstr_calldata_gas(x)
AS 16*hexstr_nonzero_bytes(x) + 4*hexstr_zero_bytes(x);

--Get the method id for input data. This is the first 4 bytes, or first 10 string characters for binary data that is encoded as a hex string.
CREATE OR REPLACE MACRO hexstr_method_id(x)
AS substring(x,1,10);

-- TODO: Is there some way that we could or should pass in the alias for each table?
-- Calculate total gas fee in native units, given transactions t and blocks b
CREATE OR REPLACE MACRO total_gas_fee_native()
AS CASE WHEN t.gas_price = 0 THEN 0 ELSE wei_to_eth(t.receipt_l1_fee + t.gas_price * t.receipt_gas_used) END;

-- Calculate l2 base fee gas fee in native units, given transactions t and blocks b
CREATE OR REPLACE MACRO l2_base_gas_fee_native()
AS CASE WHEN t.gas_price = 0 THEN 0 ELSE wei_to_eth(t.receipt_l1_fee + t.gas_price * t.receipt_gas_used) END;

-- Calculate l2 priority fee gas fee in native units, given transactions t and blocks b
CREATE OR REPLACE MACRO l2_priority_gas_fee_native()
AS CASE WHEN t.gas_price = 0 THEN 0 ELSE wei_to_eth(t.receipt_l1_fee + t.gas_price * t.receipt_gas_used) END;

-- Calculate l1 data fee gas fee in native units, given transactions t and blocks b
CREATE OR REPLACE MACRO l2_gas_fee_native()
AS wei_to_eth(t.gas_price * t.receipt_gas_used

-- Calculate l1 data fee gas fee in native units, given transactions t and blocks b
CREATE OR REPLACE MACRO l1_gas_fee_native()
AS CASE WHEN t.gas_price = 0 THEN 0 ELSE wei_to_eth(t.receipt_l1_fee) END
""")


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
WITH

pb AS (
SELECT
number,
base_fee_per_gas
FROM
enriched_transasctions_v1
),

-- Select the columns that we want from transactions joined to blocks.
-- Include some minor transformations that are needed for further calculations.
base_level_trace_calls AS (
SELECT
-- Raw Trace fields
r.*,
-- Computed Trace Fields
hexstr_method_id(r.input) AS trc_method_id,
-- Find the next level up in the trace tree, so that we can subtract out subtract gas.
CASE WHEN trace_address IS NULL OR trace_address = '' THEN '-1' --if trace address is null, we don't want to go uplevel
ELSE
CASE
WHEN LENGTH(trace_address) - LENGTH(REPLACE(trace_address, ',', '')) = 0 THEN trace_address
ELSE SUBSTR(trace_address, 1, LENGTH(trace_address) - LENGTH(SPLIT(trace_address, ',')[ORDINAL(ARRAY_LENGTH(SPLIT(trace_address, ',')))]) - 1)
END
END AS trace_address_uplevel,
-- Raw Transaction Fields
t.receipt_gas_used AS tx_gas_used,
t.receipt_l1_gas_used AS tx_l1_gas_used,
t.from_address AS tx_from_address,
t.to_address AS tx_to_address,
t.receipt_gas_used AS tx_gas_used,
t.derived_l1_gas_used AS tx_l1_gas_used
-- Computed Transaction Fee Fields
total_gas_fee_native AS tx_total_gas_fee_native,
l1_fee_native AS tx_l1_fee_native,
l2_fee_native AS tx_l2_fee_native,
l2_priority_fee_native AS tx_l2_priority_fee_native,
l2_base_fee_native AS tx_l2_base_fee_native,
-- Computed Transaction Gas Price Fields
l2_gas_price_gwei,
l2_base_gas_price_gwei,
l2_priority_gas_price_gwei,
l1_base_gas_price_gwei,
l1_blob_base_gas_price_gwei,

t.method_id AS tx_method_id,
hexstr_method_id(r.input) AS trc_method_id,

-- Event Columns
log_count_total_events,
log_count_approval_events,
log_count_wrapping_events,
log_count_transfer_events,

-- Convenience columns
t.block_hour,
t.success AS tx_success,
t.receipt_status = 1 AS trc_success,

FROM ingestion_traces_v1 r
INNER JOIN enriched_transasctions_v1 t
ON r.block_number = t.block_number
AND r.transaction_hash = t.hash

WHERE r.trace_type = 'call'
)

--get amount of calls per transcation, to use for amortizing
, get_traces_per_tansaction AS (
SELECT block_number, block_timestamp, transaction_hash
, COUNT(*) AS count_traces_in_transaction
FROM base_level_traces
GROUP BY 1,2,3
)

--get subtrace gas used, for isolating each call
, get_subtraces_gas_per_trace AS (
SELECT block_number, block_timestamp, transaction_hash, trace_address_uplevel
, SUM(trc_gas_used) AS gas_used_in_subtraces, COUNT(*) AS count_subtraces
FROM base_level_traces
GROUP BY 1,2,3,4
)


SELECT
bl.*
--
, st.gas_used_in_subtraces AS gas_used_in_subtraces
, tpt.count_traces_in_transaction AS count_traces_in_transaction
--
, bl.trc_gas_used - st.gas_used_in_subtraces AS trc_gas_used_minus_subtraces

-- the subtraces will never add up to part of whole, but leave as is
, (bl.trc_gas_used - st.gas_used_in_subtraces) * gwei_to_eth(bl.tx_gas_price_gwei) AS trc_l2_fee_native_minus_subtraces
, (bl.trc_gas_used - st.gas_used_in_subtraces) * bl.tx_base_gas_price_gwei/1e9 AS trc_base_fee_native_minus_subtraces
, (bl.trc_gas_used - st.gas_used_in_subtraces) * bl.tx_priority_gas_price_gwei/1e9 AS trc_priority_fee_native_minus_subtraces
--
, (bl.tx_gas_used) / tpt.count_traces AS tx_gas_used_per_call_amortized
, (bl.tx_l1_gas_used) / tpt.count_traces AS tx_l1_gas_used_per_call_amortized

, bl.tx_gas_fee_native / tpt.count_traces AS tx_gas_fee_native_per_call_amortized
, bl.tx_l2_fee_native / tpt.count_traces AS tx_l2_fee_native_per_call_amortized
, bl.tx_l1_fee_native / tpt.count_traces AS tx_l1_fee_native_per_call_amortized
, bl.tx_base_fee_native / tpt.count_traces AS tx_base_fee_native_per_call_amortized
, bl.tx_priority_fee_native / tpt.count_traces AS tx_priority_fee_native_per_call_amortized


FROM base_level_trace_calls bl
INNER JOIN get_traces_per_tansaction tpt -- join on txn level
ON bl.block_number = tpt.block_number
AND bl.transaction_hash = tpt.transaction_hash
LEFT JOIN get_subtraces_gas_per_trace st -- join on trace level 1 level down
ON bl.block_number = st.block_number
AND bl.transaction_hash = st.transaction_hash
AND bl.trace_address = st.trace_address_uplevel
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
WITH

pb AS (
SELECT
number,
base_fee_per_gas
FROM
ingestion_blocks_v1
),

-- Select the columns that we want from transactions joined to blocks.
-- Include some minor transformations that are needed for further calculations.
base_level_transactions AS (
SELECT
-- Raw Transaction fields
t.*,
-- Fees
total_gas_fee_native() AS total_gas_fee_native
l1_gas_fee_native() END AS l1_fee_native,
l2_gas_fee_native() AS l2_fee_native,
l2_priority_gas_fee_native() AS l2_priority_fee_native,
l2_base_gas_fee_native() AS l2_base_fee_native,
-- TODO: Separate L1 Fee in to Blobs vs L1 Fee Contribution - TBD if we do this here or in the next step
-- Gas Prices
wei_to_gwei(b.gas_price) AS l2_gas_price_gwei,
wei_to_gwei(b.base_fee_per_gas) AS l2_base_gas_price_gwei,
wei_to_gwei(t.gas_price - b.base_fee_per_gas) AS l2_priority_gas_price_gwei,

wei_to_gwei(t.receipt_l1_gas_price) AS l1_base_gas_price_gwei,
wei_to_gwei(t.receipt_l1_blob_base_fee) AS l1_blob_base_gas_price_gwei,
-- L1 Fee BASE and BLOB scalars
coalesce(16 * micro(t.receipt_l1_base_fee_scalar), t.receipt_l1_fee_scalar::DECIMAL(12, 6))
AS l1_base_scalar,
coalesce(micro(t.receipt_l1_blob_base_fee_scalar), 0) AS l1_blob_scalar,

-- Transaction Attributes
hexstr_bytelen(t.input) AS input_byte_length,
hexstr_bytelen(t.input) AS input_calldata_gas,
hexstr_method_id(t.input) AS method_id,
transaction_type = 126 AS is_system_transaction,
( -- Attributes Deposited Spec https://specs.optimism.io/protocol/deposits.html#l1-attributes-deposited-transaction
transaction_type = 126
AND from_address = lower('0xDeaDDEaDDeAdDeAdDEAdDEaddeAddEAdDEAd0001')
AND to_address = lower('0x4200000000000000000000000000000000000015')
) AS is_attributes_deposited_transaction,

-- Convenience columns
epoch_to_hour(t.block_timestamp) AS block_hour,
t.receipt_status = 1 AS success

FROM ingestion_transactions_v1 AS t
INNER JOIN pb AS b ON t.block_number = b.number
WHERE 1=1
-- Optional address filter for faster results when developing.
-- AND from_address LIKE '0x00%'
),




SELECT
bl.*
-- Add fallback l1 gas estimate based on observed fee and weighted gas and blob prices.
, coalesce(
receipt_l1_gas_used,
round(16 * receipt_l1_fee / (
(l1_base_scalar * receipt_l1_gas_price)
+ (l1_blob_scalar * receipt_l1_blob_base_fee)
))::INT64
) AS derived_l1_gas_used
-- Add event emitting transaction fields
, COALESCE(eet.count_total_events,0) AS log_count_total_events
, COALESCE(eet.count_approval_events,0) AS log_count_approval_events
, COALESCE(eet.count_wrapping_events,0) AS log_count_wrapping_events
, COALESCE(eet.count_transfer_events,0) AS log_count_transfer_events

FROM base_level_transactions bl
LEFT JOIN event_emitting_transactions eet
ON eet.block_number = bl.block_number
AND eet.transaction_hash = bl.transaction_hash
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
WITH topic0_filters AS ( -- list of events that we want to indicate for possible future filtering.
SELECT distinct lower(topic) as topic, description, category
FROM (values
('0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b925', 'ERC20 Approval', 'Approval')
,('0x17307eab39ab6107e8899845ad3d59bd9653f200f220920489ca2b5937696c31', 'ERC721/ERC1155 Approval', 'Approval')

,('0xe1fffcc4923d04b559f4d29a8bfc6cda04eb5b0d3c460751c2402c5c5cc9109c', 'WETH Wrap', 'Wrapping')
,('0x7fcf532c15f0a6db0bd6d0e038bea71d30d808c7d98cb3bf7268a95bf5081b65', 'WETH Unwrap', 'Wrapping')

,('0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', 'ERC20/ERC721 Transfer', 'Transfer')
,('0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', 'ERC1155 Transfer Single', 'Transfer')
,('0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb', 'ERC1155 Transfer Batch', 'Transfer')
,('0xe6497e3ee548a3372136af2fcb0696db31fc6cf20260707645068bd3fe97f3c4', 'Polygon Log Transfer', 'Transfer')
,('0x4dfe1bbbcf077ddc3e01291eea2d5c70c2b422b415d95645b9adcfd678cb1d63', 'Polygon Log Fee Transfer', 'Transfer')
) a (topic, description)
)

-- Select the columns for joining back to transactions and sum event types
-- Calculate types of events emitted for downstream "qualified transaction" filtering

SELECT
l.dt
,epoch_to_hour(l.block_timestamp) AS block_hour
,l.block_timestamp
,l.network, l.chain, l.chain_id
,l.block_number
,l.transaction_hash

,COUNT(*) AS count_total_events
,SUM(CASE WHEN f.category = 'Approval' THEN 1 ELSE 0 END) AS count_approval_events
,SUM(CASE WHEN f.category = 'Wrapping' THEN 1 ELSE 0 END) AS count_wrapping_events
,SUM(CASE WHEN f.category = 'Transfer' THEN 1 ELSE 0 END) AS count_transfer_events

FROM ingestion_logs_v1 AS l

LEFT JOIN topic0_filters AS f
ON l.topic0 = f.topic

GROUP BY 1,2,3,4,5,6,7,8


Loading