From 59331bdb157b461534e4d51140fc5f193ba8de05 Mon Sep 17 00:00:00 2001 From: wlmyng <127570466+wlmyng@users.noreply.github.com> Date: Mon, 8 Jan 2024 17:02:35 -0500 Subject: [PATCH] [gql][2/n] output nodes estimation (#15497) ## Description Implements output nodes estimation. A node adds a cost of 1, unless it is a connection field, where it would then have a cost of default_page_size, or min(first, last) if so provided. This is multiplicative - at the next child connection, that child's estimated output will be multiplied with its parent's to yield the cumulative result. A connection's pageInfo field does not multiplicatively increase the current count, as there is only one node for the entire connection. ## Test Plan 1. check that we get this back in the headers when showUsage is provided 4. output_node_estimation.move --- If your changes are not user-facing and not a breaking change, you can skip the following section. Otherwise, please indicate what changed, and then add to the Release Notes section as highlighted during the release process. ### Type of Change (Check all that apply) - [ ] protocol change - [ ] user-visible impact - [ ] breaking change for a client SDKs - [ ] breaking change for FNs (FN binary must upgrade) - [ ] breaking change for validators or node operators (must upgrade binaries) - [ ] breaking change for on-chain data layout - [ ] necessitate either a data wipe or data migration ### Release notes --- Cargo.lock | 1 + Cargo.toml | 1 + .../tests/call/simple.exp | 5 +- .../tests/limits/output_node_estimation.exp | 368 ++++++++++++++++++ .../tests/limits/output_node_estimation.move | 272 +++++++++++++ crates/sui-graphql-rpc/Cargo.toml | 1 + .../schema/current_progress_schema.graphql | 13 +- crates/sui-graphql-rpc/src/config.rs | 13 +- .../src/extensions/query_limits_checker.rs | 176 +++++++-- crates/sui-graphql-rpc/src/metrics.rs | 24 +- crates/sui-graphql-rpc/src/server/builder.rs | 12 +- crates/sui-graphql-rpc/tests/e2e_tests.rs | 3 +- .../tests/examples_validation_tests.rs | 22 +- .../snapshot_tests__schema_sdl_export.snap | 13 +- 14 files changed, 856 insertions(+), 68 deletions(-) create mode 100644 crates/sui-graphql-e2e-tests/tests/limits/output_node_estimation.exp create mode 100644 crates/sui-graphql-e2e-tests/tests/limits/output_node_estimation.move diff --git a/Cargo.lock b/Cargo.lock index 240c920cb05e4..bb222eb8cb0af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11865,6 +11865,7 @@ dependencies = [ "anyhow", "async-graphql", "async-graphql-axum", + "async-graphql-value", "async-trait", "axum", "bcs", diff --git a/Cargo.toml b/Cargo.toml index 8346e210c47c9..3a11d17b3dd12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -235,6 +235,7 @@ arc-swap = { version = "1.5.1", features = ["serde"] } assert_cmd = "2.0.6" async-graphql = "6.0.7" async-graphql-axum = "6.0.7" +async-graphql-value = "6.0.7" async-recursion = "1.0.4" async-trait = "0.1.61" atomic_float = "0.1" diff --git a/crates/sui-graphql-e2e-tests/tests/call/simple.exp b/crates/sui-graphql-e2e-tests/tests/call/simple.exp index fa02f9868b149..1bd7233983a6f 100644 --- a/crates/sui-graphql-e2e-tests/tests/call/simple.exp +++ b/crates/sui-graphql-e2e-tests/tests/call/simple.exp @@ -75,7 +75,7 @@ Response: { task 15 'run-graphql'. lines 68-74: Headers: { "content-type": "application/json", - "content-length": "136", + "content-length": "157", "x-sui-rpc-version": "0.1.0", } Service version: 0.1.0 @@ -87,7 +87,8 @@ Response: { }, "extensions": { "usage": { - "nodes": 2, + "inputNodes": 2, + "outputNodes": 2, "depth": 2, "variables": 0, "fragments": 0, diff --git a/crates/sui-graphql-e2e-tests/tests/limits/output_node_estimation.exp b/crates/sui-graphql-e2e-tests/tests/limits/output_node_estimation.exp new file mode 100644 index 0000000000000..fb64db9612038 --- /dev/null +++ b/crates/sui-graphql-e2e-tests/tests/limits/output_node_estimation.exp @@ -0,0 +1,368 @@ +processed 14 tasks + +task 1 'run-graphql'. lines 6-14: +Response: { + "data": { + "transactionBlockConnection": { + "pageInfo": { + "hasPreviousPage": false + } + } + }, + "extensions": { + "usage": { + "inputNodes": 3, + "outputNodes": 3, + "depth": 3, + "variables": 0, + "fragments": 0, + "queryPayload": 141 + } + } +} + +task 2 'run-graphql'. lines 16-26: +Response: { + "data": { + "transactionBlockConnection": { + "edges": [ + { + "node": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv" + } + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 4, + "outputNodes": 80, + "depth": 4, + "variables": 0, + "fragments": 0, + "queryPayload": 172 + } + } +} + +task 3 'run-graphql'. lines 28-42: +Response: { + "data": { + "checkpoints": { + "nodes": [ + { + "transactionBlockConnection": { + "edges": [ + { + "txns": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv" + } + } + ] + } + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 6, + "outputNodes": 1640, + "depth": 6, + "variables": 0, + "fragments": 0, + "queryPayload": 215 + } + } +} + +task 4 'run-graphql'. lines 44-65: +Response: { + "data": { + "checkpoints": { + "nodes": [ + { + "notOne": { + "edges": [ + { + "txns": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv" + } + } + ] + }, + "isOne": { + "edges": [ + { + "txns": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv" + } + } + ] + } + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 10, + "outputNodes": 1720, + "depth": 6, + "variables": 0, + "fragments": 0, + "queryPayload": 324 + } + } +} + +task 5 'run-graphql'. lines 67-88: +Response: { + "data": { + "checkpoints": { + "nodes": [ + { + "notZero": { + "edges": [ + { + "txns": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv" + } + } + ] + }, + "isZero": { + "edges": [] + } + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 10, + "outputNodes": 1640, + "depth": 6, + "variables": 0, + "fragments": 0, + "queryPayload": 326 + } + } +} + +task 6 'run-graphql'. lines 90-100: +Response: { + "data": { + "transactionBlockConnection": { + "edges": [ + { + "txns": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv" + } + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 4, + "outputNodes": 4, + "depth": 4, + "variables": 0, + "fragments": 0, + "queryPayload": 154 + } + } +} + +task 7 'run-graphql'. lines 102-112: +Response: { + "data": { + "transactionBlockConnection": { + "edges": [ + { + "txns": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv" + } + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 4, + "outputNodes": 4, + "depth": 4, + "variables": 0, + "fragments": 0, + "queryPayload": 152 + } + } +} + +task 8 'run-graphql'. lines 114-142: +Response: { + "data": { + "transactionBlockConnection": { + "edges": [ + { + "txns": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv", + "first": null, + "last": null + } + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 14, + "outputNodes": 3320, + "depth": 8, + "variables": 0, + "fragments": 0, + "queryPayload": 510 + } + } +} + +task 9 'run-graphql'. lines 144-170: +Response: { + "data": { + "transactionBlockConnection": { + "nodes": [ + { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv", + "first": null, + "last": null + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 13, + "outputNodes": 3300, + "depth": 7, + "variables": 0, + "fragments": 0, + "queryPayload": 542 + } + } +} + +task 10 'run-graphql'. lines 172-221: +Response: { + "data": { + "transactionBlockConnection": { + "edges": [ + { + "txns": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv", + "a": null, + "b": null + } + } + ] + }, + "eventConnection": { + "edges": [] + } + }, + "extensions": { + "usage": { + "inputNodes": 24, + "outputNodes": 86340, + "depth": 11, + "variables": 0, + "fragments": 0, + "queryPayload": 1431 + } + } +} + +task 11 'run-graphql'. lines 223-248: +Response: { + "data": { + "transactionBlockConnection": { + "edges": [ + { + "node": { + "digest": "FbaziGbTUgv1hCfsnzivvm4e7BuWvYjCibecqkLEohLv", + "a": null + } + } + ] + } + }, + "extensions": { + "usage": { + "inputNodes": 12, + "outputNodes": 33300, + "depth": 11, + "variables": 0, + "fragments": 0, + "queryPayload": 722 + } + } +} + +task 12 'run-graphql'. lines 250-260: +Response: { + "data": null, + "extensions": { + "usage": { + "inputNodes": 4, + "outputNodes": 80, + "depth": 4, + "variables": 0, + "fragments": 0, + "queryPayload": 163 + } + }, + "errors": [ + { + "message": "'first' and 'last' must not be used together", + "locations": [ + { + "line": 3, + "column": 3 + } + ], + "path": [ + "transactionBlockConnection" + ], + "extensions": { + "code": "BAD_USER_INPUT" + } + } + ] +} + +task 13 'run-graphql'. lines 262-272: +Response: { + "data": null, + "extensions": { + "usage": { + "inputNodes": 4, + "outputNodes": 80, + "depth": 4, + "variables": 0, + "fragments": 0, + "queryPayload": 156 + } + }, + "errors": [ + { + "message": "Invalid value for argument \"first\", expected type \"Int\"", + "locations": [ + { + "line": 3, + "column": 30 + } + ] + } + ] +} diff --git a/crates/sui-graphql-e2e-tests/tests/limits/output_node_estimation.move b/crates/sui-graphql-e2e-tests/tests/limits/output_node_estimation.move new file mode 100644 index 0000000000000..b1eb7e36b08f3 --- /dev/null +++ b/crates/sui-graphql-e2e-tests/tests/limits/output_node_estimation.move @@ -0,0 +1,272 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//# init --addresses A=0x42 --simulator + +//# run-graphql --show-usage +# pageInfo does not inherit connection's weights +{ + transactionBlockConnection(first: 20) { + pageInfo { + hasPreviousPage + } + } +} + +//# run-graphql --show-usage +# if connection does not have 'first' or 'last' set, use default_page_size (20) +{ + transactionBlockConnection { + edges { + node { + digest + } + } + } +} + +//# run-graphql --show-usage +# build on previous example with nested connection +{ + checkpoints { + nodes { + transactionBlockConnection { + edges { + txns: node { + digest + } + } + } + } + } +} + +//# run-graphql --show-usage +# handles 1 +{ + checkpoints { + nodes { + notOne: transactionBlockConnection { + edges { + txns: node { + digest + } + } + } + isOne: transactionBlockConnection(first: 1) { + edges { + txns: node { + digest + } + } + } + } + } +} + +//# run-graphql --show-usage +# handles 0 +{ + checkpoints { + nodes { + notZero: transactionBlockConnection { + edges { + txns: node { + digest + } + } + } + isZero: transactionBlockConnection(first: 0) { + edges { + txns: node { + digest + } + } + } + } + } +} + +//# run-graphql --show-usage +# if connection does have 'first' set, use it +{ + transactionBlockConnection(first: 1) { + edges { + txns: node { + digest + } + } + } +} + +//# run-graphql --show-usage +# if connection does have 'last' set, use it +{ + transactionBlockConnection(last: 1) { + edges { + txns: node { + digest + } + } + } +} + +//# run-graphql --show-usage +# first and last should behave the same +{ + transactionBlockConnection { + edges { + txns: node { + digest + first: expiration { + checkpoints(first: 20) { + edges { + node { + sequenceNumber + } + } + } + } + last: expiration { + checkpoints(last: 20) { + edges { + node { + sequenceNumber + } + } + } + } + } + } + } +} + +//# run-graphql --show-usage +# edges incur additional cost over nodes +{ + transactionBlockConnection { + nodes { + digest + first: expiration { # 80 cumulative + checkpoints(first: 20) { + edges { + node { + sequenceNumber + } + } + } + } # 1680 cumulative + last: expiration { # 20 + 1680 = 1700 cumulative + checkpoints(last: 20) { + edges { + node { + sequenceNumber + } + } + } # another 1600, 3300 cumulative + } + } + } +} + +//# run-graphql --show-usage +# example lifted from complex query at +# https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api#node-limit +# our costing will be different since we consider all nodes +{ + transactionBlockConnection(first: 50) { # 50, 50 + edges { # 50, 100 + txns: node { # 50, 150 + digest # 50, 200 + a: expiration { # 50, 250 + checkpoints(last: 20) { # 50 * 20 = 1000, 1250 + edges { # 1000, 2250 + node { # 1000, 3250 + transactionBlockConnection(first: 10) { # 50 * 20 * 10 = 10000, 13250 + edges { # 10000, 23250 + node { # 10000, 33250 + digest # 10000, 43250 + } + } + } + } + } + } + } + b: expiration { # 50, 43300 + checkpoints(first: 20) { # 50 * 20 = 1000, 44300 + edges { # 1000, 45300 + node { # 1000, 46300 + transactionBlockConnection(last: 10) { # 50 * 20 * 10 = 10000, 56300 + edges { # 10000, 66300 + node { # 10000, 76300 + digest # 10000, 86300 + } + } + } + } + } + } + } + } + } + } + eventConnection(last: 10) { # 10 + edges { + node { + timestamp + } + } + } # 40, 86340 +} + +//# run-graphql --show-usage +# Null value for variable passed to limit will use default_page_size +query NullVariableForLimit($howMany: Int) { + transactionBlockConnection(last: $howMany) { # 20, 20 + edges { # 20, 40 + node { # 20, 60 + digest # 20, 80 + a: expiration { # 20, 100 + checkpoints { # 20 * 20, 500 + edges { # 400, 900 + node { # 400, 1300 + transactionBlockConnection(first: $howMany) { # 20 * 20 * 20 = 8000, 9300 + edges { # 8000, 17300 + node { # 8000, 25300 + digest # 8000, 33300 + } + } + } + } + } + } + } + } + } + } +} + +//# run-graphql --show-usage +# error state - can't use first and last together +{ + transactionBlockConnection(first: 20, last: 30) { + edges { + node { + digest + } + } + } +} + +//# run-graphql --show-usage +# error state - exceed max integer +{ + transactionBlockConnection(first: 36893488147419103000) { + edges { + node { + digest + } + } + } +} diff --git a/crates/sui-graphql-rpc/Cargo.toml b/crates/sui-graphql-rpc/Cargo.toml index 08004d06386f1..7423c884d3527 100644 --- a/crates/sui-graphql-rpc/Cargo.toml +++ b/crates/sui-graphql-rpc/Cargo.toml @@ -11,6 +11,7 @@ edition = "2021" anyhow.workspace = true async-graphql = {workspace = true, features = ["dataloader", "apollo_tracing", "tracing", "opentelemetry"] } async-graphql-axum.workspace = true +async-graphql-value.workspace = true async-trait.workspace = true axum.workspace = true chrono.workspace = true diff --git a/crates/sui-graphql-rpc/schema/current_progress_schema.graphql b/crates/sui-graphql-rpc/schema/current_progress_schema.graphql index 14440c6cff227..b4df8ff3d4ef8 100644 --- a/crates/sui-graphql-rpc/schema/current_progress_schema.graphql +++ b/crates/sui-graphql-rpc/schema/current_progress_schema.graphql @@ -2094,13 +2094,14 @@ type ServiceConfig { """ The maximum number of output nodes in a GraphQL response. - If a node is a connection, it is counted as the specified 'first' or 'last' number of items, - or the default_page_size as set by the server. Non-connection nodes and fields are not included - in this count. + Non-connection nodes have a count of 1, while connection nodes are counted as + the specified 'first' or 'last' number of items, or the default_page_size + as set by the server if those arguments are not set. - The count of output nodes is multiplicative. For example, if the current node is a connection - with first: 10 and has a field to a connection with last: 20, the total estimated output nodes - would be 10 * 20 = 200. + Counts accumulate multiplicatively down the query tree. For example, if a query starts + with a connection of first: 10 and has a field to a connection with last: 20, the count + at the second level would be 200 nodes. This is then summed to the count of 10 nodes + at the first level, for a total of 210 nodes. """ maxOutputNodes: Int! """ diff --git a/crates/sui-graphql-rpc/src/config.rs b/crates/sui-graphql-rpc/src/config.rs index ace9fb31a40ac..be05018ba7a5a 100644 --- a/crates/sui-graphql-rpc/src/config.rs +++ b/crates/sui-graphql-rpc/src/config.rs @@ -219,13 +219,14 @@ impl ServiceConfig { /// The maximum number of output nodes in a GraphQL response. /// - /// If a node is a connection, it is counted as the specified 'first' or 'last' number of items, - /// or the default_page_size as set by the server. Non-connection nodes and fields are not included - /// in this count. + /// Non-connection nodes have a count of 1, while connection nodes are counted as + /// the specified 'first' or 'last' number of items, or the default_page_size + /// as set by the server if those arguments are not set. /// - /// The count of output nodes is multiplicative. For example, if the current node is a connection - /// with first: 10 and has a field to a connection with last: 20, the total estimated output nodes - /// would be 10 * 20 = 200. + /// Counts accumulate multiplicatively down the query tree. For example, if a query starts + /// with a connection of first: 10 and has a field to a connection with last: 20, the count + /// at the second level would be 200 nodes. This is then summed to the count of 10 nodes + /// at the first level, for a total of 210 nodes. pub async fn max_output_nodes(&self) -> u64 { self.limits.max_output_nodes } diff --git a/crates/sui-graphql-rpc/src/extensions/query_limits_checker.rs b/crates/sui-graphql-rpc/src/extensions/query_limits_checker.rs index 5c4ef94f721b6..ad6d93baeb8e6 100644 --- a/crates/sui-graphql-rpc/src/extensions/query_limits_checker.rs +++ b/crates/sui-graphql-rpc/src/extensions/query_limits_checker.rs @@ -12,6 +12,7 @@ use async_graphql::extensions::NextParseQuery; use async_graphql::extensions::NextRequest; use async_graphql::parser::types::Directive; use async_graphql::parser::types::ExecutableDocument; +use async_graphql::parser::types::Field; use async_graphql::parser::types::FragmentDefinition; use async_graphql::parser::types::Selection; use async_graphql::parser::types::SelectionSet; @@ -21,11 +22,13 @@ use async_graphql::Pos; use async_graphql::Positioned; use async_graphql::Response; use async_graphql::ServerResult; +use async_graphql::Value; use async_graphql::Variables; use async_graphql::{ extensions::{Extension, ExtensionContext, ExtensionFactory}, ServerError, }; +use async_graphql_value::Value as GqlValue; use axum::headers; use axum::http::HeaderName; use axum::http::HeaderValue; @@ -42,7 +45,8 @@ pub(crate) struct ShowUsage; #[derive(Clone, Debug, Default)] struct ValidationRes { - num_nodes: u32, + input_nodes: u32, + output_nodes: u64, depth: u32, num_variables: u32, num_fragments: u32, @@ -54,6 +58,8 @@ pub(crate) struct QueryLimitsChecker { validation_result: Mutex>, } +pub(crate) const CONNECTION_FIELDS: [&str; 2] = ["edges", "nodes"]; + impl headers::Header for ShowUsage { fn name() -> &'static HeaderName { &LIMITS_HEADER @@ -81,7 +87,8 @@ impl ExtensionFactory for QueryLimitsChecker { #[derive(Debug)] struct ComponentCost { - pub num_nodes: u32, + pub input_nodes: u32, + pub output_nodes: u64, pub depth: u32, } @@ -90,7 +97,8 @@ impl std::ops::Add for ComponentCost { fn add(self, rhs: Self) -> Self::Output { Self { - num_nodes: self.num_nodes + rhs.num_nodes, + input_nodes: self.input_nodes + rhs.input_nodes, + output_nodes: self.output_nodes + rhs.output_nodes, depth: self.depth + rhs.depth, } } @@ -105,7 +113,8 @@ impl Extension for QueryLimitsChecker { resp.extension( "usage", value! ({ - "nodes": validation_result.num_nodes, + "inputNodes": validation_result.input_nodes, + "outputNodes": validation_result.output_nodes, "depth": validation_result.depth, "variables": validation_result.num_variables, "fragments": validation_result.num_fragments, @@ -147,10 +156,12 @@ impl Extension for QueryLimitsChecker { let mut running_costs = ComponentCost { depth: 0, - num_nodes: 0, + input_nodes: 0, + output_nodes: 0, }; let mut max_depth_seen = 0; + // An operation is a query, mutation or subscription consisting of a set of selections for (count, (_name, oper)) in doc.operations.iter().enumerate() { let sel_set = &oper.node.selection_set; @@ -168,13 +179,20 @@ impl Extension for QueryLimitsChecker { } running_costs.depth = 0; - self.analyze_selection_set(&cfg.limits, &doc.fragments, sel_set, &mut running_costs)?; + self.analyze_selection_set( + &cfg.limits, + &doc.fragments, + sel_set, + &mut running_costs, + variables, + )?; max_depth_seen = max_depth_seen.max(running_costs.depth); } if ctx.data_opt::().is_some() { *self.validation_result.lock().await = Some(ValidationRes { - num_nodes: running_costs.num_nodes, + input_nodes: running_costs.input_nodes, + output_nodes: running_costs.output_nodes, depth: running_costs.depth, query_payload: query.len() as u32, num_variables: variables.len() as u32, @@ -182,7 +200,12 @@ impl Extension for QueryLimitsChecker { }); } if let Some(metrics) = ctx.data_opt::>() { - metrics.num_nodes.observe(running_costs.num_nodes as f64); + metrics + .input_nodes + .observe(running_costs.input_nodes as f64); + metrics + .output_nodes + .observe(running_costs.output_nodes as f64); metrics.query_depth.observe(running_costs.depth as f64); metrics.query_payload_size.observe(query.len() as f64); } @@ -191,22 +214,31 @@ impl Extension for QueryLimitsChecker { } impl QueryLimitsChecker { + /// Parse the selected fields in one operation and check if it conforms to configured limits. fn analyze_selection_set( &self, limits: &Limits, fragment_defs: &HashMap>, sel_set: &Positioned, cost: &mut ComponentCost, + variables: &Variables, ) -> ServerResult<()> { // Use BFS to analyze the query and count the number of nodes and the depth of the query + struct ToVisit<'s> { + selection: &'s Positioned, + parent_node_count: u64, + } // Queue to store the nodes at each level let mut que = VecDeque::new(); - for top_level_sel in sel_set.node.items.iter() { - que.push_back(top_level_sel); - cost.num_nodes += 1; - check_limits(limits, cost.num_nodes, cost.depth, Some(top_level_sel.pos))?; + for selection in sel_set.node.items.iter() { + que.push_back(ToVisit { + selection, + parent_node_count: 1, + }); + cost.input_nodes += 1; + check_limits(limits, cost, Some(selection.pos))?; } // Track the number of nodes at first level if any @@ -215,21 +247,37 @@ impl QueryLimitsChecker { while !que.is_empty() { // Signifies the start of a new level cost.depth += 1; - check_limits(limits, cost.num_nodes, cost.depth, None)?; + check_limits(limits, cost, None)?; while level_len > 0 { // Ok to unwrap since we checked for empty queue // and level_len > 0 - let curr_sel = que.pop_front().unwrap(); + let ToVisit { + selection, + parent_node_count, + } = que.pop_front().unwrap(); - match &curr_sel.node { + match &selection.node { Selection::Field(f) => { check_directives(&f.node.directives)?; + + let current_count = estimate_output_nodes_for_curr_node( + f, + variables, + limits.default_page_size, + ) * parent_node_count; + + cost.output_nodes += current_count; + for field_sel in f.node.selection_set.node.items.iter() { - que.push_back(field_sel); - cost.num_nodes += 1; - check_limits(limits, cost.num_nodes, cost.depth, Some(field_sel.pos))?; + que.push_back(ToVisit { + selection: field_sel, + parent_node_count: current_count, + }); + cost.input_nodes += 1; + check_limits(limits, cost, Some(field_sel.pos))?; } } + Selection::FragmentSpread(fs) => { let frag_name = &fs.node.fragment_name.node; let frag_def = fragment_defs.get(frag_name).ok_or_else(|| { @@ -247,23 +295,25 @@ impl QueryLimitsChecker { // Ideally web should cache the costs of fragments we've seen before // Will do as enhancement check_directives(&frag_def.node.directives)?; - for frag_sel in frag_def.node.selection_set.node.items.iter() { - que.push_back(frag_sel); - cost.num_nodes += 1; - check_limits(limits, cost.num_nodes, cost.depth, Some(frag_sel.pos))?; + for selection in frag_def.node.selection_set.node.items.iter() { + que.push_back(ToVisit { + selection, + parent_node_count, + }); + cost.input_nodes += 1; + check_limits(limits, cost, Some(selection.pos))?; } } + Selection::InlineFragment(fs) => { check_directives(&fs.node.directives)?; - for in_frag_sel in fs.node.selection_set.node.items.iter() { - que.push_back(in_frag_sel); - cost.num_nodes += 1; - check_limits( - limits, - cost.num_nodes, - cost.depth, - Some(in_frag_sel.pos), - )?; + for selection in fs.node.selection_set.node.items.iter() { + que.push_back(ToVisit { + selection, + parent_node_count, + }); + cost.input_nodes += 1; + check_limits(limits, cost, Some(selection.pos))?; } } } @@ -271,12 +321,13 @@ impl QueryLimitsChecker { } level_len = que.len(); } + Ok(()) } } -fn check_limits(limits: &Limits, nodes: u32, depth: u32, pos: Option) -> ServerResult<()> { - if nodes > limits.max_query_nodes { +fn check_limits(limits: &Limits, cost: &ComponentCost, pos: Option) -> ServerResult<()> { + if cost.input_nodes > limits.max_query_nodes { return Err(ServerError::new( format!( "Query has too many nodes. The maximum allowed is {}", @@ -286,7 +337,7 @@ fn check_limits(limits: &Limits, nodes: u32, depth: u32, pos: Option) -> Se )); } - if depth > limits.max_query_depth { + if cost.depth > limits.max_query_depth { return Err(ServerError::new( format!( "Query has too many levels of nesting. The maximum allowed is {}", @@ -296,6 +347,16 @@ fn check_limits(limits: &Limits, nodes: u32, depth: u32, pos: Option) -> Se )); } + if cost.output_nodes > limits.max_output_nodes { + return Err(ServerError::new( + format!( + "Query will result in too many output nodes. The maximum allowed is {}, estimated {}", + limits.max_output_nodes, cost.output_nodes + ), + pos, + )); + } + Ok(()) } @@ -327,3 +388,50 @@ fn check_directives(directives: &[Positioned]) -> ServerResult<()> { } Ok(()) } + +/// Given a node, estimate the number of output nodes it will produce. +fn estimate_output_nodes_for_curr_node( + f: &Positioned, + variables: &Variables, + default_page_size: u64, +) -> u64 { + if !is_connection(f) { + 1 + } else { + // If the args 'first' or 'last' is set, then we should use that as the count + let first_arg = f.node.get_argument("first"); + let last_arg = f.node.get_argument("last"); + + extract_limit(first_arg, variables) + .or_else(|| extract_limit(last_arg, variables)) + .unwrap_or(default_page_size) + } +} + +/// Try to extract a u64 value from the given argument, or return None on failure. +fn extract_limit(value: Option<&Positioned>, variables: &Variables) -> Option { + if let GqlValue::Variable(var) = &value?.node { + return match variables.get(var) { + Some(Value::Number(num)) => num.as_u64(), + _ => None, + }; + } + + let GqlValue::Number(value) = &value?.node else { + return None; + }; + value.as_u64() +} + +/// Checks if the given field is a connection field by whether it has 'edges' or 'nodes' selected. +/// This should typically not require checking more than the first element of the selection set +fn is_connection(f: &Positioned) -> bool { + for field_sel in f.node.selection_set.node.items.iter() { + if let Selection::Field(field) = &field_sel.node { + if CONNECTION_FIELDS.contains(&field.node.name.node.as_str()) { + return true; + } + } + } + false +} diff --git a/crates/sui-graphql-rpc/src/metrics.rs b/crates/sui-graphql-rpc/src/metrics.rs index ead6e731375ce..c725149e421a6 100644 --- a/crates/sui-graphql-rpc/src/metrics.rs +++ b/crates/sui-graphql-rpc/src/metrics.rs @@ -5,16 +5,21 @@ use prometheus::{register_histogram_with_registry, Histogram, Registry}; #[derive(Clone, Debug)] pub struct RequestMetrics { - pub(crate) num_nodes: Histogram, + pub(crate) input_nodes: Histogram, + pub(crate) output_nodes: Histogram, pub(crate) query_depth: Histogram, pub(crate) query_payload_size: Histogram, pub(crate) _db_query_cost: Histogram, } // TODO: finetune buckets as we learn more about the distribution of queries -const NUM_NODES_BUCKETS: &[f64] = &[ +const INPUT_NODES_BUCKETS: &[f64] = &[ 1., 2., 4., 8., 12., 16., 24., 32., 48., 64., 96., 128., 256., 512., 1024., ]; +const OUTPUT_NODES_BUCKETS: &[f64] = &[ + 100., 200., 400., 800., 1200., 1600., 2400., 3200., 4800., 6400., 9600., 12800., 25600., + 51200., 102400., +]; const QUERY_DEPTH_BUCKETS: &[f64] = &[ 1., 2., 4., 8., 12., 16., 24., 32., 48., 64., 96., 128., 256., 512., 1024., ]; @@ -29,10 +34,17 @@ const DB_QUERY_COST_BUCKETS: &[f64] = &[ impl RequestMetrics { pub fn new(registry: &Registry) -> Self { Self { - num_nodes: register_histogram_with_registry!( - "num_nodes", - "Number of nodes in the query", - NUM_NODES_BUCKETS.to_vec(), + input_nodes: register_histogram_with_registry!( + "input_nodes", + "Number of input nodes in the query", + INPUT_NODES_BUCKETS.to_vec(), + registry, + ) + .unwrap(), + output_nodes: register_histogram_with_registry!( + "output_nodes", + "Number of output nodes in the response", + OUTPUT_NODES_BUCKETS.to_vec(), registry, ) .unwrap(), diff --git a/crates/sui-graphql-rpc/src/server/builder.rs b/crates/sui-graphql-rpc/src/server/builder.rs index 94438ae036bb5..bb41414556fae 100644 --- a/crates/sui-graphql-rpc/src/server/builder.rs +++ b/crates/sui-graphql-rpc/src/server/builder.rs @@ -655,17 +655,21 @@ pub mod tests { .build_schema(); let _ = schema.execute("{ chainIdentifier }").await; - assert_eq!(metrics2.num_nodes.get_sample_count(), 1); + assert_eq!(metrics2.input_nodes.get_sample_count(), 1); + assert_eq!(metrics2.output_nodes.get_sample_count(), 1); assert_eq!(metrics2.query_depth.get_sample_count(), 1); - assert_eq!(metrics2.num_nodes.get_sample_sum(), 1.); + assert_eq!(metrics2.input_nodes.get_sample_sum(), 1.); + assert_eq!(metrics2.output_nodes.get_sample_sum(), 1.); assert_eq!(metrics2.query_depth.get_sample_sum(), 1.); let _ = schema .execute("{ chainIdentifier protocolConfig { configs { value key }} }") .await; - assert_eq!(metrics2.num_nodes.get_sample_count(), 2); + assert_eq!(metrics2.input_nodes.get_sample_count(), 2); + assert_eq!(metrics2.output_nodes.get_sample_count(), 2); assert_eq!(metrics2.query_depth.get_sample_count(), 2); - assert_eq!(metrics2.num_nodes.get_sample_sum(), 2. + 4.); + assert_eq!(metrics2.input_nodes.get_sample_sum(), 2. + 4.); + assert_eq!(metrics2.output_nodes.get_sample_sum(), 2. + 4.); assert_eq!(metrics2.query_depth.get_sample_sum(), 1. + 3.); } } diff --git a/crates/sui-graphql-rpc/tests/e2e_tests.rs b/crates/sui-graphql-rpc/tests/e2e_tests.rs index b1fe2eff401ff..2b704438fa504 100644 --- a/crates/sui-graphql-rpc/tests/e2e_tests.rs +++ b/crates/sui-graphql-rpc/tests/e2e_tests.rs @@ -135,7 +135,8 @@ mod tests { assert!(res.errors().is_empty()); let usage = res.usage().unwrap().unwrap(); - assert_eq!(*usage.get("nodes").unwrap(), 1); + assert_eq!(*usage.get("inputNodes").unwrap(), 1); + assert_eq!(*usage.get("outputNodes").unwrap(), 1); assert_eq!(*usage.get("depth").unwrap(), 1); assert_eq!(*usage.get("variables").unwrap(), 0); assert_eq!(*usage.get("fragments").unwrap(), 0); diff --git a/crates/sui-graphql-rpc/tests/examples_validation_tests.rs b/crates/sui-graphql-rpc/tests/examples_validation_tests.rs index 0a1eec9e66c54..32825355329cd 100644 --- a/crates/sui-graphql-rpc/tests/examples_validation_tests.rs +++ b/crates/sui-graphql-rpc/tests/examples_validation_tests.rs @@ -48,6 +48,7 @@ mod tests { cluster: &ExecutorCluster, group: &ExampleQueryGroup, max_nodes: &mut u64, + max_output_nodes: &mut u64, max_depth: &mut u64, max_payload: &mut u64, ) -> Vec { @@ -73,9 +74,15 @@ mod tests { .expect("Usage fetch should succeed") .unwrap_or_else(|| panic!("Usage should be present for query: {}", query.name)); - let nodes = *usage.get("nodes").unwrap_or_else(|| { + let nodes = *usage.get("inputNodes").unwrap_or_else(|| { panic!("Node usage should be present for query: {}", query.name) }); + let output_nodes = *usage.get("outputNodes").unwrap_or_else(|| { + panic!( + "Output node usage should be present for query: {}", + query.name + ) + }); let depth = *usage.get("depth").unwrap_or_else(|| { panic!("Depth usage should be present for query: {}", query.name) }); @@ -83,6 +90,7 @@ mod tests { panic!("Payload usage should be present for query: {}", query.name) }); *max_nodes = max(*max_nodes, nodes); + *max_output_nodes = max(*max_output_nodes, output_nodes); *max_depth = max(*max_depth, depth); *max_payload = max(*max_payload, payload); } @@ -95,7 +103,7 @@ mod tests { async fn test_single_all_examples_structure_valid() { let rng = StdRng::from_seed([12; 32]); let mut sim = Simulacrum::new_with_rng(rng); - let (mut max_nodes, mut max_depth, mut max_payload) = (0, 0, 0); + let (mut max_nodes, mut max_output_nodes, mut max_depth, mut max_payload) = (0, 0, 0, 0); sim.create_checkpoint(); @@ -116,6 +124,7 @@ mod tests { &cluster, &group, &mut max_nodes, + &mut max_output_nodes, &mut max_depth, &mut max_payload, ) @@ -131,6 +140,12 @@ mod tests { max_nodes, default_config.max_query_nodes ); + assert!( + max_output_nodes <= default_config.max_output_nodes, + "Max output nodes {} exceeds default limit {}", + max_output_nodes, + default_config.max_output_nodes + ); assert!( max_depth <= default_config.max_query_depth as u64, "Max depth {} exceeds default limit {}", @@ -152,7 +167,7 @@ mod tests { async fn test_bad_examples_fail() { let rng = StdRng::from_seed([12; 32]); let mut sim = Simulacrum::new_with_rng(rng); - let (mut max_nodes, mut max_depth, mut max_payload) = (0, 0, 0); + let (mut max_nodes, mut max_output_nodes, mut max_depth, mut max_payload) = (0, 0, 0, 0); sim.create_checkpoint(); @@ -170,6 +185,7 @@ mod tests { &cluster, &bad_examples, &mut max_nodes, + &mut max_output_nodes, &mut max_depth, &mut max_payload, ) diff --git a/crates/sui-graphql-rpc/tests/snapshots/snapshot_tests__schema_sdl_export.snap b/crates/sui-graphql-rpc/tests/snapshots/snapshot_tests__schema_sdl_export.snap index 0413b106071b0..384def2be3281 100644 --- a/crates/sui-graphql-rpc/tests/snapshots/snapshot_tests__schema_sdl_export.snap +++ b/crates/sui-graphql-rpc/tests/snapshots/snapshot_tests__schema_sdl_export.snap @@ -2098,13 +2098,14 @@ type ServiceConfig { """ The maximum number of output nodes in a GraphQL response. - If a node is a connection, it is counted as the specified 'first' or 'last' number of items, - or the default_page_size as set by the server. Non-connection nodes and fields are not included - in this count. + Non-connection nodes have a count of 1, while connection nodes are counted as + the specified 'first' or 'last' number of items, or the default_page_size + as set by the server if those arguments are not set. - The count of output nodes is multiplicative. For example, if the current node is a connection - with first: 10 and has a field to a connection with last: 20, the total estimated output nodes - would be 10 * 20 = 200. + Counts accumulate multiplicatively down the query tree. For example, if a query starts + with a connection of first: 10 and has a field to a connection with last: 20, the count + at the second level would be 200 nodes. This is then summed to the count of 10 nodes + at the first level, for a total of 210 nodes. """ maxOutputNodes: Int! """