Skip to content

Commit

Permalink
Authority overload monitor (MystenLabs#16050)
Browse files Browse the repository at this point in the history
Roll forward MystenLabs#15981 

Previously, the PR was causing CI failure because `max_txn_age_in_queue`
was used in the config. Therefore, although `overload_threshold_config`
has a default, since `max_txn_age_in_queue` is specified, serde expects
the rest of config to show up in the config file. Because we don't have
a serde default set for the rest of the field, it crashes.

This PR added serde default for all the field in OverloadThresholdConfig
in addition to the original MystenLabs#15981 .

I manually verified that it can parse the config correctly when only
partial config is set.
  • Loading branch information
halfprice authored Feb 2, 2024
1 parent 9898f10 commit 4a22454
Show file tree
Hide file tree
Showing 11 changed files with 485 additions and 11 deletions.
55 changes: 54 additions & 1 deletion crates/sui-config/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,16 +685,69 @@ pub struct TransactionKeyValueStoreWriteConfig {
/// stop processing new transactions and/or certificates until the congestion
/// resolves.
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case")]
pub struct OverloadThresholdConfig {
#[serde(default = "default_max_txn_age_in_queue")]
pub max_txn_age_in_queue: Duration,

// The interval of checking overload signal.
#[serde(default = "default_overload_monitor_interval")]
pub overload_monitor_interval: Duration,

// The execution queueing latency when entering load shedding mode.
#[serde(default = "default_execution_queue_latency_soft_limit")]
pub execution_queue_latency_soft_limit: Duration,

// The execution queueing latency when entering aggressive load shedding mode.
#[serde(default = "default_execution_queue_latency_hard_limit")]
pub execution_queue_latency_hard_limit: Duration,

// The maximum percentage of transactions to shed in load shedding mode.
#[serde(default = "default_max_load_shedding_percentage")]
pub max_load_shedding_percentage: u32,

// When in aggressive load shedding mode, the the minimum percentage of
// transactions to shed.
#[serde(default = "default_min_load_shedding_percentage_above_hard_limit")]
pub min_load_shedding_percentage_above_hard_limit: u32,
// TODO: Move other thresholds here as well, including `MAX_TM_QUEUE_LENGTH`
// and `MAX_PER_OBJECT_QUEUE_LENGTH`.
}

fn default_max_txn_age_in_queue() -> Duration {
Duration::from_secs(1)
}

fn default_overload_monitor_interval() -> Duration {
Duration::from_secs(10)
}

fn default_execution_queue_latency_soft_limit() -> Duration {
Duration::from_secs(1)
}

fn default_execution_queue_latency_hard_limit() -> Duration {
Duration::from_secs(10)
}

fn default_max_load_shedding_percentage() -> u32 {
95
}

fn default_min_load_shedding_percentage_above_hard_limit() -> u32 {
50
}

impl Default for OverloadThresholdConfig {
fn default() -> Self {
Self {
max_txn_age_in_queue: Duration::from_secs(1), // 1 second
max_txn_age_in_queue: default_max_txn_age_in_queue(),
overload_monitor_interval: default_overload_monitor_interval(),
execution_queue_latency_soft_limit: default_execution_queue_latency_soft_limit(),
execution_queue_latency_hard_limit: default_execution_queue_latency_hard_limit(),
max_load_shedding_percentage: default_max_load_shedding_percentage(),
min_load_shedding_percentage_above_hard_limit:
default_min_load_shedding_percentage_above_hard_limit(),
}
}
}
Expand Down
40 changes: 38 additions & 2 deletions crates/sui-core/src/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ use crate::epoch::committee_store::CommitteeStore;
use crate::execution_driver::execution_process;
use crate::in_mem_execution_cache::{ExecutionCache, ExecutionCacheRead, ExecutionCacheWrite};
use crate::metrics::LatencyObserver;
use crate::metrics::RateTracker;
use crate::module_cache_metrics::ResolverMetrics;
use crate::overload_monitor::{overload_monitor, AuthorityOverloadInfo};
use crate::stake_aggregator::StakeAggregator;
use crate::state_accumulator::{StateAccumulator, WrappedObject};
use crate::subscription_handler::SubscriptionHandler;
Expand Down Expand Up @@ -237,6 +239,9 @@ pub struct AuthorityMetrics {
pub(crate) skipped_consensus_txns: IntCounter,
pub(crate) skipped_consensus_txns_cache_hit: IntCounter,

pub(crate) authority_overload_status: IntGauge,
pub(crate) authority_load_shedding_percentage: IntGauge,

/// Post processing metrics
post_processing_total_events_emitted: IntCounter,
post_processing_total_tx_indexed: IntCounter,
Expand Down Expand Up @@ -269,6 +274,18 @@ pub struct AuthorityMetrics {
// Tracks recent average txn queueing delay between when it is ready for execution
// until it starts executing.
pub execution_queueing_latency: LatencyObserver,

// Tracks the rate of transactions become ready for execution in transaction manager.
// The need for the Mutex is that the tracker is updated in transaction manager and read
// in the overload_monitor. There should be low mutex contention because
// transaction manager is single threaded and the read rate in overload_monitor is
// low. In the case where transaction manager becomes multi-threaded, we can
// create one rate tracker per thread.
pub txn_ready_rate_tracker: Arc<Mutex<RateTracker>>,

// Tracks the rate of transactions starts execution in execution driver.
// Similar reason for using a Mutex here as to `txn_ready_rate_tracker`.
pub execution_rate_tracker: Arc<Mutex<RateTracker>>,
}

// Override default Prom buckets for positive numbers in 0-50k range
Expand Down Expand Up @@ -453,6 +470,16 @@ impl AuthorityMetrics {
registry,
)
.unwrap(),
authority_overload_status: register_int_gauge_with_registry!(
"authority_overload_status",
"Whether authority is current experiencing overload and enters load shedding mode.",
registry)
.unwrap(),
authority_load_shedding_percentage: register_int_gauge_with_registry!(
"authority_load_shedding_percentage",
"The percentage of transactions is shed when the authority is in load shedding mode.",
registry)
.unwrap(),
transaction_manager_object_cache_misses: register_int_counter_with_registry!(
"transaction_manager_object_cache_misses",
"Number of object-availability cache misses in TransactionManager",
Expand Down Expand Up @@ -618,6 +645,8 @@ impl AuthorityMetrics {
registry
).unwrap(),
execution_queueing_latency: LatencyObserver::new(),
txn_ready_rate_tracker: Arc::new(Mutex::new(RateTracker::new(Duration::from_secs(10)))),
execution_rate_tracker: Arc::new(Mutex::new(RateTracker::new(Duration::from_secs(10)))),
}
}
}
Expand Down Expand Up @@ -684,6 +713,9 @@ pub struct AuthorityState {

/// Config for when we consider the node overloaded.
overload_threshold_config: OverloadThresholdConfig,

/// Current overload status in this authority. Updated periodically.
pub overload_info: AuthorityOverloadInfo,
}

/// The authority state encapsulates all state, drives execution, and ensures safety.
Expand Down Expand Up @@ -2452,17 +2484,21 @@ impl AuthorityState {
transaction_deny_config,
certificate_deny_config,
debug_dump_config,
overload_threshold_config,
overload_threshold_config: overload_threshold_config.clone(),
overload_info: AuthorityOverloadInfo::default(),
});

// Start a task to execute ready certificates.
let authority_state = Arc::downgrade(&state);
spawn_monitored_task!(execution_process(
authority_state,
rx_ready_certificates,
rx_execution_shutdown
rx_execution_shutdown,
));

let authority_state = Arc::downgrade(&state);
spawn_monitored_task!(overload_monitor(authority_state, overload_threshold_config));

// TODO: This doesn't belong to the constructor of AuthorityState.
state
.create_owner_index_if_empty(genesis_objects, &epoch_store)
Expand Down
5 changes: 5 additions & 0 deletions crates/sui-core/src/consensus_throughput_calculator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ mod tests {
}

#[test]
#[cfg_attr(msim, ignore)]
pub fn test_consensus_throughput_calculator() {
let metrics = Arc::new(AuthorityMetrics::new(&Registry::new()));
let max_observation_points: NonZeroU64 = NonZeroU64::new(3).unwrap();
Expand Down Expand Up @@ -513,6 +514,7 @@ mod tests {
}

#[test]
#[cfg_attr(msim, ignore)]
pub fn test_throughput_calculator_same_timestamp_observations() {
let metrics = Arc::new(AuthorityMetrics::new(&Registry::new()));
let max_observation_points: NonZeroU64 = NonZeroU64::new(2).unwrap();
Expand Down Expand Up @@ -543,6 +545,7 @@ mod tests {
}

#[test]
#[cfg_attr(msim, ignore)]
pub fn test_consensus_throughput_profiler() {
let metrics = Arc::new(AuthorityMetrics::new(&Registry::new()));
let throughput_profile_update_interval: TimestampSecs = 5;
Expand Down Expand Up @@ -610,6 +613,7 @@ mod tests {
}

#[test]
#[cfg_attr(msim, ignore)]
pub fn test_consensus_throughput_profiler_update_interval() {
let metrics = Arc::new(AuthorityMetrics::new(&Registry::new()));
let throughput_profile_update_interval: TimestampSecs = 5;
Expand Down Expand Up @@ -662,6 +666,7 @@ mod tests {
}

#[test]
#[cfg_attr(msim, ignore)]
pub fn test_consensus_throughput_profiler_cool_down() {
let metrics = Arc::new(AuthorityMetrics::new(&Registry::new()));
let throughput_profile_update_window: TimestampSecs = 3;
Expand Down
2 changes: 2 additions & 0 deletions crates/sui-core/src/execution_driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ pub async fn execution_process(
}
}

authority.metrics.execution_rate_tracker.lock().record();

// Certificate execution can take significant time, so run it in a separate task.
spawn_monitored_task!(async move {
let _scope = monitored_scope("ExecutionDriver::task");
Expand Down
1 change: 1 addition & 0 deletions crates/sui-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub mod in_mem_execution_cache;
pub mod metrics;
pub mod module_cache_metrics;
pub mod mysticeti_adapter;
mod overload_monitor;
pub(crate) mod post_consensus_tx_reorder;
pub mod quorum_driver;
pub mod safe_client;
Expand Down
Loading

0 comments on commit 4a22454

Please sign in to comment.