From fba16698a8f23c0c1457cc8e9c268d371f4b3d6c Mon Sep 17 00:00:00 2001 From: Roy Dahan Date: Thu, 8 Feb 2024 22:39:42 +0200 Subject: [PATCH] fix(gemini): Increase instance size and max retries Currently some of our gemini tests are very flakey failing with read validations due to "missing rows" on the cluster side. Since Gemini is random, some tests can be heavier than others. Looking on failed tests, espcially with dirsuptive nemesis one could easily notice that CPU utlization per core reaches to 100%. In this cases, some of the validations fail with 1-4 attempts until there is a validation that fails all the 5 attempts, mostly due to missing row or several rows on the test cluster. However, connecting to the cluster afterwards, I could see all rows are available. This change first increase the instances size from i4i.large to 2xlarge. Second, it increases the retries from 5 to 10, same as we have in c-s. --- test-cases/gemini/gemini-1tb-10h.yaml | 2 +- test-cases/gemini/gemini-3h-with-nemesis.yaml | 4 ++-- test-cases/gemini/gemini-3h-with-nondisruptive-nemesis.yaml | 4 ++-- test-cases/gemini/gemini-8h-large-num-columns.yaml | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test-cases/gemini/gemini-1tb-10h.yaml b/test-cases/gemini/gemini-1tb-10h.yaml index bc82adcbc6..c65210be1d 100644 --- a/test-cases/gemini/gemini-1tb-10h.yaml +++ b/test-cases/gemini/gemini-1tb-10h.yaml @@ -15,7 +15,7 @@ nemesis_seed: '041' gemini_cmd: "gemini -d --duration 8h --warmup 2h -c 50 \ -m mixed -f --non-interactive --cql-features normal \ ---max-mutation-retries 5 --max-mutation-retries-backoff 500ms \ +--max-mutation-retries 10 --max-mutation-retries-backoff 500ms \ --async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \ --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" --oracle-replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '1'}\" " diff --git a/test-cases/gemini/gemini-3h-with-nemesis.yaml b/test-cases/gemini/gemini-3h-with-nemesis.yaml index f8c132eb7a..bc87e56069 100644 --- a/test-cases/gemini/gemini-3h-with-nemesis.yaml +++ b/test-cases/gemini/gemini-3h-with-nemesis.yaml @@ -3,7 +3,7 @@ n_db_nodes: 3 n_test_oracle_db_nodes: 1 n_loaders: 1 n_monitor_nodes: 1 -instance_type_db: 'i4i.large' +instance_type_db: 'i4i.2xlarge' user_prefix: 'gemini-with-nemesis-3h-normal' @@ -18,7 +18,7 @@ nemesis_seed: '032' # the below cmd runs about 3 hours gemini_cmd: "gemini -d --duration 3h --warmup 30m \ -c 50 -m mixed -f --non-interactive --cql-features normal \ ---max-mutation-retries 5 --max-mutation-retries-backoff 500ms \ +--max-mutation-retries 10 --max-mutation-retries-backoff 500ms \ --async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \ --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" \ --oracle-replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '1'}\"" diff --git a/test-cases/gemini/gemini-3h-with-nondisruptive-nemesis.yaml b/test-cases/gemini/gemini-3h-with-nondisruptive-nemesis.yaml index 3f149cb837..6fd1c317c0 100644 --- a/test-cases/gemini/gemini-3h-with-nondisruptive-nemesis.yaml +++ b/test-cases/gemini/gemini-3h-with-nondisruptive-nemesis.yaml @@ -3,7 +3,7 @@ n_db_nodes: 3 n_test_oracle_db_nodes: 1 n_loaders: 1 n_monitor_nodes: 1 -instance_type_db: 'i4i.large' +instance_type_db: 'i4i.xlarge' user_prefix: 'gemini-basic-3h' @@ -15,7 +15,7 @@ nemesis_interval: 5 # the below cmd runs about 3 hours gemini_cmd: "gemini -d --duration 10800s --warmup 1800s -c 50 \ -m mixed -f --non-interactive --cql-features normal \ ---max-mutation-retries 5 --max-mutation-retries-backoff 500ms \ +--max-mutation-retries 10 --max-mutation-retries-backoff 500ms \ --async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \ --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" \ --oracle-replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '1'}\" " diff --git a/test-cases/gemini/gemini-8h-large-num-columns.yaml b/test-cases/gemini/gemini-8h-large-num-columns.yaml index 0ea2c600a0..4e69247127 100644 --- a/test-cases/gemini/gemini-8h-large-num-columns.yaml +++ b/test-cases/gemini/gemini-8h-large-num-columns.yaml @@ -20,7 +20,7 @@ gemini_cmd: "gemini -d --duration 7h --warmup 1h \ -c 10 -m mixed -f --non-interactive \ --cql-features normal --async-objects-stabilization-backoff 500ms \ --replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" \ ---max-mutation-retries 5 --max-mutation-retries-backoff 500ms \ +--max-mutation-retries 10 --max-mutation-retries-backoff 500ms \ --max-partition-keys 12 --min-partition-keys 8 \ --max-clustering-keys 20 --min-clustering-keys 12 \ --max-columns 100 --min-columns 80 --verbose"