Skip to content

Commit

Permalink
fix(cluster): don't execute SSH commands on dead nodes during terminate
Browse files Browse the repository at this point in the history
Use the same approach as Valerii for k8s nodes with passing `scylla_shards`
to cluster.terminate_node()

This is used for k8s and ScyllaDB Cloud.
  • Loading branch information
enaydanov authored and fruch committed Oct 4, 2023
1 parent 1556348 commit a6b750c
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 30 deletions.
14 changes: 8 additions & 6 deletions sdcm/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def cpu_cores(self) -> Optional[int]:
return None

@property
def scylla_shards(self):
def scylla_shards(self) -> int:
"""
Priority of selecting number of shards for Scylla is defined in
<dist.common.scripts.scylla_util.scylla_cpuinfo.nr_shards> and has following order:
Expand Down Expand Up @@ -3405,18 +3405,20 @@ def destroy(self):
for node in self.nodes:
node.destroy()

def terminate_node(self, node):
def terminate_node(self, node: BaseNode, scylla_shards: int = 0) -> None:
# NOTE: BaseNode.scylla_shards uses SSH commands to get actual numbers which is not possible on a dead node.
# In such cases, a caller needs to get the number of shards before the node dies and provide it.
if node.ip_address not in self.dead_nodes_ip_address_list:
self.dead_nodes_list.append(DeadNode(
name=node.name,
public_ip=node.public_ip_address,
private_ip=node.private_ip_address,
ipv6_ip=node.ipv6_ip_address if self.test_config.IP_SSH_CONNECTIONS == "ipv6" else '',
ipv6_ip=node.ipv6_ip_address if self.test_config.IP_SSH_CONNECTIONS == "ipv6" else "",
ip_address=node.ip_address,
shards=node.scylla_shards,
shards=scylla_shards or node.scylla_shards,
termination_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
terminated_by_nemesis=node.running_nemesis))

terminated_by_nemesis=node.running_nemesis,
))
if node in self.nodes:
self.nodes.remove(node)
node.destroy()
Expand Down
25 changes: 1 addition & 24 deletions sdcm/cluster_k8s/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2709,36 +2709,13 @@ def _delete_k8s_rack(self, rack: int):
racks.pop(rack)
self.replace_scylla_cluster_value('/spec/datacenter/racks', racks)

def terminate_node(self, node: BasePodContainer, scylla_shards=""): # pylint: disable=arguments-differ
"""Terminate node.
:param node: 'node' object to be processed.
:param scylla_shards: expected to be the same as 'node.scylla_shards'.
Used to avoid remoter calls to the target node.
Useful when the node is unreachable by SSH on the moment of this method call.
"""
if node.ip_address not in self.dead_nodes_ip_address_list:
self.dead_nodes_list.append(DeadNode(
name=node.name,
public_ip=node.public_ip_address,
private_ip=node.private_ip_address,
ipv6_ip=node.ipv6_ip_address if self.test_config.IP_SSH_CONNECTIONS == "ipv6" else '',
ip_address=node.ip_address,
shards=scylla_shards or node.scylla_shards,
termination_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
terminated_by_nemesis=node.running_nemesis,
))
if node in self.nodes:
self.nodes.remove(node)
node.destroy()

def decommission(self, node: BaseScyllaPodContainer, timeout: int | float = None):
rack = node.rack
rack_nodes = self.get_rack_nodes(rack)
assert rack_nodes[-1] == node, "Can withdraw the last node only"
current_members = len(rack_nodes)

# NOTE: "scylla_shards" property uses remoter calls and we save it's result before
# NOTE: "scylla_shards" property uses remoter calls, and we save its result before
# the target scylla node gets killed using kubectl command which precedes the target GCE
# node deletion using "terminate_node" command.
scylla_shards = node.scylla_shards
Expand Down

0 comments on commit a6b750c

Please sign in to comment.