-
Notifications
You must be signed in to change notification settings - Fork 0
/
rollout_captures_asg.sh
executable file
·261 lines (221 loc) · 7.62 KB
/
rollout_captures_asg.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#!/bin/bash
set -euo pipefail
OVERPROVISIONER_NAMESPACE=cluster-overprovisioner
OVERPROVISIONER_DEPLOYMENT=cluster-overprovisioner-captures-overprovisioner
CAPTURE_NAMESPACE=reference
CAPTURE_IMAGE=$(
kubectl -n "${CAPTURE_NAMESPACE}" get pod \
--selector app.kubernetes.io/name=capture \
-o jsonpath='{.items[*].spec.containers[*].image}' |
tr -s '[[:space:]]' '\n' | sort | uniq | head -n 1
)
# Colors
end="\033[0m"
black="\033[0;30m"
blackb="\033[1;30m"
white="\033[0;37m"
whiteb="\033[1;37m"
red="\033[0;31m"
redb="\033[1;31m"
green="\033[0;32m"
greenb="\033[1;32m"
yellow="\033[0;33m"
yellowb="\033[1;33m"
blue="\033[0;34m"
blueb="\033[1;34m"
purple="\033[0;35m"
purpleb="\033[1;35m"
lightblue="\033[0;36m"
lightblueb="\033[1;36m"
function green {
echo -e "${green}${1}${end}"
}
function greenb {
echo -e "${greenb}${1}${end}"
}
function white {
echo -e "${white}${1}${end}"
}
function whiteb {
echo -e "${whiteb}${1}${end}"
}
function yellow {
echo -e "${yellow}${1}${end}"
}
function yellowb {
echo -e "${yellowb}${1}${end}"
}
function redb {
echo -e "${redb}${1}${end}"
}
function fail_and_exit {
redb "${1}"
exit 1
}
function drain {
yellow "Draining ${1}"
kubectl drain "${1}" --ignore-daemonsets=true --delete-emptydir-data=true --timeout=120s --skip-wait-for-delete-timeout=1
green "${1} successfully drained."
}
function releaseip {
local pending_pods_count
externalip_name=$(
kubectl get externalips \
-o jsonpath="{.items[?(@.spec.nodeName==\"${1}\")].metadata.name}"
)
for ip in ${externalip_name}; do
green "Disassociate EIP ${ip} from ${1}"
kubectl patch externalips "${ip}" --type=merge -p '{"spec": {"nodeName": ""}}'
done
}
function kubestatic_unlabel {
green "Remove kubestatic label on node ${1}"
kubectl label node "${1}" kubestatic.quortex.io/externalip-auto-assign-
}
function check_capture_status {
echo -n "Checking that all capture pods are running... "
local pending_pods_count
pending_pods_count=$(
kubectl -n "${CAPTURE_NAMESPACE}" get pods \
--selector app.kubernetes.io/name=capture \
--field-selector status.phase!=Running -o name | wc -w
)
if [ "${pending_pods_count}" -ne 0 ]; then
fail_and_exit "All capture pods are not in a Running state, I can't continue."
fi
echo -e "${green}Ok${end}"
}
function check_overprovisioner_status {
echo -n "Checking that all captures-overprovisioner pods are running... "
local pending_pods_count
pending_pods_count=$(
kubectl -n "${OVERPROVISIONER_NAMESPACE}" get pods \
--selector app.cluster-overprovisioner/deployment=captures-overprovisioner \
--field-selector status.phase!=Running -o name | wc -w
)
if [ "${pending_pods_count}" -ne 0 ]; then
fail_and_exit "Some overprovisioner pods are in a Pending state, I can't continue."
fi
echo -e "${green}Ok${end}"
}
function check_overprovisioner_image {
echo -n "Capture overprovisioner image... "
local overprovisioner_image
overprovisioner_image=$(
kubectl -n "${OVERPROVISIONER_NAMESPACE}" get deployment "${OVERPROVISIONER_DEPLOYMENT}" \
-o jsonpath='{.spec.template.spec.containers[0].image}'
)
if [ "${overprovisioner_image}" != "${CAPTURE_IMAGE}" ]; then
echo -e "${yellowb}You should set it to the most used capture image.${end}"
else
echo -e "${green}Ok${end}"
fi
}
function now {
date +%s
}
[[ ! $(which kubectl) ]] && fail_and_exit "kubectl CLI not found"
check_capture_status
check_overprovisioner_status
check_overprovisioner_image
white "Note that unschedulable capture nodes will be ignored."
echo
echo
whiteb "Kube context : $(kubectl config current-context)"
whiteb "Capture overprovisioner deployment : ${OVERPROVISIONER_DEPLOYMENT}"
whiteb "Capture overprovisioner namespace : ${OVERPROVISIONER_NAMESPACE}"
whiteb "Most used capture image : ${CAPTURE_IMAGE}"
echo -n "Continue? y/n "
read -r answer
if [[ "${answer}" != "y" ]]; then
fail_and_exit "Did not receive [y], exiting."
fi
# List the nodes that are currently schedulable
nodes_to_rollout=$(
kubectl get nodes \
--selector group=captures-fix-group \
--field-selector spec.unschedulable=false \
-o jsonpath="{.items[*]['metadata.name']}"
)
whiteb "The following nodes will be processed : ${nodes_to_rollout}"
green "Cordoning ${nodes_to_rollout} ..."
for node in ${nodes_to_rollout}; do
kubectl cordon "${node}"
done
green "Remove kubestatic label on ${nodes_to_rollout} ..."
for node in ${nodes_to_rollout}; do
kubectl label node "${node}" kubestatic.quortex.io/externalip-auto-assign-
done
# First rollout overprovisioner nodes
overprovisioner_nodes=$(
kubectl -n "${OVERPROVISIONER_NAMESPACE}" get pod \
--selector app.cluster-overprovisioner/deployment=captures-overprovisioner \
--sort-by=.spec.nodeName -o jsonpath="{.items[*]['spec.nodeName']}" | uniq
)
green "Draining all nodes with overprovisioner"
for node in ${overprovisioner_nodes}; do
yellow "Draining ${node}"
kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --force --timeout=120s
done
green "Wait for capture overprovisioners to be rescheduled and ready"
kubectl -n "${OVERPROVISIONER_NAMESPACE}" wait pod --for=condition=ready --timeout=10m \
--selector "app.cluster-overprovisioner/deployment=captures-overprovisioner"
for node in ${nodes_to_rollout}; do
# Get some info about the cluster state
capture_pods=$(
kubectl get pods -A \
--selector app.kubernetes.io/name=capture \
--field-selector "spec.nodeName=${node}" \
-o jsonpath="{.items[*]['metadata.name']}"
)
# probably only overprovisioner pods on this node, skipping
if [[ -z "${capture_pods}" ]]; then
whiteb "Node ${node} does not have any captures, skipping"
continue
fi
whiteb "Migrating the node ${node}"
# Make an overprovisioner node (any of them) available for capture pods
overprovisioner_node=$(
kubectl -n "${OVERPROVISIONER_NAMESPACE}" get pod \
--selector app.cluster-overprovisioner/deployment=captures-overprovisioner \
-o jsonpath="{.items[0]['spec.nodeName']}"
)
# Start capture migration
stamp=$(now)
kubestatic_unlabel "${node}"
yellow "==> Unlabeling capture node took $(($(now) - stamp))s."
start_downtime=$(now)
releaseip "${node}"
yellow "==> Release EIP from capture node took $(($(now) - start_downtime))s."
stamp=$(now)
for pod in ${capture_pods}; do
kubectl -n "${CAPTURE_NAMESPACE}" delete pod --force=true "${pod}"
done
yellow "==> Force deleted ${capture_pods} in $(($(now) - stamp))s."
stamp=$(now)
releaseip "${overprovisioner_node}"
yellow "==> Release EIP from overprovisioner node took $(($(now) - stamp))s."
stamp=$(now)
drain "${node}"
yellow "==> Draining took $(($(now) - stamp))s."
# wait for capture pods to be scheduled on new node
stamp=$(now)
pending_capture_pods=$(
kubectl -n "${CAPTURE_NAMESPACE}" get pods \
--selector app.kubernetes.io/name=capture \
--field-selector status.phase!=Running \
-o jsonpath="{.items[*]['metadata.name']}"
)
for pod in ${pending_capture_pods}; do
green "Waiting for ${pod} to be scheduled..."
kubectl -n "${CAPTURE_NAMESPACE}" wait pod "${pod}" \
--for=condition=ready --timeout=5m
done
end_downtime=$(now)
yellow "==> Waiting for capture pod to be ready took $((end_downtime - stamp))s."
yellowb "==> capture pods on ${node} were unavailable for $((end_downtime - start_downtime)) seconds."
green "Wait for overprovisioners to be rescheduled and ready"
kubectl -n "${OVERPROVISIONER_NAMESPACE}" wait pod \
--for=condition=ready --timeout=10m \
--selector "app.cluster-overprovisioner/deployment=captures-overprovisioner"
done