-
Notifications
You must be signed in to change notification settings - Fork 0
/
build-cluster.sh
executable file
·345 lines (292 loc) · 10.9 KB
/
build-cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#!/bin/bash
#
# Run this script on a cloud controller
#
#############
# Variables #
#############
# Get directory of script for locating templates and config
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# Source application variables
source $DIR/settings.sh
# Source cluster config variables
if [ -z "${CONFIG}" ] ; then
source $DIR/configs/default.sh
else
config_path=$DIR/configs/$CONFIG.sh
if [ -f $config_path ] ; then
source $config_path
else
echo "Error loading $CONFIG"
echo "Could not load $config_path"
exit 1
fi
fi
CLUSTERNAMEARG="$1"
SSH_PUB_KEY="${2:-$SSH_PUB_KEY}"
PASSWORD="${2:-$PASSWORD}"
LOG="$DIR/log/deploy.log"
SEED=$(head /dev/urandom | tr -dc a-z0-9 | head -c 6 ; echo '')
CLUSTERNAME="$CLUSTERNAMEARG-$SEED"
CHEADIP="Unknown"
#################
# Checking Args #
#################
if [ -z "${CLUSTERNAME}" ] ; then
echo "Provide cluster name"
echo " build-cluster.sh CLUSTERNAME SSH_PUB_KEY"
exit 1
elif [ "$COMPUTENODES" -lt 2 -o "$COMPUTENODES" -gt 8 ] ; then
echo "Number of nodes must be between 2 and 8"
exit 1
fi
#############
# Functions #
#############
function check_key() {
if [ -z "${SSH_PUB_KEY}" ] ; then
echo "Provide ssh public key"
echo " build-cluster.sh CLUSTERNAME SSH_PUB_KEY"
exit 1
fi
if ! echo "$SSH_PUB_KEY" | ssh-keygen -lf /dev/stdin > /dev/null 2>&1 ; then
echo "Invalid SSH key"
echo " The SSH key provided was not successfully validated by ssh-keygen"
echo " It is most likely that a character or symbol is missing from the key"
echo " Verify the key is correct and try running this script again"
exit 1
fi
# Don't allow SSH_PUB_KEY to be set to the controller's pub key (as this is added via setup.sh on the deployed nodes)
if [[ *"$(cat /root/.ssh/id_rsa.pub)"* == *"$SSH_PUB_KEY"* ]] ; then
echo "Provide ssh public key that is *not* this controller's public key."
echo "This controller's key is automatically added to the compute nodes at deployment"
echo "to allow ansible setup to run on nodes"
exit 1
fi
}
function check_password() {
if [ -z "${PASSWORD}" ] ; then
echo "Provide ssh password"
echo " build-cluster.sh CLUSTERNAME PASSWORD"
exit 1
fi
}
function generate_custom_data() {
DATA=$(cat << EOF
#cloud-config
system_info:
default_user:
name: flight
runcmd:
- echo "$(cat /root/.ssh/id_rsa.pub)" >> /root/.ssh/authorized_keys
$(if [[ "$AUTH" == "key" ]] ; then
echo " - echo "$SSH_PUB_KEY" >> /home/flight/.ssh/authorized_keys"
else
echo " - echo "$PASSWORD" | passwd --stdin flight"
echo " - sed -i 's/^PasswordAuthentication .*/PasswordAuthentication yes/g' /etc/ssh/sshd_config"
echo " - systemctl restart sshd"
fi)
- timedatectl set-timezone Europe/London
- grep -q "$CLUSTERNAMEARG" /etc/resolv.conf || sed -ri 's/^search (.*?)( pri.$CLUSTERNAMEARG.cluster.local|$)/search \1 pri.$CLUSTERNAMEARG.cluster.local/' /etc/resolv.conf
EOF
)
GW=$(cat << EOF
$(echo "$DATA")
- firewall-cmd --add-rich-rule='rule family="ipv4" source address="10.10.0.0/16" masquerade' --zone public --permanent
- firewall-cmd --set-target=ACCEPT --zone public --permanent
- firewall-cmd --add-interface eth0 --zone public --permanent
- firewall-cmd --reload
- echo "net.ipv4.ip_forward = 1" > /etc/sysctl.conf
- echo 1 > /proc/sys/net/ipv4/ip_forward
EOF
)
NODE=$(cat << EOF
$(echo "$DATA")
- systemctl disable firewalld && systemctl stop firewalld
EOF
)
CUSTOMDATAGW=$(echo "$GW" |base64 -w 0)
CUSTOMDATANODE=$(echo "$NODE" |base64 -w 0)
}
function check_azure() {
# Azure variables are non-empty
if [ -z "${AZURE_SOURCEIMAGE}" ] ; then
echo "AZURE_SOURCEIMAGE is not set in config.sh"
echo "Set this before running script again"
exit 1
elif [ -z "${AZURE_LOCATION}" ] ; then
echo "AZURE_LOCATION is not set in config.sh"
echo "Set this before running script again"
exit 1
fi
# Azure login configured
if ! az account show > /dev/null 2>&1 ; then
echo "Azure account not connected to CLI"
echo "Run az login to connect your account"
exit 1
fi
}
function deploy_azure() {
az group create --name "$CLUSTERNAME" --location "$AZURE_LOCATION"
az group deployment create --name "$CLUSTERNAME" --resource-group "$CLUSTERNAME" \
--template-file $DIR/$AZURE_TEMPLATE \
--parameters sourceimage="$AZURE_SOURCEIMAGE" \
clustername="$CLUSTERNAMEARG" \
computeNodesCount="$COMPUTENODES" \
cheadinstancetype="$AZURE_GATEWAYINSTANCE" \
computeinstancetype="$AZURE_COMPUTEINSTANCE" \
customdatagw="$CUSTOMDATAGW" \
customdatanode="$CUSTOMDATANODE"
CHEADIP=$(az network public-ip show -g $CLUSTERNAME -n chead1pubIP --query "{address: ipAddress}" --output yaml |awk '{print $2}')
az network dns record-set a add-record --resource-group $AZURE_DOMAIN_RG --zone-name $AZURE_DOMAIN --record-set-name "chead1.$CLUSTERNAME" --ipv4-address $CHEADIP
CHEADFQDN="chead1.$CLUSTERNAME.$AZURE_DOMAIN"
# Create ansible hosts file
mkdir -p /opt/flight/clusters
cat << EOF > /opt/flight/clusters/$CLUSTERNAME
[head]
chead1 ansible_host=$CHEADIP
[nodes]
$(i=1 ; while [ $i -le $COMPUTENODES ] ; do
echo "cnode0$i ansible_host=$(az vm list-ip-addresses -g $CLUSTERNAME -n cnode0$i --query [?virtualMachine].virtualMachine.network.privateIpAddresses --output tsv) ansible_ssh_common_args='-o ProxyJump=$CHEADIP'"
i=$((i + 1))
done)
EOF
# Customise nodes
run_customisation
}
function check_aws() {
# Azure variables are non-empty
if [ -z "${AWS_SOURCEIMAGE}" ] ; then
echo "AWS_SOURCEIMAGE is not set in config.sh"
echo "Set this before running script again"
exit 1
elif [ -z "${AWS_LOCATION}" ] ; then
echo "AWS_LOCATION is not set in config.sh"
echo "Set this before running script again"
exit 1
fi
# Azure login configured
if ! aws sts get-caller-identity > /dev/null 2>&1 ; then
echo "AWS account not connected to CLI"
echo "Run aws configure to connect your account"
exit 1
fi
}
function deploy_aws() {
# Deploy resources
aws cloudformation deploy --template-file $DIR/$AWS_TEMPLATE --stack-name $CLUSTERNAME \
--region "$AWS_LOCATION" \
--parameter-overrides sourceimage="$AWS_SOURCEIMAGE" \
clustername="$CLUSTERNAMEARG" \
computeNodesCount="$COMPUTENODES" \
cheadinstancetype="$AWS_GATEWAYINSTANCE" \
computeinstancetype="$AWS_COMPUTEINSTANCE" \
customdatagw="$CUSTOMDATAGW" \
customdatanode="$CUSTOMDATANODE"
aws cloudformation wait stack-create-complete --stack-name $CLUSTERNAME --region "$AWS_LOCATION"
CHEADIP=$(aws cloudformation describe-stack-resources --region "$AWS_LOCATION" --stack-name $CLUSTERNAME --logical-resource-id chead1pubIP |grep PhysicalResourceId |awk '{print $2}' |tr -d , | tr -d \")
cat << EOF > /tmp/$CLUSTERNAME-dns.json
{
"Changes": [
{
"Action": "CREATE",
"ResourceRecordSet": {
"Name": "chead1.${CLUSTERNAME}.${AWS_DOMAIN}",
"Type": "A",
"TTL": 300,
"ResourceRecords": [
{
"Value": "$CHEADIP"
}
]
}
}
]
}
EOF
aws route53 change-resource-record-sets --hosted-zone-id $AWS_DOMAIN_ID --change-batch file:///tmp/$CLUSTERNAME-dns.json
rm -f /tmp/$CLUSTERNAME-dns.json
CHEADFQDN="chead1.${CLUSTERNAME}.${AWS_DOMAIN}"
# Create ansible hosts file
mkdir -p /opt/flight/clusters
cat << EOF > /opt/flight/clusters/$CLUSTERNAME
[head]
chead1 ansible_host=$CHEADIP
[nodes]
$(i=1 ; while [ $i -le $COMPUTENODES ] ; do
echo "cnode0$i ansible_host=$(aws ec2 describe-instances --region "$AWS_LOCATION" --instance-ids $(aws cloudformation describe-stack-resources --region "$AWS_LOCATION" --stack-name $CLUSTERNAME --logical-resource-id cnode0$i --query 'StackResources[].PhysicalResourceId' --output text) --query 'Reservations[*].Instances[*].[PrivateIpAddress]' --output text) ansible_ssh_common_args='-o ProxyJump=$CHEADIP'"
i=$((i + 1))
done)
EOF
# Customise nodes
run_customisation
}
function run_customisation() {
set_hostnames
run_ansible
}
function set_hostnames() {
NODES=$(grep -vE '^\[|^$' /opt/flight/clusters/$CLUSTERNAME)
# Loop through nodes and set hostname
while IFS= read -r node ; do
name=$(echo "$node" |awk '{print $1}')
ip=$(echo "$node" |awk '{print $2}' |sed 's/.*ansible_host=//g')
ssh_args=$(echo "$node" |awk '{print $3,$4}' |sed "s/.*ansible_ssh_common_args=//g;s/'//g")
until ssh -q -o StrictHostKeyChecking=no -o PasswordAuthentication=no $ssh_args $ip exit </dev/null 2>/dev/null ; do
sleep 5
done
ssh -q -o StrictHostKeyChecking=no -o PasswordAuthentication=no $ssh_args $ip "hostnamectl set-hostname $name.pri.$CLUSTERNAMEARG.cluster.local" </dev/null 2>/dev/null
done <<< "$(echo "$NODES")"
}
function run_ansible() {
# Determine if dev repos for openflight to be used
if [ "$FLIGHTENVDEV" = "true" ] ; then
flightenv_dev_var="flightenv_dev=true"
fi
# Determine if extra flight env stuff to be run
if [ "$FLIGHTENVPREPARE" = "true" ] ; then
flightenv_bootstrap_var="flightenv_bootstrap=true"
fi
# Determine if Alces branding to be setup
if [ "$ALCESBRANDING" = "true" ] ; then
flightenv_alces_branding="alces=true"
fi
# Run ansible playbook
cd $ANSIBLE_PLAYBOOK_DIR
export ANSIBLE_HOST_KEY_CHECKING=false
ARGS="cluster_name=$CLUSTERNAMEARG compute_ip_range='10.10.0.0/255.255.0.0' shared_ssh_key='$SSH_PUB_KEY' flightweb_fqdn='$CHEADFQDN' $flightenv_dev_var $flightenv_bootstrap_var $flightenv_alces_branding"
echo "$(date +'%Y-%m-%d %H-%M-%S') | $CLUSTERNAME | Start Ansible | ANSIBLE_HOST_KEY_CHECKING=false ansible-playbook -i /opt/flight/clusters/$CLUSTERNAME --extra-vars \"$ARGS\" openflight.yml" |tee -a $LOG
ansible-playbook -i /opt/flight/clusters/$CLUSTERNAME --extra-vars "$ARGS" openflight.yml
}
#################
# Run Functions #
#################
case $AUTH in
"key")
check_key
;;
"password")
check_password
;;
*)
echo "Unrecognised auth type ($AUTH)"
echo "Set to either 'key' or 'password'"
exit 1
;;
esac
echo "$(date +'%Y-%m-%d %H-%M-%S') | $CLUSTERNAME | Start Deploy | $PLATFORM | Auth Method: $AUTH" |tee -a $LOG
generate_custom_data
case $PLATFORM in
"azure")
check_azure
deploy_azure
;;
"aws")
check_aws
deploy_aws
;;
*)
echo "Unknown platform"
;;
esac
echo "$(date +'%Y-%m-%d %H-%M-%S') | $CLUSTERNAME | End Deploy | chead1 IP: $CHEADFQDN ($CHEADIP)" |tee -a $LOG