-
Notifications
You must be signed in to change notification settings - Fork 0
/
amazon-linux-2-docker-rei.sh
259 lines (227 loc) · 10.7 KB
/
amazon-linux-2-docker-rei.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#!/usr/bin/env bash
# Slightly modified version of 3INSTConfigurationScript, the cloud init script for gitlab runners
# found here: https://gl-public-templates.s3.us-west-2.amazonaws.com/cfn/v1.5.5/runner_configs/amazon-linux-2-docker.sh
# This is modified to mount /home/.m2 for improved maven caching, and to set up a cron job to clean up dangling docker volumes
GITLABRunnerExecutor='docker'
IMDS_TOKEN="$(curl -X PUT http://169.254.169.254/latest/api/token -H X-aws-ec2-metadata-token-ttl-seconds:21600)"
MYIP="$(curl -H X-aws-ec2-metadata-token:$IMDS_TOKEN http://169.254.169.254/latest/meta-data/local-ipv4)"
MYACCOUNTID="$(curl -H X-aws-ec2-metadata-token:$IMDS_TOKEN http://169.254.169.254/latest/dynamic/instance-identity/document | grep accountId | awk '{print $3}' | sed 's/"//g' | sed 's/,//g')"
RunnerName="$MYINSTANCEID-in-$MYACCOUNTID-at-$AWS_REGION"
function logit() {
LOGSTRING="$(date +"%_b %e %H:%M:%S") $(hostname) USERDATA_SCRIPT: $1"
#For CloudFormation, if you already collect /var/log/cloud-init-output.log or /var/log/messsages (non amazon linux), then you could mute the next logging line
echo "$LOGSTRING" >> /var/log/messages
}
logit "Preflight checks for required endpoints..."
urlportpairlist="$(echo $GITLABRunnerInstanceURL | cut -d'/' -f3 | cut -d':' -f1)=443 gitlab-runner-downloads.s3.amazonaws.com=443"
failurecount=0
for urlportpair in $urlportpairlist; do
set -- $(echo $urlportpair | tr '=' ' ') ; url=$1 ; port=$2
logit "TCP Test of $url on $port"
timeout 20 bash -c "cat < /dev/null > /dev/tcp/$url/$port"
if [ "$?" -ne 0 ]; then
logit " Connection to $url on port $port failed"
((failurecount++))
else
logit " Connection to $url on port $port succeeded"
fi
done
if [ $failurecount -gt 0 ]; then
logit "$failurecount tcp connect tests failed. Please check all networking configuration for problems."
if [ -f /opt/aws/bin/cfn-signal ]; then
/opt/aws/bin/cfn-signal --success false --stack ${AWS::StackName} --resource InstanceASG --region $AWS_REGION --reason "Cant connect to GitLab or other endpoints"
fi
exit $failurecount
fi
#Detect package manager
if [[ -n "$(command -v yum)" ]] ; then
PKGMGR='yum'
elif [[ -n "$(command -v apt-get)" ]] ; then
PKGMGR='apt-get'
fi
set -e
if [[ -z "$(command -v docker)" ]] ; then
echo "Docker not present, installing..."
amazon-linux-extras install docker
usermod -a -G docker ec2-user
# Update the docker daemon to use docker mirror
cat << EndOfDockerConfig > /etc/docker/daemon.json
{
"registry-mirrors": ["https://docker-repo.rei-cloud.com"]
}
EndOfDockerConfig
systemctl enable docker.service
systemctl start docker.service
fi
RunnerCompleteTagList="$RunnerOSTags,glexecutor-$GITLABRunnerExecutor,${OSInstanceLinuxArch,,}"
if [[ -n "${GITLABRunnerTagList}" ]]; then RunnerCompleteTagList="$RunnerCompleteTagList,${GITLABRunnerTagList,,}"; fi
if [[ -n "${COMPUTETYPE}" ]]; then RunnerCompleteTagList="$RunnerCompleteTagList,computetype-${COMPUTETYPE,,}"; fi
# Installing and configuring Gitlab Runner
if [ ! -d $RunnerInstallRoot ]; then mkdir -p $RunnerInstallRoot; fi
curl https://gitlab-runner-downloads.s3.amazonaws.com/${GITLABRunnerVersion,,}/binaries/gitlab-runner-linux-${OSInstanceLinuxArch} --output $RunnerInstallRoot/gitlab-runner
chmod +x $RunnerInstallRoot/gitlab-runner
if ! id -u "gitlab-runner" >/dev/null 2>&1; then
useradd --comment 'GitLab Runner' --create-home gitlab-runner --shell /bin/bash
usermod -a -G docker gitlab-runner
fi
$RunnerInstallRoot/gitlab-runner install --user="gitlab-runner" --working-directory="/gitlab-runner"
echo -e "\nRunning scripts as '$(whoami)'\n\n"
# create .m2/repository folder
mkdir -p /home/.m2/repository
for RunnerRegToken in ${GITLABRunnerRegTokenList//;/ }
do
if [[ $RunnerRegToken == *"glrt-"* ]]; then
TokenParameters="--token $RunnerRegToken"
logit "New Runner Authentication Token used, the following parameters will be ignored because they are part of the runner registration process: tags, locked, run untagged"
else
TokenParameters="--registration-token $RunnerRegToken --tag-list $RunnerCompleteTagList --locked=false --run-untagged=true"
fi
$RunnerInstallRoot/gitlab-runner register \
--non-interactive \
--name $RunnerName \
--config $RunnerConfigToml \
--url "$GITLABRunnerInstanceURL" \
$TokenParameters \
--executor "$GITLABRunnerExecutor" \
--cache-type "s3" \
--cache-path "/" \
--cache-shared="true" \
--cache-s3-server-address "s3.amazonaws.com" \
--cache-s3-bucket-name $GITLABRunnerS3CacheBucket \
--cache-s3-bucket-location $AWS_REGION \
--docker-volumes "/certs/client" \
--docker-volumes "/home/.m2:/home/.m2" \
--docker-volumes "/etc/docker/daemon.json:/etc/docker/daemon.json" \
--docker-image "$GITLABRunnerDockerImage" \
--docker-privileged \
--docker-pull-policy if-not-present \
--output-limit "16384"
done
sed -i "s/^\s*concurrent.*/concurrent = $GITLABRunnerConcurrentJobs/g" $RunnerConfigToml
$RunnerInstallRoot/gitlab-runner start
aws ec2 create-tags --region $AWS_REGION --resources $MYINSTANCEID --tags Key=GitLabRunnerName,Value="$RunnerName" Key=GitLabURL,Value="$GITLABRunnerInstanceURL" Key=GitLabRunnerTags,Value="$(echo $RunnerCompleteTagList | sed 's/,/\\\,/g')"
# Set up cron job to remove dangling volumes
(crontab -l 2>/dev/null; echo "0 * * * * docker volume ls -qf dangling=true | xargs --no-run-if-empty docker volume rm") | crontab -
#$RunnerInstallRoot/gitlab-runner unregister --all-runners
#Escape $ for variables that should wait until script runtime to be expanded.
#Non-especaped $ will result in variable expansion DURING script writing which is used on purpose by this heredoc.
#This approach for termination hook is much simpler than those involving SNS or CloudWatch, but when deployed
# on many instances it can result in a lot of ASG Describe API calls (which may be rate limited).
if [ ! -z "$NAMEOFASG" ] && [ "$ASGSelfMonitorTerminationInterval" != "Disabled" ] && [ "$WaitingForReboot" != "true" ]; then
logit "Setting up termination monitoring because 5ASGSelfMonitorTerminationInterval is set to $ASGSelfMonitorTerminationInterval"
SCRIPTNAME=/etc/cron.d/MonitorTerminationHook.sh
SCRIPTFOLDER=$(dirname $SCRIPTNAME)
SCRIPTBASENAME=$(basename $SCRIPTNAME)
#Heredoc script
cat << EndOfScript > $SCRIPTNAME
function logit() {
LOGSTRING="\$(date +'%_b %e %H:%M:%S') \$(hostname) TERMINATIONMON_SCRIPT: \$1"
echo "\$LOGSTRING"
echo "\$LOGSTRING" >> /var/log/messages
}
#These are resolved at script creation time to reduce api calls when this script runs every minute on instances.
if [[ "\$(aws autoscaling describe-auto-scaling-instances --instance-ids $MYINSTANCEID --region $AWS_REGION | jq --raw-output '.AutoScalingInstances[0] .LifecycleState')" == *"Terminating"* ]]; then
logit "This instance ($MYINSTANCEID) is being terminated, perform cleanup..."
if [ "${COMPUTETYPE,,}" != "spot" ]; then
logit "Instance is not spot compute, draining running jobs..."
$RunnerInstallRoot/gitlab-runner stop
else
logit "Instance is spot compute, deregistering runner immediately without draining running jobs..."
fi
$RunnerInstallRoot/gitlab-runner unregister --all-runners
#### PUT YOUR CLEANUP CODE HERE, DECIDE IF CLEANUP CODE SHOULD ERROR OUT OR SILENTLY FAIL (best effort cleanup)
aws autoscaling complete-lifecycle-action --region $AWS_REGION --lifecycle-action-result CONTINUE --instance-id $MYINSTANCEID --lifecycle-hook-name instance-terminating --auto-scaling-group-name $NAMEOFASG
logit "This instance ($MYINSTANCEID) is ready for termination"
logit "Lifecycle CONTINUE was sent to termination hook in ASG: $NAMEOFASG for this instance ($MYINSTANCEID)."
fi
EndOfScript
fi
echo "Settings up CloudWatch Metrics to Enable Scaling on Memory Utilization"
yum install -y amazon-cloudwatch-agent
systemctl stop amazon-cloudwatch-agent
cat << 'EndOfCWMetricsConfig' > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json
{
"agent": {
"metrics_collection_interval": 30,
"run_as_user": "root"
},
"metrics": {
"aggregation_dimensions" : [["AutoScalingGroupName"], ["InstanceId"], ["InstanceType"], ["InstanceId","InstanceType"]],
"append_dimensions": {
"AutoScalingGroupName": "${aws:AutoScalingGroupName}",
"ImageId": "${aws:ImageId}",
"InstanceId": "${aws:InstanceId}",
"InstanceType": "${aws:InstanceType}"
},
"metrics_collected": {
"cpu": {
"measurement": [
"cpu_usage_idle",
"cpu_usage_iowait",
"cpu_usage_user",
"cpu_usage_system"
],
"metrics_collection_interval": 30,
"totalcpu": false
},
"disk": {
"measurement": [
"used_percent",
"inodes_free"
],
"metrics_collection_interval": 30,
"resources": [
"*"
]
},
"diskio": {
"measurement": [
"io_time",
"write_bytes",
"read_bytes",
"writes",
"reads"
],
"metrics_collection_interval": 30,
"resources": [
"*"
]
},
"mem": {
"measurement": [
"mem_used_percent"
],
"metrics_collection_interval": 30
},
"netstat": {
"measurement": [
"tcp_established",
"tcp_time_wait"
],
"metrics_collection_interval": 30
},
"swap": {
"measurement": [
"swap_used_percent"
],
"metrics_collection_interval": 30
}
}
}
}
EndOfCWMetricsConfig
systemctl enable amazon-cloudwatch-agent
systemctl restart amazon-cloudwatch-agent
#Debugging:
#Check if running: sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -m ec2 -a status
#config: cat /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json
#log file: tail /opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log -f
#wizard saves: /opt/aws/amazon-cloudwatch-agent/bin/config.json
#amazon-linux-extras install -y epel; yum install -y stress-ng
#stress-ng --vm 1 --vm-bytes 75% --vm-method all --verify -t 10m -v
#stress-ng --vm-hang 2 --vm-keep --verify --timeout 600 --verbose --vm 2 --vm-bytes $(awk '/MemTotal/{printf "%d\n", $2;}' < /proc/meminfo)k
# --vm-method all
#stress-ng --vm-hang 2 --vm-keep --verify --timeout 600 --verbose --vm 2 --vm-bytes $(awk '/MemAvailable/{printf "%d\n", $2 * 0.9;}' < /proc/meminfo)k
#90% of available memory: $(awk '/MemAvailable/{printf "%d\n", $2 * 0.9;}' < /proc/meminfo)k
#100% of total memory: $(awk '/MemTotal/{printf "%d\n", $2;}' < /proc/meminfo)k
# cpus * 2: $(awk '/cpu cores/{printf "%d\n", $4 * 2;}' < /proc/cpuinfo)