-
Notifications
You must be signed in to change notification settings - Fork 1
/
docker-entrypoint.sh
executable file
·107 lines (94 loc) · 2.91 KB
/
docker-entrypoint.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/bin/bash
function error_with_msg {
if [[ "$count" -eq 0 ]]; then
echo
echo >&2 "$1"
exit 1
fi
}
function check_running_status {
for count in {2..0}; do
STATUS=$(/usr/bin/supervisorctl status $1 | awk '{print $2}')
echo "#> $1 is in the $STATUS state."
if [[ "$STATUS" = "RUNNING" ]]; then
break
else
sleep 1
fi
done
}
function check_port_status {
for count in {2..0}; do
echo 2>/dev/null >/dev/tcp/localhost/$1
if [[ "$?" -eq 0 ]]; then
echo "#> Port $1 is listening"
break
else
echo "#> Port $1 is not listening"
sleep 1
fi
done
}
function start_service {
echo "## Starting $1"
/usr/bin/supervisorctl start $1
check_running_status $1
}
function check_cluster () {
echo "#> waiting for the cluster to become available"
for count in {10..0}; do
if ! grep -E "up.*idle" <(timeout 1 sinfo); then
sleep 1
else
break
fi
done
error_with_msg "Slurm partitions failed to start successfully."
}
if [ ! -d "/var/lib/mysql/mysql" ]; then
echo "[mysqld]\nskip-host-cache\nskip-name-resolve" > /etc/my.cnf.d/docker.cnf
echo "#> Initializing database"
/usr/bin/mysql_install_db --user=mysql &> /dev/null
echo "#> Database initialized [ OK ]"
fi
if [ ! -d "/var/lib/mysql/slurm_acct_db" ]; then
/usr/bin/mysqld_safe &
for count in {30..0}; do
if echo "SELECT 1" | mysql &> /dev/null; then
break
fi
echo "## Starting MariaDB to create Slurm account database"
sleep 1
done
error_with_msg "MariaDB did not start"
echo "* Creating Slurm acct database"
mysql -NBe "CREATE USER 'slurm'@'localhost' identified by 'password'"
mysql -NBe "GRANT ALL ON slurm_acct_db.* to 'slurm'@'localhost' identified by 'password' with GRANT option"
mysql -NBe "GRANT ALL ON slurm_acct_db.* to 'slurm'@'slurmctl' identified by 'password' with GRANT option"
mysql -NBe "CREATE DATABASE slurm_acct_db"
echo "## Slurm acct database created. Stopping MariaDB"
pkill -f mysqld
for count in {10..0}; do
if echo "SELECT 1" | mysql &> /dev/null; then
sleep 1
else
break
fi
done
error_with_msg "MariaDB did not stop"
fi
echo "#> Starting supervisord process manager"
/usr/bin/supervisord --configuration /etc/supervisord.conf
# order of the programs is important
sudo chown munge:munge -R /run/munge # double check
nohup sudo -u munge munged -F > /var/log/munged.log &!
# munged: Info: Unauthorized credential for client UID=0 GID=0 // but works
for service in mysqld slurmdbd slurmctld slurmd_1 slurmd_2; do
start_service $service
done
for port in 6817 6818 6819 6011 6012; do
check_port_status $port
done
check_cluster
echo "#> Cluster is now available"
exec "$@"