Skip to content

Commit

Permalink
add changes for stackgres 1.14
Browse files Browse the repository at this point in the history
Signed-off-by: Jesse Nelson <[email protected]>
  • Loading branch information
jnels124 committed Dec 19, 2024
1 parent 5fd36f4 commit 65ed715
Show file tree
Hide file tree
Showing 8 changed files with 210 additions and 193 deletions.
2 changes: 1 addition & 1 deletion charts/hedera-mirror-common/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies:
condition: stackgres.enabled
name: stackgres-operator
repository: https://stackgres.io/downloads/stackgres-k8s/stackgres/helm/
version: 1.13.0
version: 1.14.1
- condition: traefik.enabled
name: traefik
repository: https://helm.traefik.io/traefik
Expand Down
45 changes: 44 additions & 1 deletion charts/hedera-mirror-common/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,53 @@ promtail:
operator: Exists

stackgres:
collector:
config:
exporters:
prometheus:
metric_expiration: 2m
service:
pipelines:
metrics:
receivers:
- prometheus/prometheus # This appears to be corrected to just prometheus in 1.15+ and can be updated (possibly removed) when upgrading

Check failure on line 429 in charts/hedera-mirror-common/values.yaml

View workflow job for this annotation

GitHub Actions / lint

429:39 [comments] too few spaces before comment
resources:
limits:
memory: "4Gi"
cpu: "1500m"
requests:
memory: "3Gi"
cpu: "1000m"
prometheusOperator:
monitors:
- name: mirror-prometheus-prometheus
metadata:
namespace: common
name: mirror-prometheus-prometheus
spec:
podMetricsEndpoints:
- honorLabels: true
honorTimestamps: true
interval: 1m
metricRelabelings:
- action: drop
regex: ^(pg_stat.*_user.*|pg_table.*|pg_total_relation_size_bytes|pg.*)$
sourceLabels:
- __name__
path: /metrics
port: prom-http
scheme: https
tlsConfig:
ca:
secret:
key: tls.crt
name: mirror-prometheus-prometheus
serverName: stackgres-collector

enabled: false
operator:
image:
name: gcr.io/mirrornode/stackgres-operator
name: docker.io/xinatswirlds/stackgres-operator
prometheusRules:
enabled: true
DatabaseStorageFull:
Expand Down
15 changes: 9 additions & 6 deletions charts/hedera-mirror/templates/stackgres/stackgres-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@ metadata:
name: {{ include "hedera-mirror.stackgres" . }}
namespace: {{ include "hedera-mirror.namespace" . }}
spec:
{{- if .Values.stackgres.backup.enabled }}
configurations:
{{- if .Values.stackgres.backup.enabled }}
backups:
{{- range $backupConfig := .Values.stackgres.backup.instances }}
- sgObjectStorage: {{ include "hedera-mirror.stackgres" $ }}-object-storage
{{$backupConfig | toYaml | nindent 6}}
{{- end }}
{{- end }}
{{- end }}
observability:
disableMetrics: {{ not .Values.stackgres.observability.enableMetricsExporter }}
prometheusAutobind: {{ .Values.stackgres.observability.enableMetricsExporter }}
coordinator:
configurations:
sgPoolingConfig: {{ include "hedera-mirror.stackgres" . }}-coordinator
Expand All @@ -23,7 +26,6 @@ spec:
scripts:
- sgScript: {{ include "hedera-mirror.stackgres" . }}-coordinator
pods:
disableMetricsExporter: {{ not .Values.stackgres.coordinator.enableMetricsExporter }}
disablePostgresUtil: {{ not .Values.stackgres.coordinator.enablePostgresUtil }}
persistentVolume: {{ .Values.stackgres.coordinator.persistentVolume | toYaml | nindent 8 }}
resources:
Expand Down Expand Up @@ -51,9 +53,11 @@ spec:
ssl:
enabled: false # Disable SSL to work around https://github.com/hashgraph/hedera-mirror-node/issues/9143
version: {{ .Values.stackgres.postgresVersion | quote }}
prometheusAutobind: {{ or .Values.stackgres.coordinator.enableMetricsExporter .Values.stackgres.worker.enableMetricsExporter }}
prometheusAutobind: {{ .Values.stackgres.observability.enableMetricsExporter }}
replication:
mode: {{ .Values.stackgres.coordinator.replication.mode }}
mode: {{ .Values.stackgres.replication.mode }}
initialization:
mode: {{ .Values.stackgres.replication.initialization.mode }}
shards:
clusters: {{ .Values.stackgres.worker.instances }}
configurations:
Expand All @@ -65,7 +69,6 @@ spec:
- sgScript: {{ include "hedera-mirror.stackgres" . }}-worker
overrides: {{ .Values.stackgres.worker.overrides | toYaml | nindent 6 }}
pods:
disableMetricsExporter: {{ not .Values.stackgres.worker.enableMetricsExporter }}
disablePostgresUtil: {{ not .Values.stackgres.worker.enablePostgresUtil }}
persistentVolume: {{ .Values.stackgres.worker.persistentVolume | toYaml | nindent 8 }}
resources:
Expand Down
4 changes: 2 additions & 2 deletions charts/hedera-mirror/values-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ stackgres:
cronSchedule: "45 22 * * *"
compression: lz4
useVolumeSnapshot: true
fastVolumeSnapshot: false # Can be true once StackGres issue 2887 is resolved
fastVolumeSnapshot: true
volumeSnapshotClass: zfs
performance:
uploadDiskConcurrency: 1 # Can be 4 once StackGres issue 2887 is resolved
uploadDiskConcurrency: 1
objectStorage:
endpoint: http://mirror-minio.common:9000
bucket: stackgres-backup
Expand Down
10 changes: 6 additions & 4 deletions charts/hedera-mirror/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,6 @@ stackgres:
wal_init_zero: "off" # Not needed with ZFS
wal_recycle: "off" # Not needed with ZFS
work_mem: "12MB"
enableMetricsExporter: false
enablePostgresUtil: true
instances: 1
pgbouncer:
Expand All @@ -325,8 +324,6 @@ stackgres:
pool_mode: session
mirror_importer:
pool_mode: session
replication:
mode: sync-all
resources:
cpu: 100m
memory: 1Gi
Expand All @@ -342,9 +339,15 @@ stackgres:
- name: pg_trgm
version: "1.6"
nameOverride: citus
observability:
enableMetricsExporter: true
podAntiAffinity: true
postgresVersion: "16.2"
priorityClassName: ""
replication:
initialization:
mode: FromReplica
mode: sync-all
worker:
config:
autovacuum_max_workers: "2"
Expand All @@ -362,7 +365,6 @@ stackgres:
wal_init_zero: "off" # Not needed with ZFS
wal_recycle: "off" # Not needed with ZFS
work_mem: "12MB"
enableMetricsExporter: false
enablePostgresUtil: true
instances: 1
overrides: [] # Override shard(s) configuration
Expand Down
103 changes: 9 additions & 94 deletions docs/runbook/scripts/restore-stackgres-backup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ function createSGShardedClusterConfigWithRestore() {
"downloadDiskConcurrency": 1
}
}
EOF)
EOF
)
CLUSTER_CONFIG=$(echo "${sourceConfig}" | \
jq --argjson initialDataConfig "${initialDataConfig}" '.spec.initialData=$initialDataConfig')
log "Created SGShardedCluster configuration to restore backup ${BACKUP_TO_RESTORE}"
Expand Down Expand Up @@ -144,75 +145,6 @@ function findShardedCluster() {
doContinue
}

function fixClusterAuth() {
local sgPasswords=$(kubectl get secret "${CLUSTER}" -o json |
ksd |
jq -r '.stringData')
local superuserUsername=$(echo "${sgPasswords}" | jq -r '.["superuser-username"]')
local superuserPassword=$(echo "${sgPasswords}" | jq -r '.["superuser-password"]')
local replicationUsername=$(echo "${sgPasswords}" | jq -r '.["replication-username"]')
local replicationPassword=$(echo "${sgPasswords}" | jq -r '.["replication-password"]')
local authenticatorUsername=$(echo "${sgPasswords}" | jq -r '.["authenticator-username"]')
local authenticatorPassword=$(echo "${sgPasswords}" | jq -r '.["authenticator-password"]')
# Mirror Node Passwords
local mirrorNodePasswords=$(kubectl get secret "${HELM_RELEASE_NAME}-passwords" -o json |
ksd |
jq -r '.stringData')
local graphqlUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_GRAPHQL_DB_USERNAME')
local graphqlPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_GRAPHQL_DB_PASSWORD')
local grpcUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_GRPC_DB_USERNAME')
local grpcPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_GRPC_DB_PASSWORD')
local importerUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_USERNAME')
local importerPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_PASSWORD')
local ownerUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_OWNER')
local ownerPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_OWNERPASSWORD')
local restUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_REST_DB_USERNAME')
local restPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_REST_DB_PASSWORD')
local restJavaUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_RESTJAVA_DB_USERNAME')
local restJavaPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_RESTJAVA_DB_PASSWORD')
local rosettaUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_ROSETTA_DB_USERNAME')
local rosettaPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_ROSETTA_DB_PASSWORD')
local web3Username=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_WEB3_DB_USERNAME')
local web3Password=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_WEB3_DB_PASSWORD')
local dbName=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_NAME')
local sql=$(cat <<EOF
alter user ${superuserUsername} with password '${superuserPassword}';
alter user ${graphqlUsername} with password '${graphqlPassword}';
alter user ${grpcUsername} with password '${grpcPassword}';
alter user ${importerUsername} with password '${importerPassword}';
alter user ${ownerUsername} with password '${ownerPassword}';
alter user ${restUsername} with password '${restPassword}';
alter user ${restJavaUsername} with password '${restJavaPassword}';
alter user ${rosettaUsername} with password '${rosettaPassword}';
alter user ${web3Username} with password '${web3Password}';
alter user ${replicationUsername} with password '${replicationPassword}';
alter user ${authenticatorUsername} with password '${authenticatorPassword}';
\c ${dbName}
insert into pg_dist_authinfo(nodeid, rolename, authinfo)
values (0, '${superuserUsername}', 'password=${superuserPassword}'),
(0, '${graphqlUsername}', 'password=${graphqlPassword}'),
(0, '${grpcUsername}', 'password=${grpcPassword}'),
(0, '${importerUsername}', 'password=${importerPassword}'),
(0, '${ownerUsername}', 'password=${ownerPassword}'),
(0, '${restUsername}', 'password=${restPassword}'),
(0, '${restJavaUsername}', 'password=${restJavaPassword}'),
(0, '${rosettaUsername}', 'password=${rosettaPassword}'),
(0, '${web3Username}', 'password=${web3Password}') on conflict (nodeid, rolename)
do
update set authinfo = excluded.authinfo;
EOF)
log "Fixing passwords and pg_dist_authinfo for all pods in the cluster"
for pod in $(kubectl get pods -l 'app=StackGresCluster,role=master' -o name); do
log "Updating passwords and pg_dist_authinfo for ${pod}"
echo "$sql" | kubectl exec -i "${pod}" -c postgres-util -- psql -U "${superuserUsername}" -f -
done
checkCitusMetadataSyncStatus "${CURRENT_NAMESPACE}"
}
function getSnapshotHandle() {
local sgBackup=$1

Expand Down Expand Up @@ -249,7 +181,7 @@ function pickShardedBackup() {
echo "WARNING!!! You are about to restore an older backup, all later backups have to be removed before proceeding"
doContinue
count=$((backupIndex-1))
kubectl delete sgshardedbackups $(echo "${allBackups[@]:0:${count}}" | sed 's/[:TZ0-9\-]\+\///g')
kubectl delete sgshardedbackups $(echo "${allBackups[@]:0:${count}}" | sed -E 's/[:TZ0-9\-]+\///g')
log "Deleted ${count} most recent SGShardedBackups"
fi

Expand Down Expand Up @@ -286,13 +218,13 @@ function recreateManagedCluster() {
flux resume helmrelease -n "${CURRENT_NAMESPACE}" "${HELM_RELEASE_NAME}" --timeout 30m
fi

waitForStackGresClusterPods
fixClusterAuth
unpauseCitus "${CURRENT_NAMESPACE}"
updateStackgresCreds "${CLUSTER}" "${CURRENT_NAMESPACE}"
routeTraffic "${CURRENT_NAMESPACE}"
log "SGShardedCluster ${CLUSTER} is ready"
}

function restoreBackup() {
function restoreBackup() {
log "Restoring..."

adjustCoordStorage
Expand All @@ -301,8 +233,9 @@ function restoreBackup() {
log "Creating SGShardedCluster with the restore configuration"
echo "${CLUSTER_CONFIG}" | kubectl apply -f -

waitForStackGresClusterPods
fixClusterAuth
unpauseCitus "${CURRENT_NAMESPACE}"
updateStackgresCreds "${CLUSTER}" "${CURRENT_NAMESPACE}"
checkCitusMetadataSyncStatus "${CURRENT_NAMESPACE}"
checkCoordinatorReplica

# Once again remove ownerReferences since in restore they will get updated with new owners
Expand Down Expand Up @@ -363,24 +296,6 @@ function swapPv() {
kubectl wait --for=jsonpath='{.status.phase}'=Bound pvc "${pvcs[@]}" --timeout=-1s
}
function waitForStackGresClusterPods() {
log "Waiting for all StackGresCluster StatefulSets to be created"
while ! kubectl describe sgshardedclusters "${CLUSTER}" >/dev/null 2>&1; do
sleep 1
done
expectedTotal=$(($(kubectl get sgshardedclusters "${CLUSTER}" -o jsonpath='{.spec.shards.clusters}')+1))
while [[ "$(kubectl get sts -l 'app=StackGresCluster' -o name | wc -l)" -ne "${expectedTotal}" ]]; do
sleep 1
done
log "Waiting for all StackGresCluster pods to be ready"
for sts in $(kubectl get sts -l 'app=StackGresCluster' -o name); do
expected=$(kubectl get "${sts}" -o jsonpath='{.spec.replicas}')
kubectl wait --for=jsonpath='{.status.readyReplicas}'=${expected} "${sts}" --timeout=-1s
done
}
CURRENT_NAMESPACE=$(kubectl config view --minify --output 'jsonpath={..namespace}')
prepare
Expand Down
Loading

0 comments on commit 65ed715

Please sign in to comment.