From 8156230bdd2ff04ca72bfdd2ed13e9c5c10d156a Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Thu, 3 Oct 2024 13:37:14 +0100 Subject: [PATCH 01/19] ref(actions): allow more flexibility on cached states usage --- .github/workflows/cd-deploy-nodes-gcp.yml | 55 ++++++++++++++----- .../workflows/scripts/gcp-get-cached-disks.sh | 33 +++++------ .github/workflows/sub-find-cached-disks.yml | 20 +++++-- 3 files changed, 69 insertions(+), 39 deletions(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index c096d531ddf..88d9f0b5497 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -30,17 +30,39 @@ on: workflow_dispatch: inputs: network: - default: 'Mainnet' + default: Mainnet description: 'Network to deploy: Mainnet or Testnet' required: true - log_file: - default: '' - description: 'Log to a file path rather than standard output' + type: choice + options: + - Mainnet + - Testnet + cached_disk_type: + default: tip + description: 'Type of cached disk to use' + required: true + type: choice + options: + - tip + - checkpoint + prefer_main_cached_state: + default: false + description: 'Prefer cached state from the main branch' + required: false + type: boolean + no_cached_disk: + default: false + description: 'Do not use a cached state disk' + required: false + type: boolean no_cache: description: 'Disable the Docker cache for this build' required: false type: boolean default: false + log_file: + default: '' + description: 'Log to a file path rather than standard output' push: # Skip main branch updates where Rust code and dependencies aren't modified. @@ -175,18 +197,19 @@ jobs: test_variables: '-e NETWORK -e ZEBRA_CONF_PATH="zebrad/tests/common/configs/v1.0.0-rc.2.toml"' network: ${{ inputs.network || vars.ZCASH_NETWORK }} - # Finds a `tip` cached state disk for zebra from the main branch + # Finds a cached state disk for zebra # # Passes the disk name to subsequent jobs using `cached_disk_name` output # get-disk-name: name: Get disk name uses: ./.github/workflows/sub-find-cached-disks.yml + if: ${{ !inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} with: network: ${{ inputs.network || vars.ZCASH_NETWORK }} disk_prefix: zebrad-cache - disk_suffix: tip - prefer_main_cached_state: true + disk_suffix: ${{ inputs.cached_disk_type || 'tip' }} + prefer_main_cached_state: ${{ inputs.prefer_main_cached_state || (github.event_name == 'push' && github.ref_name == 'main' && true) || false }} # Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet, # with one node in the configured GCP region. @@ -250,14 +273,14 @@ jobs: - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2.1.1 - # TODO we should implement the fixes from https://github.com/ZcashFoundation/zebra/pull/5670 here - # but the implementation is failing as it's requiring the disk names, contrary to what is stated in the official documentation - name: Create instance template for ${{ matrix.network }} run: | - NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" - DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd" + DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" + DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}" + elif [ ${{ !inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then + echo "No cached disk required" else echo "No cached disk found for ${{ matrix.network }} in main branch" exit 1 @@ -270,7 +293,7 @@ jobs: --image-family=cos-stable \ --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ --create-disk="${DISK_PARAMS}" \ - --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \ + --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ --container-stdin \ --container-tty \ --container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \ @@ -363,10 +386,12 @@ jobs: # Create instance template from container image - name: Manual deploy of a single ${{ inputs.network }} instance running zebrad run: | - NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" - DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd" + DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" + DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}" + elif [ ${{ !inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then + echo "No cached disk required" else echo "No cached disk found for ${{ matrix.network }} in main branch" exit 1 @@ -379,7 +404,7 @@ jobs: --image-family=cos-stable \ --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ --create-disk="${DISK_PARAMS}" \ - --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \ + --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ --container-stdin \ --container-tty \ --container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \ diff --git a/.github/workflows/scripts/gcp-get-cached-disks.sh b/.github/workflows/scripts/gcp-get-cached-disks.sh index 0f38addf10f..16687458c21 100755 --- a/.github/workflows/scripts/gcp-get-cached-disks.sh +++ b/.github/workflows/scripts/gcp-get-cached-disks.sh @@ -3,9 +3,9 @@ # This script finds a cached Google Cloud Compute image based on specific criteria. # # If there are multiple disks: -# - prefer images generated from the same commit, then +# - prefer images generated from the same branch, then # - if prefer_main_cached_state is true, prefer images from the `main` branch, then -# - use any images from any other branch or commit. +# - use any images from any other branch or branch. # # Within each of these categories: # - prefer newer images to older images @@ -20,7 +20,7 @@ echo "Extracting local state version..." LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) echo "STATE_VERSION: ${LOCAL_STATE_VERSION}" -# Function to find a cached disk image based on the git pattern (commit, main, or any branch) +# Function to find a cached disk image based on the git pattern (branch, main, or any branch) find_cached_disk_image() { local git_pattern="${1}" local git_source="${2}" @@ -43,31 +43,29 @@ find_cached_disk_image() { # Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then # Find the most suitable cached disk image - echo "Finding the most suitable cached disk image..." + echo "Finding a ${DISK_PREFIX}--${DISK_SUFFIX} cached disk image for ${NETWORK}..." CACHED_DISK_NAME="" - # First, try to find a cached disk image from the current commit - CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit") - - # If no cached disk image is found - if [[ -z "${CACHED_DISK_NAME}" ]]; then - # Check if main branch images are preferred - if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then - CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch") - # Else, try to find one from any branch - else + # Check if main branch images are preferred + if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then + CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch") + # Else, try to find a cached disk image from the current branch (or PR) + else + CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_REF}" "branch") + # If no cached disk image is found, try to find one from any branch + if [[ -z "${CACHED_DISK_NAME}" ]]; then CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch") fi fi # Handle case where no suitable disk image is found if [[ -z "${CACHED_DISK_NAME}" ]]; then - echo "No suitable cached state disk available." - echo "Cached state test jobs must depend on the cached state rebuild job." + echo "No suitable cached state disk available. Try running the cached state rebuild job." exit 1 + else + echo "Selected Disk: ${CACHED_DISK_NAME}" fi - echo "Selected Disk: ${CACHED_DISK_NAME}" else echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search." fi @@ -77,7 +75,6 @@ find_available_disk_type() { local base_name="${1}" local disk_type="${2}" local disk_pattern="${base_name}-cache" - local output_var="${base_name}_${disk_type}_disk" local disk_name disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) diff --git a/.github/workflows/sub-find-cached-disks.yml b/.github/workflows/sub-find-cached-disks.yml index c936d65f8bd..2c0d5f83350 100644 --- a/.github/workflows/sub-find-cached-disks.yml +++ b/.github/workflows/sub-find-cached-disks.yml @@ -74,20 +74,28 @@ jobs: - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2.1.1 + # Performs formatting on disk name components. + # # Disk images in GCP are required to be in lowercase, but the blockchain network - # uses sentence case, so we need to downcase ${{ inputs.network }} + # uses sentence case, so we need to downcase ${{ inputs.network }}. + # + # Disk image names in GCP are limited to 63 characters, so we need to limit + # branch names to 12 characters. # - # Passes a lowercase Network name to subsequent steps using $NETWORK env variable - - name: Downcase network name for disks + # Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable. + # Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable. + - name: Format network name and branch name for disks run: | - NETWORK_CAPS=${{ inputs.network }} - echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV + NETWORK_CAPS="${{ inputs.network }}" + echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV" + LONG_GITHUB_REF="${{ env.GITHUB_REF_SLUG_URL }}" + echo "SHORT_GITHUB_REF=${LONG_GITHUB_REF:0:12}" >> "$GITHUB_ENV" # Check if there are cached state disks available for subsequent jobs to use. - name: Check if cached state disks exists id: get-available-disks env: - GITHUB_SHA_SHORT: ${{ env.GITHUB_SHA_SHORT }} + GITHUB_REF: ${{ env.SHORT_GITHUB_REF }} NETWORK: ${{ env.NETWORK }} # use lowercase version from env, not input DISK_PREFIX: ${{ inputs.disk_prefix }} DISK_SUFFIX: ${{ inputs.disk_suffix }} From 77c0e0d65126f8c68d4fc1b450f7c4601cfed44e Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Thu, 3 Oct 2024 14:18:02 +0100 Subject: [PATCH 02/19] chore: improve message --- .github/workflows/scripts/gcp-get-cached-disks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/gcp-get-cached-disks.sh b/.github/workflows/scripts/gcp-get-cached-disks.sh index 16687458c21..4c295c99ec5 100755 --- a/.github/workflows/scripts/gcp-get-cached-disks.sh +++ b/.github/workflows/scripts/gcp-get-cached-disks.sh @@ -43,7 +43,7 @@ find_cached_disk_image() { # Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then # Find the most suitable cached disk image - echo "Finding a ${DISK_PREFIX}--${DISK_SUFFIX} cached disk image for ${NETWORK}..." + echo "Finding a ${DISK_PREFIX}-${DISK_SUFFIX} disk image for ${NETWORK}..." CACHED_DISK_NAME="" # Check if main branch images are preferred From 2f706bda3c767cf8173c767eeae6815d2421ed06 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Thu, 3 Oct 2024 15:06:21 +0100 Subject: [PATCH 03/19] fix(actions): deploy single instances even if no cached state is needed --- .github/workflows/cd-deploy-nodes-gcp.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 88d9f0b5497..bd8a3ae67ed 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -349,7 +349,8 @@ jobs: permissions: contents: 'read' id-token: 'write' - if: github.event_name == 'workflow_dispatch' + # Run even if we don't need a cached disk, but only when triggered by a workflow_dispatch + if: ${{ !failure(needs.get-disk-name) && github.event_name == 'workflow_dispatch' }} steps: - uses: actions/checkout@v4.2.1 From 5dee64794c4430f40244514bcf049e305c450840 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 9 Oct 2024 09:32:34 +0100 Subject: [PATCH 04/19] rev: apply suggestions from code review Co-authored-by: Marek --- .github/workflows/cd-deploy-nodes-gcp.yml | 4 ++-- .github/workflows/scripts/gcp-get-cached-disks.sh | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index bd8a3ae67ed..202928b071b 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -279,7 +279,7 @@ jobs: DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}" - elif [ ${{ !inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then + elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then echo "No cached disk required" else echo "No cached disk found for ${{ matrix.network }} in main branch" @@ -391,7 +391,7 @@ jobs: DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}" - elif [ ${{ !inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then + elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then echo "No cached disk required" else echo "No cached disk found for ${{ matrix.network }} in main branch" diff --git a/.github/workflows/scripts/gcp-get-cached-disks.sh b/.github/workflows/scripts/gcp-get-cached-disks.sh index 4c295c99ec5..228f5ba4b8a 100755 --- a/.github/workflows/scripts/gcp-get-cached-disks.sh +++ b/.github/workflows/scripts/gcp-get-cached-disks.sh @@ -3,9 +3,9 @@ # This script finds a cached Google Cloud Compute image based on specific criteria. # # If there are multiple disks: -# - prefer images generated from the same branch, then -# - if prefer_main_cached_state is true, prefer images from the `main` branch, then -# - use any images from any other branch or branch. +# - if `PREFER_MAIN_CACHED_STATE` is "true", then select an image from the `main` branch, else +# - try to find a cached disk image from the current branch (or PR), else +# - try to find an image from any branch. # # Within each of these categories: # - prefer newer images to older images From dd55563fa809ec4a011862258a86aaf00c13c662 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 9 Oct 2024 13:16:23 +0100 Subject: [PATCH 05/19] fix: wrong use of `failure()` --- .github/workflows/cd-deploy-nodes-gcp.yml | 2 +- docker/docker-compose.full.yml | 153 ++++++++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 docker/docker-compose.full.yml diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 202928b071b..784f40952c0 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -350,7 +350,7 @@ jobs: contents: 'read' id-token: 'write' # Run even if we don't need a cached disk, but only when triggered by a workflow_dispatch - if: ${{ !failure(needs.get-disk-name) && github.event_name == 'workflow_dispatch' }} + if: ${{ !failure() && github.event_name == 'workflow_dispatch' }} steps: - uses: actions/checkout@v4.2.1 diff --git a/docker/docker-compose.full.yml b/docker/docker-compose.full.yml new file mode 100644 index 00000000000..815ad941ea6 --- /dev/null +++ b/docker/docker-compose.full.yml @@ -0,0 +1,153 @@ +version: "3.8" + +services: + zebra: + image: zfnd/zebra + platform: linux/amd64 + build: + context: ../ + dockerfile: docker/Dockerfile + target: runtime + restart: unless-stopped + deploy: + resources: + reservations: + cpus: "4" + memory: 16G + depends_on: + prometheus: + condition: service_started + grafana: + condition: service_started + env_file: + - .zebra.env + # Change this to the commmand you want to run, respecting the entrypoint.sh + # For example, to run the tests, use the following command: + # command: ["cargo", "test", "--locked", "--release", "--features", "${TEST_FEATURES}", "--package", "zebrad", "--test", "acceptance", "--", "--nocapture", "--include-ignored", "sync_large_checkpoints_"] + #! Uncomment the following line to use a zebrad.toml from the host machine + # NOTE: This will override the zebrad.toml in the image and make some variables irrelevant + # configs: + # - source: zebra_config + # target: /etc/zebrad/zebrad.toml + # uid: '2001' # Rust's container default user uid + # gid: '2001' # Rust's container default group gid + # mode: 0440 + volumes: + - zebrad-cache:/var/cache/zebrad-cache + - lwd-cache:/var/cache/lwd-cache + ports: + # Zebra uses the following inbound and outbound TCP ports + - "8232:8232" # Opens an RPC endpoint (for wallet storing and mining) + - "8233:8233" # Mainnet Network (for peer connections) + - "18233:18233" # Testnet Network + # - "9999:9999" # Metrics + # - "3000:3000" # Tracing + healthcheck: + start_period: 3m + interval: 15s + timeout: 10s + retries: 3 + # test: ["CMD-SHELL", "curl --data-binary '{\"jsonrpc\": \"1.0\", \"id\":\"curltest\", \"method\": \"getblockchaininfo\", \"params\": [] }' -H 'content-type: application/json' http://127.0.0.1:8232/ || exit 1"] + + lightwalletd: + image: electriccoinco/lightwalletd + platform: linux/amd64 + depends_on: + zebra: + condition: service_started + restart: unless-stopped + deploy: + resources: + reservations: + cpus: "4" + memory: 16G + env_file: + - .lightwalletd.env + configs: + - source: lwd_config + target: /etc/lightwalletd/zcash.conf + volumes: + - litewalletd-data:/var/lib/lightwalletd/db + # This setup with --no-tls-very-insecure is only for testing purposes + #! For production environments follow the guidelines here: https://github.com/zcash/lightwalletd#production-usage + command: > + --no-tls-very-insecure + --grpc-bind-addr=0.0.0.0:9067 + --http-bind-addr=0.0.0.0:9068 + --zcash-conf-path=/etc/lightwalletd/zcash.conf + --data-dir=/var/lib/lightwalletd/db + --log-file=/dev/stdout + --log-level=7 + ports: + - "9067:9067" # gRPC + - "9068:9068" # HTTP + + prometheus: + image: prom/prometheus + configs: + - source: prometheus_config + target: /etc/prometheus/prometheus.yml + volumes: + - prometheus-data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.enable-lifecycle' + ports: + - "9090:9090" + healthcheck: + test: wget --no-verbose --tries=1 --spider http://localhost:9090/status || exit 1 + start_period: 30s + interval: 10s + timeout: 15s + retries: 3 + + grafana: + image: grafana/grafana + volumes: + - grafana-data:/var/lib/grafana + - ../grafana/provisioning/:/etc/grafana/provisioning/ + # environment: + # GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD} + depends_on: + prometheus: + condition: service_healthy + env_file: + - ../grafana/config.monitoring + ports: + - "3000:3000" + healthcheck: + test: wget --no-verbose --tries=1 --spider http://localhost:3000 || exit 1 + interval: 30s + timeout: 10s + retries: 3 + +configs: + zebra_config: + # Change the following line to point to a zebrad.toml on your host machine + # to allow for easy configuration changes without rebuilding the image + file: ../zebrad/tests/common/configs/v1.0.0-rc.2.toml/ + + lwd_config: + # Change the following line to point to a zcash.conf on your host machine + # to allow for easy configuration changes without rebuilding the image + file: ./zcash-lightwalletd/zcash.conf + + prometheus_config: + file: ../prometheus.yaml + +volumes: + zebrad-cache: + driver: local + + lwd-cache: + driver: local + + litewalletd-data: + driver: local + + prometheus-data: + driver: local + + grafana-data: + driver: local From 301fa96c32fec3dd8d3471c1c36c4c6ceae020be Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Fri, 11 Oct 2024 19:57:24 +0100 Subject: [PATCH 06/19] chore: remove extra file --- docker/docker-compose.full.yml | 153 --------------------------------- 1 file changed, 153 deletions(-) delete mode 100644 docker/docker-compose.full.yml diff --git a/docker/docker-compose.full.yml b/docker/docker-compose.full.yml deleted file mode 100644 index 815ad941ea6..00000000000 --- a/docker/docker-compose.full.yml +++ /dev/null @@ -1,153 +0,0 @@ -version: "3.8" - -services: - zebra: - image: zfnd/zebra - platform: linux/amd64 - build: - context: ../ - dockerfile: docker/Dockerfile - target: runtime - restart: unless-stopped - deploy: - resources: - reservations: - cpus: "4" - memory: 16G - depends_on: - prometheus: - condition: service_started - grafana: - condition: service_started - env_file: - - .zebra.env - # Change this to the commmand you want to run, respecting the entrypoint.sh - # For example, to run the tests, use the following command: - # command: ["cargo", "test", "--locked", "--release", "--features", "${TEST_FEATURES}", "--package", "zebrad", "--test", "acceptance", "--", "--nocapture", "--include-ignored", "sync_large_checkpoints_"] - #! Uncomment the following line to use a zebrad.toml from the host machine - # NOTE: This will override the zebrad.toml in the image and make some variables irrelevant - # configs: - # - source: zebra_config - # target: /etc/zebrad/zebrad.toml - # uid: '2001' # Rust's container default user uid - # gid: '2001' # Rust's container default group gid - # mode: 0440 - volumes: - - zebrad-cache:/var/cache/zebrad-cache - - lwd-cache:/var/cache/lwd-cache - ports: - # Zebra uses the following inbound and outbound TCP ports - - "8232:8232" # Opens an RPC endpoint (for wallet storing and mining) - - "8233:8233" # Mainnet Network (for peer connections) - - "18233:18233" # Testnet Network - # - "9999:9999" # Metrics - # - "3000:3000" # Tracing - healthcheck: - start_period: 3m - interval: 15s - timeout: 10s - retries: 3 - # test: ["CMD-SHELL", "curl --data-binary '{\"jsonrpc\": \"1.0\", \"id\":\"curltest\", \"method\": \"getblockchaininfo\", \"params\": [] }' -H 'content-type: application/json' http://127.0.0.1:8232/ || exit 1"] - - lightwalletd: - image: electriccoinco/lightwalletd - platform: linux/amd64 - depends_on: - zebra: - condition: service_started - restart: unless-stopped - deploy: - resources: - reservations: - cpus: "4" - memory: 16G - env_file: - - .lightwalletd.env - configs: - - source: lwd_config - target: /etc/lightwalletd/zcash.conf - volumes: - - litewalletd-data:/var/lib/lightwalletd/db - # This setup with --no-tls-very-insecure is only for testing purposes - #! For production environments follow the guidelines here: https://github.com/zcash/lightwalletd#production-usage - command: > - --no-tls-very-insecure - --grpc-bind-addr=0.0.0.0:9067 - --http-bind-addr=0.0.0.0:9068 - --zcash-conf-path=/etc/lightwalletd/zcash.conf - --data-dir=/var/lib/lightwalletd/db - --log-file=/dev/stdout - --log-level=7 - ports: - - "9067:9067" # gRPC - - "9068:9068" # HTTP - - prometheus: - image: prom/prometheus - configs: - - source: prometheus_config - target: /etc/prometheus/prometheus.yml - volumes: - - prometheus-data:/prometheus - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.enable-lifecycle' - ports: - - "9090:9090" - healthcheck: - test: wget --no-verbose --tries=1 --spider http://localhost:9090/status || exit 1 - start_period: 30s - interval: 10s - timeout: 15s - retries: 3 - - grafana: - image: grafana/grafana - volumes: - - grafana-data:/var/lib/grafana - - ../grafana/provisioning/:/etc/grafana/provisioning/ - # environment: - # GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD} - depends_on: - prometheus: - condition: service_healthy - env_file: - - ../grafana/config.monitoring - ports: - - "3000:3000" - healthcheck: - test: wget --no-verbose --tries=1 --spider http://localhost:3000 || exit 1 - interval: 30s - timeout: 10s - retries: 3 - -configs: - zebra_config: - # Change the following line to point to a zebrad.toml on your host machine - # to allow for easy configuration changes without rebuilding the image - file: ../zebrad/tests/common/configs/v1.0.0-rc.2.toml/ - - lwd_config: - # Change the following line to point to a zcash.conf on your host machine - # to allow for easy configuration changes without rebuilding the image - file: ./zcash-lightwalletd/zcash.conf - - prometheus_config: - file: ../prometheus.yaml - -volumes: - zebrad-cache: - driver: local - - lwd-cache: - driver: local - - litewalletd-data: - driver: local - - prometheus-data: - driver: local - - grafana-data: - driver: local From e57daabd10acdc0f2b1e772cceebe19a5f0db627 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 14 Oct 2024 15:39:40 +0100 Subject: [PATCH 07/19] chore: `echo` the pattern for easier debugging --- .github/workflows/scripts/gcp-get-cached-disks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/gcp-get-cached-disks.sh b/.github/workflows/scripts/gcp-get-cached-disks.sh index 228f5ba4b8a..0ca12d95b10 100755 --- a/.github/workflows/scripts/gcp-get-cached-disks.sh +++ b/.github/workflows/scripts/gcp-get-cached-disks.sh @@ -36,7 +36,7 @@ find_cached_disk_image() { echo "Description: ${disk_description}" >&2 echo "${disk_name}" # This is the actual return value when a disk is found else - echo "No ${git_source} disk found." >&2 + echo "No ${git_source} disk found with '${disk_search_pattern}' pattern." >&2 fi } From ba68afde7ccbe46450d6ea092e5546e0889e4a36 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 16 Oct 2024 12:01:22 +0100 Subject: [PATCH 08/19] chore: add extra details to image naming convention Addresses https://github.com/ZcashFoundation/zebra/pull/8908#discussion_r1797679586 --- .github/workflows/sub-deploy-integration-tests-gcp.yml | 1 + .github/workflows/sub-find-cached-disks.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.github/workflows/sub-deploy-integration-tests-gcp.yml b/.github/workflows/sub-deploy-integration-tests-gcp.yml index fb880a8a369..7266f60ea54 100644 --- a/.github/workflows/sub-deploy-integration-tests-gcp.yml +++ b/.github/workflows/sub-deploy-integration-tests-gcp.yml @@ -654,6 +654,7 @@ jobs: # (This is unlikely, because each image created by a workflow has a different name.) # # The image name must also be 63 characters or less. + # More info: https://cloud.google.com/compute/docs/naming-resources#resource-name-format # # Force the image creation (--force) as the disk is still attached even though is not being # used by the container. diff --git a/.github/workflows/sub-find-cached-disks.yml b/.github/workflows/sub-find-cached-disks.yml index 2c0d5f83350..a71237887e2 100644 --- a/.github/workflows/sub-find-cached-disks.yml +++ b/.github/workflows/sub-find-cached-disks.yml @@ -81,6 +81,8 @@ jobs: # # Disk image names in GCP are limited to 63 characters, so we need to limit # branch names to 12 characters. + # Check the `create-state-image` in `sub-deploy-integration-tests-gcp.yml` for more details in image names. + # More info: https://cloud.google.com/compute/docs/naming-resources#resource-name-format # # Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable. # Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable. From 50cd029a75d4e8dad88e76b3fc233a173b35e0cd Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 16 Oct 2024 12:06:05 +0100 Subject: [PATCH 09/19] ref(actions): use a better logic for disk image selection This supersedes https://github.com/ZcashFoundation/zebra/pull/8936/ using a different approach with `${VAR:-value}` --- .../workflows/scripts/gcp-get-cached-disks.sh | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/.github/workflows/scripts/gcp-get-cached-disks.sh b/.github/workflows/scripts/gcp-get-cached-disks.sh index 0ca12d95b10..9716dc9f5a7 100755 --- a/.github/workflows/scripts/gcp-get-cached-disks.sh +++ b/.github/workflows/scripts/gcp-get-cached-disks.sh @@ -34,31 +34,29 @@ find_cached_disk_image() { echo "Found ${git_source} Disk: ${disk_name}" >&2 disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)") echo "Description: ${disk_description}" >&2 - echo "${disk_name}" # This is the actual return value when a disk is found + echo "${disk_name}" # This is the actual return value when a disk is found else echo "No ${git_source} disk found with '${disk_search_pattern}' pattern." >&2 fi } -# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image +# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to +# find a cached disk image. if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then # Find the most suitable cached disk image echo "Finding a ${DISK_PREFIX}-${DISK_SUFFIX} disk image for ${NETWORK}..." CACHED_DISK_NAME="" - # Check if main branch images are preferred + # Try to find an image based on the `main` branch if that branch is preferred. if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch") - # Else, try to find a cached disk image from the current branch (or PR) - else - CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_REF}" "branch") - # If no cached disk image is found, try to find one from any branch - if [[ -z "${CACHED_DISK_NAME}" ]]; then - CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch") - fi fi + # If no image was found, try to find one from the current branch (or PR). + CACHED_DISK_NAME=${CACHED_DISK_NAME:-$(find_cached_disk_image ".+-${GITHUB_REF}" "branch")} + # If we still have no image, try to find one from any branch. + CACHED_DISK_NAME=${CACHED_DISK_NAME:-$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")} - # Handle case where no suitable disk image is found + # Handle the case where no suitable disk image is found if [[ -z "${CACHED_DISK_NAME}" ]]; then echo "No suitable cached state disk available. Try running the cached state rebuild job." exit 1 @@ -84,10 +82,10 @@ find_available_disk_type() { echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2 disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)") echo "Description: ${disk_description}" >&2 - echo "true" # This is the actual return value when a disk is found + echo "true" # This is the actual return value when a disk is found else echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2 - echo "false" # This is the actual return value when no disk is found + echo "false" # This is the actual return value when no disk is found fi } if [[ -n "${NETWORK}" ]]; then From 71b83cf6c084ac248bb878f3dd63aa6e463fa081 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 9 Oct 2024 13:16:23 +0100 Subject: [PATCH 10/19] fix: wrong use of `failure()` --- docker/docker-compose.full.yml | 153 +++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 docker/docker-compose.full.yml diff --git a/docker/docker-compose.full.yml b/docker/docker-compose.full.yml new file mode 100644 index 00000000000..815ad941ea6 --- /dev/null +++ b/docker/docker-compose.full.yml @@ -0,0 +1,153 @@ +version: "3.8" + +services: + zebra: + image: zfnd/zebra + platform: linux/amd64 + build: + context: ../ + dockerfile: docker/Dockerfile + target: runtime + restart: unless-stopped + deploy: + resources: + reservations: + cpus: "4" + memory: 16G + depends_on: + prometheus: + condition: service_started + grafana: + condition: service_started + env_file: + - .zebra.env + # Change this to the commmand you want to run, respecting the entrypoint.sh + # For example, to run the tests, use the following command: + # command: ["cargo", "test", "--locked", "--release", "--features", "${TEST_FEATURES}", "--package", "zebrad", "--test", "acceptance", "--", "--nocapture", "--include-ignored", "sync_large_checkpoints_"] + #! Uncomment the following line to use a zebrad.toml from the host machine + # NOTE: This will override the zebrad.toml in the image and make some variables irrelevant + # configs: + # - source: zebra_config + # target: /etc/zebrad/zebrad.toml + # uid: '2001' # Rust's container default user uid + # gid: '2001' # Rust's container default group gid + # mode: 0440 + volumes: + - zebrad-cache:/var/cache/zebrad-cache + - lwd-cache:/var/cache/lwd-cache + ports: + # Zebra uses the following inbound and outbound TCP ports + - "8232:8232" # Opens an RPC endpoint (for wallet storing and mining) + - "8233:8233" # Mainnet Network (for peer connections) + - "18233:18233" # Testnet Network + # - "9999:9999" # Metrics + # - "3000:3000" # Tracing + healthcheck: + start_period: 3m + interval: 15s + timeout: 10s + retries: 3 + # test: ["CMD-SHELL", "curl --data-binary '{\"jsonrpc\": \"1.0\", \"id\":\"curltest\", \"method\": \"getblockchaininfo\", \"params\": [] }' -H 'content-type: application/json' http://127.0.0.1:8232/ || exit 1"] + + lightwalletd: + image: electriccoinco/lightwalletd + platform: linux/amd64 + depends_on: + zebra: + condition: service_started + restart: unless-stopped + deploy: + resources: + reservations: + cpus: "4" + memory: 16G + env_file: + - .lightwalletd.env + configs: + - source: lwd_config + target: /etc/lightwalletd/zcash.conf + volumes: + - litewalletd-data:/var/lib/lightwalletd/db + # This setup with --no-tls-very-insecure is only for testing purposes + #! For production environments follow the guidelines here: https://github.com/zcash/lightwalletd#production-usage + command: > + --no-tls-very-insecure + --grpc-bind-addr=0.0.0.0:9067 + --http-bind-addr=0.0.0.0:9068 + --zcash-conf-path=/etc/lightwalletd/zcash.conf + --data-dir=/var/lib/lightwalletd/db + --log-file=/dev/stdout + --log-level=7 + ports: + - "9067:9067" # gRPC + - "9068:9068" # HTTP + + prometheus: + image: prom/prometheus + configs: + - source: prometheus_config + target: /etc/prometheus/prometheus.yml + volumes: + - prometheus-data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.enable-lifecycle' + ports: + - "9090:9090" + healthcheck: + test: wget --no-verbose --tries=1 --spider http://localhost:9090/status || exit 1 + start_period: 30s + interval: 10s + timeout: 15s + retries: 3 + + grafana: + image: grafana/grafana + volumes: + - grafana-data:/var/lib/grafana + - ../grafana/provisioning/:/etc/grafana/provisioning/ + # environment: + # GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD} + depends_on: + prometheus: + condition: service_healthy + env_file: + - ../grafana/config.monitoring + ports: + - "3000:3000" + healthcheck: + test: wget --no-verbose --tries=1 --spider http://localhost:3000 || exit 1 + interval: 30s + timeout: 10s + retries: 3 + +configs: + zebra_config: + # Change the following line to point to a zebrad.toml on your host machine + # to allow for easy configuration changes without rebuilding the image + file: ../zebrad/tests/common/configs/v1.0.0-rc.2.toml/ + + lwd_config: + # Change the following line to point to a zcash.conf on your host machine + # to allow for easy configuration changes without rebuilding the image + file: ./zcash-lightwalletd/zcash.conf + + prometheus_config: + file: ../prometheus.yaml + +volumes: + zebrad-cache: + driver: local + + lwd-cache: + driver: local + + litewalletd-data: + driver: local + + prometheus-data: + driver: local + + grafana-data: + driver: local From 3a9062f5049dff8b73dc95e93cd3e058a380b61c Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Fri, 11 Oct 2024 15:04:16 +0100 Subject: [PATCH 11/19] feat: allow to output disks in a matrix job --- .github/workflows/cd-deploy-nodes-gcp.yml | 30 +++++++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 784f40952c0..bc21d601445 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -234,8 +234,6 @@ jobs: needs: [ build, versioning, test-configuration-file, test-zebra-conf-path, get-disk-name ] runs-on: ubuntu-latest timeout-minutes: 60 - env: - CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }} permissions: contents: 'read' id-token: 'write' @@ -253,14 +251,17 @@ jobs: # Makes the Zcash network name lowercase. # - # Labels in GCP are required to be in lowercase, but the blockchain network - # uses sentence case, so we need to downcase the network. + # Labels and disks in GCP are required to be in lowercase, but the blockchain network + # uses sentence case, so we need to downcase the ${{ matrix.network }}. # - # Passes the lowercase network to subsequent steps using $NETWORK env variable. + # Passes ${{ matrix.network }} to subsequent steps using $NETWORK env variable. + # Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable. - name: Downcase network name for labels run: | NETWORK_CAPS="${{ matrix.network }}" echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV" + LONG_GITHUB_REF="${{ env.GITHUB_REF_SLUG_URL }}" + echo "SHORT_GITHUB_REF=${LONG_GITHUB_REF:0:12}" >> "$GITHUB_ENV" # Setup gcloud CLI - name: Authenticate to Google Cloud @@ -273,6 +274,25 @@ jobs: - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2.1.1 + # TODO: The sub-find-cached-disks.yml reusable workflow does not supports matrix strategy + # We need to find a way to pass the matching network from the `get-disk-name` job + # In the meanwhile we're repeating the logic of sub-find-cached-disks.yml here + - name: Check if cached state disks exists + id: get-available-disks + env: + GITHUB_REF: ${{ env.SHORT_GITHUB_REF }} + NETWORK: ${{ matrix.network }} # use lowercase version from env, not input + DISK_PREFIX: zebrad-cache + DISK_SUFFIX: ${{ inputs.cached_disk_type || 'tip' }} + PREFER_MAIN_CACHED_STATE: ${{ inputs.prefer_main_cached_state }} + run: | + source ./.github/workflows/scripts/gcp-get-cached-disks.sh + echo "state_version=${LOCAL_STATE_VERSION}" >> "${GITHUB_OUTPUT}" + echo "cached_disk_name=${CACHED_DISK_NAME}" >> "${GITHUB_OUTPUT}" + echo "lwd_tip_disk=${LWD_TIP_DISK}" >> "${GITHUB_OUTPUT}" + echo "zebra_tip_disk=${ZEBRA_TIP_DISK}" >> "${GITHUB_OUTPUT}" + echo "zebra_checkpoint_disk=${ZEBRA_CHECKPOINT_DISK}" >> "${GITHUB_OUTPUT}" + - name: Create instance template for ${{ matrix.network }} run: | DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" From b6edc5b88c02ad446c3fdb3a5fd077ed6c492650 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 25 Sep 2024 17:25:31 +0100 Subject: [PATCH 12/19] feat(cd): add external static IPs to release nodes --- .github/workflows/cd-deploy-nodes-gcp.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index bc21d601445..45052f43df1 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -237,7 +237,7 @@ jobs: permissions: contents: 'read' id-token: 'write' - if: ${{ !cancelled() && !failure() && ((github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release') }} + if: ${{ !cancelled() && !failure() && ((github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release') || github.event_name == 'workflow_dispatch' }} steps: - uses: actions/checkout@v4.2.1 @@ -293,8 +293,15 @@ jobs: echo "zebra_tip_disk=${ZEBRA_TIP_DISK}" >> "${GITHUB_OUTPUT}" echo "zebra_checkpoint_disk=${ZEBRA_CHECKPOINT_DISK}" >> "${GITHUB_OUTPUT}" + - name: Get IP address for long-running release nodes + # if: ${{ github.event_name == 'release' }} + run: echo "IP_ADDRESS=$(gcloud compute addresses describe zebra-${NETWORK} --region ${{ vars.GCP_REGION }} --format='value(address)')" >> "$GITHUB_ENV" + - name: Create instance template for ${{ matrix.network }} run: | + if [ -n "${{ env.IP_ADDRESS }}" ]; then + IP_FLAG="--address=${{ env.IP_ADDRESS }}" + fi DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then @@ -311,7 +318,7 @@ jobs: --boot-disk-type=pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ + --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} ${IP_FLAG} \ --create-disk="${DISK_PARAMS}" \ --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ --container-stdin \ From c3e4c2f5959a149125a5d51e97148516b7ffb14f Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Thu, 26 Sep 2024 10:44:18 +0100 Subject: [PATCH 13/19] fix(ci): allow to specify the required cached state disk type --- .github/workflows/cd-deploy-nodes-gcp.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 45052f43df1..3df820a965f 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -237,6 +237,7 @@ jobs: permissions: contents: 'read' id-token: 'write' + # TODO: Remove `|| github.event_name == 'workflow_dispatch'` condition before merging if: ${{ !cancelled() && !failure() && ((github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release') || github.event_name == 'workflow_dispatch' }} steps: From 9618efe6d68215801041bd42e66dfc486ec7731b Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 14 Oct 2024 16:39:00 +0100 Subject: [PATCH 14/19] fix: use downcase `NETOWRK` name --- .github/workflows/cd-deploy-nodes-gcp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 3df820a965f..e62613c2561 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -282,7 +282,7 @@ jobs: id: get-available-disks env: GITHUB_REF: ${{ env.SHORT_GITHUB_REF }} - NETWORK: ${{ matrix.network }} # use lowercase version from env, not input + NETWORK: ${{ env.NETWORK }} # use lowercase version from env, not input DISK_PREFIX: zebrad-cache DISK_SUFFIX: ${{ inputs.cached_disk_type || 'tip' }} PREFER_MAIN_CACHED_STATE: ${{ inputs.prefer_main_cached_state }} From b812f61326759df6af9834073562f9e4a00c4dc2 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 14 Oct 2024 17:54:51 +0100 Subject: [PATCH 15/19] fix(ci): do not add IP flag as an argument --- .github/workflows/cd-deploy-nodes-gcp.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index e62613c2561..72fb92f37fa 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -301,7 +301,7 @@ jobs: - name: Create instance template for ${{ matrix.network }} run: | if [ -n "${{ env.IP_ADDRESS }}" ]; then - IP_FLAG="--address=${{ env.IP_ADDRESS }}" + IP_FLAG="--address=${{ env.IP_ADDRESS }} \" fi DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" @@ -319,7 +319,8 @@ jobs: --boot-disk-type=pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} ${IP_FLAG} \ + --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ + ${IP_FLAG} --create-disk="${DISK_PARAMS}" \ --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ --container-stdin \ From e10bdb222a0e442be8ed346a689e76130645d61f Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Tue, 15 Oct 2024 15:24:30 +0100 Subject: [PATCH 16/19] fix: allow a dynamic `IP_FLAG` --- .github/workflows/cd-deploy-nodes-gcp.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 72fb92f37fa..6fbb3bcf390 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -301,10 +301,14 @@ jobs: - name: Create instance template for ${{ matrix.network }} run: | if [ -n "${{ env.IP_ADDRESS }}" ]; then - IP_FLAG="--address=${{ env.IP_ADDRESS }} \" + IP_FLAG="--address=${{ env.IP_ADDRESS }}" + else + IP_FLAG="" fi + DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" + if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}" elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then @@ -313,6 +317,7 @@ jobs: echo "No cached disk found for ${{ matrix.network }} in main branch" exit 1 fi + gcloud compute instance-templates create-with-container zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK} \ --machine-type ${{ vars.GCP_SMALL_MACHINE }} \ --boot-disk-size 50GB \ @@ -320,7 +325,7 @@ jobs: --image-project=cos-cloud \ --image-family=cos-stable \ --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ - ${IP_FLAG} + ${IP_FLAG} \ --create-disk="${DISK_PARAMS}" \ --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ --container-stdin \ From 2748e38762fadd1025ed3456626a748398bcc900 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Tue, 15 Oct 2024 16:09:43 +0100 Subject: [PATCH 17/19] fix: gcloud CLI was using an incompatible parameter for subnetwork --- .github/workflows/cd-deploy-nodes-gcp.yml | 4 ++-- .github/workflows/manual-zcashd-deploy.yml | 2 +- .github/workflows/sub-deploy-integration-tests-gcp.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 6fbb3bcf390..bf4f94d1c4c 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -324,7 +324,7 @@ jobs: --boot-disk-type=pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ + --subnet=${{ vars.GCP_SUBNETWORK }} \ ${IP_FLAG} \ --create-disk="${DISK_PARAMS}" \ --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ @@ -437,7 +437,7 @@ jobs: --boot-disk-type=pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ + --subnet=${{ vars.GCP_SUBNETWORK }} \ --create-disk="${DISK_PARAMS}" \ --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ --container-stdin \ diff --git a/.github/workflows/manual-zcashd-deploy.yml b/.github/workflows/manual-zcashd-deploy.yml index 05872f2532d..fc160aab766 100644 --- a/.github/workflows/manual-zcashd-deploy.yml +++ b/.github/workflows/manual-zcashd-deploy.yml @@ -73,7 +73,7 @@ jobs: --container-image electriccoinco/zcashd \ --container-env ZCASHD_NETWORK="${{ inputs.network }}" \ --machine-type ${{ vars.GCP_SMALL_MACHINE }} \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ + --subnet=${{ vars.GCP_SUBNETWORK }} \ --service-account ${{ vars.GCP_DEPLOYMENTS_SA }} \ --scopes cloud-platform \ --labels=app=zcashd,environment=prod,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \ diff --git a/.github/workflows/sub-deploy-integration-tests-gcp.yml b/.github/workflows/sub-deploy-integration-tests-gcp.yml index 7266f60ea54..4d63ee868e0 100644 --- a/.github/workflows/sub-deploy-integration-tests-gcp.yml +++ b/.github/workflows/sub-deploy-integration-tests-gcp.yml @@ -200,7 +200,7 @@ jobs: --create-disk="${DISK_PARAMS}" \ --container-image=gcr.io/google-containers/busybox \ --machine-type ${{ vars.GCP_LARGE_MACHINE }} \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ + --subnet=${{ vars.GCP_SUBNETWORK }} \ --scopes cloud-platform \ --metadata=google-monitoring-enabled=TRUE,google-logging-enabled=TRUE \ --metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \ From 8f51de0f6d4eb05c43820e461e0686f8014e6568 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 16 Oct 2024 10:43:25 +0100 Subject: [PATCH 18/19] fix: use correct pipeline conditions --- .github/workflows/cd-deploy-nodes-gcp.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index bf4f94d1c4c..3811b9ac8cb 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -237,8 +237,7 @@ jobs: permissions: contents: 'read' id-token: 'write' - # TODO: Remove `|| github.event_name == 'workflow_dispatch'` condition before merging - if: ${{ !cancelled() && !failure() && ((github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release') || github.event_name == 'workflow_dispatch' }} + if: ${{ !cancelled() && !failure() && ((github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release') }} steps: - uses: actions/checkout@v4.2.1 @@ -295,7 +294,7 @@ jobs: echo "zebra_checkpoint_disk=${ZEBRA_CHECKPOINT_DISK}" >> "${GITHUB_OUTPUT}" - name: Get IP address for long-running release nodes - # if: ${{ github.event_name == 'release' }} + if: ${{ github.event_name == 'release' }} run: echo "IP_ADDRESS=$(gcloud compute addresses describe zebra-${NETWORK} --region ${{ vars.GCP_REGION }} --format='value(address)')" >> "$GITHUB_ENV" - name: Create instance template for ${{ matrix.network }} From 3e11e12f2f17d1226852d99f3177fdd3381b67a3 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 16 Oct 2024 14:08:01 +0100 Subject: [PATCH 19/19] chore: remove extra file --- docker/docker-compose.full.yml | 153 --------------------------------- 1 file changed, 153 deletions(-) delete mode 100644 docker/docker-compose.full.yml diff --git a/docker/docker-compose.full.yml b/docker/docker-compose.full.yml deleted file mode 100644 index 815ad941ea6..00000000000 --- a/docker/docker-compose.full.yml +++ /dev/null @@ -1,153 +0,0 @@ -version: "3.8" - -services: - zebra: - image: zfnd/zebra - platform: linux/amd64 - build: - context: ../ - dockerfile: docker/Dockerfile - target: runtime - restart: unless-stopped - deploy: - resources: - reservations: - cpus: "4" - memory: 16G - depends_on: - prometheus: - condition: service_started - grafana: - condition: service_started - env_file: - - .zebra.env - # Change this to the commmand you want to run, respecting the entrypoint.sh - # For example, to run the tests, use the following command: - # command: ["cargo", "test", "--locked", "--release", "--features", "${TEST_FEATURES}", "--package", "zebrad", "--test", "acceptance", "--", "--nocapture", "--include-ignored", "sync_large_checkpoints_"] - #! Uncomment the following line to use a zebrad.toml from the host machine - # NOTE: This will override the zebrad.toml in the image and make some variables irrelevant - # configs: - # - source: zebra_config - # target: /etc/zebrad/zebrad.toml - # uid: '2001' # Rust's container default user uid - # gid: '2001' # Rust's container default group gid - # mode: 0440 - volumes: - - zebrad-cache:/var/cache/zebrad-cache - - lwd-cache:/var/cache/lwd-cache - ports: - # Zebra uses the following inbound and outbound TCP ports - - "8232:8232" # Opens an RPC endpoint (for wallet storing and mining) - - "8233:8233" # Mainnet Network (for peer connections) - - "18233:18233" # Testnet Network - # - "9999:9999" # Metrics - # - "3000:3000" # Tracing - healthcheck: - start_period: 3m - interval: 15s - timeout: 10s - retries: 3 - # test: ["CMD-SHELL", "curl --data-binary '{\"jsonrpc\": \"1.0\", \"id\":\"curltest\", \"method\": \"getblockchaininfo\", \"params\": [] }' -H 'content-type: application/json' http://127.0.0.1:8232/ || exit 1"] - - lightwalletd: - image: electriccoinco/lightwalletd - platform: linux/amd64 - depends_on: - zebra: - condition: service_started - restart: unless-stopped - deploy: - resources: - reservations: - cpus: "4" - memory: 16G - env_file: - - .lightwalletd.env - configs: - - source: lwd_config - target: /etc/lightwalletd/zcash.conf - volumes: - - litewalletd-data:/var/lib/lightwalletd/db - # This setup with --no-tls-very-insecure is only for testing purposes - #! For production environments follow the guidelines here: https://github.com/zcash/lightwalletd#production-usage - command: > - --no-tls-very-insecure - --grpc-bind-addr=0.0.0.0:9067 - --http-bind-addr=0.0.0.0:9068 - --zcash-conf-path=/etc/lightwalletd/zcash.conf - --data-dir=/var/lib/lightwalletd/db - --log-file=/dev/stdout - --log-level=7 - ports: - - "9067:9067" # gRPC - - "9068:9068" # HTTP - - prometheus: - image: prom/prometheus - configs: - - source: prometheus_config - target: /etc/prometheus/prometheus.yml - volumes: - - prometheus-data:/prometheus - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.enable-lifecycle' - ports: - - "9090:9090" - healthcheck: - test: wget --no-verbose --tries=1 --spider http://localhost:9090/status || exit 1 - start_period: 30s - interval: 10s - timeout: 15s - retries: 3 - - grafana: - image: grafana/grafana - volumes: - - grafana-data:/var/lib/grafana - - ../grafana/provisioning/:/etc/grafana/provisioning/ - # environment: - # GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD} - depends_on: - prometheus: - condition: service_healthy - env_file: - - ../grafana/config.monitoring - ports: - - "3000:3000" - healthcheck: - test: wget --no-verbose --tries=1 --spider http://localhost:3000 || exit 1 - interval: 30s - timeout: 10s - retries: 3 - -configs: - zebra_config: - # Change the following line to point to a zebrad.toml on your host machine - # to allow for easy configuration changes without rebuilding the image - file: ../zebrad/tests/common/configs/v1.0.0-rc.2.toml/ - - lwd_config: - # Change the following line to point to a zcash.conf on your host machine - # to allow for easy configuration changes without rebuilding the image - file: ./zcash-lightwalletd/zcash.conf - - prometheus_config: - file: ../prometheus.yaml - -volumes: - zebrad-cache: - driver: local - - lwd-cache: - driver: local - - litewalletd-data: - driver: local - - prometheus-data: - driver: local - - grafana-data: - driver: local