diff --git a/.gitlab/.ci-linters.yml b/.gitlab/.ci-linters.yml index fd1af4a9868c5..2283d1833f5d7 100644 --- a/.gitlab/.ci-linters.yml +++ b/.gitlab/.ci-linters.yml @@ -42,6 +42,7 @@ needs-rules: - tests_windows_sysprobe_x64 - trigger_auto_staging_release - trigger_manual_prod_release + - generate_windows_gitlab_runner_bump_pr # Lists jobs that are allowed to not be within JOBOWNERS job-owners: diff --git a/.gitlab/JOBOWNERS b/.gitlab/JOBOWNERS index 2678bd7a5de6d..b8c045048e928 100644 --- a/.gitlab/JOBOWNERS +++ b/.gitlab/JOBOWNERS @@ -127,6 +127,7 @@ deploy_containers-cws-instrumentation* @DataDog/agent-security # Trigger release trigger_manual_prod_release @DataDog/agent-delivery trigger_auto_staging_release @DataDog/agent-delivery +generate_windows_gitlab_runner_bump_pr @DataDog/agent-delivery # Integration test integration_tests_windows* @DataDog/windows-agent diff --git a/.gitlab/trigger_release/trigger_release.yml b/.gitlab/trigger_release/trigger_release.yml index 0ecd5e0b02606..fa6d2c7402b60 100644 --- a/.gitlab/trigger_release/trigger_release.yml +++ b/.gitlab/trigger_release/trigger_release.yml @@ -55,3 +55,33 @@ trigger_manual_prod_release: - if: $CI_COMMIT_TAG =~ /^[0-9]+\.[0-9]+\.[0-9]+-v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+){0,1}$/ when: never - !reference [.on_deploy_stable_or_beta_manual_auto_on_stable] + +include: + - https://gitlab-templates.ddbuild.io/slack-notifier/v3-sdm/template.yml + +.setup_github_app_agent_platform_auto_pr: + # GitHub App rate-limits are per-app. Since we are rarely calling the job, we are only using the instance 2 + - | + GITHUB_KEY_B64=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $MACOS_GITHUB_APP_2 key_b64) || exit $?; export GITHUB_KEY_B64 + GITHUB_APP_ID=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $MACOS_GITHUB_APP_2 app_id) || exit $?; export GITHUB_APP_ID + GITHUB_INSTALLATION_ID=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $MACOS_GITHUB_APP_2 installation_id) || exit $?; export GITHUB_INSTALLATION_ID + echo "Using GitHub App instance 2" + +generate_windows_gitlab_runner_bump_pr: + stage: trigger_release + extends: .slack-notifier-base + needs: ["trigger_auto_staging_release"] + tags: ["arch:amd64"] + rules: + - if: $DDR == "true" + when: never + - if: $CI_COMMIT_TAG =~ /^[0-9]+\.[0-9]+\.[0-9]+-v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+){0,1}$/ + when: never + - if: $CI_COMMIT_TAG =~ /^[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)?$/ + + script: + # We are using the agent platform auto PR github app to access the buildenv repository (already used for macOS builds) + - !reference [.setup_github_app_agent_platform_auto_pr] + - python3 -m pip install -r requirements.txt -r tasks/libs/requirements-notifications.txt + - $S3_CP_CMD $S3_ARTIFACTS_URI/agent-version.cache . + - inv -e github.update-windows-runner-version diff --git a/tasks/github_tasks.py b/tasks/github_tasks.py index bf2c546c8ecb4..a71e00b834997 100644 --- a/tasks/github_tasks.py +++ b/tasks/github_tasks.py @@ -16,6 +16,7 @@ follow_workflow_run, print_failed_jobs_logs, print_workflow_conclusion, + trigger_buildenv_workflow, trigger_macos_workflow, ) from tasks.libs.common.color import Color, color_message @@ -23,9 +24,11 @@ from tasks.libs.common.datadog_api import create_gauge, send_event, send_metrics from tasks.libs.common.git import get_default_branch from tasks.libs.common.utils import get_git_pretty_ref +from tasks.libs.notify.pipeline_status import send_slack_message from tasks.libs.owners.linter import codeowner_has_orphans, directory_has_packages_without_owner from tasks.libs.owners.parsing import read_owners from tasks.libs.pipeline.notifications import GITHUB_SLACK_MAP +from tasks.libs.releasing.version import current_version from tasks.release import _get_release_json_value ALL_TEAMS = '@datadog/agent-all' @@ -110,6 +113,56 @@ def trigger_macos( raise Exit(message=f"Macos {workflow_type} workflow {conclusion}", code=1) +def _update_windows_runner_version(new_version=None, buildenv_ref="master"): + if new_version is None: + raise Exit(message="Buildenv workflow need the 'new_version' field value to be not None") + + run = trigger_buildenv_workflow( + workflow_name="runner-bump.yml", + github_action_ref=buildenv_ref, + new_version=new_version, + ) + # We are only waiting 0.5min between each status check because buildenv is much faster than macOS builds + workflow_conclusion, workflow_url = follow_workflow_run(run, "DataDog/buildenv", 0.5) + + if workflow_conclusion != "success": + if workflow_conclusion == "failure": + print_failed_jobs_logs(run) + return workflow_conclusion + + print_workflow_conclusion(workflow_conclusion, workflow_url) + + download_with_retry(download_artifacts, run, ".", 3, 5, "DataDog/buildenv") + + with open("PR_URL_ARTIFACT") as f: + PR_URL = f.read().strip() + + if not PR_URL: + raise Exit(message="Failed to fetch artifact from the workflow. (Empty artifact)") + + message = f":robobits: A new windows-runner bump PR to {new_version} has been generated. Please take a look :frog-review:\n:pr: {PR_URL} :ty:" + + send_slack_message("ci-infra-support", message) + return workflow_conclusion + + +@task +def update_windows_runner_version( + ctx, + new_version=None, + buildenv_ref="master", +): + """ + Trigger a workflow on the buildenv repository to bump windows gitlab runner + """ + if new_version is None: + new_version = str(current_version(ctx, "7")) + + conclusion = _update_windows_runner_version(new_version, buildenv_ref) + if conclusion != "success": + raise Exit(message=f"Buildenv workflow {conclusion}", code=1) + + @task def lint_codeowner(_, owners_file=".github/CODEOWNERS"): """ diff --git a/tasks/libs/ciproviders/github_actions_tools.py b/tasks/libs/ciproviders/github_actions_tools.py index 9399302271b5d..591c999a4066d 100644 --- a/tasks/libs/ciproviders/github_actions_tools.py +++ b/tasks/libs/ciproviders/github_actions_tools.py @@ -14,6 +14,45 @@ from tasks.libs.common.git import get_default_branch +def trigger_buildenv_workflow(workflow_name="runner-bump.yml", github_action_ref="master", new_version=None): + """ + Trigger a workflow to bump windows gitlab runner + """ + inputs = {} + if new_version is not None: + inputs["new-version"] = new_version + + print( + "Creating workflow on buildenv on commit {} with args:\n{}".format( # noqa: FS002 + github_action_ref, "\n".join([f" - {k}: {inputs[k]}" for k in inputs]) + ) + ) + + # Hack: get current time to only fetch workflows that started after now + now = datetime.utcnow() + + gh = GithubAPI('DataDog/buildenv') + result = gh.trigger_workflow(workflow_name, github_action_ref, inputs) + + if not result: + print(f"Couldn't trigger workflow run. result={result}") + raise Exit(code=1) + + # Since we can't get the worflow run id from a `create_dispatch` api call we are fetching the first running workflow after `now`. + recent_runs = gh.workflow_run_for_ref_after_date(workflow_name, github_action_ref, now) + MAX_RETRY = 10 + while not recent_runs and MAX_RETRY > 0: + MAX_RETRY -= 1 + sleep(3) + recent_runs = gh.workflow_run_for_ref_after_date(workflow_name, github_action_ref, now) + + if not recent_runs: + print("Couldn't get the run workflow") + raise Exit(code=1) + + return recent_runs[0] + + def trigger_macos_workflow( workflow_name="macos.yaml", github_action_ref="master", @@ -130,7 +169,7 @@ def trigger_macos_workflow( raise Exit(code=1) -def follow_workflow_run(run): +def follow_workflow_run(run, repository="DataDog/datadog-agent-macos-build", interval=5): """ Follow the workflow run until completion and return its conclusion. """ @@ -141,13 +180,11 @@ def follow_workflow_run(run): minutes = 0 failures = 0 - # Wait time (in minutes) between two queries of the workflow status - interval = 5 MAX_FAILURES = 5 while True: # Do not fail outright for temporary failures try: - github = GithubAPI('DataDog/datadog-agent-macos-build') + github = GithubAPI(repository) run = github.workflow_run(run.id) except GithubException as e: failures += 1 @@ -241,7 +278,7 @@ def parse_log_file(log_file): return lines[line_number:] -def download_artifacts(run, destination="."): +def download_artifacts(run, destination=".", repository="DataDog/datadog-agent-macos-build"): """ Download all artifacts for a given job in the specified location. """ @@ -255,7 +292,7 @@ def download_artifacts(run, destination="."): # Create temp directory to store the artifact zips with tempfile.TemporaryDirectory() as tmpdir: - workflow = GithubAPI('DataDog/datadog-agent-macos-build') + workflow = GithubAPI(repository) for artifact in run_artifacts: # Download artifact print("Downloading artifact: ", artifact) @@ -281,14 +318,21 @@ def download_logs(run, destination="."): zip_ref.extractall(destination) -def download_with_retry(download_function, run, destination=".", retry_count=3, retry_interval=10): +def download_with_retry( + download_function, + run, + destination=".", + retry_count=3, + retry_interval=10, + repository="DataDog/datadog-agent-macos-build", +): import requests retry = retry_count while retry > 0: try: - download_function(run, destination) + download_function(run, destination, repository) print(color_message(f"Download successful for run {run.id} to {destination}", "blue")) return except (requests.exceptions.RequestException, ConnectionError):