Add workflow for on-demand benchmarking #13
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: android-perf | ||
on: | ||
schedule: | ||
- cron: 0 0 * * * | ||
push: | ||
tags: | ||
- ciflow/perf-android | ||
pull_request: | ||
# Note: GitHub has an upper limit of 10 inputs | ||
workflow_dispatch: | ||
inputs: | ||
models: | ||
description: Models to be benchmarked | ||
required: false | ||
type: string | ||
default: stories110M | ||
devices: | ||
description: Target devices to run benchmark | ||
required: false | ||
type: string | ||
default: false | ||
delegates: | ||
description: Backend delegates | ||
required: false | ||
type: string | ||
default: xnnpack | ||
threadpool: | ||
description: Run with threadpool? | ||
required: false | ||
type: boolean | ||
default: false | ||
benchmark_configs: | ||
description: The list of configs used the benchmark | ||
required: false | ||
type: string | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | ||
cancel-in-progress: true | ||
permissions: read-all | ||
jobs: | ||
set-models: | ||
runs-on: linux.2xlarge | ||
outputs: | ||
models: ${{ steps.set-models.outputs.models }} | ||
steps: | ||
- name: Set models | ||
id: set-models | ||
run: | | ||
Check failure on line 52 in .github/workflows/android-perf.yml GitHub Actions / android-perfInvalid workflow file
|
||
MODELS="${{ inputs.models || stories110M }}" | ||
if [[ -z "$MODELS" ]]; then | ||
echo "No models provided and failed to retrieve the default values." | ||
exit 1 | ||
fi | ||
echo "::set-output name=models::$(echo $MODELS | jq -R 'split(",")')" | ||
export-models: | ||
name: export-models | ||
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | ||
needs: set-models | ||
strategy: | ||
matrix: | ||
model: ${{ fromJson(needs.set-models.outputs.models) }} | ||
fail-fast: false | ||
with: | ||
runner: linux.2xlarge | ||
docker-image: executorch-ubuntu-22.04-clang12 | ||
submodules: 'true' | ||
timeout: 60 | ||
upload-artifact: android-models | ||
script: | | ||
# The generic Linux job chooses to use base env, not the one setup by the image | ||
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | ||
conda activate "${CONDA_ENV}" | ||
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" | ||
echo "Exporting model: ${{ matrix.model }}" | ||
# Install requirements for export_llama | ||
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh | ||
# Test llama2 | ||
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "artifacts-to-be-uploaded"\ | ||
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat | ||
upload-models: | ||
needs: export-models | ||
runs-on: linux.2xlarge | ||
steps: | ||
- name: Download the artifacts from GitHub | ||
uses: actions/download-artifact@v3 | ||
with: | ||
# The name here needs to match the name of the upload-artifact parameter | ||
name: android-models | ||
path: ${{ runner.temp }}/artifacts/ | ||
- name: Verify the artifacts | ||
shell: bash | ||
working-directory: ${{ runner.temp }}/artifacts/ | ||
run: | | ||
ls -lah ./ | ||
- name: Upload the artifacts to S3 | ||
uses: seemethere/upload-artifact-s3@v5 | ||
with: | ||
s3-bucket: gha-artifacts | ||
s3-prefix: | | ||
${{ github.repository }}/${{ github.run_id }}/artifact | ||
retention-days: 1 | ||
if-no-files-found: ignore | ||
path: ${{ runner.temp }}/artifacts/ | ||
build-llm-demo: | ||
name: build-llm-demo | ||
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | ||
needs: set-models | ||
strategy: | ||
matrix: | ||
tokenizer: [bpe, tiktoken] | ||
with: | ||
runner: linux.2xlarge | ||
docker-image: executorch-ubuntu-22.04-clang12-android | ||
submodules: 'true' | ||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | ||
timeout: 90 | ||
upload-artifact: android-apps | ||
script: | | ||
set -eux | ||
# The generic Linux job chooses to use base env, not the one setup by the image | ||
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | ||
conda activate "${CONDA_ENV}" | ||
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2 | ||
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded | ||
# TODO: This needs to be replaced with a generic loader .apk | ||
# Build LLM Demo for Android | ||
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME} | ||
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat | ||
upload-android-apps: | ||
needs: build-llm-demo | ||
runs-on: linux.2xlarge | ||
steps: | ||
- name: Download the artifacts from GitHub | ||
uses: actions/download-artifact@v3 | ||
with: | ||
# The name here needs to match the name of the upload-artifact parameter | ||
name: android-apps | ||
path: ${{ runner.temp }}/artifacts/ | ||
- name: Verify the artifacts | ||
shell: bash | ||
working-directory: ${{ runner.temp }}/artifacts/ | ||
run: | | ||
ls -lah ./ | ||
- name: Upload the artifacts to S3 | ||
uses: seemethere/upload-artifact-s3@v5 | ||
with: | ||
s3-bucket: gha-artifacts | ||
s3-prefix: | | ||
${{ github.repository }}/${{ github.run_id }}/artifact | ||
retention-days: 14 | ||
if-no-files-found: ignore | ||
path: ${{ runner.temp }}/artifacts/ | ||
# Let's see how expensive this job is, we might want to tone it down by running it periodically | ||
benchmark-on-device: | ||
needs: | ||
- upload-models | ||
- upload-android-apps | ||
permissions: | ||
id-token: write | ||
contents: read | ||
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main | ||
strategy: | ||
matrix: | ||
# https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/LlamaDemo/README.md#alternative-2-build-from-local-machine | ||
# mentions that tiktoken is only for Llama3. So, we can export it later in another archive | ||
# like https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip when this is | ||
# updated to run Llama3 | ||
tokenizer: [bpe] | ||
with: | ||
device-type: android | ||
runner: linux.2xlarge | ||
test-infra-ref: '' | ||
# This is the ARN of ExecuTorch project on AWS | ||
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 | ||
# This is the custom Android device pool that only includes Samsung Galaxy S2x | ||
device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa | ||
# Uploaded to S3 from the previous job, the name of the app comes from the project itself | ||
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug.apk | ||
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug-androidTest.apk | ||
# The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml | ||
test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77 | ||
# Uploaded to S3 from the previous job | ||
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/model.zip |