diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 30b77ee38f1..5472fe477d5 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -13,6 +13,7 @@ MODEL_NAME=$1 # stories110M.pt BUILD_TOOL=$2 # buck2 or cmake DTYPE=$3 # fp16 or fp32 MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe +UPLOAD_DIR=${5:-} if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args echo "Expecting atleast 4 positional arguments" echo "Usage: [...]" @@ -126,6 +127,15 @@ cleanup_files() { rm params.json } +upload_artifacts() { + if [ -n "$UPLOAD_DIR" ]; then + echo "Uploading generated artifacs" + mkdir -p "${UPLOAD_DIR}/model" + zip -j "model.zip" "${MODEL_NAME}" tokenizer.bin + cp "model.zip" "${UPLOAD_DIR}" + fi +} + # Download and create artifacts. PARAMS="params.json" touch "${PARAMS}" @@ -205,6 +215,7 @@ if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then echo "Actual result: ${RESULT}" echo "Success" + upload_artifacts cleanup_files else echo "Expected result prefix: ${EXPECTED_PREFIX}" diff --git a/.github/pytorch-probot.yml b/.github/pytorch-probot.yml index f684d83fa51..8cf5e087c51 100644 --- a/.github/pytorch-probot.yml +++ b/.github/pytorch-probot.yml @@ -5,3 +5,4 @@ ciflow_push_tags: - ciflow/binaries - ciflow/binaries/all - ciflow/periodic +- ciflow/perf-android diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml new file mode 100644 index 00000000000..593a4477279 --- /dev/null +++ b/.github/workflows/android-perf.yml @@ -0,0 +1,194 @@ +name: android-perf + +on: + schedule: + - cron: 0 0 * * * + # TODO: Remove 'pull_request' before merge + pull_request: + push: + tags: + - ciflow/perf-android + # Note: GitHub has an upper limit of 10 inputs + workflow_dispatch: + inputs: + models: + description: Models to be benchmarked + required: false + type: string + default: stories110M + devices: + description: Target devices to run benchmark + required: false + type: string + default: false + delegates: + description: Backend delegates + required: false + type: string + default: xnnpack + threadpool: + description: Run with threadpool? + required: false + type: boolean + default: false + benchmark_configs: + description: The list of configs used the benchmark + required: false + type: string + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: true + +permissions: read-all + +jobs: + set-models: + runs-on: linux.2xlarge + outputs: + models: ${{ steps.set-models.outputs.models }} + steps: + - name: Set models + id: set-models + run: | + MODELS="${{ github.event.inputs.models }}" + echo "${MODELS}" + echo "::set-output name=models::$(echo $MODELS | jq -R 'split(",")')" + export-models: + name: export-models + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + needs: set-models + strategy: + matrix: + model: ${{ fromJson(needs.set-models.outputs.models) }} + fail-fast: false + with: + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-clang12 + submodules: 'true' + timeout: 60 + upload-artifact: android-models + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + echo "Exporting model: ${{ matrix.model }}" + + # Install requirements for export_llama + PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh + # Test llama2 + PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "artifacts-to-be-uploaded" + # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat + upload-models: + needs: export-models + runs-on: linux.2xlarge + steps: + - name: Download the artifacts from GitHub + uses: actions/download-artifact@v3 + with: + # The name here needs to match the name of the upload-artifact parameter + name: android-models + path: ${{ runner.temp }}/artifacts/ + + - name: Verify the artifacts + shell: bash + working-directory: ${{ runner.temp }}/artifacts/ + run: | + ls -lah ./ + + - name: Upload the artifacts to S3 + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifact + retention-days: 1 + if-no-files-found: ignore + path: ${{ runner.temp }}/artifacts/ + + build-llm-demo: + name: build-llm-demo + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + strategy: + matrix: + tokenizer: [bpe, tiktoken] + with: + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-clang12-android + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + upload-artifact: android-apps + script: | + set -eux + + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2 + export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded + + # TODO: This needs to be replaced with a generic loader .apk + # Build LLM Demo for Android + bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME} + # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat + upload-android-apps: + needs: build-llm-demo + runs-on: linux.2xlarge + steps: + - name: Download the artifacts from GitHub + uses: actions/download-artifact@v3 + with: + # The name here needs to match the name of the upload-artifact parameter + name: android-apps + path: ${{ runner.temp }}/artifacts/ + + - name: Verify the artifacts + shell: bash + working-directory: ${{ runner.temp }}/artifacts/ + run: | + ls -lah ./ + + - name: Upload the artifacts to S3 + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifact + retention-days: 14 + if-no-files-found: ignore + path: ${{ runner.temp }}/artifacts/ + + # Let's see how expensive this job is, we might want to tone it down by running it periodically + benchmark-on-device: + needs: + - upload-models + - upload-android-apps + permissions: + id-token: write + contents: read + uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main + strategy: + matrix: + # https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/LlamaDemo/README.md#alternative-2-build-from-local-machine + # mentions that tiktoken is only for Llama3. So, we can export it later in another archive + # like https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip when this is + # updated to run Llama3 + tokenizer: [bpe] + with: + device-type: android + runner: linux.2xlarge + test-infra-ref: '' + # This is the ARN of ExecuTorch project on AWS + project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 + # This is the custom Android device pool that only includes Samsung Galaxy S2x + device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa + # Uploaded to S3 from the previous job, the name of the app comes from the project itself + android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug.apk + android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug-androidTest.apk + # The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml + test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77 + # Uploaded to S3 from the previous job + extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/model.zip