Add workflow for on-demand benchmarking #19

Workflow file for this run

.github/workflows/android-perf.yml at 8f406de

	name: android-perf

	on:
	schedule:
	- cron: 0 0 * * *
	pull_request:
	# Note: GitHub has an upper limit of 10 inputs
	workflow_dispatch:
	inputs:
	models:
	description: Models to be benchmarked
	required: false
	type: string
	default: stories110M
	devices:
	description: Target devices to run benchmark
	required: false
	type: string
	default: false
	delegates:
	description: Backend delegates
	required: false
	type: string
	default: xnnpack
	threadpool:
	description: Run with threadpool?
	required: false
	type: boolean
	default: false
	benchmark_configs:
	description: The list of configs used the benchmark
	required: false
	type: string

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	permissions: read-all

	jobs:
	set-models:
	runs-on: linux.2xlarge
	outputs:
	models: ${{ steps.set-models.outputs.models }}
	steps:
	- name: Set models
	id: set-models
	shell: bash
	run: \|
	set -ex
	MODELS="${{ inputs.models \|\| 'stories110M' }}"
	echo "models=$(echo $MODELS \| jq -Rc 'split(",")')" >> $GITHUB_OUTPUT

	export-models:
	name: export-models
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	needs: set-models
	strategy:
	matrix:
	model: ${{ fromJson(needs.set-models.outputs.models) }}
	fail-fast: false
	with:
	runner: linux.2xlarge
	docker-image: executorch-ubuntu-22.04-clang12
	submodules: 'true'
	timeout: 60
	upload-artifact: android-models
	script: \|
	# The generic Linux job chooses to use base env, not the one setup by the image
	CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	conda activate "${CONDA_ENV}"

	PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
	echo "Exporting model: ${{ matrix.model }}"
	export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}

	# Install requirements for export_llama
	PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
	# Test llama2
	PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "${ARTIFACTS_DIR_NAME}"\

	# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
	upload-models:
	needs: export-models
	runs-on: linux.2xlarge
	steps:
	- name: Download the artifacts from GitHub
	uses: actions/download-artifact@v3
	with:
	# The name here needs to match the name of the upload-artifact parameter
	name: android-models
	path: ${{ runner.temp }}/artifacts/

	- name: Verify the artifacts
	shell: bash
	working-directory: ${{ runner.temp }}/artifacts/
	run: \|
	ls -lah ./

	- name: Upload the artifacts to S3
	uses: seemethere/upload-artifact-s3@v5
	with:
	s3-bucket: gha-artifacts
	s3-prefix: \|
	${{ github.repository }}/${{ github.run_id }}/artifact
	retention-days: 1
	if-no-files-found: ignore
	path: ${{ runner.temp }}/artifacts/

	build-llm-demo:
	name: build-llm-demo
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	needs: set-models
	strategy:
	matrix:
	tokenizer: [bpe]
	with:
	runner: linux.2xlarge
	docker-image: executorch-ubuntu-22.04-clang12-android
	submodules: 'true'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 90
	upload-artifact: android-apps
	script: \|
	set -eux

	# The generic Linux job chooses to use base env, not the one setup by the image
	CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	conda activate "${CONDA_ENV}"
	PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
	export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded

	# TODO: This needs to be replaced with a generic loader .apk
	# Build LLM Demo for Android
	bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}

	# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
	upload-android-apps:
	needs: build-llm-demo
	runs-on: linux.2xlarge
	steps:
	- name: Download the artifacts from GitHub
	uses: actions/download-artifact@v3
	with:
	# The name here needs to match the name of the upload-artifact parameter
	name: android-apps
	path: ${{ runner.temp }}/artifacts/

	- name: Verify the artifacts
	shell: bash
	working-directory: ${{ runner.temp }}/artifacts/
	run: \|
	ls -lah ./

	- name: Upload the artifacts to S3
	uses: seemethere/upload-artifact-s3@v5
	with:
	s3-bucket: gha-artifacts
	s3-prefix: \|
	${{ github.repository }}/${{ github.run_id }}/artifact
	retention-days: 14
	if-no-files-found: ignore
	path: ${{ runner.temp }}/artifacts/

	# Let's see how expensive this job is, we might want to tone it down by running it periodically
	benchmark-on-device:
	permissions:
	id-token: write
	contents: read
	uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
	needs:
	- set-models
	- upload-models
	- upload-android-apps
	strategy:
	matrix:
	model: ${{ fromJson(needs.set-models.outputs.models) }}
	with:
	device-type: android
	runner: linux.2xlarge
	test-infra-ref: ''
	# This is the ARN of ExecuTorch project on AWS
	project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
	# This is the custom Android device pool that only includes Samsung Galaxy S2x
	device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
	# Uploaded to S3 from the previous job, the name of the app comes from the project itself.
	# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
	# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
	# one app+flavor that could load and run the model.
	# TODO: Hard code llm_demo_bpe for now in this job.
	android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug.apk
	android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug-androidTest.apk
	# The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
	test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
	# Uploaded to S3 from the previous job
	extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}/model.zip

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add workflow for on-demand benchmarking #19

Workflow file

Add workflow for on-demand benchmarking #19

Jobs

Run details

Workflow file for this run