From da54a2f2b2435cf73013d6ea75d68c469da0c3be Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Thu, 15 Aug 2024 17:58:16 -0700 Subject: [PATCH] Add the example non-genai qnn model to ci and benchinfra --- .ci/scripts/build-qnn-sdk.sh | 32 +++++++++++++++++++++++-- .ci/scripts/test.sh | 38 ++++++++++++++++++++++++++++++ .github/workflows/android-perf.yml | 35 ++++++++++++++++++++------- .github/workflows/trunk.yml | 23 ++++++++++++++++++ 4 files changed, 117 insertions(+), 11 deletions(-) diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh index d912069b06..260072f734 100644 --- a/.ci/scripts/build-qnn-sdk.sh +++ b/.ci/scripts/build-qnn-sdk.sh @@ -5,15 +5,43 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -set -ex +set -eux build_qnn_backend() { echo "Start building qnn backend." export ANDROID_NDK_ROOT=/opt/ndk export QNN_SDK_ROOT=/tmp/qnn/2.23.0.240531 - export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)" + export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release } +set_up_aot() { + cd $EXECUTORCH_ROOT + if [ ! -d "cmake-out" ]; then + mkdir cmake-out + fi + pushd cmake-out + cmake .. \ + -DCMAKE_INSTALL_PREFIX=$PWD \ + -DEXECUTORCH_BUILD_QNN=ON \ + -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \ + -DEXECUTORCH_BUILD_SDK=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -DPYTHON_EXECUTABLE=python3 \ + -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF + cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc) + # install Python APIs to correct import path + # The filename might vary depending on your Python and host version. + cp -f backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python + cp -f backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python + popd + + # Workaround for fbs files in exir/_serialize + cp schema/program.fbs exir/_serialize/program.fbs + cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs +} + build_qnn_backend +set_up_aot diff --git a/.ci/scripts/test.sh b/.ci/scripts/test.sh index ad02fdc79d..bdb71921bf 100755 --- a/.ci/scripts/test.sh +++ b/.ci/scripts/test.sh @@ -28,10 +28,25 @@ if [[ -z "${BACKEND:-}" ]]; then exit 1 fi +UPLOAD_DIR=${4:-} + +if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then + PYTHON_EXECUTABLE=python3 +fi which "${PYTHON_EXECUTABLE}" + # Just set this variable here, it's cheap even if we use buck2 CMAKE_OUTPUT_DIR=cmake-out +prepare_artifacts_upload() { + if [ -n "$UPLOAD_DIR" ]; then + echo "Preparing for uploading generated artifacs" + zip -j model.zip "${EXPORTED_MODEL}" + mkdir -p "${UPLOAD_DIR}" + mv model.zip "${UPLOAD_DIR}" + fi +} + build_cmake_executor_runner() { echo "Building executor_runner" (rm -rf ${CMAKE_OUTPUT_DIR} \ @@ -129,9 +144,30 @@ test_model_with_xnnpack() { fi } +test_model_with_qnn() { + source "$(dirname "${BASH_SOURCE[0]}")/build-qnn-sdk.sh" + echo "ANDROID_NDK_ROOT: $ANDROID_NDK_ROOT" + echo "QNN_SDK_ROOT: $QNN_SDK_ROOT" + echo "EXECUTORCH_ROOT: $EXECUTORCH_ROOT" + + export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/ + export PYTHONPATH=$EXECUTORCH_ROOT/.. + + if [[ "${MODEL_NAME}" == "dl3" ]]; then + "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.deeplab_v3 -b ${CMAKE_OUTPUT_DIR} -m SM8550 --compile_only --download + EXPORTED_MODEL=./${CMAKE_OUTPUT_DIR}/deeplab_v3/dlv3_qnn.pte + fi +} + if [[ "${BACKEND}" == "portable" ]]; then echo "Testing ${MODEL_NAME} with portable kernels..." test_model +elif [[ "${BACKEND}" == "qnn" ]]; then + echo "Testing ${MODEL_NAME} with qnn..." + test_model_with_qnn + if [[ $? -eq 0 ]]; then + prepare_artifacts_upload + fi else set +e if [[ "${BACKEND}" == *"quantization"* ]]; then @@ -153,5 +189,7 @@ else if [[ -n "${Q_ERROR:-}" ]] || [[ -n "${D_ERROR:-}" ]] || [[ -n "${Q_D_ERROR:-}" ]]; then echo "Portable q8 ${Q_ERROR:-ok}," "Delegation fp32 ${D_ERROR:-ok}," "Delegation q8 ${Q_D_ERROR:-ok}" exit 1 + else + prepare_artifacts_upload fi fi diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index fcc3421772..86219a817e 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -135,24 +135,41 @@ jobs: fail-fast: false with: runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 + docker-image: executorch-ubuntu-22.04-clang12-android submodules: 'true' timeout: 60 upload-artifact: android-models script: | # The generic Linux job chooses to use base env, not the one setup by the image + echo "::group::Setting up dev environment" CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - + if [[ ${{ matrix.delegate }} == "qnn" ]]; then + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + fi PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" - echo "Exporting model: ${{ matrix.model }}" - export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }} + ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }} + echo "::endgroup::" + + echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}" + BUILD_MODE="cmake" + DTYPE="fp32" + if [[ ${{ matrix.delegate }} == "qnn" ]]; then + DELEGATE_CONFIG="qnn" + elif [[ ${{ matrix.delegate }} == "xnnpack" ]]; then + DELEGATE_CONFIG="xnnpack+custom+qe" + fi - # TODO(T197546696): Note that the following scripts/steps only work for llama. It's expected to fail for other models+delegates. - # Install requirements for export_llama - PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh - # Test llama2 - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "${ARTIFACTS_DIR_NAME}"\ + if [[ ${{ matrix.model }} == "llama*" ]]; then + # Install requirements for export_llama + PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh + # Test llama2 + PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}" + else + PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}" + fi + echo "::endgroup::" # Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat upload-models: diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 9b28d26048..969ea3d361 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -270,3 +270,26 @@ jobs: PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh # Test llama2 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}" + + test-qnn-model: + name: test-qnn-model + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + strategy: + matrix: + dtype: [fp32] + model: [dl3] + fail-fast: false + with: + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-clang12-android + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 900 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh ${{ matrix.model }} "cmake" "qnn"