Add performance statistics for speculative decoding #2991

Workflow file for this run

.github/workflows/llm_bench-python.yml at 9c441ec

	# This workflow will install Python dependencies, run tests and lint with a single version of Python
	# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

	name: llm_bench Python Test

	on:
	workflow_dispatch:
	pull_request:
	merge_group:
	push:
	branches:
	- master
	- 'releases/**'

	permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions

	concurrency:
	# github.ref is not unique in post-commit
	group: ${{ github.event_name == 'push' && github.run_id \|\| github.ref }}-llm-bench-python
	cancel-in-progress: true

	jobs:
	openvino_download:
	name: Download OpenVINO
	outputs:
	status: ${{ steps.openvino_download.outcome }}
	ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }}
	ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }}
	ov_version: ${{ steps.openvino_download.outputs.ov_version }}
	timeout-minutes: 10
	defaults:
	run:
	shell: bash
	runs-on: aks-linux-2-cores-8gb
	container:
	image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0'
	volumes:
	- /mount:/mount
	- ${{ github.workspace }}:${{ github.workspace }}

	steps:
	- uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master
	id: openvino_download
	with:
	platform: ubuntu22
	commit_packages_to_provide: wheels
	revision: latest_available_commit

	build:
	defaults:
	run:
	shell: bash
	runs-on: ubuntu-22.04
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.11"]
	needs: [ openvino_download ]
	env:
	OV_INSTALL_DIR: ${{ github.workspace }}/ov
	SRC_DIR: ${{ github.workspace }}
	LLM_BENCH_PYPATH: ${{ github.workspace }}/tools/llm_bench
	WWB_PATH: ${{ github.workspace }}/tools/who_what_benchmark

	steps:
	- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	submodules: recursive
	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
	with:
	python-version: ${{ matrix.python-version }}
	- name: Download OpenVINO package
	uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
	with:
	name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
	path: ${{ env.OV_INSTALL_DIR }}
	merge-multiple: true
	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	python -m pip install flake8 pytest black
	python -m pip install ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
	python -m pip install ${{ env.SRC_DIR }} -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
	GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt ${{ needs.openvino_download.outputs.ov_wheel_source }}
	working-directory: ${{ env.OV_INSTALL_DIR }}
	- name: Lint with flake8
	run: \|
	# stop the build if there are Python syntax errors or undefined names
	python -m flake8 ${{ env.LLM_BENCH_PYPATH }} --config=${{ env.LLM_BENCH_PYPATH }}/setup.cfg
	python -m flake8 ${{ env.WWB_PATH }} --config=${{ env.WWB_PATH }}/setup.cfg
	- name: Create code style diff for samples
	if: failure()
	run: \|
	python -m black -l 160 -S ${{ env.LLM_BENCH_PYPATH }}/
	git diff > llm.bench_diff.diff
	- uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
	if: failure()
	with:
	name: llm.bench_diff
	path: llm.bench_diff.diff
	- name: Test native pytorch model on Linux
	run: \|
	git clone --depth 1 https://huggingface.co/katuni4ka/tiny-random-qwen
	python ./tools/llm_bench/benchmark.py -m tiny-random-qwen -d cpu -n 1 -f pt -ic 20
	rm -rf tiny-random-qwen
	env:
	GIT_LFS_SKIP_SMUDGE: 0
	- name: Test tiny-random-baichuan2 on Linux Optimum Intel
	run: \|
	optimum-cli export openvino --model katuni4ka/tiny-random-baichuan2 --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16
	python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16/ -d cpu -n 1 --optimum -ic 10
	rm -rf ./ov_models/tiny-random-baichuan2
	- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux Optimum Intel
	run: \|
	huggingface-cli download OpenVINO/LCM_Dreamshaper_v7-int8-ov --local-dir ov_models/lcm_dreamshaper_v7
	python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --optimum -ic 4
	- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux with GenAI
	run: \|
	python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 -ic 4
	- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux with GenAI and LoRA
	run: \|
	wget -O ./ov_models/soulcard.safetensors https://civitai.com/api/download/models/72591
	python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7 -ic 4
	rm -rf ./ov_models/lcm_dreamshaper_v7/
	- name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Deconding mode on Linux
	run: \|
	optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format fp16 ov_models/TinyLlama-1.1B-Chat-v1.0/FP16
	optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format int8 ov_models/TinyLlama-1.1B-Chat-v1.0/INT8
	python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --assistant_confidence_threshold 0.4 -ic 20
	python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --num_assistant_tokens 5 -ic 20
	rm -rf ov_models/TinyLlama-1.1B-Chat-v1.0
	- name: Test whisper-tiny on Linux
	run: \|
	GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --branch main --single-branch https://huggingface.co/datasets/facebook/multilingual_librispeech
	cd multilingual_librispeech
	git lfs pull -I /data/mls_polish/train/audio/3283_1447_000.tar.gz
	mkdir data/mls_polish/train/audio/3283_1447_000
	tar zxvf data/mls_polish/train/audio/3283_1447_000.tar.gz -C data/mls_polish/train/audio/3283_1447_000/
	cd ..
	optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny ./ov_models/whisper-tiny
	python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1 --optimum
	python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1
	rm -rf ./ov_models/whisper-tiny
	rm -rf multilingual_librispeech
	- name: Text InternVL2-1B on Linux
	run: \|
	optimum-cli export openvino --model OpenGVLab/InternVL2-1B ./ov_models/internvl2-1B --task image-text-to-text --trust-remote-code
	python ./tools/llm_bench/benchmark.py -m ./ov_models/internvl2-1B --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20
	python ./tools/llm_bench/benchmark.py -m ./ov_models/internvl2-1B --media https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --prompt "What is unusual on this image?" -ic 20 --optimum
	rm -rf ./ov_models/internvl2-1B
	- name: WWB Tests
	run: \|
	pip install git+https://github.com/huggingface/optimum-intel.git
	GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }}
	python -m pytest -v ${{ env.WWB_PATH }}/tests
	stateful:
	defaults:
	run:
	shell: bash
	runs-on: ubuntu-22.04
	needs: [ openvino_download ]
	env:
	OV_INSTALL_DIR: ${{ github.workspace }}/ov
	SRC_DIR: ${{ github.workspace }}
	LLM_BENCH_PYPATH: ${{ github.workspace }}/tools/llm_bench
	WWB_PATH: ${{ github.workspace }}/tools/who_what_benchmark

	steps:
	- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	submodules: recursive
	- uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
	with:
	python-version: "3.11"
	- name: Download OpenVINO package
	uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
	with:
	name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
	path: ${{ env.OV_INSTALL_DIR }}
	merge-multiple: true
	- name: Test stateful
	run: \|
	python -m pip install ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
	python -m pip install ${{ env.SRC_DIR }} -v ${{ needs.openvino_download.outputs.ov_wheel_source }}
	GIT_CLONE_PROTECTION_ACTIVE=false python -m pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt ${{ needs.openvino_download.outputs.ov_wheel_source }}
	python ${{ env.LLM_BENCH_PYPATH }}/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ${{ env.SRC_DIR }} --stateful
	grep beam_idx ${{ env.SRC_DIR }}/pytorch/dldt/FP32/openvino_model.xml
	working-directory: ${{ env.OV_INSTALL_DIR }}
	- name: WWB Tests
	run: \|
	pip install pytest
	pip install git+https://github.com/huggingface/optimum-intel.git
	GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }}
	python -m pytest -v ${{ env.WWB_PATH }}/tests

	Overall_Status:
	name: ci/gha_overall_status_llm_bench
	needs: [openvino_download, build, stateful]
	if: ${{ always() }}
	runs-on: ubuntu-latest
	steps:
	- name: Check status of all jobs
	if: >-
	${{
	contains(needs.*.result, 'failure') \|\|
	contains(needs.*.result, 'cancelled')
	}}
	run: exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add performance statistics for speculative decoding #2991

Workflow file

Add performance statistics for speculative decoding #2991

Jobs

Run details

Workflow file for this run