LLM tests restructuring (#1440)

- Merged chat scenario tests to test_llm_pipeline.py - Created CB dedicated test_continuous_batching.py file with CB-specific tests (in addition to test_llm_pipeline.py, which cover basic LLM pipeline functionality) CVS-159921
openvinotoolkit · Dec 27, 2024 · 82b44fa · 82b44fa
1 parent 8fe0ff5
commit 82b44fa
Show file tree

Hide file tree

Showing 15 changed files with 418 additions and 388 deletions.
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -13,17 +13,20 @@
 - 'src/python/py_tokenizer.cpp'
 - 'thirdparty/openvino_tokenizers'
 - 'tests/python_tests/tokenizer_configs.py'
+- 'tests/python_tests/test_tokenizer.py'
 
 'category: LLM':
 - 'src/cpp/include/openvino/genai/llm_pipeline.hpp'
 - 'src/cpp/src/llm_pipeline.cpp'
+- 'src/cpp/src/lm_encoding.hpp'
 - 'src/cpp/src/lm_encoding.cpp'
 - 'src/cpp/src/llm_pipeline_base.hpp'
 - 'src/cpp/src/llm_pipeline_static.hpp'
 - 'src/cpp/src/llm_pipeline_static.cpp'
+- 'src/cpp/src/text_callback_streamer.cpp'
+- 'src/cpp/src/text_callback_streamer.hpp'
 - 'src/python/py_llm_pipeline.cpp'
-- 'tests/python_tests/test_generate_api.py'
-- 'tests/python_tests/test_chat_generate_api.py'
+- 'tests/python_tests/test_llm_pipeline.py'
 
 'category: sampling':
 - 'src/cpp/include/openvino/genai/generation_config.hpp'
@@ -35,6 +38,7 @@
 - 'tests/cpp/logit_filtering.cpp'
 - 'tests/cpp/generate_config.cpp'
 - 'tests/cpp/sampler.cpp'
+- 'tests/python_tests/test_sampling.py'
 
 'category: LoRA':
 - 'src/cpp/include/openvino/genai/lora_adapter.hpp'
@@ -54,9 +58,12 @@
 - 'src/cpp/include/openvino/genai/whisper_pipeline.hpp'
 - 'src/cpp/src/whisper/**/*'
 - 'src/cpp/src/whisper_generation_config.cpp'
+- 'src/cpp/src/whisper_pipeline_base.hpp'
 - 'src/cpp/src/whisper_pipeline.cpp'
+- 'src/cpp/src/whisper_pipeline_static.cpp'
+- 'src/cpp/src/whisper_pipeline_static.hpp'
 - 'src/python/py_whisper_pipeline.cpp'
-- 'tests/python_tests/test_whisper_generate_api.py'
+- 'tests/python_tests/test_whisper_pipeline.py'
 
 'category: Python API':
 - 'src/python/**/*'
@@ -65,10 +72,14 @@
 - 'src/include/openvino/genai/visual_language/**/*'
 - 'src/cpp/src/visual_language/**/*'
 - 'src/python/py_vlm_pipeline.cpp'
-- 'tests/python_tests/test_vlm_api.py'
+- 'tests/python_tests/test_vlm_pipeline.py'
 
 'category: speculative decoding':
 - 'src/cpp/src/speculative_decoding/**/*'
+- 'tests/cpp/speculative_decoding.cpp'
+
+'category: prompt lookup':
+- 'src/cpp/src/prompt_lookup/**/*'
 
 'category: continuous batching':
 - 'src/cpp/include/openvino/genai/cache_eviction.hpp'
@@ -91,19 +102,19 @@
 - 'src/cpp/src/generation_handle.cpp'
 - 'src/cpp/src/generation_stream.hpp'
 - 'src/cpp/src/model_runner.hpp'
-- 'src/cpp/src/paged_attention_transformations.cpp'
-- 'src/cpp/src/paged_attention_transformations.hpp'
+- 'src/cpp/src/utils/paged_attention_transformations.cpp'
+- 'src/cpp/src/utils/paged_attention_transformations.hpp'
 - 'src/cpp/src/scheduler.hpp'
 - 'src/cpp/src/sequence_group.cpp'
 - 'src/cpp/src/sequence_group.hpp'
 - 'src/cpp/src/timer.hpp'
 - 'src/python/py_continuous_batching_pipeline.cpp'
-- 'tests/python_tests/test_cache_optimizations.py'
-- 'tests/python_tests/test_preemption.py'
-- 'tests/python_tests/test_sampling.py'
+- 'tests/python_tests/test_continuous_batching.py'
+- 'tests/python_tests/test_kv_cache_eviction.py'
 - 'tests/cpp/block_allocator.cpp'
 - 'tests/cpp/block_hash_store.cpp'
 - 'tests/cpp/block_manager.cpp'
+- 'tests/cpp/cache_eviction.cpp'
 - 'tests/cpp/cache_manager.cpp'
 - 'tests/cpp/device_config.cpp'
 - 'tests/cpp/scheduler.cpp'

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -268,9 +268,9 @@ jobs:
       matrix:
         test:
           - name: 'Whisper'
-            cmd: 'tests/python_tests/test_whisper_generate_api.py'
+            cmd: 'tests/python_tests/test_whisper_pipeline.py'
           - name: 'LLM & VLM'
-            cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py'
+            cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py'
     defaults:
       run:
         shell: bash

diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
@@ -178,7 +178,7 @@ jobs:
     if: |
       always() &&
       (needs.openvino_download.outputs.status == 'success' || needs.openvino_build.result == 'success')
-    timeout-minutes: 90
+    timeout-minutes: 120
     defaults:
       run:
         shell: bash
@@ -235,7 +235,7 @@ jobs:
           python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
           python -c "from openvino_genai import LLMPipeline"
           python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
+          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
 
   genai_python_lib_whisper:
     name: OpenVINO genai extension whisper tests (cmake + wheel)
@@ -290,7 +290,7 @@ jobs:
         run: |
           source ${OV_INSTALL_DIR}/setupvars.sh
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
         env:
           PYTHONPATH: "./build/:$PYTHONPATH"
 
@@ -300,7 +300,7 @@ jobs:
           python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
           python -c "from openvino_genai import LLMPipeline"
           python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
 
   genai_package:
     name: OpenVINO genai extension (install to OpenVINO package)

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -245,7 +245,7 @@ jobs:
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
           python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
+          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
 
   genai_python_lib_whisper:
     name: OpenVINO genai extension whisper tests (cmake + wheel)
@@ -301,7 +301,7 @@ jobs:
         run: |
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
         env:
           PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
 
@@ -310,7 +310,7 @@ jobs:
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
           python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
 
   genai_python_lib_vlm:
     name: OpenVINO genai VLM tests (cmake + wheel)
@@ -366,7 +366,7 @@ jobs:
         run: |
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_vlm_api.py
+          python -m pytest -v ./tests/python_tests/test_vlm_pipeline.py
         env:
           PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
 

diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp
@@ -703,8 +703,7 @@ std::pair<ov::AnyMap, ov::genai::ModelConfigDesc> split_model_descr(const ov::An
 ov::genai::LLMPipeline::LLMPipeline(
     const ov::InferRequest& request,
     const ov::genai::Tokenizer& tokenizer,
-    OptionalGenerationConfig generation_config
-) {
+    OptionalGenerationConfig generation_config) {
     auto start_time = std::chrono::steady_clock::now();
     m_pimpl = std::make_unique<StatefulLLMPipeline>(request, tokenizer, generation_config);
     auto stop_time = std::chrono::steady_clock::now();
@@ -715,8 +714,7 @@ ov::genai::LLMPipeline::LLMPipeline(
     const std::filesystem::path& models_path,
     const ov::genai::Tokenizer& tokenizer,
     const std::string& device,
-    const ov::AnyMap& properties
-){
+    const ov::AnyMap& properties) {
     auto start_time = std::chrono::steady_clock::now();
     if (properties.find(ov::genai::scheduler_config.name()) != properties.end() || 
         properties.find(utils::DRAFT_MODEL_ARG_NAME) != properties.end() || 
@@ -735,8 +733,7 @@ ov::genai::LLMPipeline::LLMPipeline(
 ov::genai::LLMPipeline::LLMPipeline(
     const std::filesystem::path& models_path,
     const std::string& device,
-    const ov::AnyMap& config
-){
+    const ov::AnyMap& config) {
     auto start_time = std::chrono::steady_clock::now();
 
     if (config.find(ov::genai::scheduler_config.name()) != config.end() || 
@@ -759,8 +756,7 @@ ov::genai::LLMPipeline::LLMPipeline(
     const ov::genai::Tokenizer& tokenizer,
     const std::string& device,
     const ov::AnyMap& config,
-    const ov::genai::GenerationConfig& generation_config
-){
+    const ov::genai::GenerationConfig& generation_config) {
     auto [core_properties, plugin_config] = ov::genai::utils::split_core_compile_config(config);
 
     auto start_time = std::chrono::steady_clock::now();

diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
@@ -364,18 +364,6 @@ def run_continuous_batching(
     return output
 
 
-def read_models_list(file_name: str):
-    models = []
-    with open(file_name) as f:
-        for model_name in f:
-            model_name = model_name.strip()
-            # skip comment in model scope file
-            if model_name.startswith('#'):
-                continue
-            models.append(model_name)
-    return models
-
-
 def compare_results(hf_result: GenerationResult, ov_result: GenerationResult, generation_config: GenerationConfig):
     if generation_config.is_beam_search():
         assert len(hf_result.m_scores) == len(ov_result.m_scores)
@@ -447,7 +435,7 @@ def generate_and_compare_with_reference_text(models_path: Path, prompts: List[st
             assert ref_text == ov_text
 
 
-def run_test_pipeline(tmp_path: str, model_id: str, scheduler_params: dict = None, generation_config = None):
+def run_continuous_batching_pipeline_test(tmp_path: str, model_id: str, scheduler_params: dict = None, generation_config = None):
     prompts, generation_configs = get_test_dataset()
     scheduler_config = get_scheduler_config(scheduler_params)
 

diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
@@ -32,7 +32,7 @@ def get_models_list():
         "HuggingFaceH4/zephyr-7b-beta",
         "ikala/redpajama-3b-chat",
         "mistralai/Mistral-7B-v0.1",
-        
+
         # "meta-llama/Llama-2-7b-chat-hf",  # Cannot be downloaded without access token
         # "google/gemma-2b-it",  # Cannot be downloaded without access token.
         # "google/gemma-7b-it",  # Cannot be downloaded without access token.
@@ -49,7 +49,7 @@ def get_models_list():
         model_ids = precommit_models
     else:
         model_ids = nightly_models
-    
+
     if pytest.selected_model_ids:
         model_ids = [model_id for model_id in model_ids if model_id in pytest.selected_model_ids.split(' ')]
     # pytest.set_trace()
@@ -82,30 +82,30 @@ def get_chat_models_list():
 @functools.lru_cache(1)
 def read_model(params, **tokenizer_kwargs):
     model_id, path = params
-    
+
     from optimum.intel.openvino import OVModelForCausalLM
     from transformers import AutoTokenizer
     hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 
     if (path / "openvino_model.xml").exists():
-        opt_model = OVModelForCausalLM.from_pretrained(path, trust_remote_code=True, 
+        opt_model = OVModelForCausalLM.from_pretrained(path, trust_remote_code=True,
                                                        compile=False, device='CPU')
     else:
-        ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer, 
+        ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
                                                                              with_detokenizer=True,
                                                                              **tokenizer_kwargs)
         openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml")
         openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml")
-        
+
         # to store tokenizer config jsons with special tokens
         hf_tokenizer.save_pretrained(path)
-        
-        opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, 
+
+        opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,
                                                        compile=False, device='CPU', load_in_8bit=False)
         opt_model.generation_config.save_pretrained(path)
         opt_model.config.save_pretrained(path)
         opt_model.save_pretrained(path)
-    
+
     return (
         model_id,
         path,
@@ -116,11 +116,11 @@ def read_model(params, **tokenizer_kwargs):
 
 
 # in OpenVINO GenAI this parameter is called stop_criteria,
-# while in HF it's called early_stopping. 
+# while in HF it's called early_stopping.
 # HF values True, False and "never" correspond to OV GenAI values "EARLY", "HEURISTIC" and "NEVER"
 STOP_CRITERIA_MAP = {
-    ov_genai.StopCriteria.NEVER: "never", 
-    ov_genai.StopCriteria.EARLY: True, 
+    ov_genai.StopCriteria.NEVER: "never",
+    ov_genai.StopCriteria.EARLY: True,
     ov_genai.StopCriteria.HEURISTIC: False
 }
 
@@ -137,6 +137,7 @@ def model_tmp_path(tmpdir_factory):
                 shutil.copy(src_file, temp_path / src_file.name)
     yield model_id, Path(temp_path)
 
+
 @pytest.fixture(scope="module")
 def model_tokenizers_path_tmp_path(tmpdir_factory):
     model_id, path, _, _, _ = read_model(get_models_list()[0])
@@ -146,7 +147,7 @@ def model_tokenizers_path_tmp_path(tmpdir_factory):
     # There was no easy way to add tokens to IR in tests, so we remove them
     # and set tokens in configs and to check if they are read and validated correctly.
     import openvino as ov
-    
+
     # copy openvino converted model and tokenizers
     for pattern in ['*.xml', '*.bin']:
         for src_file in path.glob(pattern):
@@ -162,7 +163,7 @@ def model_tokenizers_path_tmp_path(tmpdir_factory):
                     ov_model.set_rt_info("eos_token_id", "")
                     ov_model.set_rt_info("chat_template", "")
                     ov.save_model(ov_model, str(temp_path / src_file.name))
-                    
+
             if src_file in ['openvino_tokenizer.bin', 'openvino_detokenizer.bin']:
                 continue
             if src_file.is_file():