openvinotoolkit · mengbingrock · Mar 26, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 9, 2024
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -265,6 +265,48 @@ jobs:
           convert_tokenizer ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ "你好！" > ./pred_qwen15.txt
 
+  cpp-beam_search_causal_lm-Baichuan2-7B-Chat:
+    runs-on: ubuntu-20.04-16-cores
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python -m pip install -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id baichuan-inc/Baichuan2-7B-Chat --output_dir ./Baichuan2-7B-Chat/ --precision FP16 &
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
+          cmake --build ./build/ --config Release -j
+          wait
+      - name: Run and Compare
+        run: |
+          source ./ov/setupvars.sh
+          convert_tokenizer ./Baichuan2-7B-Chat/pytorch/dldt/FP16/ --output ./Baichuan2-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm ./Baichuan2-7B-Chat/pytorch/dldt/FP16/ "69" > ./pred_baichuan2.txt
+          python -c "
+          import transformers
+          with open('pred_baichuan2.txt', 'r') as file:
+            predictions = file.read()
+
+          tokenizer = transformers.AutoTokenizer.from_pretrained('baichuan-inc/Baichuan2-7B-Chat',trust_remote_code=True)
+          tokenized = tokenizer('69', return_tensors='pt')
+          for beam in transformers.AutoModelForCausalLM.from_pretrained('baichuan-inc/Baichuan2-7B-Chat',trust_remote_code=True).generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
+              ref = tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
+              idx = predictions.find(ref)
+              if -1 == idx:
+                  raise RuntimeError(f'Missing "{ref=}" from predictions')
+              predictions = predictions[:idx] + predictions[idx + len(ref):]
+          "
+          echo 69 passed
+
   cpp-beam_search_causal_lm-Phi-2:
     runs-on: ubuntu-20.04-16-cores
     steps:

diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
@@ -140,13 +140,15 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro
    4. https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4
    [Qwen-7B-Chat-Int4 - Torch not compiled with CUDA enabled](../../../llm_bench/python/doc/NOTES.md#qwen-7b-chat-int4---torch-not-compiled-with-cuda-enabled)
    in case of `AssertionError`
-7. Dolly
+7. Baichuan
+   1. https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat
+8. Dolly
    1. https://huggingface.co/databricks/dolly-v2-3b
-8. Phi
+9. Phi
    1. https://huggingface.co/microsoft/phi-2
    2. https://huggingface.co/microsoft/phi-1_5
-9. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1)
-10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
-11. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
+10. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1)
+11. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
+12. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
 
 This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.