From 931aa6b0820da6499217678918400b3d6887cc68 Mon Sep 17 00:00:00 2001 From: Uranus <109661872+UranusSeven@users.noreply.github.com> Date: Wed, 6 Sep 2023 22:16:44 +0800 Subject: [PATCH] FEAT: support baichuan2 (#425) --- README.md | 2 + README_ja_JP.md | 2 + README_zh_CN.md | 2 + doc/source/models/builtin/baichuan-2-chat.rst | 51 ++++++++++ doc/source/models/builtin/baichuan-2.rst | 48 ++++++++++ doc/source/models/builtin/index.rst | 4 + xinference/model/llm/llm_family.json | 92 +++++++++++++++++++ xinference/model/llm/pytorch/baichuan.py | 2 +- xinference/model/llm/pytorch/core.py | 1 + 9 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 doc/source/models/builtin/baichuan-2-chat.rst create mode 100644 doc/source/models/builtin/baichuan-2.rst diff --git a/README.md b/README.md index fd5e072468..1354acae0a 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,9 @@ $ xinference registrations | Type | Name | Language | Ability | |------|---------------------|--------------|------------------------| | LLM | baichuan | ['en', 'zh'] | ['embed', 'generate'] | +| LLM | baichuan-2 | ['en', 'zh'] | ['embed', 'generate'] | | LLM | baichuan-chat | ['en', 'zh'] | ['embed', 'chat'] | +| LLM | baichuan-2-chat | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm2 | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm2-32k | ['en', 'zh'] | ['embed', 'chat'] | diff --git a/README_ja_JP.md b/README_ja_JP.md index bb530a0859..9838d8d23f 100644 --- a/README_ja_JP.md +++ b/README_ja_JP.md @@ -174,7 +174,9 @@ $ xinference registrations | Type | Name | Language | Ability | |------|---------------------|--------------|------------------------| | LLM | baichuan | ['en', 'zh'] | ['embed', 'generate'] | +| LLM | baichuan-2 | ['en', 'zh'] | ['embed', 'generate'] | | LLM | baichuan-chat | ['en', 'zh'] | ['embed', 'chat'] | +| LLM | baichuan-2-chat | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm2 | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm2-32k | ['en', 'zh'] | ['embed', 'chat'] | diff --git a/README_zh_CN.md b/README_zh_CN.md index 7960d061ad..71e020e3fd 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -177,7 +177,9 @@ $ xinference registrations | Type | Name | Language | Ability | |------|---------------------|--------------|------------------------| | LLM | baichuan | ['en', 'zh'] | ['embed', 'generate'] | +| LLM | baichuan-2 | ['en', 'zh'] | ['embed', 'generate'] | | LLM | baichuan-chat | ['en', 'zh'] | ['embed', 'chat'] | +| LLM | baichuan-2-chat | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm2 | ['en', 'zh'] | ['embed', 'chat'] | | LLM | chatglm2-32k | ['en', 'zh'] | ['embed', 'chat'] | diff --git a/doc/source/models/builtin/baichuan-2-chat.rst b/doc/source/models/builtin/baichuan-2-chat.rst new file mode 100644 index 0000000000..0bbeb8affc --- /dev/null +++ b/doc/source/models/builtin/baichuan-2-chat.rst @@ -0,0 +1,51 @@ +.. _models_builtin_baichuan_2_chat: + +=============== +Baichuan-2-Chat +=============== + +- **Context Length:** 4096 +- **Model Name:** baichuan-2-chat +- **Languages:** en, zh +- **Abilities:** embed, generate, chat +- **Description:** Baichuan2-chat is a fine-tuned version of the Baichuan LLM, specializing in chatting. + +Specifications +^^^^^^^^^^^^^^ + +Model Spec 1 (pytorch, 7 Billion) ++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 7 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** baichuan-inc/Baichuan2-7B-Chat +- **Model Revision:** 2ce891951e000c36c65442608a0b95fd09b405dc + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name baichuan-2-chat --size-in-billions 7 --model-format pytorch --quantization ${quantization} + +.. note:: + + Not supported on macOS. + + +Model Spec 2 (pytorch, 13 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 13 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** baichuan-inc/Baichuan2-13B-Chat +- **Model Revision:** a56c793eb7a721ab6c270f779024e0375e8afd4a + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name baichuan-2-chat --size-in-billions 13 --model-format pytorch --quantization ${quantization} + +.. note:: + + Not supported on macOS. diff --git a/doc/source/models/builtin/baichuan-2.rst b/doc/source/models/builtin/baichuan-2.rst new file mode 100644 index 0000000000..bfb08a6949 --- /dev/null +++ b/doc/source/models/builtin/baichuan-2.rst @@ -0,0 +1,48 @@ +.. _models_builtin_baichuan_2: + +========== +Baichuan-2 +========== + +- **Context Length:** 4096 +- **Model Name:** baichuan-2 +- **Languages:** en, zh +- **Abilities:** embed, generate +- **Description:** Baichuan2 is an open-source Transformer based LLM that is trained on both Chinese and English data. + +Specifications +^^^^^^^^^^^^^^ + +Model Spec 1 (pytorch, 7 Billion) ++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 7 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** baichuan-inc/Baichuan2-7B-Base + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name baichuan-2 --size-in-billions 7 --model-format pytorch --quantization ${quantization} + +.. note:: + + Not supported on macOS. + +Model Spec 2 (pytorch, 13 Billion) ++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 13 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** baichuan-inc/Baichuan2-13B-Base + +Execute the following command to launch the model, remember to replace `${quantization}` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name baichuan-2 --size-in-billions 13 --model-format pytorch --quantization ${quantization} + +.. note:: + + Not supported on macOS. diff --git a/doc/source/models/builtin/index.rst b/doc/source/models/builtin/index.rst index 4244b10f28..a002025037 100644 --- a/doc/source/models/builtin/index.rst +++ b/doc/source/models/builtin/index.rst @@ -11,6 +11,7 @@ Text Generation Models ++++++++++++++++++++++ - :ref:`Baichuan ` +- :ref:`Baichuan ` - :ref:`Falcon ` - :ref:`InternLM ` - :ref:`Llama-2 ` @@ -21,6 +22,7 @@ Chat & Instruction-following Models +++++++++++++++++++++++++++++++++++ - :ref:`Baichuan Chat ` +- :ref:`Baichuan-2 Chat ` - :ref:`ChatGLM ` - :ref:`ChatGLM2 ` - :ref:`ChatGLM2-32k ` @@ -57,7 +59,9 @@ Code Assistant Models :hidden: baichuan-chat + baichuan-2-chat baichuan + baichuan-2 chatglm chatglm2-32k chatglm2 diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index 61f90fdac1..2095bfb09f 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -1376,6 +1376,7 @@ "version": 1, "context_length": 100000, "model_name": "code-llama-instruct", + "model_description": "Code-Llama-Instruct is an instruct-tuned version of the Code-Llama LLM.", "model_lang": [ "en" ], @@ -1430,5 +1431,96 @@ 2 ] } + }, + { + "version": 1, + "context_length": 4096, + "model_name": "baichuan-2-chat", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "embed", + "generate", + "chat" + ], + "model_description": "Baichuan2-chat is a fine-tuned version of the Baichuan LLM, specializing in chatting.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "baichuan-inc/Baichuan2-7B-Chat", + "model_revision": "2ce891951e000c36c65442608a0b95fd09b405dc" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 13, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "baichuan-inc/Baichuan2-13B-Chat", + "model_revision": "a56c793eb7a721ab6c270f779024e0375e8afd4a" + } + ], + "prompt_style": { + "style_name": "NO_COLON_TWO", + "system_prompt": "", + "roles": [ + "", + "" + ], + "intra_message_sep": "", + "inter_message_sep": "", + "stop_token_ids": [ + 2, + 195 + ] + } + }, + { + "version": 1, + "context_length": 4096, + "model_name": "baichuan-2", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "embed", + "generate" + ], + "model_description": "Baichuan2 is an open-source Transformer based LLM that is trained on both Chinese and English data.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "baichuan-inc/Baichuan2-7B-Base", + "model_revision": "f2cc3a689c5eba7dc7fd3757d0175d312d167604" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 13, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "baichuan-inc/Baichuan2-13B-Base", + "model_revision": "fa88072fee36e36282287410e00897df2f59e09b" + } + ] } ] diff --git a/xinference/model/llm/pytorch/baichuan.py b/xinference/model/llm/pytorch/baichuan.py index 6484a1e19d..8d78f60b0c 100644 --- a/xinference/model/llm/pytorch/baichuan.py +++ b/xinference/model/llm/pytorch/baichuan.py @@ -69,7 +69,7 @@ def _load_model(self, kwargs: dict): def match(cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1") -> bool: if llm_spec.model_format != "pytorch": return False - if "baichuan-chat" not in llm_family.model_name: + if llm_family.model_name not in ["baichuan-chat", "baichuan-2-chat"]: return False if "chat" not in llm_family.model_ability: return False diff --git a/xinference/model/llm/pytorch/core.py b/xinference/model/llm/pytorch/core.py index 17be2d4530..668f8fa66d 100644 --- a/xinference/model/llm/pytorch/core.py +++ b/xinference/model/llm/pytorch/core.py @@ -438,6 +438,7 @@ def match(cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1") -> bool: return False if llm_family.model_name in [ "baichuan-chat", + "baichuan-2-chat", "vicuna-v1.3", "falcon", "falcon-instruct",