feat(model): Support AquilaChat2-34B (#747)

Close #704
eosphoros-ai · Oct 30, 2023 · 70464c7 · 70464c7
2 parents 52d6095 + 95d3f52
commit 70464c7
Show file tree

Hide file tree

Showing 14 changed files with 234 additions and 85 deletions.
diff --git a/.env.template b/.env.template
@@ -23,6 +23,15 @@ WEB_SERVER_PORT=7860
 #*******************************************************************#
 # LLM_MODEL, see /pilot/configs/model_config.LLM_MODEL_CONFIG
 LLM_MODEL=vicuna-13b-v1.5
+## LLM model path, by default, DB-GPT will read the model path from LLM_MODEL_CONFIG based on the LLM_MODEL.
+## Of course you can specify your model path according to LLM_MODEL_PATH
+## In DB-GPT, the priority from high to low to read model path:
+##    1. environment variable with key: {LLM_MODEL}_MODEL_PATH (Avoid multi-model conflicts)
+##    2. environment variable with key: MODEL_PATH
+##    3. environment variable with key: LLM_MODEL_PATH
+##    4. the config in /pilot/configs/model_config.LLM_MODEL_CONFIG
+# LLM_MODEL_PATH=/app/models/vicuna-13b-v1.5
+# LLM_PROMPT_TEMPLATE=vicuna_v1.1
 MODEL_SERVER=http://127.0.0.1:8000
 LIMIT_MODEL_CONCURRENCY=5
 MAX_POSITION_EMBEDDINGS=4096

diff --git a/docs/getting_started/faq/llm/llm_faq.md b/docs/getting_started/faq/llm/llm_faq.md
@@ -1,6 +1,6 @@
 LLM USE FAQ
 ==================================
-##### Q1:how to use openai chatgpt service
+##### Q1: how to use openai chatgpt service
 change your LLM_MODEL in `.env`
 ````shell
 LLM_MODEL=proxyllm
@@ -15,7 +15,7 @@ PROXY_SERVER_URL=https://api.openai.com/v1/chat/completions
 
 make sure your openapi API_KEY is available
 
-##### Q2 What difference between `python dbgpt_server --light` and `python dbgpt_server`
+##### Q2: What difference between `python dbgpt_server --light` and `python dbgpt_server`
 ```{note}
 * `python dbgpt_server --light` dbgpt_server does not start the llm service. Users can deploy the llm service separately by using `python llmserver`, and dbgpt_server accesses the llm service through set the LLM_SERVER environment variable in .env. The purpose is to allow for the separate deployment of dbgpt's backend service and llm service.
 
@@ -35,7 +35,7 @@ python pilot/server/dbgpt_server.py --light
 ```
 
 
-##### Q3 How to use MultiGPUs
+##### Q3: How to use MultiGPUs
 
 DB-GPT will use all available gpu by default. And you can modify the setting `CUDA_VISIBLE_DEVICES=0,1` in `.env` file
 to use the specific gpu IDs.
@@ -52,17 +52,17 @@ CUDA_VISIBLE_DEVICES=3,4,5,6 python3 pilot/server/dbgpt_server.py
 
 You can modify the setting `MAX_GPU_MEMORY=xxGib` in `.env` file to configure the maximum memory used by each GPU.
 
-##### Q4 Not Enough Memory
+##### Q4: Not Enough Memory
 
 DB-GPT supported 8-bit quantization and 4-bit quantization.
 
 You can modify the setting `QUANTIZE_8bit=True` or `QUANTIZE_4bit=True` in `.env` file to use quantization(8-bit quantization is enabled by default).
 
 Llama-2-70b with 8-bit quantization can run with 80 GB of VRAM, and 4-bit quantization can run with 48 GB of VRAM.
 
-Note: you need to install the latest dependencies according to [requirements.txt](https://github.com/eosphoros-ai/DB-GPT/blob/main/requirements.txt).
+Note: you need to install the quantization dependencies with `pip install -e ".[quantization]"`
 
-##### Q5 How to Add LLM Service dynamic local mode
+##### Q5: How to Add LLM Service dynamic local mode
 
 Now DB-GPT through multi-llm service switch, so how to add llm service dynamic,
 
@@ -75,7 +75,7 @@ eg: dbgpt model start --model_name chatglm2-6b --model_path /root/DB-GPT/models/
 chatgpt
 eg: dbgpt model start --model_name chatgpt_proxyllm --model_path chatgpt_proxyllm --proxy_api_key ${OPENAI_KEY} --proxy_server_url {OPENAI_URL}
 ```
-##### Q6 How to Add LLM Service dynamic in remote mode
+##### Q6: How to Add LLM Service dynamic in remote mode
 If you  deploy llm service in remote machine instance, and you want to add model service to dbgpt server to manage
 
 use dbgpt start worker and set --controller_addr.
@@ -88,13 +88,13 @@ eg: dbgpt start worker --model_name vicuna-13b-v1.5 \
 
 ```
 
-##### Q7 dbgpt command not found
+##### Q7: dbgpt command not found
 
 ```commandline
 pip install -e "pip install -e ".[default]"
 ```
 
-##### Q8 When starting the worker_manager on a cloud server and registering it with the controller, it is noticed that the worker's exposed IP is a private IP instead of a public IP, which leads to the inability to access the service.
+##### Q8: When starting the worker_manager on a cloud server and registering it with the controller, it is noticed that the worker's exposed IP is a private IP instead of a public IP, which leads to the inability to access the service.
 
 ```commandline
 
@@ -103,4 +103,14 @@ pip install -e "pip install -e ".[default]"
                                   automatically determined
 ```
 
+##### Q9: How to customize model path and prompt template
+
+DB-GPT will read the model path from `pilot.configs.model_config.LLM_MODEL_CONFIG` based on the `LLM_MODEL`.
+Of course, you can use the environment variable `LLM_MODEL_PATH` to specify the model path and `LLM_PROMPT_TEMPLATE` to specify your model prompt template.
+
+```
+LLM_MODEL=vicuna-13b-v1.5
+LLM_MODEL_PATH=/app/models/vicuna-13b-v1.5
+# LLM_PROMPT_TEMPLATE=vicuna_v1.1
+```
 
diff --git a/docs/locales/zh_CN/LC_MESSAGES/getting_started/faq/llm/llm_faq.po b/docs/locales/zh_CN/LC_MESSAGES/getting_started/faq/llm/llm_faq.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-10-20 22:29+0800\n"
+"POT-Creation-Date: 2023-10-30 11:37+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -19,34 +19,36 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"
 
-#: ../../getting_started/faq/llm/llm_faq.md:1 54763acec7da4deb90669195c54ec3a1
+#: ../../getting_started/faq/llm/llm_faq.md:1 98e23f85313c45169ff2ba7f80193356
 msgid "LLM USE FAQ"
 msgstr "LLM模型使用FAQ"
 
-#: ../../getting_started/faq/llm/llm_faq.md:3 66f73fd2ee7b462e92d3f263792a5e33
-msgid "Q1:how to use openai chatgpt service"
+#: ../../getting_started/faq/llm/llm_faq.md:3 0d49acfb4af947cb969b249346b00d33
+#, fuzzy
+msgid "Q1: how to use openai chatgpt service"
 msgstr "我怎么使用OPENAI服务"
 
-#: ../../getting_started/faq/llm/llm_faq.md:4 9d178d8462b74cb188bbacf2ac2ac12b
+#: ../../getting_started/faq/llm/llm_faq.md:4 7010fec33e264987a29de86c54da93e8
 #, fuzzy
 msgid "change your LLM_MODEL in `.env`"
 msgstr "通过在.env文件设置LLM_MODEL"
 
-#: ../../getting_started/faq/llm/llm_faq.md:9 f7ca82f257be4ac09639a7f8af5e83eb
+#: ../../getting_started/faq/llm/llm_faq.md:9 0982d6d5d0b3434fb00698aaf675f3f3
 msgid "set your OPENAPI KEY"
 msgstr "set your OPENAPI KEY"
 
-#: ../../getting_started/faq/llm/llm_faq.md:16 d6255b20dce34a2690df7e2af3505d97
+#: ../../getting_started/faq/llm/llm_faq.md:16 63650494c1574de09c007e1d470dd53d
 msgid "make sure your openapi API_KEY is available"
 msgstr "确认openapi API_KEY是否可用"
 
-#: ../../getting_started/faq/llm/llm_faq.md:18 6f1c6dbdb31f4210a6d21f0f3a6ae589
+#: ../../getting_started/faq/llm/llm_faq.md:18 5721ec71e344499d96c55b7e531d7c08
+#, fuzzy
 msgid ""
-"Q2 What difference between `python dbgpt_server --light` and `python "
+"Q2: What difference between `python dbgpt_server --light` and `python "
 "dbgpt_server`"
-msgstr "Q2 `python dbgpt_server --light` 和 `python dbgpt_server`的区别是什么?"
+msgstr "Q2: `python dbgpt_server --light` 和 `python dbgpt_server`的区别是什么?"
 
-#: ../../getting_started/faq/llm/llm_faq.md:20 b839771ae9e34e998b0edf8d69deabdd
+#: ../../getting_started/faq/llm/llm_faq.md:20 76a650f195dd40b6a3a3564030cdc040
 msgid ""
 "`python dbgpt_server --light` dbgpt_server does not start the llm "
 "service. Users can deploy the llm service separately by using `python "
@@ -58,125 +60,153 @@ msgstr ""
 "用户可以通过`python "
 "llmserver`单独部署模型服务，dbgpt_server通过LLM_SERVER环境变量来访问模型服务。目的是为了可以将dbgpt后台服务和大模型服务分离部署。"
 
-#: ../../getting_started/faq/llm/llm_faq.md:22 aba39cef6fe84799bcd03e8f36c41296
+#: ../../getting_started/faq/llm/llm_faq.md:22 8cd87e3504784d9e891e1fb96c79e143
 msgid ""
 "`python dbgpt_server` dbgpt_server service and the llm service are "
 "deployed on the same instance. when dbgpt_server starts the service, it "
 "also starts the llm service at the same time."
 msgstr "`python dbgpt_server` 是将后台服务和模型服务部署在同一台实例上.dbgpt_server在启动服务的时候同时开启模型服务."
 
-#: ../../getting_started/faq/llm/llm_faq.md:27 c65270d479af49e28e99b35a7932adbd
+#: ../../getting_started/faq/llm/llm_faq.md:27 58a6eaf57e6d425685f67058b1a642d4
 msgid ""
 "If you want to access an external LLM service(deployed by DB-GPT), you "
 "need to"
 msgstr "如果模型服务部署(通过DB-GPT部署)在别的机器，想通过dbgpt服务访问模型服务"
 
-#: ../../getting_started/faq/llm/llm_faq.md:29 da153e6d18c543f28e0c4e85618e3d3d
+#: ../../getting_started/faq/llm/llm_faq.md:29 67ac8823ca2e49ba9c833368e2cfb53c
 msgid ""
 "1.set the variables LLM_MODEL=YOUR_MODEL_NAME, "
 "MODEL_SERVER=YOUR_MODEL_SERVER（eg:http://localhost:5000） in the .env "
 "file."
 msgstr ""
 
-#: ../../getting_started/faq/llm/llm_faq.md:31 cd89b8a2075f4407b8036a74151a6377
+#: ../../getting_started/faq/llm/llm_faq.md:31 e5c066bcdf0649a1b33bbfc7fd3b1a66
 msgid "2.execute dbgpt_server.py in light mode"
 msgstr "2.execute dbgpt_server.py light 模式"
 
-#: ../../getting_started/faq/llm/llm_faq.md:33 8f4b9401ac4f4a25a7479bee9ef5e8c1
+#: ../../getting_started/faq/llm/llm_faq.md:33 402ff01d7ee94d97be4a0eb964e39b97
 msgid "python pilot/server/dbgpt_server.py --light"
 msgstr ""
 
-#: ../../getting_started/faq/llm/llm_faq.md:38 69e1064cd7554ce6b49da732f800eacc
+#: ../../getting_started/faq/llm/llm_faq.md:38 86190c689d8f4d9a9b58d904e0b5867b
 #, fuzzy
-msgid "Q3 How to use MultiGPUs"
-msgstr "Q2 怎么使用 MultiGPUs"
+msgid "Q3: How to use MultiGPUs"
+msgstr "Q3: 怎么使用 MultiGPUs"
 
-#: ../../getting_started/faq/llm/llm_faq.md:40 6de3f105ce96430db5756f38bbd9ca12
+#: ../../getting_started/faq/llm/llm_faq.md:40 6b08cff88750440b98956203d8b8a084
 msgid ""
 "DB-GPT will use all available gpu by default. And you can modify the "
 "setting `CUDA_VISIBLE_DEVICES=0,1` in `.env` file to use the specific gpu"
 " IDs."
 msgstr "DB-GPT默认加载可利用的gpu，你也可以通过修改 在`.env`文件 `CUDA_VISIBLE_DEVICES=0,1`来指定gpu IDs"
 
-#: ../../getting_started/faq/llm/llm_faq.md:43 87cb9bfb20af4b259d719df797c42a7d
+#: ../../getting_started/faq/llm/llm_faq.md:43 93b39089e5be4475b9e90e7813f5a7d9
 msgid ""
 "Optionally, you can also specify the gpu ID to use before the starting "
 "command, as shown below:"
 msgstr "你也可以指定gpu ID启动"
 
-#: ../../getting_started/faq/llm/llm_faq.md:53 bcfa35cda6304ee5ab9a775a2d4eda63
+#: ../../getting_started/faq/llm/llm_faq.md:53 62e3074c109d401fa4bf1ddbdc6c7be1
 msgid ""
 "You can modify the setting `MAX_GPU_MEMORY=xxGib` in `.env` file to "
 "configure the maximum memory used by each GPU."
 msgstr "同时你可以通过在.env文件设置`MAX_GPU_MEMORY=xxGib`修改每个GPU的最大使用内存"
 
-#: ../../getting_started/faq/llm/llm_faq.md:55 a05c5484927844c8bb4791f0a9ccc82e
+#: ../../getting_started/faq/llm/llm_faq.md:55 d235bd83545c476f8e12572658d1c723
 #, fuzzy
-msgid "Q4 Not Enough Memory"
-msgstr "Q3 机器显存不够 "
+msgid "Q4: Not Enough Memory"
+msgstr "Q4: 机器显存不够 "
 
-#: ../../getting_started/faq/llm/llm_faq.md:57 fe17a023b6eb4a92b1b927e1b94e3784
+#: ../../getting_started/faq/llm/llm_faq.md:57 b3243ed9147f42bba987d7f9b778e66f
 msgid "DB-GPT supported 8-bit quantization and 4-bit quantization."
 msgstr "DB-GPT 支持 8-bit quantization 和 4-bit quantization."
 
-#: ../../getting_started/faq/llm/llm_faq.md:59 76c3684c10864b8e87e5c2255b6c0b7f
+#: ../../getting_started/faq/llm/llm_faq.md:59 1ddb9f94ab994bfebfee46d1c19888d4
 msgid ""
 "You can modify the setting `QUANTIZE_8bit=True` or `QUANTIZE_4bit=True` "
 "in `.env` file to use quantization(8-bit quantization is enabled by "
 "default)."
 msgstr "你可以通过在.env文件设置`QUANTIZE_8bit=True` or `QUANTIZE_4bit=True`"
 
-#: ../../getting_started/faq/llm/llm_faq.md:61 c5d849a38f1a4f0687bbcffb6699dc39
+#: ../../getting_started/faq/llm/llm_faq.md:61 54b85daa3fb24b17b67a6da31d2be8b0
 msgid ""
 "Llama-2-70b with 8-bit quantization can run with 80 GB of VRAM, and 4-bit"
 " quantization can run with 48 GB of VRAM."
 msgstr ""
 "Llama-2-70b with 8-bit quantization 可以运行在 80 GB VRAM机器， 4-bit "
 "quantization可以运行在 48 GB VRAM"
 
-#: ../../getting_started/faq/llm/llm_faq.md:63 867329a5e3b0403083e96f72b8747fb2
+#: ../../getting_started/faq/llm/llm_faq.md:63 097d680aed184fee9eceebee55a47ac1
 msgid ""
-"Note: you need to install the latest dependencies according to "
-"[requirements.txt](https://github.com/eosphoros-ai/DB-"
-"GPT/blob/main/requirements.txt)."
+"Note: you need to install the quantization dependencies with `pip install"
+" -e \".[quantization]\"`"
 msgstr ""
 
-#: ../../getting_started/faq/llm/llm_faq.md:65 60ceee25e9fb4ddba40c5306bfb0a82f
+#: ../../getting_started/faq/llm/llm_faq.md:65 f3a51056043c49eb84471040f2b364aa
 #, fuzzy
-msgid "Q5 How to Add LLM Service dynamic local mode"
-msgstr "Q5 怎样动态新增模型服务"
+msgid "Q5: How to Add LLM Service dynamic local mode"
+msgstr "Q5: 怎样动态新增模型服务"
 
-#: ../../getting_started/faq/llm/llm_faq.md:67 c99eb7f7ae844884a8f0da94238ea7e0
+#: ../../getting_started/faq/llm/llm_faq.md:67 43ee6b0f23814c94a4ddb2429801a5e1
 msgid ""
 "Now DB-GPT through multi-llm service switch, so how to add llm service "
 "dynamic,"
 msgstr "DB-GPT支持多个模型服务切换, 怎样添加一个模型服务呢"
 
-#: ../../getting_started/faq/llm/llm_faq.md:78 cd89b8a2075f4407b8036a74151a6377
+#: ../../getting_started/faq/llm/llm_faq.md:78 c217bbf0d2b6425fa7a1c691b7704a8d
 #, fuzzy
-msgid "Q6 How to Add LLM Service dynamic in remote mode"
-msgstr "Q5 怎样动态新增模型服务"
+msgid "Q6: How to Add LLM Service dynamic in remote mode"
+msgstr "Q6: 怎样动态新增模型服务"
 
-#: ../../getting_started/faq/llm/llm_faq.md:79 8833ce89465848259b08ef0a4fa68d96
+#: ../../getting_started/faq/llm/llm_faq.md:79 195bdaa937a94c7aa0d8c6e1a5430d6e
 msgid ""
 "If you  deploy llm service in remote machine instance, and you want to "
 "add model service to dbgpt server to manage"
 msgstr "如果你想在远程机器实例部署大模型服务并添加到本地dbgpt_server进行管理"
 
-#: ../../getting_started/faq/llm/llm_faq.md:81 992eb37e3cca48829636c15ba3ec2ee8
+#: ../../getting_started/faq/llm/llm_faq.md:81 c64098b838a94821963a1d16e56497ff
 msgid "use dbgpt start worker and set --controller_addr."
 msgstr "使用1`dbgpt start worker`命令并设置注册地址--controller_addr"
 
-#: ../../getting_started/faq/llm/llm_faq.md:91 0d06d7d6dd3d4780894ecd914c89b5a2
+#: ../../getting_started/faq/llm/llm_faq.md:91 cb12d5e9d9d24f14abc3ebea877a4b24
 #, fuzzy
-msgid "Q7 dbgpt command not found"
-msgstr "Q6 dbgpt command not found"
+msgid "Q7: dbgpt command not found"
+msgstr "Q7: dbgpt command not found"
 
-#: ../../getting_started/faq/llm/llm_faq.md:97 5d9beed0d95a4503a43d0e025664273b
+#: ../../getting_started/faq/llm/llm_faq.md:97 f95cdccfa82d4b3eb2a23dd297131faa
+#, fuzzy
 msgid ""
-"Q8 When starting the worker_manager on a cloud server and registering it "
-"with the controller, it is noticed that the worker's exposed IP is a "
+"Q8: When starting the worker_manager on a cloud server and registering it"
+" with the controller, it is noticed that the worker's exposed IP is a "
 "private IP instead of a public IP, which leads to the inability to access"
 " the service."
-msgstr "云服务器启动worker_manager注册到controller时，发现worker暴露的ip是私网ip, 没有以公网ip暴露，导致服务访问不到"
+msgstr ""
+"Q8: 云服务器启动worker_manager注册到controller时，发现worker暴露的ip是私网ip, "
+"没有以公网ip暴露，导致服务访问不到"
+
+#: ../../getting_started/faq/llm/llm_faq.md:106
+#: 739a2983f3484acf98e877dc12f4ccda
+msgid "Q9: How to customize model path and prompt template"
+msgstr "Q9: 如何自定义模型路径和 prompt 模板"
+
+#: ../../getting_started/faq/llm/llm_faq.md:108
+#: 8b82a33a311649c7850c30c00c987c72
+#, fuzzy
+msgid ""
+"DB-GPT will read the model path from "
+"`pilot.configs.model_config.LLM_MODEL_CONFIG` based on the `LLM_MODEL`. "
+"Of course, you can use the environment variable `LLM_MODEL_PATH` to "
+"specify the model path and `LLM_PROMPT_TEMPLATE` to specify your model "
+"prompt template."
+msgstr ""
+"DB-GPT 会根据 `LLM_MODEL` 从 `pilot.configs.model_config.LLM_MODEL_CONFIG` "
+"中读取模型路径。当然，你可以使用环境 `LLM_MODEL_PATH` 来指定模型路径，以及使用 `LLM_PROMPT_TEMPLATE` "
+"来指定模型的 prompt 模板。"
+
+#~ msgid ""
+#~ "Note: you need to install the "
+#~ "latest dependencies according to "
+#~ "[requirements.txt](https://github.com/eosphoros-ai/DB-"
+#~ "GPT/blob/main/requirements.txt)."
+#~ msgstr ""
 
diff --git a/pilot/configs/config.py b/pilot/configs/config.py
@@ -194,6 +194,8 @@ def __init__(self) -> None:
 
         ### LLM Model Service Configuration
         self.LLM_MODEL = os.getenv("LLM_MODEL", "vicuna-13b-v1.5")
+        self.LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH")
+
         ### Proxy llm backend, this configuration is only valid when "LLM_MODEL=proxyllm"
         ### When we use the rest API provided by deployment frameworks like fastchat as a proxyllm, "PROXYLLM_BACKEND" is the model they actually deploy.
         ### We need to use "PROXYLLM_BACKEND" to load the prompt of the corresponding scene.

diff --git a/pilot/model/cluster/controller/controller.py b/pilot/model/cluster/controller/controller.py
@@ -13,7 +13,7 @@
     _api_remote as api_remote,
     _sync_api_remote as sync_api_remote,
 )
-from pilot.utils.utils import setup_logging
+from pilot.utils.utils import setup_logging, setup_http_service_logging
 
 logger = logging.getLogger(__name__)
 
@@ -149,6 +149,7 @@ def initialize_controller(
     else:
         import uvicorn
 
+        setup_http_service_logging()
         app = FastAPI()
         app.include_router(router, prefix="/api", tags=["Model"])
         uvicorn.run(app, host=host, port=port, log_level="info")
@@ -179,7 +180,8 @@ def run_model_controller():
     parser = EnvArgumentParser()
     env_prefix = "controller_"
     controller_params: ModelControllerParameters = parser.parse_args_into_dataclass(
-        ModelControllerParameters, env_prefix=env_prefix
+        ModelControllerParameters,
+        env_prefixes=[env_prefix],
     )
 
     setup_logging(

diff --git a/pilot/model/cluster/worker/default_worker.py b/pilot/model/cluster/worker/default_worker.py
@@ -76,7 +76,7 @@ def parse_parameters(self, command_args: List[str] = None) -> ModelParameters:
         model_type = self.llm_adapter.model_type()
         model_params: ModelParameters = model_args.parse_args_into_dataclass(
             param_cls,
-            env_prefix=env_prefix,
+            env_prefixes=[env_prefix, "LLM_"],
             command_args=command_args,
             model_name=self.model_name,
             model_path=self.model_path,