-
Notifications
You must be signed in to change notification settings - Fork 569
/
docker-compose.yaml
96 lines (91 loc) · 3.13 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
include:
- path:
- docker-compose-vectordb.yaml
- docker-compose-nim-ms.yaml
services:
chain-server:
container_name: chain-server
image: chain-server:${TAG:-latest}
build:
context: ./
dockerfile: src/chain_server/Dockerfile
args:
APP_PATH: 'app_chain'
volumes:
- ./prompt.yaml:/prompt.yaml
command: --port 8081 --host 0.0.0.0
environment:
APP_PATH: 'app_chain'
APP_VECTORSTORE_URL: "http://milvus:19530"
APP_VECTORSTORE_NAME: "milvus"
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-70b-instruct"}
APP_LLM_MODELENGINE: nvidia-ai-endpoints
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5}
APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints}
APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""}
APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l
APP_TEXTSPLITTER_CHUNKSIZE: 506
APP_TEXTSPLITTER_CHUNKOVERLAP: 200
APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking
APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints}
APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""}
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
APP_RETRIEVER_TOPK: 4
APP_RETRIEVER_SCORETHRESHOLD: 0.25
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password}
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_DB: ${POSTGRES_DB:-api}
COLLECTION_NAME: ${COLLECTION_NAME:-ifu_rag}
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
LOGLEVEL: ${LOGLEVEL:-INFO}
ports:
- "8081:8081"
expose:
- "8081"
shm_size: 5gb
depends_on:
nemollm-embedding:
condition: service_healthy
required: false
nemollm-inference:
condition: service_healthy
required: false
ranking-ms:
condition: service_healthy
required: false
rag-playground:
container_name: rag-playground
image: rag-playground:${TAG:-latest}
build:
context: src/rag_playground/
dockerfile: Dockerfile
args:
PLAYGROUND_MODE: ${PLAYGROUND_MODE:-speech}
command: --port 8090
environment:
APP_SERVERURL: http://chain-server
APP_SERVERPORT: 8081
APP_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-70b-instruct"}
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
ENABLE_TRACING: false
# if locally hosting Riva:
#RIVA_API_URI: <riva-ip-address>:50051
#TTS_SAMPLE_RATE: 48000
# if using Riva API Endpoint on NVIDIA API Catalog
RIVA_API_URI: grpc.nvcf.nvidia.com:443
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
RIVA_ASR_FUNCTION_ID: 1598d209-5e27-4d3c-8079-4751568b1081
RIVA_TTS_FUNCTION_ID: 0149dedb-2be8-4195-b9a0-e57e0e14f972
TTS_SAMPLE_RATE: 48000
ports:
- "8090:8090"
expose:
- "8090"
depends_on:
- chain-server
networks:
default:
name: nvidia-rag