Add Llama 3.1 Nemotron Instruct (70B) model on Together AI (#3172)

stanford-crfm · Nov 23, 2024 · ee10b8f · ee10b8f
1 parent 141588e
commit ee10b8f
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 0 deletions.
diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
@@ -2164,6 +2164,16 @@ model_deployments:
       args:
         together_model: meta-llama/Meta-Llama-Guard-3-8B
 
+  # NVIDIA  
+  - name: together/llama-3.1-nemotron-70b-instruct
+    model_name: nvidia/llama-3.1-nemotron-70b-instruct
+    tokenizer_name: nvidia/llama-3.1-nemotron-70b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
+
   # 01.AI
   - name: together/yi-6b
     model_name: 01-ai/yi-6b

diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
@@ -2173,6 +2173,16 @@ models:
     release_date: 2024-06-17
     tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 
+  - name: nvidia/llama-3.1-nemotron-70b-instruct
+    display_name: Llama 3.1 Nemotron Instruct (70B)
+    description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. It was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model. ([paper](https://arxiv.org/abs/2410.01257))
+    creator_organization_name: NVIDIA
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-10-02
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+
   # OpenAI
 
   ## GPT 2 Models

diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml
@@ -473,6 +473,14 @@ tokenizer_configs:
     end_of_text_token: "<|endoftext|>"
     prefix_token: "<|endoftext|>"
 
+  - name: nvidia/llama-3.1-nemotron-70b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
+    end_of_text_token: "<|eot_id|>"
+    prefix_token: "<|begin_of_text|>"
+
   # OpenAI
   - name: openai/cl100k_base
     tokenizer_spec: