diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml index 95214d068b..10f688461a 100644 --- a/src/helm/config/model_deployments.yaml +++ b/src/helm/config/model_deployments.yaml @@ -2152,6 +2152,15 @@ model_deployments: args: together_model: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + - name: together/llama-3.3-70b-instruct-turbo + model_name: meta/llama-3.3-70b-instruct-turbo + tokenizer_name: meta/llama-3.3-70b-instruct + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.together_client.TogetherChatClient" + args: + together_model: meta-llama/Llama-3.3-70B-Instruct-Turbo + - name: together/llama-guard-7b model_name: meta/llama-guard-7b tokenizer_name: meta-llama/Llama-2-7b-hf diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml index c3bb0f54b8..7fc8457beb 100644 --- a/src/helm/config/model_metadata.yaml +++ b/src/helm/config/model_metadata.yaml @@ -1656,6 +1656,15 @@ models: release_date: 2024-09-25 tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + - name: meta/llama-3.3-70b-instruct-turbo + display_name: Llama 3.3 Instruct Turbo (70B) + description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality)) + creator_organization_name: Meta + access: open + num_parameters: 70000000000 + release_date: 2024-12-06 + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + - name: meta/llama-3-8b-chat display_name: Llama 3 Instruct (8B) description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml index ef7dda6d76..cbf96457f1 100644 --- a/src/helm/config/tokenizer_configs.yaml +++ b/src/helm/config/tokenizer_configs.yaml @@ -349,6 +349,14 @@ tokenizer_configs: prefix_token: "<|begin_of_text|>" end_of_text_token: "<|eot_id|>" + - name: meta/llama-3.3-70b-instruct + tokenizer_spec: + class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer" + args: + pretrained_model_name_or_path: meta-llama/Llama-3.3-70B-Instruct + prefix_token: "<|begin_of_text|>" + end_of_text_token: "<|eot_id|>" + # 01-ai - name: 01-ai/Yi-6B tokenizer_spec: