From 0d8eab5eede5035ba12f97641df4ef261300d065 Mon Sep 17 00:00:00 2001 From: Yifan Mai Date: Thu, 19 Dec 2024 22:05:50 -0800 Subject: [PATCH] Update run entries config for Nova --- .../run_entries_lite_20240424_nova.conf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/helm/benchmark/presentation/run_entries_lite_20240424_nova.conf b/src/helm/benchmark/presentation/run_entries_lite_20240424_nova.conf index 812c23d454..31aebf7554 100644 --- a/src/helm/benchmark/presentation/run_entries_lite_20240424_nova.conf +++ b/src/helm/benchmark/presentation/run_entries_lite_20240424_nova.conf @@ -9,8 +9,8 @@ entries: [ {description: "narrative_qa:model=text,output_format_instructions=narrative_qa", priority: 1} # NaturalQuestions - {description: "natural_qa:model=text,mode=openbook_longans,output_format_instructions=natural_qa_short_answer", priority: 1} - {description: "natural_qa:model=text,mode=closedbook,output_format_instructions=natural_qa_short_answer", priority: 1} + {description: "natural_qa:model=text,mode=openbook_longans,output_format_instructions=natural_qa_short_answer_no_prefix", priority: 1} + {description: "natural_qa:model=text,mode=closedbook,output_format_instructions=natural_qa_short_answer_no_prefix", priority: 1} # OpenbookQA {description: "commonsense:model=text_code,dataset=openbookqa,method=multiple_choice_joint,output_format_instructions=openbookqa", priority: 1} @@ -35,11 +35,11 @@ entries: [ {description: "gsm:model=text_code,stop=none", priority: 2} # LegalBench - {description: "legalbench:model=text_code,subset=abercrombie,output_format_instructions=legalbench_abercrombie_no_prefix,stop=none", priority: 2} - {description: "legalbench:model=text_code,subset=corporate_lobbying,output_format_instructions=legalbench_yes_or_no_no_prefix,stop=none", priority: 2} - {description: "legalbench:model=text_code,subset=international_citizenship_questions,output_format_instructions=legalbench_yes_or_no_no_prefix,stop=none", priority: 2} - {description: "legalbench:model=text_code,subset=function_of_decision_section,output_format_instructions=legalbench_function_of_decision_section_no_prefix,stop=none", priority: 2} - {description: "legalbench:model=text_code,subset=proa,output_format_instructions=legalbench_yes_or_no_no_prefix,stop=none", priority: 2} + {description: "legalbench:model=text_code,subset=abercrombie,output_format_instructions=legalbench_no_prefix,stop=none", priority: 2} + {description: "legalbench:model=text_code,subset=corporate_lobbying,output_format_instructions=legalbench_no_prefix,stop=none", priority: 2} + {description: "legalbench:model=text_code,subset=international_citizenship_questions,output_format_instructions=legalbench_no_prefix,stop=none", priority: 2} + {description: "legalbench:model=text_code,subset=function_of_decision_section,output_format_instructions=legalbench_no_prefix,stop=none", priority: 2} + {description: "legalbench:model=text_code,subset=proa,output_format_instructions=legalbench_no_prefix,stop=none", priority: 2} # MedQA {description: "med_qa:model=text_code,output_format_instructions=med_qa", priority: 2}