Skip to content

Commit

Permalink
Please enter the commit message for your changes. Lines starting
Browse files Browse the repository at this point in the history
  • Loading branch information
siyagoel committed Dec 6, 2024
1 parent 33a65de commit 4388c8e
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 280 deletions.
93 changes: 0 additions & 93 deletions src/helm/benchmark/metrics/chain_of_thought_metric_correctness.py

This file was deleted.

1 change: 0 additions & 1 deletion src/helm/benchmark/run_specs/lite_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ def get_mmlu_pro_spec(subject: str, use_chain_of_thought: str = "False", use_few
input_prefix="What is the correct answer to this question: ",
input_suffix="\nChoices:\n",
output_prefix="",
reference_prefix="(A) ",
global_suffix=("Format your response as follows: " '"The correct answer is (insert answer here)".'),
)
return RunSpec(
Expand Down
93 changes: 0 additions & 93 deletions src/helm/benchmark/scenarios/mmlu_pro.py

This file was deleted.

93 changes: 0 additions & 93 deletions src/helm/benchmark/scenarios/mmlu_scenario_pro.py

This file was deleted.

14 changes: 14 additions & 0 deletions src/helm/benchmark/static/schema_lite_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,20 @@ run_groups:
when: "?"
language: English

- name: ifeval
display_name: IFEval
description: IFEval
metric_groups:
- accuracy
- efficiency
- general_information
environment:
main_name: ifeval_strict_accuracy
main_name: chain_of_thought_correct # non-CoT
main_split: test
taxonomy:
task: "?"

- name: gpqa
display_name: GPQA
description: GPQA
Expand Down

0 comments on commit 4388c8e

Please sign in to comment.