Skip to content

Commit

Permalink
Add Legal Contract Summarization scenario (#3131)
Browse files Browse the repository at this point in the history
Co-authored-by: Mikio Takeuchi <[email protected]>
Co-authored-by: Ryo Kawahara <[email protected]>
  • Loading branch information
3 people authored Nov 12, 2024
1 parent 070d36a commit 6bb3662
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 0 deletions.
25 changes: 25 additions & 0 deletions src/helm/benchmark/run_specs/enterprise_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
get_generation_adapter_spec,
)
from helm.benchmark.metrics.common_metric_specs import (
get_basic_metric_specs,
get_classification_metric_specs,
get_exact_match_metric_specs,
)
Expand Down Expand Up @@ -34,3 +35,27 @@ def get_news_headline_spec(category: str) -> RunSpec:
metric_specs=get_exact_match_metric_specs() + get_classification_metric_specs(),
groups=["gold_commodity_news"],
)


@run_spec_function("legal_contract_summarization")
def get_legal_contract_spec() -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.legal_contract_summarization_scenario.LegalContractSummarizationScenario",
args={},
)

adapter_spec = get_generation_adapter_spec(
instructions="Summarize the legal document in plain English.",
input_noun="Document",
output_noun="Summary",
max_tokens=100,
stop_sequences=["\n\n"],
)

return RunSpec(
name="legal_contract_summarization",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_basic_metric_specs(["rouge_1", "rouge_2"]),
groups=["legal_contract_summarization"],
)
129 changes: 129 additions & 0 deletions src/helm/benchmark/scenarios/legal_contract_summarization_scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import os
import pandas as pd
import json
import re

from typing import List
from helm.common.general import ensure_file_downloaded, ensure_directory_exists
from helm.benchmark.scenarios.scenario import (
Input,
Scenario,
Instance,
Reference,
TRAIN_SPLIT,
TEST_SPLIT,
CORRECT_TAG,
Output,
)


class LegalContractSummarizationScenario(Scenario):
"""Legal Contract Summarization
A legal contract summarization benchmark based on the paper
Plain English Summarization of Contracts (Manor & Li, NAACL 2019),
which presented a dataset of legal text snippets paired with summaries
written in plain English.
@inproceedings{manor-li-2019-plain,
title = "Plain {E}nglish Summarization of Contracts",
author = "Manor, Laura and
Li, Junyi Jessy",
editor = "Aletras, Nikolaos and
Ash, Elliott and
Barrett, Leslie and
Chen, Daniel and
Meyers, Adam and
Preotiuc-Pietro, Daniel and
Rosenberg, David and
Stent, Amanda",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2019",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-2201",
doi = "10.18653/v1/W19-2201",
pages = "1--11",
abstract = "Unilateral legal contracts, such as terms of service, play a substantial role in modern digital life. However, few read these documents before accepting the terms within, as they are too long and the language too complicated. We propose the task of summarizing such legal documents in plain English, which would enable users to have a better understanding of the terms they are accepting. We propose an initial dataset of legal text snippets paired with summaries written in plain English. We verify the quality of these summaries manually, and show that they involve heavy abstraction, compression, and simplification. Initial experiments show that unsupervised extractive summarization methods do not perform well on this task due to the level of abstraction and style differences. We conclude with a call for resource and technique development for simplification and style transfer for legal language.",
}
""" # noqa: E501

TRAIN_RATIO: float = 0.2
ARTICLE_COLUMN_NAME = "original_text"
SUMMARY_COLUMN_NAME = "reference_summary"
ID_COLUMN_NAME = "uid"

name = "legal_contract_summarization"
description = (
"Plain English Summarization of Contracts [(Manor et al., 2019)](https://aclanthology.org/W19-2201.pdf)."
)
tags = ["summarization", "legal"]

def __init__(self):
"""
Initializes the scenario.
"""
super().__init__()

@staticmethod
def _clean(text: str) -> str:
return re.sub(r"\s+", " ", text)

def _load_dataset(self, output_path: str):
data_dir = os.path.join(output_path, "data")
ensure_directory_exists(data_dir)

source_url = "https://raw.githubusercontent.com/lauramanor/legal_summarization/master/all_v1.json"
source_file = os.path.basename(source_url)
target_path = os.path.join(data_dir, source_file)
ensure_file_downloaded(
source_url=source_url,
target_path=target_path,
)

target_df = pd.DataFrame()
with open(target_path) as f:
json_data = json.load(f)
target_df = pd.DataFrame.from_records(list(json_data.values()))
target_df = target_df.dropna(
subset=[
LegalContractSummarizationScenario.ARTICLE_COLUMN_NAME,
LegalContractSummarizationScenario.SUMMARY_COLUMN_NAME,
LegalContractSummarizationScenario.ID_COLUMN_NAME,
]
)
# Split randomly (works better than split by order)
train_df = target_df.sample(frac=LegalContractSummarizationScenario.TRAIN_RATIO, random_state=0)
test_df = target_df.drop(train_df.index).sample(frac=1, random_state=0)

return {TRAIN_SPLIT: train_df, TEST_SPLIT: test_df}

def get_instances(self, output_path: str) -> List[Instance]:
dataset = self._load_dataset(output_path)

instances: List[Instance] = []

for split, split_data in dataset.items():
for example in split_data.itertuples():
id = getattr(example, LegalContractSummarizationScenario.ID_COLUMN_NAME)
article = LegalContractSummarizationScenario._clean(
getattr(example, LegalContractSummarizationScenario.ARTICLE_COLUMN_NAME)
)
summary = LegalContractSummarizationScenario._clean(
getattr(example, LegalContractSummarizationScenario.SUMMARY_COLUMN_NAME)
)
input = Input(
text=article,
)
output = Output(text=summary)
instance = Instance(
id=id,
input=input,
references=[Reference(output=output, tags=[CORRECT_TAG])],
split=split,
)
instances.append(instance)

return instances
36 changes: 36 additions & 0 deletions src/helm/benchmark/static/schema_enterprise.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ metrics:
short_display_name: EM
description: Fraction of instances that the predicted output matches a correct reference up to light processing.
lower_is_better: false
- name: rouge_1
display_name: ROUGE-1
description: Average ROUGE score [(Lin, 2004)](https://aclanthology.org/W04-1013/) based on 1-gram overlap.
lower_is_better: false
- name: rouge_2
display_name: ROUGE-2
description: Average ROUGE score [(Lin, 2004)](https://aclanthology.org/W04-1013/) based on 2-gram overlap.
lower_is_better: false
- name: rouge_l
display_name: ROUGE-L
description: Average ROUGE score [(Lin, 2004)](https://aclanthology.org/W04-1013/) based on longest common subsequence overlap.
lower_is_better: false

############################################################
perturbations: []
Expand Down Expand Up @@ -109,3 +121,27 @@ run_groups:
who: financial journalists
when: 2000-2019
language: English

- name: legal_scenarios
display_name: Legal Scenarios
description: Scenarios for the legal domain
category: All scenarios
subgroups:
- legal_contract_summarization

- name: legal_contract_summarization
display_name: Legal Contract Summarization
description: Plain English Summarization of Contracts [(Manor et al., 2019)](https://aclanthology.org/W19-2201.pdf).
metric_groups:
- accuracy
- efficiency
- general_information
environment:
main_name: rouge_2
main_split: test
taxonomy:
task: summarization
what: legal contracts (e.g. terms of service, license agreements)
who: lawyers
when: before 2019
language: English

0 comments on commit 6bb3662

Please sign in to comment.