BayesTensor's picture
Upload folder using huggingface_hub
9d5b280 verified
import re
from itertools import product
import evaluate
import transformers.data.metrics.squad_metrics as squad_metrics
from lm_eval.utils import general_detokenize
def lowercase_first_letter(text):
return text[0].lower() + text[1:]
def process_doc_nli(dataset):
def process_fn(doc):
# Detokenize(remove extra whitespaces)
doc["premise"] = general_detokenize(doc["premise"]).strip()
doc["hypothesis"] = general_detokenize(doc["hypothesis"]).strip()
# Remove last punctuation mark in the premise
doc["premise"] = (
doc["premise"][:-1]
if doc["premise"].endswith((".", ",", "!", "?"))
else doc["premise"]
)
# Lowercase the first letter in the hypothesis
doc["hypothesis"] = lowercase_first_letter(doc["hypothesis"])
# Ensure that the hypothesis ends with a dot
doc["hypothesis"] = (
(doc["hypothesis"] + ".")
if not doc["hypothesis"].endswith(".")
else doc["hypothesis"]
)
return doc
return dataset.map(process_fn)
def process_results_coqcat(doc, results):
# Get all possible answers and compute the scores
turn_id = len(doc["questions"])
answers = [doc["answers"]["input_text"][turn_id - 1]]
additional_answers_list = doc.get("additional_answers")
if additional_answers_list:
for key, additional_answers in additional_answers_list.items():
if additional_answers["input_text"][turn_id - 1].lower() not in map(
str.lower, answers
):
answers.append(additional_answers["input_text"][turn_id - 1])
gold_list = answers
pred = results[0].strip().split("\n")[0]
# import code; code.interact(local=dict(globals(), **locals()))
f1_sum = 0.0
em_sum = 0.0
if len(gold_list) > 1:
for i in range(len(gold_list)):
gold_answers = gold_list[0:i] + gold_list[i + 1 :]
# predictions compared against (n) golds and take maximum
em_sum += max(squad_metrics.compute_exact(a, pred) for a in gold_answers)
f1_sum += max(squad_metrics.compute_f1(a, pred) for a in gold_answers)
else:
em_sum += max(squad_metrics.compute_exact(a, pred) for a in gold_list)
f1_sum += max(squad_metrics.compute_f1(a, pred) for a in gold_list)
# import code; code.interact(local=dict(globals(), **locals()))
return {
"em": em_sum / max(1, len(gold_list)),
"f1": f1_sum / max(1, len(gold_list)),
}
def process_results_qa(doc, results):
preds = results[0]
reference = doc["answers"][0]["text"]
# import code; code.interact(local=dict(globals(), **locals()))
f1_sum = squad_metrics.compute_f1(reference, preds)
exact_match = squad_metrics.compute_exact(reference, preds)
return {"f1": f1_sum, "exact_match": exact_match}
def process_doc_cabreu(dataset):
def process_fn(doc):
# Remove duplicate spaces
doc["content"] = re.sub(r" +", " ", doc["content"])
for summary_type, index in product(
["abstractive", "extractive", "extreme"], ["a1", "a2", "a3"]
):
doc["summaries"][summary_type][index] = re.sub(
r" +", " ", doc["summaries"][summary_type][index]
)
return doc
return dataset.map(process_fn)
def process_docs_paraphrases(dataset):
empty_docs = []
def _process_doc(doc):
if doc["sentence1"] not in [None, ""] and doc["sentence2"] not in [None, ""]:
doc["sentence1"] = general_detokenize(doc["sentence1"]).strip()
doc["sentence2"] = general_detokenize(doc["sentence2"]).strip()
# Remove final punctuation mark in the first sentence
if doc["sentence1"].endswith((".", ",", ";")):
doc["sentence1"] = doc["sentence1"][:-1]
# Start the second sentence in lowercase (to be used after "Yes, ...")
doc["sentence2"] = lowercase_first_letter(doc["sentence2"])
return doc
else:
empty_docs.append(doc)
return doc
return dataset.filter(
lambda doc: doc["sentence1"] not in [None, ""]
and doc["sentence2"] not in [None, ""]
).map(_process_doc)
def process_docs_copa_ca(dataset):
def _process_doc(doc):
doc["choice1"] = lowercase_first_letter(doc["choice1"])
doc["choice2"] = lowercase_first_letter(doc["choice2"])
return doc
return dataset.map(_process_doc)
def rouge1(items):
"""
# passthrough for efficiency
"""
return items
def rouge1_agg(items):
"""
Higher is better
"""
refs = list(zip(*items))[0]
preds = list(zip(*items))[1]
rouge_scorer = evaluate.load("rouge")
return rouge_scorer.compute(predictions=preds, references=refs)["rouge1"]