|
""" |
|
QuAC: Question Answering in Context |
|
https://arxiv.org/abs/1808.07036 |
|
|
|
Question Answering in Context (QuAC) is a dataset for modeling, understanding, and |
|
participating in information seeking dialog. Data instances consist of an interactive |
|
dialog between two crowd workers: (1) a student who poses a sequence of freeform |
|
questions to learn as much as possible about a hidden Wikipedia text, and (2) |
|
a teacher who answers the questions by providing short excerpts (spans) from the text. |
|
|
|
Homepage: https://quac.ai/ |
|
""" |
|
import inspect |
|
import lm_eval.datasets.quac.quac |
|
from lm_eval.base import Task |
|
|
|
|
|
_CITATION = """ |
|
@article{choi2018quac, |
|
title={Quac: Question answering in context}, |
|
author={Choi, Eunsol and He, He and Iyyer, Mohit and Yatskar, Mark and Yih, Wen-tau and Choi, Yejin and Liang, Percy and Zettlemoyer, Luke}, |
|
journal={arXiv preprint arXiv:1808.07036}, |
|
year={2018} |
|
} |
|
""" |
|
|
|
|
|
class QuAC(Task): |
|
VERSION = 0 |
|
DATASET_PATH = inspect.getfile(lm_eval.datasets.quac.quac) |
|
DATASET_NAME = None |
|
|
|
def has_training_docs(self): |
|
return True |
|
|
|
def has_validation_docs(self): |
|
return True |
|
|
|
def has_test_docs(self): |
|
return False |
|
|
|
def training_docs(self): |
|
if self._training_docs is None: |
|
self._training_docs = list(map(self._process_doc, self.dataset["train"])) |
|
return self._training_docs |
|
|
|
def validation_docs(self): |
|
return map(self._process_doc, self.dataset["validation"]) |
|
|
|
def test_docs(self): |
|
raise NotImplementedError("QuAC has no test docs.") |
|
|
|
def _process_doc(self, doc): |
|
doc["title"] = doc["title"] + " - " + doc["section_title"] |
|
return doc |
|
|
|
def doc_to_text(self, doc): |
|
return ( |
|
"TITLE: " |
|
+ doc["title"] |
|
+ "\n" |
|
+ "PARAGRAPH: " |
|
+ doc["paragraph"] |
|
+ "\n\n" |
|
+ "Q: " |
|
+ doc["question"] |
|
+ "\n\n" |
|
+ "A: " |
|
) |
|
|
|
def should_decontaminate(self): |
|
return True |
|
|
|
def doc_to_decontamination_query(self, doc): |
|
return doc["paragraph"] |
|
|
|
def doc_to_target(self, doc): |
|
return doc["answer"] |
|
|
|
def construct_requests(self, doc, ctx): |
|
"""Uses RequestFactory to construct Requests and returns an iterable of |
|
Requests which will be sent to the LM. |
|
|
|
:param doc: |
|
The document as returned from training_docs, validation_docs, or test_docs. |
|
:param ctx: str |
|
The context string, generated by fewshot_context. This includes the natural |
|
language description, as well as the few shot examples, and the question |
|
part of the document for `doc`. |
|
""" |
|
|
|
raise NotImplementedError("Evaluation not implemented") |
|
|
|
def process_results(self, doc, results): |
|
"""Take a single document and the LM results and evaluates, returning a |
|
dict where keys are the names of submetrics and values are the values of |
|
the metric for that one document |
|
|
|
:param doc: |
|
The document as returned from training_docs, validation_docs, or test_docs. |
|
:param results: |
|
The results of the requests created in construct_requests. |
|
""" |
|
|
|
raise NotImplementedError("Evaluation not implemented") |
|
|
|
def aggregation(self): |
|
""" |
|
:returns: {str: [float] -> float} |
|
A dictionary where keys are the names of submetrics and values are |
|
functions that aggregate a list of metrics |
|
""" |
|
|
|
raise NotImplementedError("Evaluation not implemented") |
|
|
|
def higher_is_better(self): |
|
""" |
|
:returns: {str: bool} |
|
A dictionary where keys are the names of submetrics and values are |
|
whether a higher value of the submetric is better |
|
""" |
|
|
|
raise NotImplementedError("Evaluation not implemented") |
|
|