lvwerra HF staff commited on
Commit
9de4a89
1 Parent(s): b2436ac

Update Space (evaluate main: c447fc8e)

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. squad_v2.py +4 -23
requirements.txt CHANGED
@@ -1 +1 @@
1
- git+https://github.com/huggingface/evaluate@e4a2724377909fe2aeb4357e3971e5a569673b39
 
1
+ git+https://github.com/huggingface/evaluate@c447fc8eda9c62af501bfdc6988919571050d950
squad_v2.py CHANGED
@@ -13,8 +13,6 @@
13
  # limitations under the License.
14
  """ SQuAD v2 metric. """
15
 
16
- from dataclasses import dataclass
17
-
18
  import datasets
19
 
20
  import evaluate
@@ -89,26 +87,13 @@ Examples:
89
  """
90
 
91
 
92
- @dataclass
93
- class SquadV2Config(evaluate.info.Config):
94
-
95
- name: str = "default"
96
-
97
- no_answer_threshold: float = 1.0
98
-
99
-
100
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
101
  class SquadV2(evaluate.Metric):
102
-
103
- CONFIG_CLASS = SquadV2Config
104
- ALLOWED_CONFIG_NAMES = ["default"]
105
-
106
- def _info(self, config):
107
  return evaluate.MetricInfo(
108
  description=_DESCRIPTION,
109
  citation=_CITATION,
110
  inputs_description=_KWARGS_DESCRIPTION,
111
- config=config,
112
  features=datasets.Features(
113
  {
114
  "predictions": {
@@ -128,7 +113,7 @@ class SquadV2(evaluate.Metric):
128
  reference_urls=["https://rajpurkar.github.io/SQuAD-explorer/"],
129
  )
130
 
131
- def _compute(self, predictions, references):
132
  no_answer_probabilities = {p["id"]: p["no_answer_probability"] for p in predictions}
133
  dataset = [{"paragraphs": [{"qas": references}]}]
134
  predictions = {p["id"]: p["prediction_text"] for p in predictions}
@@ -138,12 +123,8 @@ class SquadV2(evaluate.Metric):
138
  no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
139
 
140
  exact_raw, f1_raw = get_raw_scores(dataset, predictions)
141
- exact_thresh = apply_no_ans_threshold(
142
- exact_raw, no_answer_probabilities, qid_to_has_ans, self.config.no_answer_threshold
143
- )
144
- f1_thresh = apply_no_ans_threshold(
145
- f1_raw, no_answer_probabilities, qid_to_has_ans, self.config.no_answer_threshold
146
- )
147
  out_eval = make_eval_dict(exact_thresh, f1_thresh)
148
 
149
  if has_ans_qids:
 
13
  # limitations under the License.
14
  """ SQuAD v2 metric. """
15
 
 
 
16
  import datasets
17
 
18
  import evaluate
 
87
  """
88
 
89
 
 
 
 
 
 
 
 
 
90
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
91
  class SquadV2(evaluate.Metric):
92
+ def _info(self):
 
 
 
 
93
  return evaluate.MetricInfo(
94
  description=_DESCRIPTION,
95
  citation=_CITATION,
96
  inputs_description=_KWARGS_DESCRIPTION,
 
97
  features=datasets.Features(
98
  {
99
  "predictions": {
 
113
  reference_urls=["https://rajpurkar.github.io/SQuAD-explorer/"],
114
  )
115
 
116
+ def _compute(self, predictions, references, no_answer_threshold=1.0):
117
  no_answer_probabilities = {p["id"]: p["no_answer_probability"] for p in predictions}
118
  dataset = [{"paragraphs": [{"qas": references}]}]
119
  predictions = {p["id"]: p["prediction_text"] for p in predictions}
 
123
  no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
124
 
125
  exact_raw, f1_raw = get_raw_scores(dataset, predictions)
126
+ exact_thresh = apply_no_ans_threshold(exact_raw, no_answer_probabilities, qid_to_has_ans, no_answer_threshold)
127
+ f1_thresh = apply_no_ans_threshold(f1_raw, no_answer_probabilities, qid_to_has_ans, no_answer_threshold)
 
 
 
 
128
  out_eval = make_eval_dict(exact_thresh, f1_thresh)
129
 
130
  if has_ans_qids: