Robert
commited on
Commit
•
8fe5a80
1
Parent(s):
2827202
Small calculation fixes. Current exact match: 0.02, F1-score: 0.12
Browse files- base_model/main.py +2 -2
- base_model/retriever.py +4 -6
base_model/main.py
CHANGED
@@ -15,6 +15,6 @@ if __name__ == '__main__':
|
|
15 |
print() # Newline
|
16 |
|
17 |
# Compute overall performance
|
18 |
-
exact_match, f1_score
|
19 |
-
print(f"Exact match: {exact_match}
|
20 |
f"F1-score: {f1_score:.02f}")
|
|
|
15 |
print() # Newline
|
16 |
|
17 |
# Compute overall performance
|
18 |
+
exact_match, f1_score = r.evaluate()
|
19 |
+
print(f"Exact match: {exact_match:.02f}\n"
|
20 |
f"F1-score: {f1_score:.02f}")
|
base_model/retriever.py
CHANGED
@@ -7,7 +7,6 @@ from transformers import (
|
|
7 |
from datasets import load_dataset
|
8 |
import torch
|
9 |
import os.path
|
10 |
-
import numpy
|
11 |
|
12 |
import evaluate
|
13 |
|
@@ -125,9 +124,8 @@ class Retriever:
|
|
125 |
entire dataset.
|
126 |
|
127 |
Returns:
|
128 |
-
|
129 |
float: overall F1-score
|
130 |
-
int: total amount of questions handled
|
131 |
"""
|
132 |
questions_ds = load_dataset("GroNLP/ik-nlp-22_slp", name="questions")['test']
|
133 |
questions = questions_ds['question']
|
@@ -142,7 +140,7 @@ class Retriever:
|
|
142 |
scores += score[0]
|
143 |
predictions.append(result['text'][0])
|
144 |
|
145 |
-
|
146 |
-
|
147 |
|
148 |
-
return
|
|
|
7 |
from datasets import load_dataset
|
8 |
import torch
|
9 |
import os.path
|
|
|
10 |
|
11 |
import evaluate
|
12 |
|
|
|
124 |
entire dataset.
|
125 |
|
126 |
Returns:
|
127 |
+
float: overall exact match
|
128 |
float: overall F1-score
|
|
|
129 |
"""
|
130 |
questions_ds = load_dataset("GroNLP/ik-nlp-22_slp", name="questions")['test']
|
131 |
questions = questions_ds['question']
|
|
|
140 |
scores += score[0]
|
141 |
predictions.append(result['text'][0])
|
142 |
|
143 |
+
exact_matches = [evaluate.compute_exact_match(predictions[i], answers[i]) for i in range(len(answers))]
|
144 |
+
f1_scores = [evaluate.compute_f1(predictions[i], answers[i]) for i in range(len(answers))]
|
145 |
|
146 |
+
return sum(exact_matches) / len(exact_matches), sum(f1_scores) / len(f1_scores)
|