gardarjuto commited on
Commit
fa8bb65
1 Parent(s): 7fdb5f5

Add WikiQA

Browse files
Files changed (1) hide show
  1. src/about.py +7 -0
src/about.py CHANGED
@@ -17,6 +17,7 @@ class Tasks(Enum):
17
  task2 = Task("icelandic_inflection_all", "exact_match,get-answer", "Inflection (1-shot)")
18
  task5 = Task("icelandic_belebele", "exact_match,get-answer", "Belebele (IS)")
19
  task6 = Task("icelandic_arc_challenge", "exact_match,get-answer", "ARC-Challenge-IS")
 
20
 
21
  # ---------------------------------------------------
22
 
@@ -64,5 +65,11 @@ This is the Icelandic subset (900 examples) of the Belebele benchmark, a multipl
64
  A machine-translated version of the ARC-Challenge multiple-choice question-answering dataset. For this benchmark, we use the test set which contains 1.23k examples.
65
  - Link to dataset: https://huggingface.co/datasets/mideind/icelandic-arc-challenge
66
 
 
 
 
 
 
 
67
  """
68
 
 
17
  task2 = Task("icelandic_inflection_all", "exact_match,get-answer", "Inflection (1-shot)")
18
  task5 = Task("icelandic_belebele", "exact_match,get-answer", "Belebele (IS)")
19
  task6 = Task("icelandic_arc_challenge", "exact_match,get-answer", "ARC-Challenge-IS")
20
+ task7 = Task("icelandic_wiki_qa", "lm_judge_score,get-answer", "WikiQA-IS")
21
 
22
  # ---------------------------------------------------
23
 
 
65
  A machine-translated version of the ARC-Challenge multiple-choice question-answering dataset. For this benchmark, we use the test set which contains 1.23k examples.
66
  - Link to dataset: https://huggingface.co/datasets/mideind/icelandic-arc-challenge
67
 
68
+ ### WikiQA-IS
69
+ The Icelandic WikiQA dataset is a collection of 1.1k question-answer pairs from the Icelandic Wikipedia, meant to evaluate models' knowledge of Icelandic culture and history.
70
+ They were collected by making GPT-4o generate questions and anwswers
71
+ given Icelandic Wikipedia articles as context. All examples were then manually verified and corrected where necessary. For evaluation, we prompt GPT-4o to
72
+ compare the generated answer to the original answer for semantic similarity and rate the answer on the following scale: (0, "poor"), (1, "fair"), (2, "excellent").
73
+ - Link to dataset: https://huggingface.co/datasets/mideind/icelandic_wiki_qa
74
  """
75