djstrong commited on
Commit
9655a7c
1 Parent(s): 68bbe4a

add polqa tasks

Browse files
Files changed (1) hide show
  1. src/about.py +24 -18
src/about.py CHANGED
@@ -32,7 +32,10 @@ class Tasks(Enum):
32
  task17 = Task("polish_cbd_regex", "f1,score-first", "cbd_g", "generate_until")
33
  task18 = Task("polish_klej_ner_multiple_choice", "acc,none", "klej_ner_mc", "multiple_choice")
34
  task19 = Task("polish_klej_ner_regex", "exact_match,score-first", "klej_ner_g", "generate_until")
35
- task20 = Task("polish_poleval2018_task3_test_10k", "word_perplexity,none", "polish_poleval2018_task3_test_10k", "other")
 
 
 
36
 
37
  NUM_FEWSHOT = 0 # Change with your few shot
38
  # ---------------------------------------------------
@@ -81,25 +84,28 @@ or join our [Discord SpeakLeash](https://discord.gg/3G9DVM39)
81
 
82
  | Task | Dataset | Metric | Type |
83
  |---------------------------------|---------------------------------------|-----------|-----------------|
84
- | belebele_pol_Latn | facebook/belebele | accuracy | multiple_choice |
85
  | polemo2_in | allegro/klej-polemo2-in | accuracy | generate_until |
86
- | polemo2_in_multiple_choice | allegro/klej-polemo2-in | accuracy | multiple_choice |
87
  | polemo2_out | allegro/klej-polemo2-out | accuracy | generate_until |
88
- | polemo2_out_multiple_choice | allegro/klej-polemo2-out | accuracy | multiple_choice |
89
- | polish_8tags_multiple_choice | sdadas/8tags | accuracy | multiple_choice |
90
- | polish_8tags_regex | sdadas/8tags | accuracy | generate_until |
91
- | polish_belebele_regex | facebook/belebele | accuracy | generate_until |
92
- | polish_dyk_multiple_choice | allegro/klej-dyk | binary F1 | multiple_choice |
93
- | polish_dyk_regex | allegro/klej-dyk | binary F1 | generate_until |
94
- | polish_ppc_multiple_choice | sdadas/ppc | accuracy | multiple_choice |
95
- | polish_ppc_regex | sdadas/ppc | accuracy | generate_until |
96
- | polish_psc_multiple_choice | allegro/klej-psc | binary F1 | multiple_choice |
97
- | polish_psc_regex | allegro/klej-psc | binary F1 | generate_until |
98
- | polish_cbd_multiple_choice | ptaszynski/PolishCyberbullyingDataset | macro F1 | multiple_choice |
99
- | polish_cbd_regex | ptaszynski/PolishCyberbullyingDataset | macro F1 | generate_until |
100
- | polish_klej_ner_multiple_choice | allegro/klej-nkjp-ner | accuracy | multiple_choice |
101
- | polish_klej_ner_regex | allegro/klej-nkjp-ner | accuracy | generate_until |
102
- | polish_poleval2018_task3_test_10k | enelpol/poleval2018_task3_test_10k | word perplexity | other |
 
 
 
 
103
 
104
  ## Reproducibility
105
  To reproduce our results, you need to clone the repository:
 
32
  task17 = Task("polish_cbd_regex", "f1,score-first", "cbd_g", "generate_until")
33
  task18 = Task("polish_klej_ner_multiple_choice", "acc,none", "klej_ner_mc", "multiple_choice")
34
  task19 = Task("polish_klej_ner_regex", "exact_match,score-first", "klej_ner_g", "generate_until")
35
+ task20 = Task("polish_poleval2018_task3_test_10k", "word_perplexity,none", "poleval2018_task3_test_10k", "other")
36
+ task21 = Task("polish_polqa_reranking_multiple_choice", "acc,none", "polqa_reranking_mc", "other") # multiple_choice
37
+ task22 = Task("polish_polqa_open_book", "levenshtein,none", "polqa_open_book_g", "other") # generate_until
38
+ task23 = Task("polish_polqa_closed_book", "levenshtein,none", "polqa_closed_book_g", "other") # generate_until
39
 
40
  NUM_FEWSHOT = 0 # Change with your few shot
41
  # ---------------------------------------------------
 
84
 
85
  | Task | Dataset | Metric | Type |
86
  |---------------------------------|---------------------------------------|-----------|-----------------|
 
87
  | polemo2_in | allegro/klej-polemo2-in | accuracy | generate_until |
88
+ | polemo2_in_mc | allegro/klej-polemo2-in | accuracy | multiple_choice |
89
  | polemo2_out | allegro/klej-polemo2-out | accuracy | generate_until |
90
+ | polemo2_out_mc | allegro/klej-polemo2-out | accuracy | multiple_choice |
91
+ | 8tags_mc | sdadas/8tags | accuracy | multiple_choice |
92
+ | 8tags_g | sdadas/8tags | accuracy | generate_until |
93
+ | belebele_mc | facebook/belebele | accuracy | multiple_choice |
94
+ | belebele_g | facebook/belebele | accuracy | generate_until |
95
+ | dyk_mc | allegro/klej-dyk | binary F1 | multiple_choice |
96
+ | dyk_g | allegro/klej-dyk | binary F1 | generate_until |
97
+ | ppc_mc | sdadas/ppc | accuracy | multiple_choice |
98
+ | ppc_g | sdadas/ppc | accuracy | generate_until |
99
+ | psc_mc | allegro/klej-psc | binary F1 | multiple_choice |
100
+ | psc_g | allegro/klej-psc | binary F1 | generate_until |
101
+ | cbd_mc | ptaszynski/PolishCyberbullyingDataset | macro F1 | multiple_choice |
102
+ | cbd_g | ptaszynski/PolishCyberbullyingDataset | macro F1 | generate_until |
103
+ | klej_ner_mc | allegro/klej-nkjp-ner | accuracy | multiple_choice |
104
+ | klej_ner_g | allegro/klej-nkjp-ner | accuracy | generate_until |
105
+ | poleval2018_task3_test_10k | enelpol/poleval2018_task3_test_10k | word perplexity | other |
106
+ | polqa_reranking_mc | ipipan/polqa | accuracy | other |
107
+ | polqa_open_book_g | ipipan/polqa | levenshtein | other |
108
+ | polqa_closed_book_g | ipipan/polqa | levenshtein | other |
109
 
110
  ## Reproducibility
111
  To reproduce our results, you need to clone the repository: