Muennighoff commited on
Commit
cd12b7e
1 Parent(s): be9a366
Files changed (4) hide show
  1. eval_ru.sh +792 -0
  2. launch.sh +53 -0
  3. sbatch_mtf_4b_ru.sh +147 -0
  4. train_ru.txt +1 -0
eval_ru.sh ADDED
@@ -0,0 +1,792 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --job-name=evaluate_t0
3
+ #SBATCH --nodes=1
4
+ #SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node!
5
+ #SBATCH --cpus-per-task=32 # number of cores per tasks
6
+ #SBATCH --hint=nomultithread # we get physical cores not logical
7
+ #SBATCH --gpus-per-node=mi250:1
8
+ #SBATCH --time 5:00:00 # maximum execution time (HH:MM:SS)
9
+ #SBATCH --output=logs/%j.out # output file name
10
+ #SBATCH -e logs/%j.err
11
+ #SBATCH --account=project_462000119
12
+ #SBATCH -p pilot
13
+ #SBATCH --exclusive=user
14
+
15
+ # VALIDATION:
16
+ # --array=0-168
17
+
18
+ # L1
19
+ # --array=0-169
20
+
21
+ # L2
22
+ # --array=0-84
23
+
24
+ # MT L1
25
+ # --array=0-69
26
+
27
+ # MT L2
28
+ # --array=0-89
29
+
30
+ # XNLIMTHT:
31
+ # --array=0-79
32
+
33
+
34
+ set -x -e
35
+
36
+ # source $six_ALL_CCFRWORK/start-py38-pt111
37
+ # conda activate thomas_t_zero_evaluation
38
+ source /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/bin/activate
39
+
40
+ # CHECKPOINT_PATH=/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3/bloom-6b3
41
+ # CHECKPOINT_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/global_step1000
42
+ # CHECKPOINT_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/bloomz7b1
43
+ CHECKPOINT_PATH=bloomz-560m
44
+ CHECKPOINT_PATH=bloomz-7b1-500m-ru
45
+ CHECKPOINT_PATH=bloomz-7b1-100m-ru
46
+ CHECKPOINT_PATH=bloomz-7b1-xp3ru
47
+ CHECKPOINT_PATH=bloomz-7b1-4b-ru
48
+ # WORKDIR=/gpfswork/rech/six/commun/code/tr13f-6B3-ml-t0
49
+
50
+ # pushd $WORKDIR
51
+
52
+ export TRANSFORMERS_OFFLINE=1
53
+ export HF_TRANSFORMERS_OFFLINE=1
54
+ export HF_DATASETS_OFFLINE=1
55
+ export HF_DATASETS_CACHE=/scratch/project_462000119/ds_cache
56
+
57
+ OUTPUT_DIR=$CHECKPOINT_PATH/evaluation
58
+ mkdir -p $OUTPUT_DIR
59
+
60
+ # Validation
61
+ DATASETS_AND_CONFIGS_VAL=(
62
+ head_qa,en,en,"multiple_choice_q_and_a_index_with_context_en",validation
63
+ head_qa,en,en,"multiple_choice_q_and_a_en",validation
64
+ head_qa,en,en,"multiple_choice_q_and_a_index_en",validation
65
+ head_qa,en,en,"multiple_choice_a_and_q_with_context_en",validation
66
+ head_qa,en,en,"multiple_choice_a_and_q_en",validation
67
+ head_qa,es,en,"multiple_choice_q_and_a_index_with_context_en",validation
68
+ head_qa,es,en,"multiple_choice_q_and_a_en",validation
69
+ head_qa,es,en,"multiple_choice_q_and_a_index_en",validation
70
+ head_qa,es,en,"multiple_choice_a_and_q_with_context_en",validation
71
+ head_qa,es,en,"multiple_choice_a_and_q_en",validation
72
+ climate_fever,None,None,"first_evidence_and_claim_itemization",test
73
+ climate_fever,None,None,"claim_and_all_supporting_evidences",test
74
+ climate_fever,None,None,"fifth_evidence_and_claim_itemization",test
75
+ climate_fever,None,None,"third_evidence_claim_pair",test
76
+ climate_fever,None,None,"second_evidence_and_claim_itemization",test
77
+ codah,codah,None,"interrogative_instruction_after_sentence_and_choices",train
78
+ codah,codah,None,"affirmative_instruction_before_sentence_and_choices",train
79
+ codah,codah,None,"affirmative_instruction_after_sentence_and_choices",train
80
+ aqua_rat,raw,None,"select_the_best_option",validation
81
+ aqua_rat,raw,None,"answer_quiz",validation
82
+ aqua_rat,raw,None,"Answer questions from options",validation
83
+ commonsense_qa,None,None,"answer_given_question_without_options",validation
84
+ commonsense_qa,None,None,"question_answering",validation
85
+ commonsense_qa,None,None,"most_suitable_answer",validation
86
+ amazon_reviews_multi,en,en,"prompt_title_to_star",validation
87
+ amazon_reviews_multi,en,en,"prompt_review_to_star",validation
88
+ amazon_reviews_multi,en,en,"prompt_body_title_to_star",validation
89
+ amazon_reviews_multi,zh,en,"prompt_title_to_star",validation
90
+ amazon_reviews_multi,zh,en,"prompt_review_to_star",validation
91
+ amazon_reviews_multi,zh,en,"prompt_body_title_to_star",validation
92
+ amazon_reviews_multi,fr,en,"prompt_title_to_star",validation
93
+ amazon_reviews_multi,fr,en,"prompt_review_to_star",validation
94
+ amazon_reviews_multi,fr,en,"prompt_body_title_to_star",validation
95
+ amazon_reviews_multi,es,en,"prompt_title_to_star",validation
96
+ amazon_reviews_multi,es,en,"prompt_review_to_star",validation
97
+ amazon_reviews_multi,es,en,"prompt_body_title_to_star",validation
98
+ art,None,None,"choose_hypothesis_options",validation
99
+ art,None,None,"choose_hypothesis_believable",validation
100
+ art,None,None,"choose_hypothesis",validation
101
+ art,None,None,"choose_hypothesis_desc",validation
102
+ art,None,None,"choose_hypothesis_likely",validation
103
+ banking77,None,None,"help_page_topic",test
104
+ banking77,None,None,"direct_to_which_department",test
105
+ banking77,None,None,"rephrase_as_banking_term",test
106
+ blbooksgenre,title_genre_classifiction,None,"multi-choice",train
107
+ blbooksgenre,title_genre_classifiction,None,"premise_context_first",train
108
+ blbooksgenre,title_genre_classifiction,None,"classify",train
109
+ blimp,adjunct_island,None,"grammatical_between_1_2",train
110
+ blimp,adjunct_island,None,"grammatical_between_A_B",train
111
+ blimp,adjunct_island,None,"grammatical_which_one_1_2",train
112
+ blimp,adjunct_island,None,"single_sentence_bad_yes_no",train
113
+ blimp,adjunct_island,None,"single_sentence_good_yes_no",train
114
+ conv_ai_3,None,None,"clarification_needed",validation
115
+ conv_ai_3,None,None,"score_give_number",validation
116
+ conv_ai_3,None,None,"ambiguous",validation
117
+ conv_ai_3,None,None,"directly_answer",validation
118
+ conv_ai_3,None,None,"score_how_much",validation
119
+ craigslist_bargains,None,None,"good deal for seller no list price implicit",validation
120
+ craigslist_bargains,None,None,"good deal for seller no list price",validation
121
+ craigslist_bargains,None,None,"good deal for seller",validation
122
+ craigslist_bargains,None,None,"best deal",validation
123
+ ecthr_cases,alleged-violation-prediction,None,"implicit_advice_number",validation
124
+ ecthr_cases,alleged-violation-prediction,None,"ecthr_alleged_articles_declaration_at_end",validation
125
+ ecthr_cases,alleged-violation-prediction,None,"ecthr_alleged_articles_question_at_start",validation
126
+ ecthr_cases,alleged-violation-prediction,None,"implicit_judgment_paragraph",validation
127
+ ecthr_cases,alleged-violation-prediction,None,"confirm number of violated articles",validation
128
+ emo,None,None,"persons_describe",validation
129
+ emo,None,None,"final_message",validation
130
+ emo,None,None,"what_emotion_do_you_think",validation
131
+ emo,None,None,"emotional_state",validation
132
+ emo,None,None,"dialogue_between",validation
133
+ emotion,None,None,"choose_the_best_emotion_label",test
134
+ emotion,None,None,"reply_with_emoation_label",test
135
+ emotion,None,None,"answer_with_class_label",test
136
+ emotion,None,None,"answer_question_with_emotion_label",test
137
+ financial_phrasebank,sentences_allagree,None,"share_price_option",train
138
+ financial_phrasebank,sentences_allagree,None,"sentiment",train
139
+ financial_phrasebank,sentences_allagree,None,"word_comes_to_mind",train
140
+ financial_phrasebank,sentences_allagree,None,"complementary_industries",train
141
+ financial_phrasebank,sentences_allagree,None,"bullish_neutral_bearish",train
142
+ glue,cola,None,"Make sense yes no",validation
143
+ glue,cola,None,"is_this_correct",validation
144
+ glue,cola,None,"editing",validation
145
+ glue,cola,None,"Following sentence acceptable",validation
146
+ glue,cola,None,"Previous sentence acceptable",validation
147
+ glue,sst2,None,"positive negative after",validation
148
+ glue,sst2,None,"review",validation
149
+ glue,sst2,None,"said",validation
150
+ glue,sst2,None,"following positive negative",validation
151
+ glue,sst2,None,"happy or mad",validation
152
+ health_fact,None,None,"claim_veracity_classification_after_reading_I_believe",validation
153
+ health_fact,None,None,"claim_explanation_classification",validation
154
+ health_fact,None,None,"claim_veracity_classification_tell_me",validation
155
+ hlgd,None,None,"is_same_event_with_time_interrogative_related",validation
156
+ hlgd,None,None,"is_same_event_interrogative_talk",validation
157
+ hlgd,None,None,"is_same_event_with_time_interrogative_talk",validation
158
+ hlgd,None,None,"is_same_event_refer",validation
159
+ hlgd,None,None,"is_same_event_editor_asks",validation
160
+ hyperpartisan_news_detection,byarticle,None,"consider_does_it_follow_a_hyperpartisan_argumentation",train
161
+ hyperpartisan_news_detection,byarticle,None,"follows_hyperpartisan_argumentation",train
162
+ hyperpartisan_news_detection,byarticle,None,"consume_with_caution",train
163
+ hyperpartisan_news_detection,byarticle,None,"extreme_left_wing_or_right_wing",train
164
+ hyperpartisan_news_detection,byarticle,None,"consider_it_exhibits_extreme_one_sidedness",train
165
+ liar,None,None,"Given statement guess category",validation
166
+ lince,sa_spaeng,None,"original poster expressed sentiment",validation
167
+ lince,sa_spaeng,None,"sentiment trying to express",validation
168
+ lince,sa_spaeng,None,"express sentiment",validation
169
+ lince,sa_spaeng,None,"negation template",validation
170
+ lince,sa_spaeng,None,"the author seem",validation
171
+ math_qa,None,None,"choose_correct_og",test
172
+ math_qa,None,None,"pick_the_correct",test
173
+ math_qa,None,None,"first_choice_then_problem",test
174
+ math_qa,None,None,"problem_set_type",test
175
+ math_qa,None,None,"gre_problem",test
176
+ movie_rationales,None,None,"Standard binary sentiment analysis",validation
177
+ movie_rationales,None,None,"Evidences sentiment classification",validation
178
+ movie_rationales,None,None,"Evidences + review",validation
179
+ movie_rationales,None,None,"Generate evidences and sentiment",validation
180
+ mwsc,None,None,"in-the-sentence-question-first",validation
181
+ mwsc,None,None,"what-think",validation
182
+ mwsc,None,None,"in-the-sentence",validation
183
+ mwsc,None,None,"options-or",validation
184
+ mwsc,None,None,"is-correct",validation
185
+ poem_sentiment,None,None,"positive_or_negative_sentiment_variation_2",validation
186
+ poem_sentiment,None,None,"question_answer_format",validation
187
+ poem_sentiment,None,None,"guess_sentiment_without_options_variation_1",validation
188
+ poem_sentiment,None,None,"positive_or_negative_sentiment_variation_1",validation
189
+ poem_sentiment,None,None,"most_appropriate_sentiment",validation
190
+ onestop_english,None,None,"esl_context",train
191
+ onestop_english,None,None,"ara_context",train
192
+ onestop_english,None,None,"determine_reading_level_from_the_first_three_sentences",train
193
+ onestop_english,None,None,"esl_variation",train
194
+ onestop_english,None,None,"assess",train
195
+ pubmed_qa,pqa_labeled,None,"Long Answer to Final Decision",train
196
+ pubmed_qa,pqa_labeled,None,"Question Answering (Short)",train
197
+ riddle_sense,None,None,"most_suitable_answer",validation
198
+ riddle_sense,None,None,"answer_given_question_without_options",validation
199
+ riddle_sense,None,None,"question_to_answer_index",validation
200
+ riddle_sense,None,None,"question_answering",validation
201
+ scicite,None,None,"Classify intent w/section (select choice)",validation
202
+ scicite,None,None,"Classify intent (choices first)",validation
203
+ scicite,None,None,"Classify intent (select choice)",validation
204
+ scicite,None,None,"Classify intent",validation
205
+ scicite,None,None,"can_describe",validation
206
+ selqa,answer_selection_analysis,None,"is-he-talking-about",validation
207
+ selqa,answer_selection_analysis,None,"would-make-sense-qu-rand",validation
208
+ selqa,answer_selection_analysis,None,"make-sense-rand",validation
209
+ selqa,answer_selection_analysis,None,"which-answer-1st-vs-random",validation
210
+ snips_built_in_intents,None,None,"voice_intent",train
211
+ snips_built_in_intents,None,None,"categorize_query",train
212
+ snips_built_in_intents,None,None,"intent_query",train
213
+ snips_built_in_intents,None,None,"categorize_query_brief",train
214
+ snips_built_in_intents,None,None,"query_intent",train
215
+ )
216
+
217
+ DATASETS_AND_CONFIGS_L1MISS=(
218
+ story_cloze,2016,None,"Story Continuation and Options",validation
219
+ story_cloze,2016,None,"Answer Given options",validation
220
+ story_cloze,2016,None,"Novel Correct Ending",validation
221
+ story_cloze,2016,None,"Generate Ending",validation
222
+ story_cloze,2016,None,"Choose Story Ending",validation
223
+ )
224
+
225
+ DATASETS_AND_CONFIGS_L1=(
226
+ super_glue,copa,None,"best_option",validation
227
+ super_glue,copa,None,"C1 or C2? premise, so/because…",validation
228
+ super_glue,copa,None,"i_am_hesitating",validation
229
+ super_glue,copa,None,"cause_effect",validation
230
+ super_glue,copa,None,"plausible_alternatives",validation
231
+ super_glue,rte,None,"MNLI crowdsource",validation
232
+ super_glue,rte,None,"GPT-3 style",validation
233
+ super_glue,rte,None,"does it follow that",validation
234
+ super_glue,rte,None,"should assume",validation
235
+ super_glue,rte,None,"guaranteed true",validation
236
+ anli,dev_r1,None,"guaranteed/possible/impossible",dev_r1
237
+ anli,dev_r1,None,"MNLI crowdsource",dev_r1
238
+ anli,dev_r1,None,"GPT-3 style",dev_r1
239
+ anli,dev_r1,None,"justified in saying",dev_r1
240
+ anli,dev_r1,None,"can we infer",dev_r1
241
+ anli,dev_r2,None,"guaranteed/possible/impossible",dev_r2
242
+ anli,dev_r2,None,"MNLI crowdsource",dev_r2
243
+ anli,dev_r2,None,"GPT-3 style",dev_r2
244
+ anli,dev_r2,None,"justified in saying",dev_r2
245
+ anli,dev_r2,None,"can we infer",dev_r2
246
+ anli,dev_r3,None,"guaranteed/possible/impossible",dev_r3
247
+ anli,dev_r3,None,"MNLI crowdsource",dev_r3
248
+ anli,dev_r3,None,"GPT-3 style",dev_r3
249
+ anli,dev_r3,None,"justified in saying",dev_r3
250
+ anli,dev_r3,None,"can we infer",dev_r3
251
+ super_glue,cb,None,"guaranteed/possible/impossible",validation
252
+ super_glue,cb,None,"MNLI crowdsource",validation
253
+ super_glue,cb,None,"GPT-3 style",validation
254
+ super_glue,cb,None,"justified in saying",validation
255
+ super_glue,cb,None,"can we infer",validation
256
+ winogrande,winogrande_xl,None,"underscore refer to",validation
257
+ winogrande,winogrande_xl,None,"Replace",validation
258
+ winogrande,winogrande_xl,None,"stand for",validation
259
+ winogrande,winogrande_xl,None,"does underscore refer to",validation
260
+ winogrande,winogrande_xl,None,"True or False",validation
261
+ story_cloze,2016,None,"Story Continuation and Options",validation
262
+ story_cloze,2016,None,"Answer Given options",validation
263
+ story_cloze,2016,None,"Novel Correct Ending",validation
264
+ story_cloze,2016,None,"Generate Ending",validation
265
+ story_cloze,2016,None,"Choose Story Ending",validation
266
+ Muennighoff/xstory_cloze,ar,en,"Story Continuation and Options",validation
267
+ Muennighoff/xstory_cloze,ar,en,"Answer Given options",validation
268
+ Muennighoff/xstory_cloze,ar,en,"Novel Correct Ending",validation
269
+ Muennighoff/xstory_cloze,ar,en,"Generate Ending",validation
270
+ Muennighoff/xstory_cloze,ar,en,"Choose Story Ending",validation
271
+ Muennighoff/xstory_cloze,es,en,"Story Continuation and Options",validation
272
+ Muennighoff/xstory_cloze,es,en,"Answer Given options",validation
273
+ Muennighoff/xstory_cloze,es,en,"Novel Correct Ending",validation
274
+ Muennighoff/xstory_cloze,es,en,"Generate Ending",validation
275
+ Muennighoff/xstory_cloze,es,en,"Choose Story Ending",validation
276
+ Muennighoff/xstory_cloze,eu,en,"Story Continuation and Options",validation
277
+ Muennighoff/xstory_cloze,eu,en,"Answer Given options",validation
278
+ Muennighoff/xstory_cloze,eu,en,"Novel Correct Ending",validation
279
+ Muennighoff/xstory_cloze,eu,en,"Generate Ending",validation
280
+ Muennighoff/xstory_cloze,eu,en,"Choose Story Ending",validation
281
+ Muennighoff/xstory_cloze,id,en,"Story Continuation and Options",validation
282
+ Muennighoff/xstory_cloze,id,en,"Answer Given options",validation
283
+ Muennighoff/xstory_cloze,id,en,"Novel Correct Ending",validation
284
+ Muennighoff/xstory_cloze,id,en,"Generate Ending",validation
285
+ Muennighoff/xstory_cloze,id,en,"Choose Story Ending",validation
286
+ Muennighoff/xstory_cloze,hi,en,"Story Continuation and Options",validation
287
+ Muennighoff/xstory_cloze,hi,en,"Answer Given options",validation
288
+ Muennighoff/xstory_cloze,hi,en,"Novel Correct Ending",validation
289
+ Muennighoff/xstory_cloze,hi,en,"Generate Ending",validation
290
+ Muennighoff/xstory_cloze,hi,en,"Choose Story Ending",validation
291
+ Muennighoff/xstory_cloze,sw,en,"Story Continuation and Options",validation
292
+ Muennighoff/xstory_cloze,sw,en,"Answer Given options",validation
293
+ Muennighoff/xstory_cloze,sw,en,"Novel Correct Ending",validation
294
+ Muennighoff/xstory_cloze,sw,en,"Generate Ending",validation
295
+ Muennighoff/xstory_cloze,sw,en,"Choose Story Ending",validation
296
+ Muennighoff/xstory_cloze,te,en,"Story Continuation and Options",validation
297
+ Muennighoff/xstory_cloze,te,en,"Answer Given options",validation
298
+ Muennighoff/xstory_cloze,te,en,"Novel Correct Ending",validation
299
+ Muennighoff/xstory_cloze,te,en,"Generate Ending",validation
300
+ Muennighoff/xstory_cloze,te,en,"Choose Story Ending",validation
301
+ Muennighoff/xstory_cloze,zh,en,"Story Continuation and Options",validation
302
+ Muennighoff/xstory_cloze,zh,en,"Answer Given options",validation
303
+ Muennighoff/xstory_cloze,zh,en,"Novel Correct Ending",validation
304
+ Muennighoff/xstory_cloze,zh,en,"Generate Ending",validation
305
+ Muennighoff/xstory_cloze,zh,en,"Choose Story Ending",validation
306
+ xnli,ar,en,"guaranteed/possible/impossible",validation
307
+ xnli,ar,en,"MNLI crowdsource",validation
308
+ xnli,ar,en,"GPT-3 style",validation
309
+ xnli,ar,en,"justified in saying",validation
310
+ xnli,ar,en,"can we infer",validation
311
+ xnli,en,en,"guaranteed/possible/impossible",validation
312
+ xnli,en,en,"MNLI crowdsource",validation
313
+ xnli,en,en,"GPT-3 style",validation
314
+ xnli,en,en,"justified in saying",validation
315
+ xnli,en,en,"can we infer",validation
316
+ xnli,es,en,"guaranteed/possible/impossible",validation
317
+ xnli,es,en,"MNLI crowdsource",validation
318
+ xnli,es,en,"GPT-3 style",validation
319
+ xnli,es,en,"justified in saying",validation
320
+ xnli,es,en,"can we infer",validation
321
+ xnli,fr,en,"guaranteed/possible/impossible",validation
322
+ xnli,fr,en,"MNLI crowdsource",validation
323
+ xnli,fr,en,"GPT-3 style",validation
324
+ xnli,fr,en,"justified in saying",validation
325
+ xnli,fr,en,"can we infer",validation
326
+ xnli,hi,en,"guaranteed/possible/impossible",validation
327
+ xnli,hi,en,"MNLI crowdsource",validation
328
+ xnli,hi,en,"GPT-3 style",validation
329
+ xnli,hi,en,"justified in saying",validation
330
+ xnli,hi,en,"can we infer",validation
331
+ xnli,sw,en,"guaranteed/possible/impossible",validation
332
+ xnli,sw,en,"MNLI crowdsource",validation
333
+ xnli,sw,en,"GPT-3 style",validation
334
+ xnli,sw,en,"justified in saying",validation
335
+ xnli,sw,en,"can we infer",validation
336
+ xnli,ur,en,"guaranteed/possible/impossible",validation
337
+ xnli,ur,en,"MNLI crowdsource",validation
338
+ xnli,ur,en,"GPT-3 style",validation
339
+ xnli,ur,en,"justified in saying",validation
340
+ xnli,ur,en,"can we infer",validation
341
+ xnli,vi,en,"guaranteed/possible/impossible",validation
342
+ xnli,vi,en,"MNLI crowdsource",validation
343
+ xnli,vi,en,"GPT-3 style",validation
344
+ xnli,vi,en,"justified in saying",validation
345
+ xnli,vi,en,"can we infer",validation
346
+ xnli,zh,en,"guaranteed/possible/impossible",validation
347
+ xnli,zh,en,"MNLI crowdsource",validation
348
+ xnli,zh,en,"GPT-3 style",validation
349
+ xnli,zh,en,"justified in saying",validation
350
+ xnli,zh,en,"can we infer",validation
351
+ xcopa,id,en,"best_option",validation
352
+ xcopa,id,en,"C1 or C2? premise, so/because…",validation
353
+ xcopa,id,en,"i_am_hesitating",validation
354
+ xcopa,id,en,"cause_effect",validation
355
+ xcopa,id,en,"plausible_alternatives",validation
356
+ xcopa,sw,en,"best_option",validation
357
+ xcopa,sw,en,"C1 or C2? premise, so/because…",validation
358
+ xcopa,sw,en,"i_am_hesitating",validation
359
+ xcopa,sw,en,"cause_effect",validation
360
+ xcopa,sw,en,"plausible_alternatives",validation
361
+ xcopa,ta,en,"best_option",validation
362
+ xcopa,ta,en,"C1 or C2? premise, so/because…",validation
363
+ xcopa,ta,en,"i_am_hesitating",validation
364
+ xcopa,ta,en,"cause_effect",validation
365
+ xcopa,ta,en,"plausible_alternatives",validation
366
+ xcopa,vi,en,"best_option",validation
367
+ xcopa,vi,en,"C1 or C2? premise, so/because…",validation
368
+ xcopa,vi,en,"i_am_hesitating",validation
369
+ xcopa,vi,en,"cause_effect",validation
370
+ xcopa,vi,en,"plausible_alternatives",validation
371
+ xcopa,zh,en,"best_option",validation
372
+ xcopa,zh,en,"C1 or C2? premise, so/because…",validation
373
+ xcopa,zh,en,"i_am_hesitating",validation
374
+ xcopa,zh,en,"cause_effect",validation
375
+ xcopa,zh,en,"plausible_alternatives",validation
376
+ Muennighoff/xwinograd,en,en,"underscore refer to",test
377
+ Muennighoff/xwinograd,en,en,"Replace",test
378
+ Muennighoff/xwinograd,en,en,"stand for",test
379
+ Muennighoff/xwinograd,en,en,"does underscore refer to",test
380
+ Muennighoff/xwinograd,en,en,"True or False",test
381
+ Muennighoff/xwinograd,fr,en,"underscore refer to",test
382
+ Muennighoff/xwinograd,fr,en,"Replace",test
383
+ Muennighoff/xwinograd,fr,en,"stand for",test
384
+ Muennighoff/xwinograd,fr,en,"does underscore refer to",test
385
+ Muennighoff/xwinograd,fr,en,"True or False",test
386
+ Muennighoff/xwinograd,pt,en,"underscore refer to",test
387
+ Muennighoff/xwinograd,pt,en,"Replace",test
388
+ Muennighoff/xwinograd,pt,en,"stand for",test
389
+ Muennighoff/xwinograd,pt,en,"does underscore refer to",test
390
+ Muennighoff/xwinograd,pt,en,"True or False",test
391
+ Muennighoff/xwinograd,zh,en,"underscore refer to",test
392
+ Muennighoff/xwinograd,zh,en,"Replace",test
393
+ Muennighoff/xwinograd,zh,en,"stand for",test
394
+ Muennighoff/xwinograd,zh,en,"does underscore refer to",test
395
+ Muennighoff/xwinograd,zh,en,"True or False",test
396
+ )
397
+
398
+ DATASETS_AND_CONFIGS_L2=(
399
+ Muennighoff/xstory_cloze,ru,en,"Story Continuation and Options",validation
400
+ Muennighoff/xstory_cloze,ru,en,"Answer Given options",validation
401
+ Muennighoff/xstory_cloze,ru,en,"Novel Correct Ending",validation
402
+ Muennighoff/xstory_cloze,ru,en,"Generate Ending",validation
403
+ Muennighoff/xstory_cloze,ru,en,"Choose Story Ending",validation
404
+ Muennighoff/xstory_cloze,my,en,"Story Continuation and Options",validation
405
+ Muennighoff/xstory_cloze,my,en,"Answer Given options",validation
406
+ Muennighoff/xstory_cloze,my,en,"Novel Correct Ending",validation
407
+ Muennighoff/xstory_cloze,my,en,"Generate Ending",validation
408
+ Muennighoff/xstory_cloze,my,en,"Choose Story Ending",validation
409
+ xnli,bg,en,"guaranteed/possible/impossible",validation
410
+ xnli,bg,en,"MNLI crowdsource",validation
411
+ xnli,bg,en,"GPT-3 style",validation
412
+ xnli,bg,en,"justified in saying",validation
413
+ xnli,bg,en,"can we infer",validation
414
+ xnli,de,en,"guaranteed/possible/impossible",validation
415
+ xnli,de,en,"MNLI crowdsource",validation
416
+ xnli,de,en,"GPT-3 style",validation
417
+ xnli,de,en,"justified in saying",validation
418
+ xnli,de,en,"can we infer",validation
419
+ xnli,el,en,"guaranteed/possible/impossible",validation
420
+ xnli,el,en,"MNLI crowdsource",validation
421
+ xnli,el,en,"GPT-3 style",validation
422
+ xnli,el,en,"justified in saying",validation
423
+ xnli,el,en,"can we infer",validation
424
+ xnli,ru,en,"guaranteed/possible/impossible",validation
425
+ xnli,ru,en,"MNLI crowdsource",validation
426
+ xnli,ru,en,"GPT-3 style",validation
427
+ xnli,ru,en,"justified in saying",validation
428
+ xnli,ru,en,"can we infer",validation
429
+ xnli,th,en,"guaranteed/possible/impossible",validation
430
+ xnli,th,en,"MNLI crowdsource",validation
431
+ xnli,th,en,"GPT-3 style",validation
432
+ xnli,th,en,"justified in saying",validation
433
+ xnli,th,en,"can we infer",validation
434
+ xnli,tr,en,"guaranteed/possible/impossible",validation
435
+ xnli,tr,en,"MNLI crowdsource",validation
436
+ xnli,tr,en,"GPT-3 style",validation
437
+ xnli,tr,en,"justified in saying",validation
438
+ xnli,tr,en,"can we infer",validation
439
+ Muennighoff/xwinograd,ru,en,"underscore refer to",test
440
+ Muennighoff/xwinograd,ru,en,"Replace",test
441
+ Muennighoff/xwinograd,ru,en,"stand for",test
442
+ Muennighoff/xwinograd,ru,en,"does underscore refer to",test
443
+ Muennighoff/xwinograd,ru,en,"True or False",test
444
+ Muennighoff/xwinograd,jp,en,"underscore refer to",test
445
+ Muennighoff/xwinograd,jp,en,"Replace",test
446
+ Muennighoff/xwinograd,jp,en,"stand for",test
447
+ Muennighoff/xwinograd,jp,en,"does underscore refer to",test
448
+ Muennighoff/xwinograd,jp,en,"True or False",test
449
+ xcopa,et,en,"best_option",validation
450
+ xcopa,et,en,"C1 or C2? premise, so/because…",validation
451
+ xcopa,et,en,"i_am_hesitating",validation
452
+ xcopa,et,en,"cause_effect",validation
453
+ xcopa,et,en,"plausible_alternatives",validation
454
+ xcopa,ht,en,"best_option",validation
455
+ xcopa,ht,en,"C1 or C2? premise, so/because…",validation
456
+ xcopa,ht,en,"i_am_hesitating",validation
457
+ xcopa,ht,en,"cause_effect",validation
458
+ xcopa,ht,en,"plausible_alternatives",validation
459
+ xcopa,it,en,"best_option",validation
460
+ xcopa,it,en,"C1 or C2? premise, so/because…",validation
461
+ xcopa,it,en,"i_am_hesitating",validation
462
+ xcopa,it,en,"cause_effect",validation
463
+ xcopa,it,en,"plausible_alternatives",validation
464
+ xcopa,qu,en,"best_option",validation
465
+ xcopa,qu,en,"C1 or C2? premise, so/because…",validation
466
+ xcopa,qu,en,"i_am_hesitating",validation
467
+ xcopa,qu,en,"cause_effect",validation
468
+ xcopa,qu,en,"plausible_alternatives",validation
469
+ xcopa,th,en,"best_option",validation
470
+ xcopa,th,en,"C1 or C2? premise, so/because…",validation
471
+ xcopa,th,en,"i_am_hesitating",validation
472
+ xcopa,th,en,"cause_effect",validation
473
+ xcopa,th,en,"plausible_alternatives",validation
474
+ xcopa,tr,en,"best_option",validation
475
+ xcopa,tr,en,"C1 or C2? premise, so/because…",validation
476
+ xcopa,tr,en,"i_am_hesitating",validation
477
+ xcopa,tr,en,"cause_effect",validation
478
+ xcopa,tr,en,"plausible_alternatives",validation
479
+ )
480
+
481
+ DATASETS_AND_CONFIGS_MT_L1=(
482
+ Muennighoff/xstory_cloze,ar,ar,"Story Continuation and Options_armt",validation
483
+ Muennighoff/xstory_cloze,ar,ar,"Answer Given options_armt",validation
484
+ Muennighoff/xstory_cloze,ar,ar,"Novel Correct Ending_armt",validation
485
+ Muennighoff/xstory_cloze,ar,ar,"Generate Ending_armt",validation
486
+ Muennighoff/xstory_cloze,ar,ar,"Choose Story Ending_armt",validation
487
+ Muennighoff/xstory_cloze,es,es,"Story Continuation and Options_esmt",validation
488
+ Muennighoff/xstory_cloze,es,es,"Answer Given options_esmt",validation
489
+ Muennighoff/xstory_cloze,es,es,"Novel Correct Ending_esmt",validation
490
+ Muennighoff/xstory_cloze,es,es,"Generate Ending_esmt",validation
491
+ Muennighoff/xstory_cloze,es,es,"Choose Story Ending_esmt",validation
492
+ Muennighoff/xstory_cloze,eu,eu,"Story Continuation and Options_eumt",validation
493
+ Muennighoff/xstory_cloze,eu,eu,"Answer Given options_eumt",validation
494
+ Muennighoff/xstory_cloze,eu,eu,"Novel Correct Ending_eumt",validation
495
+ Muennighoff/xstory_cloze,eu,eu,"Generate Ending_eumt",validation
496
+ Muennighoff/xstory_cloze,eu,eu,"Choose Story Ending_eumt",validation
497
+ Muennighoff/xstory_cloze,id,id,"Story Continuation and Options_idmt",validation
498
+ Muennighoff/xstory_cloze,id,id,"Answer Given options_idmt",validation
499
+ Muennighoff/xstory_cloze,id,id,"Novel Correct Ending_idmt",validation
500
+ Muennighoff/xstory_cloze,id,id,"Generate Ending_idmt",validation
501
+ Muennighoff/xstory_cloze,id,id,"Choose Story Ending_idmt",validation
502
+ Muennighoff/xstory_cloze,hi,hi,"Story Continuation and Options_himt",validation
503
+ Muennighoff/xstory_cloze,hi,hi,"Answer Given options_himt",validation
504
+ Muennighoff/xstory_cloze,hi,hi,"Novel Correct Ending_himt",validation
505
+ Muennighoff/xstory_cloze,hi,hi,"Generate Ending_himt",validation
506
+ Muennighoff/xstory_cloze,hi,hi,"Choose Story Ending_himt",validation
507
+ Muennighoff/xstory_cloze,sw,sw,"Story Continuation and Options_swmt",validation
508
+ Muennighoff/xstory_cloze,sw,sw,"Answer Given options_swmt",validation
509
+ Muennighoff/xstory_cloze,sw,sw,"Novel Correct Ending_swmt",validation
510
+ Muennighoff/xstory_cloze,sw,sw,"Generate Ending_swmt",validation
511
+ Muennighoff/xstory_cloze,sw,sw,"Choose Story Ending_swmt",validation
512
+ Muennighoff/xstory_cloze,te,te,"Story Continuation and Options_temt",validation
513
+ Muennighoff/xstory_cloze,te,te,"Answer Given options_temt",validation
514
+ Muennighoff/xstory_cloze,te,te,"Novel Correct Ending_temt",validation
515
+ Muennighoff/xstory_cloze,te,te,"Generate Ending_temt",validation
516
+ Muennighoff/xstory_cloze,te,te,"Choose Story Ending_temt",validation
517
+ Muennighoff/xstory_cloze,zh,zh,"Story Continuation and Options_zhmt",validation
518
+ Muennighoff/xstory_cloze,zh,zh,"Answer Given options_zhmt",validation
519
+ Muennighoff/xstory_cloze,zh,zh,"Novel Correct Ending_zhmt",validation
520
+ Muennighoff/xstory_cloze,zh,zh,"Generate Ending_zhmt",validation
521
+ Muennighoff/xstory_cloze,zh,zh,"Choose Story Ending_zhmt",validation
522
+ Muennighoff/xwinograd,fr,fr,"underscore refer to_frmt",test
523
+ Muennighoff/xwinograd,fr,fr,"Replace_frmt",test
524
+ Muennighoff/xwinograd,fr,fr,"stand for_frmt",test
525
+ Muennighoff/xwinograd,fr,fr,"does underscore refer to_frmt",test
526
+ Muennighoff/xwinograd,fr,fr,"True or False_frmt",test
527
+ Muennighoff/xwinograd,pt,pt,"underscore refer to_ptmt",test
528
+ Muennighoff/xwinograd,pt,pt,"Replace_ptmt",test
529
+ Muennighoff/xwinograd,pt,pt,"stand for_ptmt",test
530
+ Muennighoff/xwinograd,pt,pt,"does underscore refer to_ptmt",test
531
+ Muennighoff/xwinograd,pt,pt,"True or False_ptmt",test
532
+ Muennighoff/xwinograd,zh,zh,"underscore refer to_zhmt",test
533
+ Muennighoff/xwinograd,zh,zh,"Replace_zhmt",test
534
+ Muennighoff/xwinograd,zh,zh,"stand for_zhmt",test
535
+ Muennighoff/xwinograd,zh,zh,"does underscore refer to_zhmt",test
536
+ Muennighoff/xwinograd,zh,zh,"True or False_zhmt",test
537
+ xcopa,id,id,"best_option_idmt",validation
538
+ xcopa,id,id,"C1 or C2? premise_idmt",validation
539
+ xcopa,id,id,"i_am_hesitating_idmt",validation
540
+ xcopa,id,id,"cause_effect_idmt",validation
541
+ xcopa,id,id,"plausible_alternatives_idmt",validation
542
+ xcopa,sw,sw,"best_option_swmt",validation
543
+ xcopa,sw,sw,"C1 or C2? premise_swmt",validation
544
+ xcopa,sw,sw,"i_am_hesitating_swmt",validation
545
+ xcopa,sw,sw,"cause_effect_swmt",validation
546
+ xcopa,sw,sw,"plausible_alternatives_swmt",validation
547
+ xcopa,ta,ta,"best_option_tamt",validation
548
+ xcopa,ta,ta,"C1 or C2? premise_tamt",validation
549
+ xcopa,ta,ta,"i_am_hesitating_tamt",validation
550
+ xcopa,ta,ta,"cause_effect_tamt",validation
551
+ xcopa,ta,ta,"plausible_alternatives_tamt",validation
552
+ xcopa,vi,vi,"best_option_vimt",validation
553
+ xcopa,vi,vi,"C1 or C2? premise_vimt",validation
554
+ xcopa,vi,vi,"i_am_hesitating_vimt",validation
555
+ xcopa,vi,vi,"cause_effect_vimt",validation
556
+ xcopa,vi,vi,"plausible_alternatives_vimt",validation
557
+ xcopa,zh,zh,"best_option_zhmt",validation
558
+ xcopa,zh,zh,"C1 or C2? premise_zhmt",validation
559
+ xcopa,zh,zh,"i_am_hesitating_zhmt",validation
560
+ xcopa,zh,zh,"cause_effect_zhmt",validation
561
+ xcopa,zh,zh,"plausible_alternatives_zhmt",validation
562
+ )
563
+
564
+ DATASETS_AND_CONFIGS_ZHHT=(
565
+ Muennighoff/xstory_cloze,zh,zh,"Story Continuation and Options_zhht",validation
566
+ Muennighoff/xstory_cloze,zh,zh,"Answer Given options_zhht",validation
567
+ Muennighoff/xstory_cloze,zh,zh,"Novel Correct Ending_zhht",validation
568
+ Muennighoff/xstory_cloze,zh,zh,"Generate Ending_zhht",validation
569
+ Muennighoff/xstory_cloze,zh,zh,"Choose Story Ending_zhht",validation
570
+ Muennighoff/xwinograd,zh,zh,"underscore refer to_zhht",test
571
+ Muennighoff/xwinograd,zh,zh,"Replace_zhht",test
572
+ Muennighoff/xwinograd,zh,zh,"stand for_zhht",test
573
+ Muennighoff/xwinograd,zh,zh,"does underscore refer to_zhht",test
574
+ Muennighoff/xwinograd,zh,zh,"True or False_zhht",test
575
+ xcopa,zh,zh,"best_option_zhht",validation
576
+ xcopa,zh,zh,"C1 or C2? premise_zhht",validation
577
+ xcopa,zh,zh,"i_am_hesitating_zhht",validation
578
+ xcopa,zh,zh,"cause_effect_zhht",validation
579
+ xcopa,zh,zh,"plausible_alternatives_zhht",validation
580
+ )
581
+
582
+ DATASETS_AND_CONFIGS_XNLIHTMT=(
583
+ xnli,ar,ar,"guaranteed/possible/impossible_arht",validation
584
+ xnli,ar,ar,"MNLI crowdsource_arht",validation
585
+ xnli,ar,ar,"GPT-3 style_arht",validation
586
+ xnli,ar,ar,"justified in saying_arht",validation
587
+ xnli,ar,ar,"can we infer_arht",validation
588
+ xnli,ar,ar,"guaranteed/possible/impossible_armt",validation
589
+ xnli,ar,ar,"MNLI crowdsource_armt",validation
590
+ xnli,ar,ar,"GPT-3 style_armt",validation
591
+ xnli,ar,ar,"justified in saying_armt",validation
592
+ xnli,ar,ar,"can we infer_armt",validation
593
+ xnli,es,es,"guaranteed/possible/impossible_esht",validation
594
+ xnli,es,es,"MNLI crowdsource_esht",validation
595
+ xnli,es,es,"GPT-3 style_esht",validation
596
+ xnli,es,es,"justified in saying_esht",validation
597
+ xnli,es,es,"can we infer_esht",validation
598
+ xnli,es,es,"guaranteed/possible/impossible_esmt",validation
599
+ xnli,es,es,"MNLI crowdsource_esmt",validation
600
+ xnli,es,es,"GPT-3 style_esmt",validation
601
+ xnli,es,es,"justified in saying_esmt",validation
602
+ xnli,es,es,"can we infer_esmt",validation
603
+ xnli,fr,fr,"guaranteed/possible/impossible_frht",validation
604
+ xnli,fr,fr,"MNLI crowdsource_frht",validation
605
+ xnli,fr,fr,"GPT-3 style_frht",validation
606
+ xnli,fr,fr,"justified in saying_frht",validation
607
+ xnli,fr,fr,"can we infer_frht",validation
608
+ xnli,fr,fr,"guaranteed/possible/impossible_frmt",validation
609
+ xnli,fr,fr,"MNLI crowdsource_frmt",validation
610
+ xnli,fr,fr,"GPT-3 style_frmt",validation
611
+ xnli,fr,fr,"justified in saying_frmt",validation
612
+ xnli,fr,fr,"can we infer_frmt",validation
613
+ xnli,hi,hi,"guaranteed/possible/impossible_hiht",validation
614
+ xnli,hi,hi,"MNLI crowdsource_hiht",validation
615
+ xnli,hi,hi,"GPT-3 style_hiht",validation
616
+ xnli,hi,hi,"justified in saying_hiht",validation
617
+ xnli,hi,hi,"can we infer_hiht",validation
618
+ xnli,hi,hi,"guaranteed/possible/impossible_himt",validation
619
+ xnli,hi,hi,"MNLI crowdsource_himt",validation
620
+ xnli,hi,hi,"GPT-3 style_himt",validation
621
+ xnli,hi,hi,"justified in saying_himt",validation
622
+ xnli,hi,hi,"can we infer_himt",validation
623
+ xnli,ur,ur,"guaranteed/possible/impossible_urht",validation
624
+ xnli,ur,ur,"MNLI crowdsource_urht",validation
625
+ xnli,ur,ur,"GPT-3 style_urht",validation
626
+ xnli,ur,ur,"justified in saying_urht",validation
627
+ xnli,ur,ur,"can we infer_urht",validation
628
+ xnli,ur,ur,"guaranteed/possible/impossible_urmt",validation
629
+ xnli,ur,ur,"MNLI crowdsource_urmt",validation
630
+ xnli,ur,ur,"GPT-3 style_urmt",validation
631
+ xnli,ur,ur,"justified in saying_urmt",validation
632
+ xnli,ur,ur,"can we infer_urmt",validation
633
+ xnli,sw,sw,"guaranteed/possible/impossible_swht",validation
634
+ xnli,sw,sw,"MNLI crowdsource_swht",validation
635
+ xnli,sw,sw,"GPT-3 style_swht",validation
636
+ xnli,sw,sw,"justified in saying_swht",validation
637
+ xnli,sw,sw,"can we infer_swht",validation
638
+ xnli,sw,sw,"guaranteed/possible/impossible_swmt",validation
639
+ xnli,sw,sw,"MNLI crowdsource_swmt",validation
640
+ xnli,sw,sw,"GPT-3 style_swmt",validation
641
+ xnli,sw,sw,"justified in saying_swmt",validation
642
+ xnli,sw,sw,"can we infer_swmt",validation
643
+ xnli,vi,vi,"guaranteed/possible/impossible_viht",validation
644
+ xnli,vi,vi,"MNLI crowdsource_viht",validation
645
+ xnli,vi,vi,"GPT-3 style_viht",validation
646
+ xnli,vi,vi,"justified in saying_viht",validation
647
+ xnli,vi,vi,"can we infer_viht",validation
648
+ xnli,vi,vi,"guaranteed/possible/impossible_vimt",validation
649
+ xnli,vi,vi,"MNLI crowdsource_vimt",validation
650
+ xnli,vi,vi,"GPT-3 style_vimt",validation
651
+ xnli,vi,vi,"justified in saying_vimt",validation
652
+ xnli,vi,vi,"can we infer_vimt",validation
653
+ xnli,zh,zh,"guaranteed/possible/impossible_zhht",validation
654
+ xnli,zh,zh,"MNLI crowdsource_zhht",validation
655
+ xnli,zh,zh,"GPT-3 style_zhht",validation
656
+ xnli,zh,zh,"justified in saying_zhht",validation
657
+ xnli,zh,zh,"can we infer_zhht",validation
658
+ xnli,zh,zh,"guaranteed/possible/impossible_zhmt",validation
659
+ xnli,zh,zh,"MNLI crowdsource_zhmt",validation
660
+ xnli,zh,zh,"GPT-3 style_zhmt",validation
661
+ xnli,zh,zh,"justified in saying_zhmt",validation
662
+ xnli,zh,zh,"can we infer_zhmt",validation
663
+ )
664
+
665
+ DATASETS_AND_CONFIGS_MT_L2=(
666
+ Muennighoff/xstory_cloze,my,my,"Story Continuation and Options_mymt",validation
667
+ Muennighoff/xstory_cloze,my,my,"Answer Given options_mymt",validation
668
+ Muennighoff/xstory_cloze,my,my,"Novel Correct Ending_mymt",validation
669
+ Muennighoff/xstory_cloze,my,my,"Generate Ending_mymt",validation
670
+ Muennighoff/xstory_cloze,my,my,"Choose Story Ending_mymt",validation
671
+ Muennighoff/xstory_cloze,ru,ru,"Story Continuation and Options_rumt",validation
672
+ Muennighoff/xstory_cloze,ru,ru,"Answer Given options_rumt",validation
673
+ Muennighoff/xstory_cloze,ru,ru,"Novel Correct Ending_rumt",validation
674
+ Muennighoff/xstory_cloze,ru,ru,"Generate Ending_rumt",validation
675
+ Muennighoff/xstory_cloze,ru,ru,"Choose Story Ending_rumt",validation
676
+ Muennighoff/xstory_cloze,sw,sw,"Story Continuation and Options_swmt",validation
677
+ Muennighoff/xstory_cloze,sw,sw,"Answer Given options_swmt",validation
678
+ Muennighoff/xstory_cloze,sw,sw,"Novel Correct Ending_swmt",validation
679
+ Muennighoff/xstory_cloze,sw,sw,"Generate Ending_swmt",validation
680
+ Muennighoff/xstory_cloze,sw,sw,"Choose Story Ending_swmt",validation
681
+ Muennighoff/xstory_cloze,te,te,"Story Continuation and Options_temt",validation
682
+ Muennighoff/xstory_cloze,te,te,"Answer Given options_temt",validation
683
+ Muennighoff/xstory_cloze,te,te,"Novel Correct Ending_temt",validation
684
+ Muennighoff/xstory_cloze,te,te,"Generate Ending_temt",validation
685
+ Muennighoff/xstory_cloze,te,te,"Choose Story Ending_temt",validation
686
+ Muennighoff/xwinograd,jp,jp,"underscore refer to_jpmt",test
687
+ Muennighoff/xwinograd,jp,jp,"Replace_jpmt",test
688
+ Muennighoff/xwinograd,jp,jp,"stand for_jpmt",test
689
+ Muennighoff/xwinograd,jp,jp,"does underscore refer to_jpmt",test
690
+ Muennighoff/xwinograd,jp,jp,"True or False_jpmt",test
691
+ Muennighoff/xwinograd,ru,ru,"underscore refer to_rumt",test
692
+ Muennighoff/xwinograd,ru,ru,"Replace_rumt",test
693
+ Muennighoff/xwinograd,ru,ru,"stand for_rumt",test
694
+ Muennighoff/xwinograd,ru,ru,"does underscore refer to_rumt",test
695
+ Muennighoff/xwinograd,ru,ru,"True or False_rumt",test
696
+ xcopa,et,et,"best_option_etmt",validation
697
+ xcopa,et,et,"C1 or C2? premise_etmt",validation
698
+ xcopa,et,et,"i_am_hesitating_etmt",validation
699
+ xcopa,et,et,"cause_effect_etmt",validation
700
+ xcopa,et,et,"plausible_alternatives_etmt",validation
701
+ xcopa,ht,ht,"best_option_htmt",validation
702
+ xcopa,ht,ht,"C1 or C2? premise_htmt",validation
703
+ xcopa,ht,ht,"i_am_hesitating_htmt",validation
704
+ xcopa,ht,ht,"cause_effect_htmt",validation
705
+ xcopa,ht,ht,"plausible_alternatives_htmt",validation
706
+ xcopa,it,it,"best_option_itmt",validation
707
+ xcopa,it,it,"C1 or C2? premise_itmt",validation
708
+ xcopa,it,it,"i_am_hesitating_itmt",validation
709
+ xcopa,it,it,"cause_effect_itmt",validation
710
+ xcopa,it,it,"plausible_alternatives_itmt",validation
711
+ xcopa,qu,qu,"best_option_qumt",validation
712
+ xcopa,qu,qu,"C1 or C2? premise_qumt",validation
713
+ xcopa,qu,qu,"i_am_hesitating_qumt",validation
714
+ xcopa,qu,qu,"cause_effect_qumt",validation
715
+ xcopa,qu,qu,"plausible_alternatives_qumt",validation
716
+ xcopa,th,th,"best_option_thmt",validation
717
+ xcopa,th,th,"C1 or C2? premise_thmt",validation
718
+ xcopa,th,th,"i_am_hesitating_thmt",validation
719
+ xcopa,th,th,"cause_effect_thmt",validation
720
+ xcopa,th,th,"plausible_alternatives_thmt",validation
721
+ xcopa,tr,tr,"best_option_trmt",validation
722
+ xcopa,tr,tr,"C1 or C2? premise_trmt",validation
723
+ xcopa,tr,tr,"i_am_hesitating_trmt",validation
724
+ xcopa,tr,tr,"cause_effect_trmt",validation
725
+ xcopa,tr,tr,"plausible_alternatives_trmt",validation
726
+ xnli,bg,bg,"guaranteed/possible/impossible_bgmt",validation
727
+ xnli,bg,bg,"MNLI crowdsource_bgmt",validation
728
+ xnli,bg,bg,"GPT-3 style_bgmt",validation
729
+ xnli,bg,bg,"justified in saying_bgmt",validation
730
+ xnli,bg,bg,"can we infer_bgmt",validation
731
+ xnli,de,de,"guaranteed/possible/impossible_demt",validation
732
+ xnli,de,de,"MNLI crowdsource_demt",validation
733
+ xnli,de,de,"GPT-3 style_demt",validation
734
+ xnli,de,de,"justified in saying_demt",validation
735
+ xnli,de,de,"can we infer_demt",validation
736
+ xnli,el,el,"guaranteed/possible/impossible_elmt",validation
737
+ xnli,el,el,"MNLI crowdsource_elmt",validation
738
+ xnli,el,el,"GPT-3 style_elmt",validation
739
+ xnli,el,el,"justified in saying_elmt",validation
740
+ xnli,el,el,"can we infer_elmt",validation
741
+ xnli,ru,ru,"guaranteed/possible/impossible_rumt",validation
742
+ xnli,ru,ru,"MNLI crowdsource_rumt",validation
743
+ xnli,ru,ru,"GPT-3 style_rumt",validation
744
+ xnli,ru,ru,"justified in saying_rumt",validation
745
+ xnli,ru,ru,"can we infer_rumt",validation
746
+ xnli,th,th,"guaranteed/possible/impossible_thmt",validation
747
+ xnli,th,th,"MNLI crowdsource_thmt",validation
748
+ xnli,th,th,"GPT-3 style_thmt",validation
749
+ xnli,th,th,"justified in saying_thmt",validation
750
+ xnli,th,th,"can we infer_thmt",validation
751
+ xnli,tr,tr,"guaranteed/possible/impossible_trmt",validation
752
+ xnli,tr,tr,"MNLI crowdsource_trmt",validation
753
+ xnli,tr,tr,"GPT-3 style_trmt",validation
754
+ xnli,tr,tr,"justified in saying_trmt",validation
755
+ xnli,tr,tr,"can we infer_trmt",validation
756
+ )
757
+
758
+ DATASETS_AND_CONFIGS_RU=(
759
+ Muennighoff/xstory_cloze,ru,en,"Story Continuation and Options",validation
760
+ Muennighoff/xstory_cloze,ru,en,"Answer Given options",validation
761
+ Muennighoff/xstory_cloze,ru,en,"Novel Correct Ending",validation
762
+ Muennighoff/xstory_cloze,ru,en,"Generate Ending",validation
763
+ Muennighoff/xstory_cloze,ru,en,"Choose Story Ending",validation
764
+ Muennighoff/xwinograd,ru,en,"underscore refer to",test
765
+ Muennighoff/xwinograd,ru,en,"Replace",test
766
+ Muennighoff/xwinograd,ru,en,"stand for",test
767
+ Muennighoff/xwinograd,ru,en,"does underscore refer to",test
768
+ Muennighoff/xwinograd,ru,en,"True or False",test
769
+ xnli,ru,en,"guaranteed/possible/impossible",validation
770
+ xnli,ru,en,"MNLI crowdsource",validation
771
+ xnli,ru,en,"GPT-3 style",validation
772
+ xnli,ru,en,"justified in saying",validation
773
+ xnli,ru,en,"can we infer",validation
774
+ )
775
+
776
+ DATASET_AND_CONFIG=${DATASETS_AND_CONFIGS_RU[$SLURM_ARRAY_TASK_ID]}
777
+ echo $ARGUMENT
778
+
779
+ # Run T0 evaluation
780
+ # For PrefixLM add --prefixlm
781
+ IFS=',' read dataset_name dataset_config_name template_config_name template_name split <<< "${DATASET_AND_CONFIG}"
782
+ python promptsource/t-zero/evaluation/run_eval.py \
783
+ --dataset_name $dataset_name \
784
+ --dataset_config_name $dataset_config_name \
785
+ --template_config_name $template_config_name \
786
+ --template_name "$template_name" \
787
+ --split $split \
788
+ --model_name_or_path $CHECKPOINT_PATH \
789
+ --output_dir $OUTPUT_DIR \
790
+ --per_device_eval_batch_size 4 \
791
+ --max_length 2048 \
792
+ --dtype float16
launch.sh ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Launch script using torch.distributed.run(). Used by slurm
4
+ # scripts, don't invoke directly.
5
+
6
+ # Samuel's fix for apparent error in SLURM initialization
7
+ if [ $SLURM_LOCALID -eq 0 ]; then
8
+ rm -rf /dev/shm/*
9
+ rocm-smi || true
10
+ else
11
+ sleep 2
12
+ fi
13
+
14
+ export NCCL_SOCKET_IFNAME=hsn0,hsn1,hsn2,hsn3
15
+ export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
16
+ export FI_CXI_DEFAULT_CQ_SIZE=131072
17
+
18
+ # debugging (noisy)
19
+ #export NCCL_DEBUG=INFO
20
+ #export RCCL_KERNEL_COLL_TRACE_ENABLE=1
21
+ #export NCCL_DEBUG_SUBSYS=INIT,COLL
22
+
23
+ module --quiet purge
24
+ module load cray-python
25
+
26
+ module load CrayEnv
27
+ module load PrgEnv-cray/8.3.3
28
+ module load craype-accel-amd-gfx90a
29
+ module load cray-python
30
+
31
+ module use /pfs/lustrep2/projappl/project_462000125/samantao-public/mymodules
32
+ module load suse-repo-deps/sam-default
33
+ module load rocm/sam-5.2.3.lua
34
+ module load rccl/sam-develop.lua
35
+ module load aws-ofi-rccl/sam-default.lua
36
+
37
+ source /scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/bin/activate
38
+
39
+ MASTER_NODE=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
40
+ MASTER_PORT=9999
41
+
42
+ echo "Launching on $SLURMD_NODENAME ($SLURM_PROCID/$SLURM_JOB_NUM_NODES)," \
43
+ "master $MASTER_NODE port $MASTER_PORT," \
44
+ "GPUs $SLURM_GPUS_ON_NODE," \
45
+ "CUDA: $(python -c 'import torch; print(torch.cuda.is_available())')"
46
+
47
+ python -u -m torch.distributed.run \
48
+ --nnodes $SLURM_JOB_NUM_NODES \
49
+ --nproc_per_node $SLURM_GPUS_ON_NODE \
50
+ --node_rank=$SLURM_PROCID \
51
+ --master_addr $MASTER_NODE \
52
+ --master_port $MASTER_PORT \
53
+ "$@"
sbatch_mtf_4b_ru.sh ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --nodes=8
3
+ #SBATCH --ntasks-per-node=1
4
+ #SBATCH --cpus-per-task=32
5
+ #SBATCH --mem=256G
6
+ #SBATCH -p pilot
7
+ #SBATCH -t 48:00:00
8
+ #SBATCH --gpus-per-node=mi250:8
9
+ #SBATCH --exclusive=user
10
+ #SBATCH --hint=nomultithread
11
+ #SBATCH --account=project_462000119
12
+ #SBATCH -o logs/%j.out
13
+ #SBATCH -e logs/%j.err
14
+
15
+ # if run without sbatch, invoke here
16
+ #if [ -z $SLURM_JOB_ID ]; then
17
+ # mkdir -p logs
18
+ # sbatch "$0"
19
+ # exit
20
+ #fi
21
+
22
+ VARIANT=7b1ru2
23
+
24
+ set -euo pipefail
25
+
26
+ # symlink logs/latest.out and logs/latest.err
27
+ ln -f -s $SLURM_JOB_ID.out logs/latest.out
28
+ ln -f -s $SLURM_JOB_ID.err logs/latest.err
29
+
30
+ KILL_SWITCH_PATH=kill-switch-$VARIANT
31
+ CHECKPOINT_PATH=checkpoints_$VARIANT
32
+ TENSORBOARD_PATH=tensorboard_$VARIANT
33
+
34
+ # Data
35
+ TOKENIZER_NAME_OR_PATH=bigscience/tokenizer
36
+
37
+ TRAIN_DATA_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3ru_train.txt
38
+ VALID_DATA_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3_validation_ru.txt
39
+
40
+ PP_SIZE=1
41
+ TP_SIZE=1
42
+
43
+ MICRO_BATCH_SIZE=2
44
+ GRADIENT_ACCUMULATION_STEPS=16
45
+ WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES))
46
+ GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS))
47
+
48
+ # Model parameters
49
+ NLAYERS=30
50
+ NHIDDEN=4096
51
+ NHEADS=32
52
+ SEQ_LEN=2048
53
+
54
+ TRAIN_SAMPLES=6_348_800
55
+
56
+ SAVE_INTERVAL=500
57
+
58
+ ZERO_STAGE=1
59
+
60
+ mkdir -p ds_configs
61
+ config_json="ds_configs/$SLURM_JOB_ID.json"
62
+
63
+ cat <<EOT > $config_json
64
+ {
65
+ "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE,
66
+ "train_batch_size": $GLOBAL_BATCH_SIZE,
67
+ "gradient_clipping": 1.0,
68
+ "zero_optimization": {
69
+ "stage": $ZERO_STAGE
70
+ },
71
+ "fp16": {
72
+ "enabled": true,
73
+ "loss_scale": 0,
74
+ "loss_scale_window": 500,
75
+ "hysteresis": 2,
76
+ "min_loss_scale": 1,
77
+ "initial_scale_power": 12
78
+ },
79
+ "steps_per_print": 2000,
80
+ "wall_clock_breakdown": false
81
+ }
82
+ EOT
83
+
84
+
85
+ CMD=" \
86
+ Megatron-DeepSpeed/finetune_t0.py \
87
+ --tensor-model-parallel-size $TP_SIZE \
88
+ --pipeline-model-parallel-size $PP_SIZE \
89
+ --num-layers $NLAYERS \
90
+ --hidden-size $NHIDDEN \
91
+ --num-attention-heads $NHEADS \
92
+ --seq-length $SEQ_LEN \
93
+ --max-position-embeddings $SEQ_LEN \
94
+ --micro-batch-size $MICRO_BATCH_SIZE \
95
+ --global-batch-size $GLOBAL_BATCH_SIZE \
96
+ --train-samples $TRAIN_SAMPLES \
97
+ --tokenizer-type PretrainedFromHF \
98
+ --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
99
+ --init-method-std 0.0048 \
100
+ --embed-layernorm \
101
+ --fp16 \
102
+ --seed 42 \
103
+ --position-embedding-type alibi \
104
+ --abort-on-unmet-fused-kernel-constraints \
105
+ --clip-grad 1.0 \
106
+ --kill-switch-path $KILL_SWITCH_PATH \
107
+ --checkpoint-activations \
108
+ --pad-vocab-size-to 250880 \
109
+ --optimizer adam \
110
+ --adam-beta1 0.9 \
111
+ --adam-beta2 0.95 \
112
+ --adam-eps 1e-8 \
113
+ --lr 2e-5 \
114
+ --lr-decay-style constant \
115
+ --lr-warmup-samples 0 \
116
+ --clip-grad 1.0 \
117
+ --weight-decay 1e-4 \
118
+ --no-load-optim \
119
+ --reset-progress \
120
+ --norm-target-loss \
121
+ --log-interval 10 \
122
+ --save-interval $SAVE_INTERVAL \
123
+ --eval-interval 500 \
124
+ --eval-iters 1 \
125
+ --tensorboard-dir $TENSORBOARD_PATH \
126
+ --tensorboard-queue-size 5 \
127
+ --log-timers-to-tensorboard \
128
+ --log-batch-size-to-tensorboard \
129
+ --log-validation-ppl-to-tensorboard \
130
+ --save $CHECKPOINT_PATH \
131
+ --load $CHECKPOINT_PATH \
132
+ --train-weighted-split-paths-path $TRAIN_DATA_PATH \
133
+ --valid-weighted-split-paths-path $VALID_DATA_PATH \
134
+ --dataloader-type single \
135
+ --data-impl mmap \
136
+ --deepspeed \
137
+ --deepspeed_config $config_json \
138
+ --zero-stage $ZERO_STAGE \
139
+ "
140
+
141
+ echo $CMD
142
+
143
+ echo "START $SLURM_JOBID: $(date)"
144
+
145
+ srun --label launch.sh $CMD
146
+
147
+ echo "END $SLURM_JOBID: $(date)"
train_ru.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ "train: 1 0:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3rumegds/xp3_ru"