system HF staff commited on
Commit
96fe417
1 Parent(s): e5017a1

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +1461 -0
config.json ADDED
@@ -0,0 +1,1461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_num_labels": 2,
3
+ "accumulate_gradients": 4,
4
+ "ae_steps": [],
5
+ "amp": 2,
6
+ "architectures": [
7
+ "XLMForQuestionAnswering"
8
+ ],
9
+ "asm": false,
10
+ "attention_dropout": 0.1,
11
+ "batch_size": 16,
12
+ "beam_size": 1,
13
+ "bos_index": 0,
14
+ "bos_token_id": 0,
15
+ "bptt": 256,
16
+ "bt_src_langs": [],
17
+ "bt_steps": [],
18
+ "causal": false,
19
+ "clip_grad_norm": 1.0,
20
+ "clm_steps": [],
21
+ "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/100/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,es,fr,de,zh,ru,pt,it,ar,ja,id,tr,nl,pl,simple,fa,vi,sv,ko,he,ro,no,hi,uk,cs,fi,hu,th,da,ca,el,bg,sr,ms,bn,hr,sl,zh_yue,az,sk,eo,ta,sh,lt,et,ml,la,bs,sq,arz,af,ka,mr,eu,tl,ang,gl,nn,ur,kk,be,hy,te,lv,mk,zh_classical,als,is,wuu,my,sco,mn,ceb,ast,cy,kn,br,an,gu,bar,uz,lb,ne,si,war,jv,ga,zh_min_nan,oc,ku,sw,nds,ckb,ia,yi,fy,scn,gan,tt,am' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656234 --master_port 11363 --exp_id \"16656234\"",
22
+ "context_size": 0,
23
+ "data_path": "/private/home/aconneau/projects/XLM/data/wiki/100/175k",
24
+ "debug": false,
25
+ "debug_slurm": false,
26
+ "debug_train": false,
27
+ "do_sample": false,
28
+ "dropout": 0.1,
29
+ "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
30
+ "early_stopping": false,
31
+ "emb_dim": 1280,
32
+ "embed_init_std": 0.02209708691207961,
33
+ "encoder_only": true,
34
+ "end_n_top": 5,
35
+ "eos_index": 1,
36
+ "eos_token_ids": null,
37
+ "epoch_size": 200000,
38
+ "eval_bleu": false,
39
+ "eval_only": false,
40
+ "exp_id": "16656234",
41
+ "exp_name": "xlm_17_100_big.3",
42
+ "finetuning_task": null,
43
+ "fp16": true,
44
+ "gelu_activation": true,
45
+ "global_rank": 0,
46
+ "group_by_size": true,
47
+ "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
48
+ "id2label": {
49
+ "0": "LABEL_0",
50
+ "1": "LABEL_1"
51
+ },
52
+ "id2lang": {
53
+ "0": "af",
54
+ "1": "als",
55
+ "10": "be",
56
+ "11": "bg",
57
+ "12": "bn",
58
+ "13": "br",
59
+ "14": "bs",
60
+ "15": "ca",
61
+ "16": "ceb",
62
+ "17": "ckb",
63
+ "18": "cs",
64
+ "19": "cy",
65
+ "2": "am",
66
+ "20": "da",
67
+ "21": "de",
68
+ "22": "el",
69
+ "23": "en",
70
+ "24": "eo",
71
+ "25": "es",
72
+ "26": "et",
73
+ "27": "eu",
74
+ "28": "fa",
75
+ "29": "fi",
76
+ "3": "an",
77
+ "30": "fr",
78
+ "31": "fy",
79
+ "32": "ga",
80
+ "33": "gan",
81
+ "34": "gl",
82
+ "35": "gu",
83
+ "36": "he",
84
+ "37": "hi",
85
+ "38": "hr",
86
+ "39": "hu",
87
+ "4": "ang",
88
+ "40": "hy",
89
+ "41": "ia",
90
+ "42": "id",
91
+ "43": "is",
92
+ "44": "it",
93
+ "45": "ja",
94
+ "46": "jv",
95
+ "47": "ka",
96
+ "48": "kk",
97
+ "49": "kn",
98
+ "5": "ar",
99
+ "50": "ko",
100
+ "51": "ku",
101
+ "52": "la",
102
+ "53": "lb",
103
+ "54": "lt",
104
+ "55": "lv",
105
+ "56": "mk",
106
+ "57": "ml",
107
+ "58": "mn",
108
+ "59": "mr",
109
+ "6": "arz",
110
+ "60": "ms",
111
+ "61": "my",
112
+ "62": "nds",
113
+ "63": "ne",
114
+ "64": "nl",
115
+ "65": "nn",
116
+ "66": "no",
117
+ "67": "oc",
118
+ "68": "pl",
119
+ "69": "pt",
120
+ "7": "ast",
121
+ "70": "ro",
122
+ "71": "ru",
123
+ "72": "scn",
124
+ "73": "sco",
125
+ "74": "sh",
126
+ "75": "si",
127
+ "76": "simple",
128
+ "77": "sk",
129
+ "78": "sl",
130
+ "79": "sq",
131
+ "8": "az",
132
+ "80": "sr",
133
+ "81": "sv",
134
+ "82": "sw",
135
+ "83": "ta",
136
+ "84": "te",
137
+ "85": "th",
138
+ "86": "tl",
139
+ "87": "tr",
140
+ "88": "tt",
141
+ "89": "uk",
142
+ "9": "bar",
143
+ "90": "ur",
144
+ "91": "uz",
145
+ "92": "vi",
146
+ "93": "war",
147
+ "94": "wuu",
148
+ "95": "yi",
149
+ "96": "zh",
150
+ "97": "zh_classical",
151
+ "98": "zh_min_nan",
152
+ "99": "zh_yue"
153
+ },
154
+ "init_std": 0.02,
155
+ "is_decoder": false,
156
+ "is_encoder": true,
157
+ "is_master": true,
158
+ "is_slurm_job": true,
159
+ "label2id": {
160
+ "LABEL_0": 0,
161
+ "LABEL_1": 1
162
+ },
163
+ "lambda_ae": 1.0,
164
+ "lambda_ae_config": null,
165
+ "lambda_bt": 1.0,
166
+ "lambda_bt_config": null,
167
+ "lambda_clm": 1.0,
168
+ "lambda_clm_config": null,
169
+ "lambda_mlm": 1.0,
170
+ "lambda_mlm_config": null,
171
+ "lambda_mt": 1.0,
172
+ "lambda_mt_config": null,
173
+ "lambda_pc": 1.0,
174
+ "lambda_pc_config": null,
175
+ "lang2id": {
176
+ "af": 0,
177
+ "als": 1,
178
+ "am": 2,
179
+ "an": 3,
180
+ "ang": 4,
181
+ "ar": 5,
182
+ "arz": 6,
183
+ "ast": 7,
184
+ "az": 8,
185
+ "bar": 9,
186
+ "be": 10,
187
+ "bg": 11,
188
+ "bn": 12,
189
+ "br": 13,
190
+ "bs": 14,
191
+ "ca": 15,
192
+ "ceb": 16,
193
+ "ckb": 17,
194
+ "cs": 18,
195
+ "cy": 19,
196
+ "da": 20,
197
+ "de": 21,
198
+ "el": 22,
199
+ "en": 23,
200
+ "eo": 24,
201
+ "es": 25,
202
+ "et": 26,
203
+ "eu": 27,
204
+ "fa": 28,
205
+ "fi": 29,
206
+ "fr": 30,
207
+ "fy": 31,
208
+ "ga": 32,
209
+ "gan": 33,
210
+ "gl": 34,
211
+ "gu": 35,
212
+ "he": 36,
213
+ "hi": 37,
214
+ "hr": 38,
215
+ "hu": 39,
216
+ "hy": 40,
217
+ "ia": 41,
218
+ "id": 42,
219
+ "is": 43,
220
+ "it": 44,
221
+ "ja": 45,
222
+ "jv": 46,
223
+ "ka": 47,
224
+ "kk": 48,
225
+ "kn": 49,
226
+ "ko": 50,
227
+ "ku": 51,
228
+ "la": 52,
229
+ "lb": 53,
230
+ "lt": 54,
231
+ "lv": 55,
232
+ "mk": 56,
233
+ "ml": 57,
234
+ "mn": 58,
235
+ "mr": 59,
236
+ "ms": 60,
237
+ "my": 61,
238
+ "nds": 62,
239
+ "ne": 63,
240
+ "nl": 64,
241
+ "nn": 65,
242
+ "no": 66,
243
+ "oc": 67,
244
+ "pl": 68,
245
+ "pt": 69,
246
+ "ro": 70,
247
+ "ru": 71,
248
+ "scn": 72,
249
+ "sco": 73,
250
+ "sh": 74,
251
+ "si": 75,
252
+ "simple": 76,
253
+ "sk": 77,
254
+ "sl": 78,
255
+ "sq": 79,
256
+ "sr": 80,
257
+ "sv": 81,
258
+ "sw": 82,
259
+ "ta": 83,
260
+ "te": 84,
261
+ "th": 85,
262
+ "tl": 86,
263
+ "tr": 87,
264
+ "tt": 88,
265
+ "uk": 89,
266
+ "ur": 90,
267
+ "uz": 91,
268
+ "vi": 92,
269
+ "war": 93,
270
+ "wuu": 94,
271
+ "yi": 95,
272
+ "zh": 96,
273
+ "zh_classical": 97,
274
+ "zh_min_nan": 98,
275
+ "zh_yue": 99
276
+ },
277
+ "lang_id": 0,
278
+ "langs": [
279
+ "en",
280
+ "es",
281
+ "fr",
282
+ "de",
283
+ "zh",
284
+ "ru",
285
+ "pt",
286
+ "it",
287
+ "ar",
288
+ "ja",
289
+ "id",
290
+ "tr",
291
+ "nl",
292
+ "pl",
293
+ "simple",
294
+ "fa",
295
+ "vi",
296
+ "sv",
297
+ "ko",
298
+ "he",
299
+ "ro",
300
+ "no",
301
+ "hi",
302
+ "uk",
303
+ "cs",
304
+ "fi",
305
+ "hu",
306
+ "th",
307
+ "da",
308
+ "ca",
309
+ "el",
310
+ "bg",
311
+ "sr",
312
+ "ms",
313
+ "bn",
314
+ "hr",
315
+ "sl",
316
+ "zh_yue",
317
+ "az",
318
+ "sk",
319
+ "eo",
320
+ "ta",
321
+ "sh",
322
+ "lt",
323
+ "et",
324
+ "ml",
325
+ "la",
326
+ "bs",
327
+ "sq",
328
+ "arz",
329
+ "af",
330
+ "ka",
331
+ "mr",
332
+ "eu",
333
+ "tl",
334
+ "ang",
335
+ "gl",
336
+ "nn",
337
+ "ur",
338
+ "kk",
339
+ "be",
340
+ "hy",
341
+ "te",
342
+ "lv",
343
+ "mk",
344
+ "zh_classical",
345
+ "als",
346
+ "is",
347
+ "wuu",
348
+ "my",
349
+ "sco",
350
+ "mn",
351
+ "ceb",
352
+ "ast",
353
+ "cy",
354
+ "kn",
355
+ "br",
356
+ "an",
357
+ "gu",
358
+ "bar",
359
+ "uz",
360
+ "lb",
361
+ "ne",
362
+ "si",
363
+ "war",
364
+ "jv",
365
+ "ga",
366
+ "zh_min_nan",
367
+ "oc",
368
+ "ku",
369
+ "sw",
370
+ "nds",
371
+ "ckb",
372
+ "ia",
373
+ "yi",
374
+ "fy",
375
+ "scn",
376
+ "gan",
377
+ "tt",
378
+ "am"
379
+ ],
380
+ "layer_norm_eps": 1e-12,
381
+ "length_penalty": 1,
382
+ "lg_sampling_factor": 0.7,
383
+ "lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
384
+ "local_rank": 0,
385
+ "mask_index": 5,
386
+ "mask_token_id": 0,
387
+ "master_addr": "learnfair0332",
388
+ "master_port": 11363,
389
+ "max_batch_size": 0,
390
+ "max_epoch": 100000,
391
+ "max_len": 200,
392
+ "max_length": 20,
393
+ "max_position_embeddings": 512,
394
+ "max_vocab": 200000,
395
+ "min_count": 0,
396
+ "mlm_steps": [
397
+ [
398
+ "en",
399
+ null
400
+ ],
401
+ [
402
+ "es",
403
+ null
404
+ ],
405
+ [
406
+ "fr",
407
+ null
408
+ ],
409
+ [
410
+ "de",
411
+ null
412
+ ],
413
+ [
414
+ "zh",
415
+ null
416
+ ],
417
+ [
418
+ "ru",
419
+ null
420
+ ],
421
+ [
422
+ "pt",
423
+ null
424
+ ],
425
+ [
426
+ "it",
427
+ null
428
+ ],
429
+ [
430
+ "ar",
431
+ null
432
+ ],
433
+ [
434
+ "ja",
435
+ null
436
+ ],
437
+ [
438
+ "id",
439
+ null
440
+ ],
441
+ [
442
+ "tr",
443
+ null
444
+ ],
445
+ [
446
+ "nl",
447
+ null
448
+ ],
449
+ [
450
+ "pl",
451
+ null
452
+ ],
453
+ [
454
+ "simple",
455
+ null
456
+ ],
457
+ [
458
+ "fa",
459
+ null
460
+ ],
461
+ [
462
+ "vi",
463
+ null
464
+ ],
465
+ [
466
+ "sv",
467
+ null
468
+ ],
469
+ [
470
+ "ko",
471
+ null
472
+ ],
473
+ [
474
+ "he",
475
+ null
476
+ ],
477
+ [
478
+ "ro",
479
+ null
480
+ ],
481
+ [
482
+ "no",
483
+ null
484
+ ],
485
+ [
486
+ "hi",
487
+ null
488
+ ],
489
+ [
490
+ "uk",
491
+ null
492
+ ],
493
+ [
494
+ "cs",
495
+ null
496
+ ],
497
+ [
498
+ "fi",
499
+ null
500
+ ],
501
+ [
502
+ "hu",
503
+ null
504
+ ],
505
+ [
506
+ "th",
507
+ null
508
+ ],
509
+ [
510
+ "da",
511
+ null
512
+ ],
513
+ [
514
+ "ca",
515
+ null
516
+ ],
517
+ [
518
+ "el",
519
+ null
520
+ ],
521
+ [
522
+ "bg",
523
+ null
524
+ ],
525
+ [
526
+ "sr",
527
+ null
528
+ ],
529
+ [
530
+ "ms",
531
+ null
532
+ ],
533
+ [
534
+ "bn",
535
+ null
536
+ ],
537
+ [
538
+ "hr",
539
+ null
540
+ ],
541
+ [
542
+ "sl",
543
+ null
544
+ ],
545
+ [
546
+ "zh_yue",
547
+ null
548
+ ],
549
+ [
550
+ "az",
551
+ null
552
+ ],
553
+ [
554
+ "sk",
555
+ null
556
+ ],
557
+ [
558
+ "eo",
559
+ null
560
+ ],
561
+ [
562
+ "ta",
563
+ null
564
+ ],
565
+ [
566
+ "sh",
567
+ null
568
+ ],
569
+ [
570
+ "lt",
571
+ null
572
+ ],
573
+ [
574
+ "et",
575
+ null
576
+ ],
577
+ [
578
+ "ml",
579
+ null
580
+ ],
581
+ [
582
+ "la",
583
+ null
584
+ ],
585
+ [
586
+ "bs",
587
+ null
588
+ ],
589
+ [
590
+ "sq",
591
+ null
592
+ ],
593
+ [
594
+ "arz",
595
+ null
596
+ ],
597
+ [
598
+ "af",
599
+ null
600
+ ],
601
+ [
602
+ "ka",
603
+ null
604
+ ],
605
+ [
606
+ "mr",
607
+ null
608
+ ],
609
+ [
610
+ "eu",
611
+ null
612
+ ],
613
+ [
614
+ "tl",
615
+ null
616
+ ],
617
+ [
618
+ "ang",
619
+ null
620
+ ],
621
+ [
622
+ "gl",
623
+ null
624
+ ],
625
+ [
626
+ "nn",
627
+ null
628
+ ],
629
+ [
630
+ "ur",
631
+ null
632
+ ],
633
+ [
634
+ "kk",
635
+ null
636
+ ],
637
+ [
638
+ "be",
639
+ null
640
+ ],
641
+ [
642
+ "hy",
643
+ null
644
+ ],
645
+ [
646
+ "te",
647
+ null
648
+ ],
649
+ [
650
+ "lv",
651
+ null
652
+ ],
653
+ [
654
+ "mk",
655
+ null
656
+ ],
657
+ [
658
+ "zh_classical",
659
+ null
660
+ ],
661
+ [
662
+ "als",
663
+ null
664
+ ],
665
+ [
666
+ "is",
667
+ null
668
+ ],
669
+ [
670
+ "wuu",
671
+ null
672
+ ],
673
+ [
674
+ "my",
675
+ null
676
+ ],
677
+ [
678
+ "sco",
679
+ null
680
+ ],
681
+ [
682
+ "mn",
683
+ null
684
+ ],
685
+ [
686
+ "ceb",
687
+ null
688
+ ],
689
+ [
690
+ "ast",
691
+ null
692
+ ],
693
+ [
694
+ "cy",
695
+ null
696
+ ],
697
+ [
698
+ "kn",
699
+ null
700
+ ],
701
+ [
702
+ "br",
703
+ null
704
+ ],
705
+ [
706
+ "an",
707
+ null
708
+ ],
709
+ [
710
+ "gu",
711
+ null
712
+ ],
713
+ [
714
+ "bar",
715
+ null
716
+ ],
717
+ [
718
+ "uz",
719
+ null
720
+ ],
721
+ [
722
+ "lb",
723
+ null
724
+ ],
725
+ [
726
+ "ne",
727
+ null
728
+ ],
729
+ [
730
+ "si",
731
+ null
732
+ ],
733
+ [
734
+ "war",
735
+ null
736
+ ],
737
+ [
738
+ "jv",
739
+ null
740
+ ],
741
+ [
742
+ "ga",
743
+ null
744
+ ],
745
+ [
746
+ "zh_min_nan",
747
+ null
748
+ ],
749
+ [
750
+ "oc",
751
+ null
752
+ ],
753
+ [
754
+ "ku",
755
+ null
756
+ ],
757
+ [
758
+ "sw",
759
+ null
760
+ ],
761
+ [
762
+ "nds",
763
+ null
764
+ ],
765
+ [
766
+ "ckb",
767
+ null
768
+ ],
769
+ [
770
+ "ia",
771
+ null
772
+ ],
773
+ [
774
+ "yi",
775
+ null
776
+ ],
777
+ [
778
+ "fy",
779
+ null
780
+ ],
781
+ [
782
+ "scn",
783
+ null
784
+ ],
785
+ [
786
+ "gan",
787
+ null
788
+ ],
789
+ [
790
+ "tt",
791
+ null
792
+ ],
793
+ [
794
+ "am",
795
+ null
796
+ ]
797
+ ],
798
+ "model_type": "xlm",
799
+ "mono_dataset": {
800
+ "af": {
801
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.af.pth",
802
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.af.pth",
803
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.af.pth"
804
+ },
805
+ "als": {
806
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.als.pth",
807
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.als.pth",
808
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.als.pth"
809
+ },
810
+ "am": {
811
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.am.pth",
812
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.am.pth",
813
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.am.pth"
814
+ },
815
+ "an": {
816
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.an.pth",
817
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.an.pth",
818
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.an.pth"
819
+ },
820
+ "ang": {
821
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ang.pth",
822
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ang.pth",
823
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ang.pth"
824
+ },
825
+ "ar": {
826
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ar.pth",
827
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ar.pth",
828
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ar.pth"
829
+ },
830
+ "arz": {
831
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.arz.pth",
832
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.arz.pth",
833
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.arz.pth"
834
+ },
835
+ "ast": {
836
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ast.pth",
837
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ast.pth",
838
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ast.pth"
839
+ },
840
+ "az": {
841
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.az.pth",
842
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.az.pth",
843
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.az.pth"
844
+ },
845
+ "bar": {
846
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bar.pth",
847
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bar.pth",
848
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bar.pth"
849
+ },
850
+ "be": {
851
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.be.pth",
852
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.be.pth",
853
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.be.pth"
854
+ },
855
+ "bg": {
856
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bg.pth",
857
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bg.pth",
858
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bg.pth"
859
+ },
860
+ "bn": {
861
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bn.pth",
862
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bn.pth",
863
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bn.pth"
864
+ },
865
+ "br": {
866
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.br.pth",
867
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.br.pth",
868
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.br.pth"
869
+ },
870
+ "bs": {
871
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bs.pth",
872
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bs.pth",
873
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bs.pth"
874
+ },
875
+ "ca": {
876
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ca.pth",
877
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ca.pth",
878
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ca.pth"
879
+ },
880
+ "ceb": {
881
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ceb.pth",
882
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ceb.pth",
883
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ceb.pth"
884
+ },
885
+ "ckb": {
886
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ckb.pth",
887
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ckb.pth",
888
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ckb.pth"
889
+ },
890
+ "cs": {
891
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cs.pth",
892
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cs.pth",
893
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cs.pth"
894
+ },
895
+ "cy": {
896
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cy.pth",
897
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cy.pth",
898
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cy.pth"
899
+ },
900
+ "da": {
901
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.da.pth",
902
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.da.pth",
903
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.da.pth"
904
+ },
905
+ "de": {
906
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.de.pth",
907
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.de.pth",
908
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.de.pth"
909
+ },
910
+ "el": {
911
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.el.pth",
912
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.el.pth",
913
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.el.pth"
914
+ },
915
+ "en": {
916
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.en.pth",
917
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.en.pth",
918
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.en.pth"
919
+ },
920
+ "eo": {
921
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eo.pth",
922
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eo.pth",
923
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eo.pth"
924
+ },
925
+ "es": {
926
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.es.pth",
927
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.es.pth",
928
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.es.pth"
929
+ },
930
+ "et": {
931
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.et.pth",
932
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.et.pth",
933
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.et.pth"
934
+ },
935
+ "eu": {
936
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eu.pth",
937
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eu.pth",
938
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eu.pth"
939
+ },
940
+ "fa": {
941
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fa.pth",
942
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fa.pth",
943
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fa.pth"
944
+ },
945
+ "fi": {
946
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fi.pth",
947
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fi.pth",
948
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fi.pth"
949
+ },
950
+ "fr": {
951
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fr.pth",
952
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fr.pth",
953
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fr.pth"
954
+ },
955
+ "fy": {
956
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fy.pth",
957
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fy.pth",
958
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fy.pth"
959
+ },
960
+ "ga": {
961
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ga.pth",
962
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ga.pth",
963
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ga.pth"
964
+ },
965
+ "gan": {
966
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gan.pth",
967
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gan.pth",
968
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gan.pth"
969
+ },
970
+ "gl": {
971
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gl.pth",
972
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gl.pth",
973
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gl.pth"
974
+ },
975
+ "gu": {
976
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gu.pth",
977
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gu.pth",
978
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gu.pth"
979
+ },
980
+ "he": {
981
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.he.pth",
982
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.he.pth",
983
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.he.pth"
984
+ },
985
+ "hi": {
986
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hi.pth",
987
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hi.pth",
988
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hi.pth"
989
+ },
990
+ "hr": {
991
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hr.pth",
992
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hr.pth",
993
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hr.pth"
994
+ },
995
+ "hu": {
996
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hu.pth",
997
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hu.pth",
998
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hu.pth"
999
+ },
1000
+ "hy": {
1001
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hy.pth",
1002
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hy.pth",
1003
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hy.pth"
1004
+ },
1005
+ "ia": {
1006
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ia.pth",
1007
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ia.pth",
1008
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ia.pth"
1009
+ },
1010
+ "id": {
1011
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.id.pth",
1012
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.id.pth",
1013
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.id.pth"
1014
+ },
1015
+ "is": {
1016
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.is.pth",
1017
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.is.pth",
1018
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.is.pth"
1019
+ },
1020
+ "it": {
1021
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.it.pth",
1022
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.it.pth",
1023
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.it.pth"
1024
+ },
1025
+ "ja": {
1026
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ja.pth",
1027
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ja.pth",
1028
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ja.pth"
1029
+ },
1030
+ "jv": {
1031
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.jv.pth",
1032
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.jv.pth",
1033
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.jv.pth"
1034
+ },
1035
+ "ka": {
1036
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ka.pth",
1037
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ka.pth",
1038
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ka.pth"
1039
+ },
1040
+ "kk": {
1041
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kk.pth",
1042
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kk.pth",
1043
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kk.pth"
1044
+ },
1045
+ "kn": {
1046
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kn.pth",
1047
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kn.pth",
1048
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kn.pth"
1049
+ },
1050
+ "ko": {
1051
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ko.pth",
1052
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ko.pth",
1053
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ko.pth"
1054
+ },
1055
+ "ku": {
1056
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ku.pth",
1057
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ku.pth",
1058
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ku.pth"
1059
+ },
1060
+ "la": {
1061
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.la.pth",
1062
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.la.pth",
1063
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.la.pth"
1064
+ },
1065
+ "lb": {
1066
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lb.pth",
1067
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lb.pth",
1068
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lb.pth"
1069
+ },
1070
+ "lt": {
1071
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lt.pth",
1072
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lt.pth",
1073
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lt.pth"
1074
+ },
1075
+ "lv": {
1076
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lv.pth",
1077
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lv.pth",
1078
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lv.pth"
1079
+ },
1080
+ "mk": {
1081
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mk.pth",
1082
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mk.pth",
1083
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mk.pth"
1084
+ },
1085
+ "ml": {
1086
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ml.pth",
1087
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ml.pth",
1088
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ml.pth"
1089
+ },
1090
+ "mn": {
1091
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mn.pth",
1092
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mn.pth",
1093
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mn.pth"
1094
+ },
1095
+ "mr": {
1096
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mr.pth",
1097
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mr.pth",
1098
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mr.pth"
1099
+ },
1100
+ "ms": {
1101
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ms.pth",
1102
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ms.pth",
1103
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ms.pth"
1104
+ },
1105
+ "my": {
1106
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.my.pth",
1107
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.my.pth",
1108
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.my.pth"
1109
+ },
1110
+ "nds": {
1111
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nds.pth",
1112
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nds.pth",
1113
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nds.pth"
1114
+ },
1115
+ "ne": {
1116
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ne.pth",
1117
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ne.pth",
1118
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ne.pth"
1119
+ },
1120
+ "nl": {
1121
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nl.pth",
1122
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nl.pth",
1123
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nl.pth"
1124
+ },
1125
+ "nn": {
1126
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nn.pth",
1127
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nn.pth",
1128
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nn.pth"
1129
+ },
1130
+ "no": {
1131
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.no.pth",
1132
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.no.pth",
1133
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.no.pth"
1134
+ },
1135
+ "oc": {
1136
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.oc.pth",
1137
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.oc.pth",
1138
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.oc.pth"
1139
+ },
1140
+ "pl": {
1141
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pl.pth",
1142
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pl.pth",
1143
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pl.pth"
1144
+ },
1145
+ "pt": {
1146
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pt.pth",
1147
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pt.pth",
1148
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pt.pth"
1149
+ },
1150
+ "ro": {
1151
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ro.pth",
1152
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ro.pth",
1153
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ro.pth"
1154
+ },
1155
+ "ru": {
1156
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ru.pth",
1157
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ru.pth",
1158
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ru.pth"
1159
+ },
1160
+ "scn": {
1161
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.scn.pth",
1162
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.scn.pth",
1163
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.scn.pth"
1164
+ },
1165
+ "sco": {
1166
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sco.pth",
1167
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sco.pth",
1168
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sco.pth"
1169
+ },
1170
+ "sh": {
1171
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sh.pth",
1172
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sh.pth",
1173
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sh.pth"
1174
+ },
1175
+ "si": {
1176
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.si.pth",
1177
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.si.pth",
1178
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.si.pth"
1179
+ },
1180
+ "simple": {
1181
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.simple.pth",
1182
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.simple.pth",
1183
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.simple.pth"
1184
+ },
1185
+ "sk": {
1186
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sk.pth",
1187
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sk.pth",
1188
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sk.pth"
1189
+ },
1190
+ "sl": {
1191
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sl.pth",
1192
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sl.pth",
1193
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sl.pth"
1194
+ },
1195
+ "sq": {
1196
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sq.pth",
1197
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sq.pth",
1198
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sq.pth"
1199
+ },
1200
+ "sr": {
1201
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sr.pth",
1202
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sr.pth",
1203
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sr.pth"
1204
+ },
1205
+ "sv": {
1206
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sv.pth",
1207
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sv.pth",
1208
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sv.pth"
1209
+ },
1210
+ "sw": {
1211
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sw.pth",
1212
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sw.pth",
1213
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sw.pth"
1214
+ },
1215
+ "ta": {
1216
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ta.pth",
1217
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ta.pth",
1218
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ta.pth"
1219
+ },
1220
+ "te": {
1221
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.te.pth",
1222
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.te.pth",
1223
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.te.pth"
1224
+ },
1225
+ "th": {
1226
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.th.pth",
1227
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.th.pth",
1228
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.th.pth"
1229
+ },
1230
+ "tl": {
1231
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tl.pth",
1232
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tl.pth",
1233
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tl.pth"
1234
+ },
1235
+ "tr": {
1236
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tr.pth",
1237
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tr.pth",
1238
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tr.pth"
1239
+ },
1240
+ "tt": {
1241
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tt.pth",
1242
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tt.pth",
1243
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tt.pth"
1244
+ },
1245
+ "uk": {
1246
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uk.pth",
1247
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uk.pth",
1248
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uk.pth"
1249
+ },
1250
+ "ur": {
1251
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ur.pth",
1252
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ur.pth",
1253
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ur.pth"
1254
+ },
1255
+ "uz": {
1256
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uz.pth",
1257
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uz.pth",
1258
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uz.pth"
1259
+ },
1260
+ "vi": {
1261
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.vi.pth",
1262
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.vi.pth",
1263
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.vi.pth"
1264
+ },
1265
+ "war": {
1266
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.war.pth",
1267
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.war.pth",
1268
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.war.pth"
1269
+ },
1270
+ "wuu": {
1271
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.wuu.pth",
1272
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.wuu.pth",
1273
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.wuu.pth"
1274
+ },
1275
+ "yi": {
1276
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.yi.pth",
1277
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.yi.pth",
1278
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.yi.pth"
1279
+ },
1280
+ "zh": {
1281
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh.pth",
1282
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh.pth",
1283
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh.pth"
1284
+ },
1285
+ "zh_classical": {
1286
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_classical.pth",
1287
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_classical.pth",
1288
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_classical.pth"
1289
+ },
1290
+ "zh_min_nan": {
1291
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_min_nan.pth",
1292
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_min_nan.pth",
1293
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_min_nan.pth"
1294
+ },
1295
+ "zh_yue": {
1296
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_yue.pth",
1297
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_yue.pth",
1298
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_yue.pth"
1299
+ }
1300
+ },
1301
+ "mono_list": [
1302
+ "en",
1303
+ "es",
1304
+ "fr",
1305
+ "de",
1306
+ "zh",
1307
+ "ru",
1308
+ "pt",
1309
+ "it",
1310
+ "ar",
1311
+ "ja",
1312
+ "id",
1313
+ "tr",
1314
+ "nl",
1315
+ "pl",
1316
+ "simple",
1317
+ "fa",
1318
+ "vi",
1319
+ "sv",
1320
+ "ko",
1321
+ "he",
1322
+ "ro",
1323
+ "no",
1324
+ "hi",
1325
+ "uk",
1326
+ "cs",
1327
+ "fi",
1328
+ "hu",
1329
+ "th",
1330
+ "da",
1331
+ "ca",
1332
+ "el",
1333
+ "bg",
1334
+ "sr",
1335
+ "ms",
1336
+ "bn",
1337
+ "hr",
1338
+ "sl",
1339
+ "zh_yue",
1340
+ "az",
1341
+ "sk",
1342
+ "eo",
1343
+ "ta",
1344
+ "sh",
1345
+ "lt",
1346
+ "et",
1347
+ "ml",
1348
+ "la",
1349
+ "bs",
1350
+ "sq",
1351
+ "arz",
1352
+ "af",
1353
+ "ka",
1354
+ "mr",
1355
+ "eu",
1356
+ "tl",
1357
+ "ang",
1358
+ "gl",
1359
+ "nn",
1360
+ "ur",
1361
+ "kk",
1362
+ "be",
1363
+ "hy",
1364
+ "te",
1365
+ "lv",
1366
+ "mk",
1367
+ "zh_classical",
1368
+ "als",
1369
+ "is",
1370
+ "wuu",
1371
+ "my",
1372
+ "sco",
1373
+ "mn",
1374
+ "ceb",
1375
+ "ast",
1376
+ "cy",
1377
+ "kn",
1378
+ "br",
1379
+ "an",
1380
+ "gu",
1381
+ "bar",
1382
+ "uz",
1383
+ "lb",
1384
+ "ne",
1385
+ "si",
1386
+ "war",
1387
+ "jv",
1388
+ "ga",
1389
+ "zh_min_nan",
1390
+ "oc",
1391
+ "ku",
1392
+ "sw",
1393
+ "nds",
1394
+ "ckb",
1395
+ "ia",
1396
+ "yi",
1397
+ "fy",
1398
+ "scn",
1399
+ "gan",
1400
+ "tt",
1401
+ "am"
1402
+ ],
1403
+ "mt_steps": [],
1404
+ "multi_gpu": true,
1405
+ "multi_node": true,
1406
+ "n_gpu_per_node": 8,
1407
+ "n_heads": 16,
1408
+ "n_langs": 100,
1409
+ "n_layers": 16,
1410
+ "n_nodes": 4,
1411
+ "node_id": 0,
1412
+ "num_beams": 1,
1413
+ "num_return_sequences": 1,
1414
+ "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
1415
+ "output_attentions": false,
1416
+ "output_hidden_states": false,
1417
+ "output_past": true,
1418
+ "pad_index": 2,
1419
+ "pad_token_id": 2,
1420
+ "para_dataset": {},
1421
+ "para_list": [],
1422
+ "pc_steps": [],
1423
+ "pruned_heads": {},
1424
+ "ref_paths": {},
1425
+ "reload_checkpoint": "",
1426
+ "reload_emb": "",
1427
+ "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
1428
+ "repetition_penalty": 1.0,
1429
+ "sample_alpha": 0.5,
1430
+ "save_periodic": 0,
1431
+ "share_inout_emb": true,
1432
+ "sinusoidal_embeddings": false,
1433
+ "split_data": true,
1434
+ "start_n_top": 5,
1435
+ "stopping_criterion": "_valid_zh_mlm_ppl,25",
1436
+ "summary_activation": null,
1437
+ "summary_first_dropout": 0.1,
1438
+ "summary_proj_to_labels": true,
1439
+ "summary_type": "first",
1440
+ "summary_use_proj": true,
1441
+ "temperature": 1.0,
1442
+ "tokens_per_batch": -1,
1443
+ "top_k": 50,
1444
+ "top_p": 1.0,
1445
+ "torchscript": false,
1446
+ "unk_index": 3,
1447
+ "use_bfloat16": false,
1448
+ "use_lang_emb": false,
1449
+ "use_memory": false,
1450
+ "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
1451
+ "vocab_size": 200000,
1452
+ "word_blank": 0.0,
1453
+ "word_dropout": 0.0,
1454
+ "word_keep": 0.1,
1455
+ "word_mask": 0.8,
1456
+ "word_mask_keep_rand": "0.8,0.1,0.1",
1457
+ "word_pred": 0.15,
1458
+ "word_rand": 0.1,
1459
+ "word_shuffle": 0.0,
1460
+ "world_size": 32
1461
+ }