system HF staff commited on
Commit
42852cc
1 Parent(s): f605667

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +224 -208
config.json CHANGED
@@ -1,58 +1,133 @@
1
  {
 
 
 
2
  "architectures": [
3
  "XLMWithLMHeadModel"
4
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237",
6
- "exp_name": "xlm_17_100_big.3",
7
- "save_periodic": 0,
 
 
 
 
 
 
8
  "exp_id": "16656237",
 
9
  "fp16": true,
10
- "amp": 2,
11
- "encoder_only": true,
12
- "emb_dim": 1280,
13
- "n_layers": 16,
14
- "n_heads": 16,
15
- "dropout": 0.1,
16
- "attention_dropout": 0.1,
17
  "gelu_activation": true,
18
- "share_inout_emb": true,
19
- "sinusoidal_embeddings": false,
20
- "use_lang_emb": false,
21
- "use_memory": false,
22
- "asm": false,
23
- "context_size": 0,
24
- "word_pred": 0.15,
25
- "sample_alpha": 0.5,
26
- "word_mask_keep_rand": "0.8,0.1,0.1",
27
- "word_shuffle": 0.0,
28
- "word_dropout": 0.0,
29
- "word_blank": 0.0,
30
- "data_path": "/private/home/aconneau/projects/XLM/data/wiki/17/175k",
31
- "lgs": "en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi",
32
- "max_vocab": 200000,
33
- "min_count": 0,
34
- "lg_sampling_factor": 0.7,
35
- "bptt": 256,
36
- "max_len": 200,
37
  "group_by_size": true,
38
- "batch_size": 16,
39
- "max_batch_size": 0,
40
- "tokens_per_batch": -1,
41
- "split_data": true,
42
- "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
43
- "clip_grad_norm": 1.0,
44
- "epoch_size": 200000,
45
- "max_epoch": 100000,
46
- "stopping_criterion": "_valid_zh_mlm_ppl,25",
47
- "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
48
- "accumulate_gradients": 4,
49
- "lambda_mlm": 1.0,
50
- "lambda_clm": 1.0,
51
- "lambda_pc": 1.0,
 
 
 
 
 
 
 
 
 
 
52
  "lambda_ae": 1.0,
53
- "lambda_mt": 1.0,
54
  "lambda_bt": 1.0,
55
- "clm_steps": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  "mlm_steps": [
57
  [
58
  "en",
@@ -123,198 +198,94 @@
123
  null
124
  ]
125
  ],
126
- "mt_steps": [],
127
- "ae_steps": [],
128
- "bt_steps": [],
129
- "pc_steps": [],
130
- "reload_emb": "",
131
- "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth",
132
- "reload_checkpoint": "",
133
- "beam_size": 1,
134
- "length_penalty": 1,
135
- "early_stopping": false,
136
- "eval_bleu": false,
137
- "eval_only": false,
138
- "debug_train": false,
139
- "debug_slurm": false,
140
- "debug": false,
141
- "local_rank": 0,
142
- "master_port": 14148,
143
- "langs": [
144
- "en",
145
- "fr",
146
- "es",
147
- "de",
148
- "it",
149
- "pt",
150
- "nl",
151
- "sv",
152
- "pl",
153
- "ru",
154
- "ar",
155
- "tr",
156
- "zh",
157
- "ja",
158
- "ko",
159
- "hi",
160
- "vi"
161
- ],
162
- "id2lang": {
163
- "0": "ar",
164
- "1": "de",
165
- "2": "en",
166
- "3": "es",
167
- "4": "fr",
168
- "5": "hi",
169
- "6": "it",
170
- "7": "ja",
171
- "8": "ko",
172
- "9": "nl",
173
- "10": "pl",
174
- "11": "pt",
175
- "12": "ru",
176
- "13": "sv",
177
- "14": "tr",
178
- "15": "vi",
179
- "16": "zh"
180
- },
181
- "lang2id": {
182
- "ar": 0,
183
- "de": 1,
184
- "en": 2,
185
- "es": 3,
186
- "fr": 4,
187
- "hi": 5,
188
- "it": 6,
189
- "ja": 7,
190
- "ko": 8,
191
- "nl": 9,
192
- "pl": 10,
193
- "pt": 11,
194
- "ru": 12,
195
- "sv": 13,
196
- "tr": 14,
197
- "vi": 15,
198
- "zh": 16
199
- },
200
- "n_langs": 17,
201
- "bt_src_langs": [],
202
  "mono_dataset": {
 
 
 
 
 
 
 
 
 
 
203
  "en": {
 
204
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.en.pth",
205
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.en.pth",
206
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.en.pth"
207
- },
208
- "fr": {
209
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.fr.pth",
210
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.fr.pth",
211
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.fr.pth"
212
  },
213
  "es": {
 
214
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.es.pth",
215
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.es.pth",
216
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.es.pth"
217
  },
218
- "de": {
219
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.de.pth",
220
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.de.pth",
221
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.de.pth"
 
 
 
 
 
222
  },
223
  "it": {
 
224
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.it.pth",
225
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.it.pth",
226
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.it.pth"
227
  },
228
- "pt": {
229
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pt.pth",
230
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pt.pth",
231
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pt.pth"
 
 
 
 
 
232
  },
233
  "nl": {
 
234
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.nl.pth",
235
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.nl.pth",
236
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.nl.pth"
237
- },
238
- "sv": {
239
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.sv.pth",
240
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.sv.pth",
241
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.sv.pth"
242
  },
243
  "pl": {
 
244
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pl.pth",
245
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pl.pth",
246
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pl.pth"
 
 
 
 
247
  },
248
  "ru": {
 
249
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ru.pth",
250
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ru.pth",
251
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ru.pth"
252
  },
253
- "ar": {
254
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ar.pth",
255
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ar.pth",
256
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ar.pth"
257
  },
258
  "tr": {
 
259
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.tr.pth",
260
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.tr.pth",
261
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.tr.pth"
262
- },
263
- "zh": {
264
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.zh.pth",
265
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.zh.pth",
266
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.zh.pth"
267
- },
268
- "ja": {
269
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ja.pth",
270
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ja.pth",
271
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ja.pth"
272
- },
273
- "ko": {
274
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ko.pth",
275
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ko.pth",
276
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ko.pth"
277
- },
278
- "hi": {
279
- "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.hi.pth",
280
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.hi.pth",
281
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.hi.pth"
282
  },
283
  "vi": {
 
284
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.vi.pth",
285
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.vi.pth",
286
- "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.vi.pth"
 
 
 
 
287
  }
288
  },
289
- "para_dataset": {},
290
- "word_mask": 0.8,
291
- "word_keep": 0.1,
292
- "word_rand": 0.1,
293
- "is_slurm_job": true,
294
- "n_nodes": 4,
295
- "node_id": 0,
296
- "global_rank": 0,
297
- "world_size": 32,
298
- "n_gpu_per_node": 8,
299
- "master_addr": "learnfair1605",
300
- "is_master": true,
301
- "multi_node": true,
302
- "multi_gpu": true,
303
- "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/17/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,fr,es,de,it,pt,nl,sv,pl,ru,ar,tr,zh,ja,ko,hi,vi' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656237 --master_port 14148 --exp_id \"16656237\"",
304
- "n_words": 200000,
305
- "bos_index": 0,
306
- "eos_index": 1,
307
- "pad_index": 2,
308
- "unk_index": 3,
309
- "mask_index": 5,
310
- "lambda_clm_config": null,
311
- "lambda_mlm_config": null,
312
- "lambda_pc_config": null,
313
- "lambda_ae_config": null,
314
- "lambda_mt_config": null,
315
- "lambda_bt_config": null,
316
- "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237/hypotheses",
317
- "ref_paths": {},
318
  "mono_list": [
319
  "en",
320
  "fr",
@@ -334,5 +305,50 @@
334
  "hi",
335
  "vi"
336
  ],
337
- "para_list": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  }
 
1
  {
2
+ "accumulate_gradients": 4,
3
+ "ae_steps": [],
4
+ "amp": 2,
5
  "architectures": [
6
  "XLMWithLMHeadModel"
7
  ],
8
+ "asm": false,
9
+ "attention_dropout": 0.1,
10
+ "batch_size": 16,
11
+ "beam_size": 1,
12
+ "bos_index": 0,
13
+ "bos_token_id": 0,
14
+ "bptt": 256,
15
+ "bt_src_langs": [],
16
+ "bt_steps": [],
17
+ "causal": false,
18
+ "clip_grad_norm": 1.0,
19
+ "clm_steps": [],
20
+ "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/17/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,fr,es,de,it,pt,nl,sv,pl,ru,ar,tr,zh,ja,ko,hi,vi' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656237 --master_port 14148 --exp_id \"16656237\"",
21
+ "context_size": 0,
22
+ "data_path": "/private/home/aconneau/projects/XLM/data/wiki/17/175k",
23
+ "debug": false,
24
+ "debug_slurm": false,
25
+ "debug_train": false,
26
+ "dropout": 0.1,
27
  "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237",
28
+ "emb_dim": 1280,
29
+ "embed_init_std": 0.02209708691207961,
30
+ "encoder_only": true,
31
+ "end_n_top": 5,
32
+ "eos_index": 1,
33
+ "epoch_size": 200000,
34
+ "eval_bleu": false,
35
+ "eval_only": false,
36
  "exp_id": "16656237",
37
+ "exp_name": "xlm_17_100_big.3",
38
  "fp16": true,
 
 
 
 
 
 
 
39
  "gelu_activation": true,
40
+ "global_rank": 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  "group_by_size": true,
42
+ "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237/hypotheses",
43
+ "id2lang": {
44
+ "0": "ar",
45
+ "1": "de",
46
+ "10": "pl",
47
+ "11": "pt",
48
+ "12": "ru",
49
+ "13": "sv",
50
+ "14": "tr",
51
+ "15": "vi",
52
+ "16": "zh",
53
+ "2": "en",
54
+ "3": "es",
55
+ "4": "fr",
56
+ "5": "hi",
57
+ "6": "it",
58
+ "7": "ja",
59
+ "8": "ko",
60
+ "9": "nl"
61
+ },
62
+ "init_std": 0.02,
63
+ "is_encoder": true,
64
+ "is_master": true,
65
+ "is_slurm_job": true,
66
  "lambda_ae": 1.0,
67
+ "lambda_ae_config": null,
68
  "lambda_bt": 1.0,
69
+ "lambda_bt_config": null,
70
+ "lambda_clm": 1.0,
71
+ "lambda_clm_config": null,
72
+ "lambda_mlm": 1.0,
73
+ "lambda_mlm_config": null,
74
+ "lambda_mt": 1.0,
75
+ "lambda_mt_config": null,
76
+ "lambda_pc": 1.0,
77
+ "lambda_pc_config": null,
78
+ "lang2id": {
79
+ "ar": 0,
80
+ "de": 1,
81
+ "en": 2,
82
+ "es": 3,
83
+ "fr": 4,
84
+ "hi": 5,
85
+ "it": 6,
86
+ "ja": 7,
87
+ "ko": 8,
88
+ "nl": 9,
89
+ "pl": 10,
90
+ "pt": 11,
91
+ "ru": 12,
92
+ "sv": 13,
93
+ "tr": 14,
94
+ "vi": 15,
95
+ "zh": 16
96
+ },
97
+ "lang_id": 0,
98
+ "langs": [
99
+ "en",
100
+ "fr",
101
+ "es",
102
+ "de",
103
+ "it",
104
+ "pt",
105
+ "nl",
106
+ "sv",
107
+ "pl",
108
+ "ru",
109
+ "ar",
110
+ "tr",
111
+ "zh",
112
+ "ja",
113
+ "ko",
114
+ "hi",
115
+ "vi"
116
+ ],
117
+ "layer_norm_eps": 1e-12,
118
+ "lg_sampling_factor": 0.7,
119
+ "lgs": "en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi",
120
+ "local_rank": 0,
121
+ "mask_index": 5,
122
+ "mask_token_id": 0,
123
+ "master_addr": "learnfair1605",
124
+ "master_port": 14148,
125
+ "max_batch_size": 0,
126
+ "max_epoch": 100000,
127
+ "max_len": 200,
128
+ "max_position_embeddings": 512,
129
+ "max_vocab": 200000,
130
+ "min_count": 0,
131
  "mlm_steps": [
132
  [
133
  "en",
 
198
  null
199
  ]
200
  ],
201
+ "model_type": "xlm",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  "mono_dataset": {
203
+ "ar": {
204
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ar.pth",
205
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ar.pth",
206
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ar.pth"
207
+ },
208
+ "de": {
209
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.de.pth",
210
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.de.pth",
211
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.de.pth"
212
+ },
213
  "en": {
214
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.en.pth",
215
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.en.pth",
216
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.en.pth"
 
 
 
 
 
 
217
  },
218
  "es": {
219
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.es.pth",
220
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.es.pth",
221
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.es.pth"
 
222
  },
223
+ "fr": {
224
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.fr.pth",
225
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.fr.pth",
226
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.fr.pth"
227
+ },
228
+ "hi": {
229
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.hi.pth",
230
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.hi.pth",
231
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.hi.pth"
232
  },
233
  "it": {
234
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.it.pth",
235
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.it.pth",
236
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.it.pth"
 
237
  },
238
+ "ja": {
239
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ja.pth",
240
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ja.pth",
241
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ja.pth"
242
+ },
243
+ "ko": {
244
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ko.pth",
245
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ko.pth",
246
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ko.pth"
247
  },
248
  "nl": {
249
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.nl.pth",
250
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.nl.pth",
251
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.nl.pth"
 
 
 
 
 
 
252
  },
253
  "pl": {
254
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pl.pth",
255
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pl.pth",
256
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pl.pth"
257
+ },
258
+ "pt": {
259
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pt.pth",
260
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pt.pth",
261
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pt.pth"
262
  },
263
  "ru": {
264
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ru.pth",
265
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ru.pth",
266
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ru.pth"
 
267
  },
268
+ "sv": {
269
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.sv.pth",
270
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.sv.pth",
271
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.sv.pth"
272
  },
273
  "tr": {
274
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.tr.pth",
275
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.tr.pth",
276
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.tr.pth"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  },
278
  "vi": {
279
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.vi.pth",
280
  "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.vi.pth",
281
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.vi.pth"
282
+ },
283
+ "zh": {
284
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.zh.pth",
285
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.zh.pth",
286
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.zh.pth"
287
  }
288
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  "mono_list": [
290
  "en",
291
  "fr",
 
305
  "hi",
306
  "vi"
307
  ],
308
+ "mt_steps": [],
309
+ "multi_gpu": true,
310
+ "multi_node": true,
311
+ "n_gpu_per_node": 8,
312
+ "n_heads": 16,
313
+ "n_langs": 17,
314
+ "n_layers": 16,
315
+ "n_nodes": 4,
316
+ "node_id": 0,
317
+ "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
318
+ "pad_index": 2,
319
+ "pad_token_id": 2,
320
+ "para_dataset": {},
321
+ "para_list": [],
322
+ "pc_steps": [],
323
+ "ref_paths": {},
324
+ "reload_checkpoint": "",
325
+ "reload_emb": "",
326
+ "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth",
327
+ "sample_alpha": 0.5,
328
+ "save_periodic": 0,
329
+ "share_inout_emb": true,
330
+ "sinusoidal_embeddings": false,
331
+ "split_data": true,
332
+ "start_n_top": 5,
333
+ "stopping_criterion": "_valid_zh_mlm_ppl,25",
334
+ "summary_activation": null,
335
+ "summary_first_dropout": 0.1,
336
+ "summary_proj_to_labels": true,
337
+ "summary_type": "first",
338
+ "summary_use_proj": true,
339
+ "tokens_per_batch": -1,
340
+ "unk_index": 3,
341
+ "use_lang_emb": false,
342
+ "use_memory": false,
343
+ "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
344
+ "vocab_size": 200000,
345
+ "word_blank": 0.0,
346
+ "word_dropout": 0.0,
347
+ "word_keep": 0.1,
348
+ "word_mask": 0.8,
349
+ "word_mask_keep_rand": "0.8,0.1,0.1",
350
+ "word_pred": 0.15,
351
+ "word_rand": 0.1,
352
+ "word_shuffle": 0.0,
353
+ "world_size": 32
354
  }