system HF staff commited on
Commit
37261dd
1 Parent(s): fb879ae

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +1 -27
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_num_labels": 2,
3
  "accumulate_gradients": 4,
4
  "ae_steps": [],
5
  "amp": 2,
@@ -24,10 +23,8 @@
24
  "debug": false,
25
  "debug_slurm": false,
26
  "debug_train": false,
27
- "do_sample": false,
28
  "dropout": 0.1,
29
  "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
30
- "early_stopping": false,
31
  "emb_dim": 1280,
32
  "embed_init_std": 0.02209708691207961,
33
  "encoder_only": true,
@@ -38,16 +35,11 @@
38
  "eval_only": false,
39
  "exp_id": "16656234",
40
  "exp_name": "xlm_17_100_big.3",
41
- "finetuning_task": null,
42
  "fp16": true,
43
  "gelu_activation": true,
44
  "global_rank": 0,
45
  "group_by_size": true,
46
  "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
47
- "id2label": {
48
- "0": "LABEL_0",
49
- "1": "LABEL_1"
50
- },
51
  "id2lang": {
52
  "0": "af",
53
  "1": "als",
@@ -151,14 +143,9 @@
151
  "99": "zh_yue"
152
  },
153
  "init_std": 0.02,
154
- "is_decoder": false,
155
  "is_encoder": true,
156
  "is_master": true,
157
  "is_slurm_job": true,
158
- "label2id": {
159
- "LABEL_0": 0,
160
- "LABEL_1": 1
161
- },
162
  "lambda_ae": 1.0,
163
  "lambda_ae_config": null,
164
  "lambda_bt": 1.0,
@@ -377,7 +364,6 @@
377
  "am"
378
  ],
379
  "layer_norm_eps": 1e-12,
380
- "length_penalty": 1,
381
  "lg_sampling_factor": 0.7,
382
  "lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
383
  "local_rank": 0,
@@ -388,7 +374,6 @@
388
  "max_batch_size": 0,
389
  "max_epoch": 100000,
390
  "max_len": 200,
391
- "max_length": 20,
392
  "max_position_embeddings": 512,
393
  "max_vocab": 200000,
394
  "min_count": 0,
@@ -1408,23 +1393,17 @@
1408
  "n_layers": 16,
1409
  "n_nodes": 4,
1410
  "node_id": 0,
1411
- "num_beams": 1,
1412
- "num_return_sequences": 1,
1413
  "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
1414
- "output_attentions": false,
1415
- "output_hidden_states": false,
1416
  "output_past": true,
1417
  "pad_index": 2,
1418
  "pad_token_id": 2,
1419
  "para_dataset": {},
1420
  "para_list": [],
1421
  "pc_steps": [],
1422
- "pruned_heads": {},
1423
  "ref_paths": {},
1424
  "reload_checkpoint": "",
1425
  "reload_emb": "",
1426
  "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
1427
- "repetition_penalty": 1.0,
1428
  "sample_alpha": 0.5,
1429
  "save_periodic": 0,
1430
  "share_inout_emb": true,
@@ -1437,13 +1416,8 @@
1437
  "summary_proj_to_labels": true,
1438
  "summary_type": "first",
1439
  "summary_use_proj": true,
1440
- "temperature": 1.0,
1441
  "tokens_per_batch": -1,
1442
- "top_k": 50,
1443
- "top_p": 1.0,
1444
- "torchscript": false,
1445
  "unk_index": 3,
1446
- "use_bfloat16": false,
1447
  "use_lang_emb": false,
1448
  "use_memory": false,
1449
  "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
@@ -1457,4 +1431,4 @@
1457
  "word_rand": 0.1,
1458
  "word_shuffle": 0.0,
1459
  "world_size": 32
1460
- }
1
  {
 
2
  "accumulate_gradients": 4,
3
  "ae_steps": [],
4
  "amp": 2,
23
  "debug": false,
24
  "debug_slurm": false,
25
  "debug_train": false,
 
26
  "dropout": 0.1,
27
  "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
 
28
  "emb_dim": 1280,
29
  "embed_init_std": 0.02209708691207961,
30
  "encoder_only": true,
35
  "eval_only": false,
36
  "exp_id": "16656234",
37
  "exp_name": "xlm_17_100_big.3",
 
38
  "fp16": true,
39
  "gelu_activation": true,
40
  "global_rank": 0,
41
  "group_by_size": true,
42
  "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
 
 
 
 
43
  "id2lang": {
44
  "0": "af",
45
  "1": "als",
143
  "99": "zh_yue"
144
  },
145
  "init_std": 0.02,
 
146
  "is_encoder": true,
147
  "is_master": true,
148
  "is_slurm_job": true,
 
 
 
 
149
  "lambda_ae": 1.0,
150
  "lambda_ae_config": null,
151
  "lambda_bt": 1.0,
364
  "am"
365
  ],
366
  "layer_norm_eps": 1e-12,
 
367
  "lg_sampling_factor": 0.7,
368
  "lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
369
  "local_rank": 0,
374
  "max_batch_size": 0,
375
  "max_epoch": 100000,
376
  "max_len": 200,
 
377
  "max_position_embeddings": 512,
378
  "max_vocab": 200000,
379
  "min_count": 0,
1393
  "n_layers": 16,
1394
  "n_nodes": 4,
1395
  "node_id": 0,
 
 
1396
  "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
 
 
1397
  "output_past": true,
1398
  "pad_index": 2,
1399
  "pad_token_id": 2,
1400
  "para_dataset": {},
1401
  "para_list": [],
1402
  "pc_steps": [],
 
1403
  "ref_paths": {},
1404
  "reload_checkpoint": "",
1405
  "reload_emb": "",
1406
  "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
 
1407
  "sample_alpha": 0.5,
1408
  "save_periodic": 0,
1409
  "share_inout_emb": true,
1416
  "summary_proj_to_labels": true,
1417
  "summary_type": "first",
1418
  "summary_use_proj": true,
 
1419
  "tokens_per_batch": -1,
 
 
 
1420
  "unk_index": 3,
 
1421
  "use_lang_emb": false,
1422
  "use_memory": false,
1423
  "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
1431
  "word_rand": 0.1,
1432
  "word_shuffle": 0.0,
1433
  "world_size": 32
1434
+ }