system HF staff commited on
Commit
1eb3dc6
1 Parent(s): d4fecd7

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +153 -0
config.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "accumulate_gradients": 1,
3
+ "ae_steps": [],
4
+ "amp": 1,
5
+ "architectures": [
6
+ "XLMModel"
7
+ ],
8
+ "asm": false,
9
+ "attention_dropout": 0.1,
10
+ "batch_size": 64,
11
+ "beam_size": 1,
12
+ "bos_index": 0,
13
+ "bos_token_id": 0,
14
+ "bptt": 256,
15
+ "bt_src_langs": [],
16
+ "bt_steps": [],
17
+ "causal": false,
18
+ "clip_grad_norm": 5,
19
+ "clm_steps": [],
20
+ "command": "python train.py --local_rank=0 --exp_name unihan_zh_ja --dump_path '/mnt/exp/ft_char' --data_path 'data/processed/xlm_zh_ja/new' --lgs 'zh-ja' --clm_steps '' --mlm_steps 'ja,zh' --emb_dim 1024 --n_layers 12 --n_heads 16 --dropout '0.1' --attention_dropout '0.1' --gelu_activation true --batch_size 64 --bptt 256 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --epoch_size 300000 --max_epoch 100000 --validation_metrics _valid_mlm_ppl --stopping_criterion '_valid_mlm_ppl,25' --fp16 true --amp 1 --exp_id epoch169 --reload_model '/mnt/exp/hard_pretrain/unihan_zh_ja/recycled/converted-best-valid_mlm_ppl.pth' --exp_id \"epoch169\"",
21
+ "context_size": 0,
22
+ "data_path": "data/processed/xlm_zh_ja/new",
23
+ "debug": false,
24
+ "debug_slurm": false,
25
+ "debug_train": false,
26
+ "dropout": 0.1,
27
+ "dump_path": "/mnt/exp/ft_char/unihan_zh_ja/epoch169",
28
+ "emb_dim": 1024,
29
+ "embed_init_std": 0.02209708691207961,
30
+ "encoder_only": true,
31
+ "end_n_top": 5,
32
+ "eos_index": 1,
33
+ "epoch_size": 300000,
34
+ "eval_bleu": false,
35
+ "eval_only": false,
36
+ "exp_id": "epoch169",
37
+ "exp_name": "unihan_zh_ja",
38
+ "fp16": true,
39
+ "gelu_activation": true,
40
+ "global_rank": 0,
41
+ "group_by_size": true,
42
+ "hyp_path": "/mnt/exp/ft_char/unihan_zh_ja/epoch169/hypotheses",
43
+ "id2lang": {
44
+ "0": "ja",
45
+ "1": "zh"
46
+ },
47
+ "init_std": 0.02,
48
+ "is_encoder": true,
49
+ "is_master": true,
50
+ "is_slurm_job": false,
51
+ "lambda_ae": 1.0,
52
+ "lambda_ae_config": null,
53
+ "lambda_bt": 1.0,
54
+ "lambda_bt_config": null,
55
+ "lambda_clm": 1.0,
56
+ "lambda_clm_config": null,
57
+ "lambda_mlm": 1.0,
58
+ "lambda_mlm_config": null,
59
+ "lambda_mt": 1.0,
60
+ "lambda_mt_config": null,
61
+ "lambda_pc": 1.0,
62
+ "lambda_pc_config": null,
63
+ "lang2id": {
64
+ "ja": 0,
65
+ "zh": 1
66
+ },
67
+ "lang_id": 0,
68
+ "langs": [
69
+ "zh",
70
+ "ja"
71
+ ],
72
+ "layer_norm_eps": 1e-12,
73
+ "lg_sampling_factor": -1,
74
+ "lgs": "zh-ja",
75
+ "local_rank": 0,
76
+ "mask_index": 5,
77
+ "mask_token_id": 0,
78
+ "master_port": -1,
79
+ "max_batch_size": 0,
80
+ "max_epoch": 100000,
81
+ "max_len": 100,
82
+ "max_position_embeddings": 512,
83
+ "max_vocab": -1,
84
+ "min_count": 0,
85
+ "mlm_steps": [
86
+ [
87
+ "ja",
88
+ null
89
+ ],
90
+ [
91
+ "zh",
92
+ null
93
+ ]
94
+ ],
95
+ "model_type": "xlm",
96
+ "mono_dataset": {
97
+ "ja": {
98
+ "test": "data/processed/xlm_zh_ja/new/test.ja.pth",
99
+ "train": "data/processed/xlm_zh_ja/new/train.ja.pth",
100
+ "valid": "data/processed/xlm_zh_ja/new/valid.ja.pth"
101
+ },
102
+ "zh": {
103
+ "test": "data/processed/xlm_zh_ja/new/test.zh.pth",
104
+ "train": "data/processed/xlm_zh_ja/new/train.zh.pth",
105
+ "valid": "data/processed/xlm_zh_ja/new/valid.zh.pth"
106
+ }
107
+ },
108
+ "mt_steps": [],
109
+ "multi_gpu": true,
110
+ "multi_node": false,
111
+ "n_gpu_per_node": 8,
112
+ "n_heads": 16,
113
+ "n_langs": 2,
114
+ "n_layers": 12,
115
+ "n_nodes": 1,
116
+ "node_id": 0,
117
+ "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
118
+ "pad_index": 2,
119
+ "pad_token_id": 2,
120
+ "para_dataset": {},
121
+ "pc_steps": [],
122
+ "ref_paths": {},
123
+ "reload_checkpoint": "",
124
+ "reload_emb": "",
125
+ "reload_model": "/mnt/exp/hard_pretrain/unihan_zh_ja/recycled/converted-best-valid_mlm_ppl.pth",
126
+ "sample_alpha": 0,
127
+ "save_periodic": 0,
128
+ "share_inout_emb": true,
129
+ "sinusoidal_embeddings": false,
130
+ "split_data": false,
131
+ "start_n_top": 5,
132
+ "stopping_criterion": "_valid_mlm_ppl,25",
133
+ "summary_activation": null,
134
+ "summary_first_dropout": 0.1,
135
+ "summary_proj_to_labels": true,
136
+ "summary_type": "first",
137
+ "summary_use_proj": true,
138
+ "tokens_per_batch": -1,
139
+ "unk_index": 3,
140
+ "use_lang_emb": true,
141
+ "use_memory": false,
142
+ "validation_metrics": "_valid_mlm_ppl",
143
+ "vocab_size": 24044,
144
+ "word_blank": 0,
145
+ "word_dropout": 0,
146
+ "word_keep": 0.1,
147
+ "word_mask": 0.8,
148
+ "word_mask_keep_rand": "0.8,0.1,0.1",
149
+ "word_pred": 0.15,
150
+ "word_rand": 0.1,
151
+ "word_shuffle": 0,
152
+ "world_size": 8
153
+ }