sadanyh commited on
Commit
9118d6a
โ€ข
1 Parent(s): bec5669
Files changed (1) hide show
  1. mt_template.json +80 -0
mt_template.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "exp_name":"mt_enfrde",
3
+ "exp_id":"demo",
4
+ "dump_path":"/content/dump_path",
5
+ "encoder_only":"False",
6
+ "data_path":"/content/processed",
7
+ "lgs":"de-en-fr",
8
+ "clm_steps":"",
9
+ "mlm_steps":"",
10
+ "mt_steps":"...",
11
+ "ae_steps":"",
12
+ "bt_steps":"",
13
+ "pc_steps":"",
14
+ "reload_emb":"",
15
+ "reload_model":"/content/dump_path/mlm_enfrde/demo/best-valid_mlm_ppl.pth,/content/dump_path/mlm_enfrde/demo/best-valid_mlm_ppl.pth",
16
+ "reload_checkpoint":"",
17
+ "epoch_size":"100",
18
+ "max_epoch":"1",
19
+ "stopping_criterion":"valid_en-fr_mt_bleu,10",
20
+ "validation_metrics":"valid_en-fr_mt_bleu",
21
+ "train_n_samples":"-1",
22
+ "valid_n_samples":"-1",
23
+ "test_n_samples":"-1",
24
+ "remove_long_sentences_train":"False",
25
+ "remove_long_sentences_valid":"False",
26
+ "remove_long_sentences_test":"False",
27
+
28
+ "eval_bleu":"True",
29
+ "eval_only":"False",
30
+ "max_len":"100",
31
+ "group_by_size":"True",
32
+ "batch_size":"32",
33
+ "save_periodic":"0",
34
+ "fp16":"False",
35
+ "amp":"-1",
36
+ "emb_dim":"1024",
37
+ "n_layers":"6",
38
+ "n_heads":"8",
39
+ "dropout":"0.1",
40
+ "attention_dropout":"0.1",
41
+ "gelu_activation":"True",
42
+ "share_inout_emb":"True",
43
+ "sinusoidal_embeddings":"False",
44
+ "use_lang_emb":"True",
45
+ "use_memory":"False",
46
+ "asm":"False",
47
+ "context_size":"0",
48
+ "word_pred":"0.15",
49
+ "word_mask_keep_rand":"0.8,0.1,0.1",
50
+ "word_shuffle":"3",
51
+ "word_dropout":"0.1",
52
+ "word_blank":"0.1",
53
+ "max_vocab":"-1",
54
+ "min_count":"0",
55
+ "lg_sampling_factor":"-1",
56
+ "bptt":"256",
57
+ "max_batch_size":"0",
58
+ "tokens_per_batch":"2000",
59
+ "split_data":"False",
60
+ "optimizer":"adam_inverse_sqrt,beta1=0.9,beta2=0.98,lr=0.0001",
61
+ "clip_grad_norm":"5",
62
+ "accumulate_gradients":"1",
63
+ "lambda_mlm":"1",
64
+ "lambda_clm":"1",
65
+ "lambda_pc":"1",
66
+ "lambda_ae":"0:1,100000:0.1,300000:0",
67
+ "lambda_mt":"1",
68
+ "lambda_bt":"1",
69
+ "beam_size":"1",
70
+ "length_penalty":"1",
71
+ "early_stopping":"False",
72
+ "debug_train":"False",
73
+ "debug_slurm":"False",
74
+ "local_rank":"-1",
75
+ "master_port":"-1",
76
+ "mem_enc_positions":"",
77
+ "mem_dec_positions":"",
78
+ "asm_cutoffs":"8000,20000",
79
+ "asm_div_value":"4"
80
+ }