File size: 2,261 Bytes
9118d6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
{
    "exp_name":"mt_enfrde", 
    "exp_id":"demo", 
    "dump_path":"/content/dump_path",
    "encoder_only":"False",
    "data_path":"/content/processed", 
    "lgs":"de-en-fr", 
    "clm_steps":"",
    "mlm_steps":"", 
    "mt_steps":"...",
    "ae_steps":"", 
    "bt_steps":"", 
    "pc_steps":"", 
    "reload_emb":"", 
    "reload_model":"/content/dump_path/mlm_enfrde/demo/best-valid_mlm_ppl.pth,/content/dump_path/mlm_enfrde/demo/best-valid_mlm_ppl.pth",
    "reload_checkpoint":"",
    "epoch_size":"100", 
    "max_epoch":"1", 
    "stopping_criterion":"valid_en-fr_mt_bleu,10", 
    "validation_metrics":"valid_en-fr_mt_bleu", 
    "train_n_samples":"-1",
    "valid_n_samples":"-1",
    "test_n_samples":"-1",
    "remove_long_sentences_train":"False",
    "remove_long_sentences_valid":"False",
    "remove_long_sentences_test":"False",
    
    "eval_bleu":"True", 
    "eval_only":"False", 
    "max_len":"100", 
    "group_by_size":"True", 
    "batch_size":"32", 
    "save_periodic":"0", 
    "fp16":"False",
    "amp":"-1",
    "emb_dim":"1024", 
    "n_layers":"6", 
    "n_heads":"8",
    "dropout":"0.1", 
    "attention_dropout":"0.1", 
    "gelu_activation":"True", 
    "share_inout_emb":"True", 
    "sinusoidal_embeddings":"False", 
    "use_lang_emb":"True", 
    "use_memory":"False", 
    "asm":"False", 
    "context_size":"0", 
    "word_pred":"0.15", 
    "word_mask_keep_rand":"0.8,0.1,0.1", 
    "word_shuffle":"3", 
    "word_dropout":"0.1", 
    "word_blank":"0.1", 
    "max_vocab":"-1", 
    "min_count":"0", 
    "lg_sampling_factor":"-1", 
    "bptt":"256", 
    "max_batch_size":"0", 
    "tokens_per_batch":"2000", 
    "split_data":"False", 
    "optimizer":"adam_inverse_sqrt,beta1=0.9,beta2=0.98,lr=0.0001",
    "clip_grad_norm":"5", 
    "accumulate_gradients":"1", 
    "lambda_mlm":"1", 
    "lambda_clm":"1",
    "lambda_pc":"1", 
    "lambda_ae":"0:1,100000:0.1,300000:0", 
    "lambda_mt":"1",
    "lambda_bt":"1", 
    "beam_size":"1",
    "length_penalty":"1",
    "early_stopping":"False",
    "debug_train":"False", 
    "debug_slurm":"False",
    "local_rank":"-1",
    "master_port":"-1",
    "mem_enc_positions":"",
    "mem_dec_positions":"",
    "asm_cutoffs":"8000,20000",
    "asm_div_value":"4"
}