File size: 2,824 Bytes
2ebe18c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

name: "data_sp"
joeynmt_version: "2.0.0"

data:
    train: "RESULTS_azb2fa/data/train"
    dev: "RESULTS_azb2fa/data/validation"
    test: "RESULTS_azb2fa/data/test"
    dataset_type: "huggingface"
    sample_dev_subset: 200
    src:
        lang: "azb"
        max_length: 100
        lowercase: False
        normalize: False
        level: "bpe"
        voc_limit: 2000
        voc_min_freq: 1
        voc_file: "RESULTS_azb2fa/data/vocab.txt"
        tokenizer_type: "sentencepiece"
        tokenizer_cfg:
            model_file: "RESULTS_azb2fa/data/sp.model"

    trg:
        lang: "fa"
        max_length: 100
        lowercase: False
        normalize: False
        level: "bpe"
        voc_limit: 2000
        voc_min_freq: 1
        voc_file: "RESULTS_azb2fa/data/vocab.txt"
        tokenizer_type: "sentencepiece"
        tokenizer_cfg:
            model_file: "RESULTS_azb2fa/data/sp.model"


testing:
    n_best: 1
    beam_size: 5
    beam_alpha: 1.0
    batch_size: 512
    batch_type: "token"
    max_output_length: 100
    eval_metrics: ["bleu"]
    #return_prob: "hyp"
    #return_attention: False
    sacrebleu_cfg:
        tokenize: "13a"

training:
    #load_model: "RESULTS_azb2fa/model/latest.ckpt"
    #reset_best_ckpt: False
    #reset_scheduler: False
    #reset_optimizer: False
    #reset_iter_state: False
    random_seed: 42
    optimizer: "adam"
    normalization: "tokens"
    adam_betas: [0.9, 0.999]
    scheduling: "warmupinversesquareroot"
    learning_rate_warmup: 2000
    learning_rate: 0.0002
    learning_rate_min: 0.00000001
    weight_decay: 0.0
    label_smoothing: 0.1
    loss: "crossentropy"
    batch_size: 512
    batch_type: "token"
    batch_multiplier: 4
    early_stopping_metric: "bleu"
    epochs: 500
    updates: 2000000000
    validation_freq: 1000
    logging_freq: 100
    model_dir: "RESULTS_azb2fa/model"
    overwrite: True
    shuffle: True
    use_cuda: True
    print_valid_sents: [0, 1, 2, 3]
    keep_best_ckpts: 3

model:
    initializer: "xavier"
    bias_initializer: "zeros"
    init_gain: 1.0
    embed_initializer: "xavier"
    embed_init_gain: 1.0
    tied_embeddings: True
    tied_softmax: True
    encoder:
        type: "transformer"
        num_layers: 2
        num_heads: 4
        embeddings:
            embedding_dim: 256
            scale: True
            dropout: 0.2
        # typically ff_size = 4 x hidden_size
        hidden_size: 256
        ff_size: 1024
        dropout: 0.1
        layer_norm: "pre"
    decoder:
        type: "transformer"
        num_layers: 2
        num_heads: 8
        embeddings:
            embedding_dim: 256
            scale: True
            dropout: 0.2
        # typically ff_size = 4 x hidden_size
        hidden_size: 256
        ff_size: 1024
        dropout: 0.1
        layer_norm: "pre"