Kleber commited on
Commit
feed085
1 Parent(s): 9502375

Upload kin_en_args.yaml

Browse files
Files changed (1) hide show
  1. kin_en_args.yaml +112 -0
kin_en_args.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "kin_en_transformer"
2
+
3
+ data:
4
+ src:
5
+ lang: "kin"
6
+ level: "bpe"
7
+ lowercase: False
8
+ tokenizer_type: "subword-nmt"
9
+ num_merges: 4000
10
+ tokenizer_cfg:
11
+ num_merges: 4000
12
+ codes: "bpe.codes.4000"
13
+ pretokenizer: "none"
14
+
15
+ trg:
16
+ lang: "en"
17
+ level: "bpe"
18
+ lowercase: False
19
+ tokenizer_type: "subword-nmt"
20
+ num_merges: 4000
21
+ tokenizer_cfg:
22
+ num_merges: 4000
23
+ codes: "bpe.codes.4000"
24
+ pretokenizer: "none"
25
+
26
+ train: "data/train/kin_en_train"
27
+ dev: "data/val/kin_en_val"
28
+ test: "data/test/kin_en_test"
29
+ level: "bpe"
30
+ # lowercase: False
31
+ max_sent_length: 100
32
+ # src_vocab: "models/kin_en_tranformer/src_vocab"
33
+ # trg_vocab: "models/kin_en_tranformer/src_vocab"
34
+ dataset_type: "tsv"
35
+
36
+ testing:
37
+ beam_size: 15
38
+ beam_alpha: 1.0
39
+ eval_metrics: ["bleu"]
40
+ batch_type: sentence
41
+ sacrebleu_cfg: # sacrebleu options
42
+ remove_whitespace: True # `remove_whitespace` option in sacrebleu.corpus_chrf() function (defalut: True)
43
+ tokenize: "none" # `tokenize` option in sacrebleu.corpus_bleu() function (options include: "none" (use for already tokenized test data), "13a" (default minimal tokenizer), "intl" which mostly does punctuation and unicode, etc)
44
+
45
+ training:
46
+ #load_model: "{ models/{name}_transformer/1.ckpt" # if uncommented, load a pre-trained model from this checkpoint
47
+ random_seed: 42
48
+ optimizer: "adam"
49
+ normalization: "tokens"
50
+ adam_betas: [0.9, 0.999]
51
+ scheduling: "plateau" # TODO: try switching from plateau to Noam scheduling
52
+ patience: 5 # For plateau: decrease learning rate by decrease_factor if validation score has not improved for this many validation rounds.
53
+ learning_rate_factor: 0.5 # factor for Noam scheduler (used with Transformer)
54
+ learning_rate_warmup: 1000 # warmup steps for Noam scheduler (used with Transformer)
55
+ decrease_factor: 0.7
56
+ loss: "crossentropy"
57
+ learning_rate: 0.0003
58
+ learning_rate_min: 0.00000001
59
+ weight_decay: 0.0
60
+ label_smoothing: 0.1
61
+ batch_size: 256
62
+ batch_type: "token"
63
+ eval_batch_size: 3600
64
+ eval_batch_type: "token"
65
+ batch_multiplier: 1
66
+ early_stopping_metric: "ppl"
67
+ epochs: 30 # TODO: Decrease for when playing around and checking of working. Around 30 is sufficient to check if its working at all
68
+ validation_freq: 1000 # TODO: Set to at least once per epoch.
69
+ logging_freq: 100
70
+ eval_metric: "bleu"
71
+ model_dir: "models/kin_en_transformer"
72
+ overwrite: False # TODO: Set to True if you want to overwrite possibly existing models.
73
+ shuffle: True
74
+ use_cuda: True
75
+ max_output_length: 100
76
+ print_valid_sents: [0, 1, 2, 3]
77
+ keep_last_ckpts: 3
78
+
79
+ model:
80
+ initializer: "xavier_normal"
81
+ bias_initializer: "zeros"
82
+ init_gain: 1.0
83
+ embed_initializer: "xavier_normal"
84
+ embed_init_gain: 1.0
85
+ tied_embeddings: False
86
+ tied_softmax: True
87
+ encoder:
88
+ type: "transformer"
89
+ num_layers: 6
90
+ num_heads: 8
91
+ embeddings:
92
+ embedding_dim: 256
93
+ scale: True
94
+ dropout: 0.
95
+ # typically ff_size = 4 x hidden_size
96
+ hidden_size: 256
97
+ ff_size: 1024
98
+ dropout: 0.1
99
+ layer_norm: "post"
100
+ decoder:
101
+ type: "transformer"
102
+ num_layers: 6
103
+ num_heads: 8
104
+ embeddings:
105
+ embedding_dim: 256
106
+ scale: True
107
+ dropout: 0.
108
+ # typically ff_size = 4 x hidden_size
109
+ hidden_size: 256
110
+ ff_size: 1024
111
+ dropout: 0.1
112
+ layer_norm: "post"