File size: 2,824 Bytes
2ebe18c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
name: "data_sp"
joeynmt_version: "2.0.0"
data:
train: "RESULTS_azb2fa/data/train"
dev: "RESULTS_azb2fa/data/validation"
test: "RESULTS_azb2fa/data/test"
dataset_type: "huggingface"
sample_dev_subset: 200
src:
lang: "azb"
max_length: 100
lowercase: False
normalize: False
level: "bpe"
voc_limit: 2000
voc_min_freq: 1
voc_file: "RESULTS_azb2fa/data/vocab.txt"
tokenizer_type: "sentencepiece"
tokenizer_cfg:
model_file: "RESULTS_azb2fa/data/sp.model"
trg:
lang: "fa"
max_length: 100
lowercase: False
normalize: False
level: "bpe"
voc_limit: 2000
voc_min_freq: 1
voc_file: "RESULTS_azb2fa/data/vocab.txt"
tokenizer_type: "sentencepiece"
tokenizer_cfg:
model_file: "RESULTS_azb2fa/data/sp.model"
testing:
n_best: 1
beam_size: 5
beam_alpha: 1.0
batch_size: 512
batch_type: "token"
max_output_length: 100
eval_metrics: ["bleu"]
#return_prob: "hyp"
#return_attention: False
sacrebleu_cfg:
tokenize: "13a"
training:
#load_model: "RESULTS_azb2fa/model/latest.ckpt"
#reset_best_ckpt: False
#reset_scheduler: False
#reset_optimizer: False
#reset_iter_state: False
random_seed: 42
optimizer: "adam"
normalization: "tokens"
adam_betas: [0.9, 0.999]
scheduling: "warmupinversesquareroot"
learning_rate_warmup: 2000
learning_rate: 0.0002
learning_rate_min: 0.00000001
weight_decay: 0.0
label_smoothing: 0.1
loss: "crossentropy"
batch_size: 512
batch_type: "token"
batch_multiplier: 4
early_stopping_metric: "bleu"
epochs: 500
updates: 2000000000
validation_freq: 1000
logging_freq: 100
model_dir: "RESULTS_azb2fa/model"
overwrite: True
shuffle: True
use_cuda: True
print_valid_sents: [0, 1, 2, 3]
keep_best_ckpts: 3
model:
initializer: "xavier"
bias_initializer: "zeros"
init_gain: 1.0
embed_initializer: "xavier"
embed_init_gain: 1.0
tied_embeddings: True
tied_softmax: True
encoder:
type: "transformer"
num_layers: 2
num_heads: 4
embeddings:
embedding_dim: 256
scale: True
dropout: 0.2
# typically ff_size = 4 x hidden_size
hidden_size: 256
ff_size: 1024
dropout: 0.1
layer_norm: "pre"
decoder:
type: "transformer"
num_layers: 2
num_heads: 8
embeddings:
embedding_dim: 256
scale: True
dropout: 0.2
# typically ff_size = 4 x hidden_size
hidden_size: 256
ff_size: 1024
dropout: 0.1
layer_norm: "pre"
|