ru_vep / config.yml
Lynxpda's picture
Upload folder using huggingface_hub
b2d9eff verified
accum_count: 100
accum_steps: 0
adam_beta2: 0.998
attention_dropout: 0.1
batch_size: 1500
batch_type: tokens
bucket_size: 49304
data:
27e6308e0faf12cd1b4bed73708fad6f:
path_src: dataset/vep-ru/et-synt/target.txt
path_tgt: dataset/vep-ru/et-synt/source.txt
src_prefix: et_s_Latn
tgt_prefix: ''
transforms: &id001
- sentencepiece
- filtertoolong
- prefix
weight: 13
2caaf1b1e5e15d4a06b5b77c438ba0bc:
path_src: dataset/vep-ru/fin-original/target.txt
path_tgt: dataset/vep-ru/fin-original/source.txt
src_prefix: fi_Latn
tgt_prefix: ''
transforms: *id001
weight: 13
3def7622ce4622202126c8828c6fae3f:
path_src: dataset/vep-ru/fin-synt/target.txt
path_tgt: dataset/vep-ru/fin-synt/source.txt
src_prefix: fi_s_Latn
tgt_prefix: ''
transforms: *id001
weight: 13
517216e92cd45eb876c79dc3bafaad18:
path_src: dataset/vep-ru/var/target.txt
path_tgt: dataset/vep-ru/var/source.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 6
815e8e066d03025e82529ef5c5ad232c:
path_src: dataset/vep-ru/vep/target.txt
path_tgt: dataset/vep-ru/vep/source.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 5
8e6d221db9fc70bb3fd1104c24c8f25c:
path_src: dataset/vep-ru/vep-dic/target.txt
path_tgt: dataset/vep-ru/vep-dic/source.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 1
f080464d79eb04a44e9947db1b54c17b:
path_src: dataset/vep-ru/et-original/target.txt
path_tgt: dataset/vep-ru/et-original/source.txt
src_prefix: et_Latn
tgt_prefix: ''
transforms: *id001
weight: 13
valid:
path_src: run/ru_vep-1.0/src-val.txt
path_tgt: run/ru_vep-1.0/tgt-val.txt
transforms: *id001
dec_layers: 20
decay_method: rsqrt
decoder_type: transformer
dropout: 0.1
dropout_steps: 0
early_stopping: 0
enc_layers: 20
encoder_type: transformer
gpu_ranks:
- 0
- 1
heads: 8
hidden_size: 512
keep_checkpoint: 15
label_smoothing: 0.1
learning_rate: 0.0015
max_generator_batches: 2
max_grad_norm: 0
max_relative_positions: 20
model_dtype: fp16
normalization: tokens
num_workers: 6
optim: pagedadamw8bit
param_init: 0
param_init_glorot: true
pos_ffn_activation_fn: gated-gelu
position_encoding: false
queue_size: 10000
reset_optim: all
rnn_size: 512
save_checkpoint_steps: 100
save_data: run/ru_vep-1.0/opennmt
save_model: run/ru_vep-1.0/opennmt/openmt.model
share_decoder_embeddings: true
share_embeddings: true
share_vocab: true
skip_empty_level: silent
src_onmttok_kwargs:
lang: ru
mode: none
src_seq_length: 185
src_subword_alpha: 0.0
src_subword_model: run/ru_vep-1.0/sentencepiece.model
src_subword_nbest: 1
src_subword_type: sentencepiece
src_vocab: run/ru_vep-1.0/opennmt/openmt.vocab
src_vocab_size: 32000
tgt_onmttok_kwargs:
lang: vep
mode: none
tgt_seq_length: 185
tgt_subword_alpha: 0.0
tgt_subword_model: run/ru_vep-1.0/sentencepiece.model
tgt_subword_nbest: 1
tgt_subword_type: sentencepiece
tgt_vocab: run/ru_vep-1.0/opennmt/openmt.vocab
tgt_vocab_size: 32000
train_steps: 1500
transformer_ff: 6144
update_vocab: 'False'
valid_batch_size: 64
valid_metrics:
- BLEU
valid_steps: 100
warmup_steps: 200
word_vec_size: 512
world_size: 2