name: "Gilaki-Persian_1" data: train: "datasets/Gilaki-Persian/1/train" dev: "datasets/Gilaki-Persian/1/dev" test: "datasets/Gilaki-Persian/1/test" level: "char" lowercase: False normalize: False max_sent_length: 100 dataset_type: "plain" src: lang: "src" voc_limit: 100 voc_min_freq: 5 level: "char" trg: lang: "trg" voc_limit: 100 voc_min_freq: 5 level: "char" training: random_seed: 42 optimizer: "adam" learning_rate: 0.001 learning_rate_min: 0.0002 weight_decay: 0.0 clip_grad_norm: 1.0 batch_size: 64 scheduling: "plateau" patience: 10 decrease_factor: 0.5 early_stopping_metric: "loss" epochs: 80 validation_freq: 1000 logging_freq: 100 eval_metric: "bleu" model_dir: "models/Gilaki-Persian" overwrite: True shuffle: True use_cuda: True max_output_length: 100 print_valid_sents: [0, 3, 6, 9] keep_best_ckpts: -1 testing: n_best: 1 beam_size: 4 beam_alpha: 1.0 eval_metrics: ["bleu", "chrf", "sequence_accuracy"] max_output_length: 1000 batch_size: 10 batch_type: "sentence" return_prob: "none" model: initializer: "xavier_uniform" init_gain: 1.0 bias_initializer: "zeros" embed_initializer: "xavier_uniform" embed_init_gain: 1.0 encoder: type: "transformer" num_layers: 6 num_heads: 8 embeddings: embedding_dim: 128 scale: True # typically ff_size = 4 x hidden_size hidden_size: 128 ff_size: 512 dropout: 0.2 layer_norm: "pre" decoder: type: "transformer" num_layers: 6 num_heads: 8 embeddings: embedding_dim: 128 scale: True # typically ff_size = 4 x hidden_size hidden_size: 128 ff_size: 512 dropout: 0.2 layer_norm: "pre"