Lynxpda commited on
Commit
bae832d
1 Parent(s): 7a2e755

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ translate-vep_ru-1_0.argosmodel filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1 @@
1
- ---
2
- license: cc-by-sa-4.0
3
- ---
 
1
+ # Veps - Russian version 1.0
 
 
config.yml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_count: 100
2
+ accum_steps: 0
3
+ adam_beta2: 0.998
4
+ attention_dropout: 0.1
5
+ batch_size: 1500
6
+ batch_type: tokens
7
+ bucket_size: 49304
8
+ data:
9
+ 27e6308e0faf12cd1b4bed73708fad6f:
10
+ path_src: dataset/vep-ru/et-synt/source.txt
11
+ path_tgt: dataset/vep-ru/et-synt/target.txt
12
+ src_prefix: et_s_Latn
13
+ tgt_prefix: ''
14
+ transforms: &id001
15
+ - sentencepiece
16
+ - filtertoolong
17
+ - prefix
18
+ weight: 37
19
+ 2caaf1b1e5e15d4a06b5b77c438ba0bc:
20
+ path_src: dataset/vep-ru/fin-original/source.txt
21
+ path_tgt: dataset/vep-ru/fin-original/target.txt
22
+ src_prefix: fi_Latn
23
+ tgt_prefix: ''
24
+ transforms: *id001
25
+ weight: 37
26
+ 3def7622ce4622202126c8828c6fae3f:
27
+ path_src: dataset/vep-ru/fin-synt/source.txt
28
+ path_tgt: dataset/vep-ru/fin-synt/target.txt
29
+ src_prefix: fi_s_Latn
30
+ tgt_prefix: ''
31
+ transforms: *id001
32
+ weight: 37
33
+ 517216e92cd45eb876c79dc3bafaad18:
34
+ path_src: dataset/vep-ru/var/source.txt
35
+ path_tgt: dataset/vep-ru/var/target.txt
36
+ src_prefix: ''
37
+ tgt_prefix: ''
38
+ transforms: *id001
39
+ weight: 167
40
+ 815e8e066d03025e82529ef5c5ad232c:
41
+ path_src: dataset/vep-ru/vep/source.txt
42
+ path_tgt: dataset/vep-ru/vep/target.txt
43
+ src_prefix: ''
44
+ tgt_prefix: ''
45
+ transforms: *id001
46
+ weight: 6
47
+ 8e6d221db9fc70bb3fd1104c24c8f25c:
48
+ path_src: dataset/vep-ru/vep-dic/source.txt
49
+ path_tgt: dataset/vep-ru/vep-dic/target.txt
50
+ src_prefix: ''
51
+ tgt_prefix: ''
52
+ transforms: *id001
53
+ weight: 1
54
+ f080464d79eb04a44e9947db1b54c17b:
55
+ path_src: dataset/vep-ru/et-original/source.txt
56
+ path_tgt: dataset/vep-ru/et-original/target.txt
57
+ src_prefix: et_Latn
58
+ tgt_prefix: ''
59
+ transforms: *id001
60
+ weight: 37
61
+ valid:
62
+ path_src: run/vep_ru-1.0/src-val.txt
63
+ path_tgt: run/vep_ru-1.0/tgt-val.txt
64
+ transforms: *id001
65
+ dec_layers: 20
66
+ decay_method: rsqrt
67
+ decoder_type: transformer
68
+ dropout: 0.1
69
+ dropout_steps: 0
70
+ early_stopping: 0
71
+ enc_layers: 20
72
+ encoder_type: transformer
73
+ gpu_ranks:
74
+ - 0
75
+ - 1
76
+ heads: 8
77
+ hidden_size: 512
78
+ keep_checkpoint: 15
79
+ label_smoothing: 0.1
80
+ learning_rate: 0.005
81
+ max_generator_batches: 2
82
+ max_grad_norm: 0
83
+ max_relative_positions: 20
84
+ model_dtype: fp16
85
+ normalization: tokens
86
+ num_workers: 6
87
+ optim: pagedadamw8bit
88
+ param_init: 0
89
+ param_init_glorot: true
90
+ pos_ffn_activation_fn: gated-gelu
91
+ position_encoding: false
92
+ queue_size: 10000
93
+ reset_optim: none
94
+ rnn_size: 512
95
+ save_checkpoint_steps: 500
96
+ save_data: run/vep_ru-1.0/opennmt
97
+ save_model: run/vep_ru-1.0/opennmt/openmt.model
98
+ share_decoder_embeddings: true
99
+ share_embeddings: true
100
+ share_vocab: true
101
+ skip_empty_level: silent
102
+ src_onmttok_kwargs:
103
+ lang: vep
104
+ mode: none
105
+ src_seq_length: 185
106
+ src_subword_alpha: 0.0
107
+ src_subword_model: run/vep_ru-1.0/sentencepiece.model
108
+ src_subword_nbest: 1
109
+ src_subword_type: sentencepiece
110
+ src_vocab: run/vep_ru-1.0/opennmt/openmt.vocab
111
+ src_vocab_size: 32000
112
+ tgt_onmttok_kwargs:
113
+ lang: ru
114
+ mode: none
115
+ tgt_seq_length: 185
116
+ tgt_subword_alpha: 0.0
117
+ tgt_subword_model: run/vep_ru-1.0/sentencepiece.model
118
+ tgt_subword_nbest: 1
119
+ tgt_subword_type: sentencepiece
120
+ tgt_vocab: run/vep_ru-1.0/opennmt/openmt.vocab
121
+ tgt_vocab_size: 32000
122
+ train_steps: 2000
123
+ transformer_ff: 6144
124
+ update_vocab: 'False'
125
+ valid_batch_size: 64
126
+ valid_metrics:
127
+ - BLEU
128
+ valid_steps: 500
129
+ warmup_steps: 500
130
+ word_vec_size: 512
131
+ world_size: 2
ctranslate2-model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
ctranslate2-model/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9db6dfd61f95f7f1edad42d81dae01e8025a92371b590ee43761d57620c7ac1
3
+ size 460469083
ctranslate2-model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"package_version": "1.0", "argos_version": "1.9.0", "from_code": "vep", "from_name": "Veps", "to_code": "ru", "to_name": "Russian"}
opennmt/openmt.model_averaged.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b1eb3318714e878a71229cb85ecb075009496b69edb0c53a7a7208712f8ab5c
3
+ size 1960760662
opennmt/openmt.vocab ADDED
The diff for this file is too large to render. See raw diff
 
sentencepiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a602803c84fef522a000aa6eedfb440360cf2ffc9b5996cc57c8376be84eb6c6
3
+ size 883660
sentencepiece.vocab ADDED
The diff for this file is too large to render. See raw diff
 
stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
stanza/vep/tokenize/edt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ae6d383b373632bcf32b82fe726ecdd3aed0b3eb423ce86460f3157eed3db5
3
+ size 633536
translate-vep_ru-1_0.argosmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba118915f5598a7054be3c988747f99b757ddb968458d11b3bad5e5c23f2ddce
3
+ size 463118692