Lynxpda commited on
Commit
b2d9eff
1 Parent(s): c28ef91

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ translate-ru_vep-1_0.argosmodel filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1 @@
1
- ---
2
- license: cc-by-sa-4.0
3
- ---
 
1
+ # Russian - Veps version 1.0
 
 
config.yml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_count: 100
2
+ accum_steps: 0
3
+ adam_beta2: 0.998
4
+ attention_dropout: 0.1
5
+ batch_size: 1500
6
+ batch_type: tokens
7
+ bucket_size: 49304
8
+ data:
9
+ 27e6308e0faf12cd1b4bed73708fad6f:
10
+ path_src: dataset/vep-ru/et-synt/target.txt
11
+ path_tgt: dataset/vep-ru/et-synt/source.txt
12
+ src_prefix: et_s_Latn
13
+ tgt_prefix: ''
14
+ transforms: &id001
15
+ - sentencepiece
16
+ - filtertoolong
17
+ - prefix
18
+ weight: 13
19
+ 2caaf1b1e5e15d4a06b5b77c438ba0bc:
20
+ path_src: dataset/vep-ru/fin-original/target.txt
21
+ path_tgt: dataset/vep-ru/fin-original/source.txt
22
+ src_prefix: fi_Latn
23
+ tgt_prefix: ''
24
+ transforms: *id001
25
+ weight: 13
26
+ 3def7622ce4622202126c8828c6fae3f:
27
+ path_src: dataset/vep-ru/fin-synt/target.txt
28
+ path_tgt: dataset/vep-ru/fin-synt/source.txt
29
+ src_prefix: fi_s_Latn
30
+ tgt_prefix: ''
31
+ transforms: *id001
32
+ weight: 13
33
+ 517216e92cd45eb876c79dc3bafaad18:
34
+ path_src: dataset/vep-ru/var/target.txt
35
+ path_tgt: dataset/vep-ru/var/source.txt
36
+ src_prefix: ''
37
+ tgt_prefix: ''
38
+ transforms: *id001
39
+ weight: 6
40
+ 815e8e066d03025e82529ef5c5ad232c:
41
+ path_src: dataset/vep-ru/vep/target.txt
42
+ path_tgt: dataset/vep-ru/vep/source.txt
43
+ src_prefix: ''
44
+ tgt_prefix: ''
45
+ transforms: *id001
46
+ weight: 5
47
+ 8e6d221db9fc70bb3fd1104c24c8f25c:
48
+ path_src: dataset/vep-ru/vep-dic/target.txt
49
+ path_tgt: dataset/vep-ru/vep-dic/source.txt
50
+ src_prefix: ''
51
+ tgt_prefix: ''
52
+ transforms: *id001
53
+ weight: 1
54
+ f080464d79eb04a44e9947db1b54c17b:
55
+ path_src: dataset/vep-ru/et-original/target.txt
56
+ path_tgt: dataset/vep-ru/et-original/source.txt
57
+ src_prefix: et_Latn
58
+ tgt_prefix: ''
59
+ transforms: *id001
60
+ weight: 13
61
+ valid:
62
+ path_src: run/ru_vep-1.0/src-val.txt
63
+ path_tgt: run/ru_vep-1.0/tgt-val.txt
64
+ transforms: *id001
65
+ dec_layers: 20
66
+ decay_method: rsqrt
67
+ decoder_type: transformer
68
+ dropout: 0.1
69
+ dropout_steps: 0
70
+ early_stopping: 0
71
+ enc_layers: 20
72
+ encoder_type: transformer
73
+ gpu_ranks:
74
+ - 0
75
+ - 1
76
+ heads: 8
77
+ hidden_size: 512
78
+ keep_checkpoint: 15
79
+ label_smoothing: 0.1
80
+ learning_rate: 0.0015
81
+ max_generator_batches: 2
82
+ max_grad_norm: 0
83
+ max_relative_positions: 20
84
+ model_dtype: fp16
85
+ normalization: tokens
86
+ num_workers: 6
87
+ optim: pagedadamw8bit
88
+ param_init: 0
89
+ param_init_glorot: true
90
+ pos_ffn_activation_fn: gated-gelu
91
+ position_encoding: false
92
+ queue_size: 10000
93
+ reset_optim: all
94
+ rnn_size: 512
95
+ save_checkpoint_steps: 100
96
+ save_data: run/ru_vep-1.0/opennmt
97
+ save_model: run/ru_vep-1.0/opennmt/openmt.model
98
+ share_decoder_embeddings: true
99
+ share_embeddings: true
100
+ share_vocab: true
101
+ skip_empty_level: silent
102
+ src_onmttok_kwargs:
103
+ lang: ru
104
+ mode: none
105
+ src_seq_length: 185
106
+ src_subword_alpha: 0.0
107
+ src_subword_model: run/ru_vep-1.0/sentencepiece.model
108
+ src_subword_nbest: 1
109
+ src_subword_type: sentencepiece
110
+ src_vocab: run/ru_vep-1.0/opennmt/openmt.vocab
111
+ src_vocab_size: 32000
112
+ tgt_onmttok_kwargs:
113
+ lang: vep
114
+ mode: none
115
+ tgt_seq_length: 185
116
+ tgt_subword_alpha: 0.0
117
+ tgt_subword_model: run/ru_vep-1.0/sentencepiece.model
118
+ tgt_subword_nbest: 1
119
+ tgt_subword_type: sentencepiece
120
+ tgt_vocab: run/ru_vep-1.0/opennmt/openmt.vocab
121
+ tgt_vocab_size: 32000
122
+ train_steps: 1500
123
+ transformer_ff: 6144
124
+ update_vocab: 'False'
125
+ valid_batch_size: 64
126
+ valid_metrics:
127
+ - BLEU
128
+ valid_steps: 100
129
+ warmup_steps: 200
130
+ word_vec_size: 512
131
+ world_size: 2
ctranslate2-model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
ctranslate2-model/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b83cbd1910ea8adf5cfb164d7e26413996f65af435f8240b5bb5b9ab9ed3abf
3
+ size 460469083
ctranslate2-model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"package_version": "1.0", "argos_version": "1.9.0", "from_code": "ru", "from_name": "Russian", "to_code": "vep", "to_name": "Veps"}
opennmt/openmt.model_averaged.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45e107a0a87c5f5794f38bb958686dffa1dcfd1f94decf4514b6d5f046e0fbb7
3
+ size 1960760662
opennmt/openmt.vocab ADDED
The diff for this file is too large to render. See raw diff
 
sentencepiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a602803c84fef522a000aa6eedfb440360cf2ffc9b5996cc57c8376be84eb6c6
3
+ size 883660
sentencepiece.vocab ADDED
The diff for this file is too large to render. See raw diff
 
stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
stanza/ru/tokenize/syntagrus.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a28c7770bd33220f61b5ab060c9aaebe62354771f557683875602d1c2a38fefc
3
+ size 638146
translate-ru_vep-1_0.argosmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20738ffc11bb4b3a97710b3758e7979f309c6a6f4b8527c6a590a9c996226ecd
3
+ size 463123318