tiedeman commited on
Commit
8d651fb
1 Parent(s): 8fbfd8b

Initial commit

Browse files
README.md CHANGED
@@ -1,24 +1,96 @@
1
  ---
 
 
 
 
2
  tags:
3
  - translation
4
- ---
5
 
6
- ### opus-mt-es-he
 
 
7
 
8
- * source languages: es
9
- * target languages: he
10
- * OPUS readme: [es-he](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/models/es-he/README.md)
11
 
12
- * dataset: opus
13
- * model: transformer-align
14
- * pre-processing: normalization + SentencePiece
15
- * download original weights: [opus-2020-01-16.zip](https://object.pouta.csc.fi/OPUS-MT-models/es-he/opus-2020-01-16.zip)
16
- * test set translations: [opus-2020-01-16.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/es-he/opus-2020-01-16.test.txt)
17
- * test set scores: [opus-2020-01-16.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/es-he/opus-2020-01-16.eval.txt)
 
 
18
 
19
  ## Benchmarks
20
 
21
  | testset | BLEU | chr-F |
22
  |-----------------------|-------|-------|
23
- | Tatoeba.es.he | 44.3 | 0.642 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
 
1
  ---
2
+ language:
3
+ - es
4
+ - he
5
+
6
  tags:
7
  - translation
 
8
 
9
+ license: apache-2.0
10
+ ---
11
+ ### es-he
12
 
13
+ * source group: Spanish
14
+ * target group: Hebrew
15
+ * OPUS readme: [spa-heb](https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/spa-heb/README.md)
16
 
17
+ * model: transformer
18
+ * source language(s): spa
19
+ * target language(s): heb
20
+ * model: transformer
21
+ * pre-processing: normalization + SentencePiece (spm32k,spm32k)
22
+ * download original weights: [opus-2020-12-10.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/spa-heb/opus-2020-12-10.zip)
23
+ * test set translations: [opus-2020-12-10.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/spa-heb/opus-2020-12-10.test.txt)
24
+ * test set scores: [opus-2020-12-10.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/spa-heb/opus-2020-12-10.eval.txt)
25
 
26
  ## Benchmarks
27
 
28
  | testset | BLEU | chr-F |
29
  |-----------------------|-------|-------|
30
+ | Tatoeba-test.spa.heb | 43.6 | 0.636 |
31
+
32
+
33
+ ### System Info:
34
+ - hf_name: es-he
35
+
36
+ - source_languages: spa
37
+
38
+ - target_languages: heb
39
+
40
+ - opus_readme_url: https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/spa-heb/README.md
41
+
42
+ - original_repo: Tatoeba-Challenge
43
+
44
+ - tags: ['translation']
45
+
46
+ - languages: ['es', 'he']
47
+
48
+ - src_constituents: ('Spanish', {'spa'})
49
+
50
+ - tgt_constituents: ('Hebrew', {'heb'})
51
+
52
+ - src_multilingual: False
53
+
54
+ - tgt_multilingual: False
55
+
56
+ - long_pair: spa-heb
57
+
58
+ - prepro: normalization + SentencePiece (spm32k,spm32k)
59
+
60
+ - url_model: https://object.pouta.csc.fi/Tatoeba-MT-models/spa-heb/opus-2020-12-10.zip
61
+
62
+ - url_test_set: https://object.pouta.csc.fi/Tatoeba-MT-models/spa-heb/opus-2020-12-10.test.txt
63
+
64
+ - src_alpha3: spa
65
+
66
+ - tgt_alpha3: heb
67
+
68
+ - chrF2_score: 0.636
69
+
70
+ - bleu: 43.6
71
+
72
+ - brevity_penalty: 0.992
73
+
74
+ - ref_len: 12112.0
75
+
76
+ - src_name: Spanish
77
+
78
+ - tgt_name: Hebrew
79
+
80
+ - train_date: 2020-12-10 00:00:00
81
+
82
+ - src_alpha2: es
83
+
84
+ - tgt_alpha2: he
85
+
86
+ - prefer_old: False
87
+
88
+ - short_pair: es-he
89
+
90
+ - helsinki_git_sha: b317f78a3ec8a556a481b6a53dc70dc11769ca96
91
+
92
+ - transformers_git_sha: 1310e1a758edc8e89ec363db76863c771fbeb1de
93
+
94
+ - port_machine: LM0-400-22516.local
95
 
96
+ - port_time: 2020-12-11-11:41
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_num_labels": 3,
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
@@ -10,23 +9,26 @@
10
  "attention_dropout": 0.0,
11
  "bad_words_ids": [
12
  [
13
- 64683
14
  ]
15
  ],
16
  "bos_token_id": 0,
17
- "classif_dropout": 0.0,
18
  "d_model": 512,
19
  "decoder_attention_heads": 8,
20
  "decoder_ffn_dim": 2048,
21
  "decoder_layerdrop": 0.0,
22
  "decoder_layers": 6,
23
- "decoder_start_token_id": 64683,
 
24
  "dropout": 0.1,
25
  "encoder_attention_heads": 8,
26
  "encoder_ffn_dim": 2048,
27
  "encoder_layerdrop": 0.0,
28
  "encoder_layers": 6,
29
  "eos_token_id": 0,
 
 
30
  "id2label": {
31
  "0": "LABEL_0",
32
  "1": "LABEL_1",
@@ -46,8 +48,9 @@
46
  "normalize_embedding": false,
47
  "num_beams": 6,
48
  "num_hidden_layers": 6,
49
- "pad_token_id": 64683,
50
  "scale_embedding": true,
51
  "static_position_embeddings": true,
52
- "vocab_size": 64684
 
53
  }
1
  {
 
2
  "activation_dropout": 0.0,
3
  "activation_function": "swish",
4
  "add_bias_logits": false,
9
  "attention_dropout": 0.0,
10
  "bad_words_ids": [
11
  [
12
+ 63104
13
  ]
14
  ],
15
  "bos_token_id": 0,
16
+ "classifier_dropout": 0.0,
17
  "d_model": 512,
18
  "decoder_attention_heads": 8,
19
  "decoder_ffn_dim": 2048,
20
  "decoder_layerdrop": 0.0,
21
  "decoder_layers": 6,
22
+ "decoder_start_token_id": 63104,
23
+ "do_blenderbot_90_layernorm": false,
24
  "dropout": 0.1,
25
  "encoder_attention_heads": 8,
26
  "encoder_ffn_dim": 2048,
27
  "encoder_layerdrop": 0.0,
28
  "encoder_layers": 6,
29
  "eos_token_id": 0,
30
+ "extra_pos_embeddings": 2,
31
+ "force_bos_token_to_be_generated": false,
32
  "id2label": {
33
  "0": "LABEL_0",
34
  "1": "LABEL_1",
48
  "normalize_embedding": false,
49
  "num_beams": 6,
50
  "num_hidden_layers": 6,
51
+ "pad_token_id": 63104,
52
  "scale_embedding": true,
53
  "static_position_embeddings": true,
54
+ "use_cache": true,
55
+ "vocab_size": 63105
56
  }
metadata.json ADDED
@@ -0,0 +1 @@
 
1
+ {"hf_name":"es-he","source_languages":"spa","target_languages":"heb","opus_readme_url":"https:\/\/github.com\/Helsinki-NLP\/Tatoeba-Challenge\/tree\/master\/models\/spa-heb\/README.md","original_repo":"Tatoeba-Challenge","tags":["translation"],"languages":["es","he"],"src_constituents":["Spanish",["spa"]],"tgt_constituents":["Hebrew",["heb"]],"src_multilingual":false,"tgt_multilingual":false,"long_pair":"spa-heb","prepro":" normalization + SentencePiece (spm32k,spm32k)","url_model":"https:\/\/object.pouta.csc.fi\/Tatoeba-MT-models\/spa-heb\/opus-2020-12-10.zip","url_test_set":"https:\/\/object.pouta.csc.fi\/Tatoeba-MT-models\/spa-heb\/opus-2020-12-10.test.txt","src_alpha3":"spa","tgt_alpha3":"heb","chrF2_score":0.636,"bleu":43.6,"brevity_penalty":0.992,"ref_len":12112.0,"src_name":"Spanish","tgt_name":"Hebrew","train_date":1607558400000,"src_alpha2":"es","tgt_alpha2":"he","prefer_old":false,"short_pair":"es-he","helsinki_git_sha":"b317f78a3ec8a556a481b6a53dc70dc11769ca96","transformers_git_sha":"1310e1a758edc8e89ec363db76863c771fbeb1de","port_machine":"LM0-400-22516.local","port_time":"2020-12-11-11:41"}
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acb2be9ef637ae5abe44d66d8d1fb128ee382d9f816447cf3274885d55f54e3c
3
- size 311436525
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b1d807ac7b402df12d461d58128b16e8c68ccd16c17a028663e501fed9b0dd4
3
+ size 153110363
source.spm CHANGED
Binary files a/source.spm and b/source.spm differ
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm CHANGED
Binary files a/target.spm and b/target.spm differ
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"target_lang": "he", "source_lang": "es"}
1
+ {"source_lang": "spa", "target_lang": "heb", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "marian_ckpt/spa-heb"}
vocab.json CHANGED
The diff for this file is too large to render. See raw diff