tiedeman commited on
Commit
39dc4cc
1 Parent(s): 5f9f9ab

Initial commit

Browse files
README.md CHANGED
@@ -1,24 +1,96 @@
1
  ---
 
 
 
 
2
  tags:
3
  - translation
4
- ---
5
 
6
- ### opus-mt-fr-he
 
 
7
 
8
- * source languages: fr
9
- * target languages: he
10
- * OPUS readme: [fr-he](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/models/fr-he/README.md)
11
 
12
- * dataset: opus
13
- * model: transformer-align
14
- * pre-processing: normalization + SentencePiece
15
- * download original weights: [opus-2020-01-09.zip](https://object.pouta.csc.fi/OPUS-MT-models/fr-he/opus-2020-01-09.zip)
16
- * test set translations: [opus-2020-01-09.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/fr-he/opus-2020-01-09.test.txt)
17
- * test set scores: [opus-2020-01-09.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/fr-he/opus-2020-01-09.eval.txt)
 
 
18
 
19
  ## Benchmarks
20
 
21
  | testset | BLEU | chr-F |
22
  |-----------------------|-------|-------|
23
- | Tatoeba.fr.he | 39.3 | 0.601 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
 
1
  ---
2
+ language:
3
+ - fr
4
+ - he
5
+
6
  tags:
7
  - translation
 
8
 
9
+ license: apache-2.0
10
+ ---
11
+ ### fr-he
12
 
13
+ * source group: French
14
+ * target group: Hebrew
15
+ * OPUS readme: [fra-heb](https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/fra-heb/README.md)
16
 
17
+ * model: transformer
18
+ * source language(s): fra
19
+ * target language(s): heb
20
+ * model: transformer
21
+ * pre-processing: normalization + SentencePiece (spm32k,spm32k)
22
+ * download original weights: [opus-2020-12-10.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/fra-heb/opus-2020-12-10.zip)
23
+ * test set translations: [opus-2020-12-10.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/fra-heb/opus-2020-12-10.test.txt)
24
+ * test set scores: [opus-2020-12-10.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/fra-heb/opus-2020-12-10.eval.txt)
25
 
26
  ## Benchmarks
27
 
28
  | testset | BLEU | chr-F |
29
  |-----------------------|-------|-------|
30
+ | Tatoeba-test.fra.heb | 39.2 | 0.598 |
31
+
32
+
33
+ ### System Info:
34
+ - hf_name: fr-he
35
+
36
+ - source_languages: fra
37
+
38
+ - target_languages: heb
39
+
40
+ - opus_readme_url: https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/fra-heb/README.md
41
+
42
+ - original_repo: Tatoeba-Challenge
43
+
44
+ - tags: ['translation']
45
+
46
+ - languages: ['fr', 'he']
47
+
48
+ - src_constituents: ('French', {'fra'})
49
+
50
+ - tgt_constituents: ('Hebrew', {'heb'})
51
+
52
+ - src_multilingual: False
53
+
54
+ - tgt_multilingual: False
55
+
56
+ - long_pair: fra-heb
57
+
58
+ - prepro: normalization + SentencePiece (spm32k,spm32k)
59
+
60
+ - url_model: https://object.pouta.csc.fi/Tatoeba-MT-models/fra-heb/opus-2020-12-10.zip
61
+
62
+ - url_test_set: https://object.pouta.csc.fi/Tatoeba-MT-models/fra-heb/opus-2020-12-10.test.txt
63
+
64
+ - src_alpha3: fra
65
+
66
+ - tgt_alpha3: heb
67
+
68
+ - chrF2_score: 0.598
69
+
70
+ - bleu: 39.2
71
+
72
+ - brevity_penalty: 1.0
73
+
74
+ - ref_len: 20655.0
75
+
76
+ - src_name: French
77
+
78
+ - tgt_name: Hebrew
79
+
80
+ - train_date: 2020-12-10 00:00:00
81
+
82
+ - src_alpha2: fr
83
+
84
+ - tgt_alpha2: he
85
+
86
+ - prefer_old: False
87
+
88
+ - short_pair: fr-he
89
+
90
+ - helsinki_git_sha: b317f78a3ec8a556a481b6a53dc70dc11769ca96
91
+
92
+ - transformers_git_sha: 1310e1a758edc8e89ec363db76863c771fbeb1de
93
+
94
+ - port_machine: LM0-400-22516.local
95
 
96
+ - port_time: 2020-12-11-16:02
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_num_labels": 3,
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
@@ -10,23 +9,26 @@
10
  "attention_dropout": 0.0,
11
  "bad_words_ids": [
12
  [
13
- 63186
14
  ]
15
  ],
16
  "bos_token_id": 0,
17
- "classif_dropout": 0.0,
18
  "d_model": 512,
19
  "decoder_attention_heads": 8,
20
  "decoder_ffn_dim": 2048,
21
  "decoder_layerdrop": 0.0,
22
  "decoder_layers": 6,
23
- "decoder_start_token_id": 63186,
 
24
  "dropout": 0.1,
25
  "encoder_attention_heads": 8,
26
  "encoder_ffn_dim": 2048,
27
  "encoder_layerdrop": 0.0,
28
  "encoder_layers": 6,
29
  "eos_token_id": 0,
 
 
30
  "id2label": {
31
  "0": "LABEL_0",
32
  "1": "LABEL_1",
@@ -46,8 +48,9 @@
46
  "normalize_embedding": false,
47
  "num_beams": 6,
48
  "num_hidden_layers": 6,
49
- "pad_token_id": 63186,
50
  "scale_embedding": true,
51
  "static_position_embeddings": true,
52
- "vocab_size": 63187
 
53
  }
1
  {
 
2
  "activation_dropout": 0.0,
3
  "activation_function": "swish",
4
  "add_bias_logits": false,
9
  "attention_dropout": 0.0,
10
  "bad_words_ids": [
11
  [
12
+ 63139
13
  ]
14
  ],
15
  "bos_token_id": 0,
16
+ "classifier_dropout": 0.0,
17
  "d_model": 512,
18
  "decoder_attention_heads": 8,
19
  "decoder_ffn_dim": 2048,
20
  "decoder_layerdrop": 0.0,
21
  "decoder_layers": 6,
22
+ "decoder_start_token_id": 63139,
23
+ "do_blenderbot_90_layernorm": false,
24
  "dropout": 0.1,
25
  "encoder_attention_heads": 8,
26
  "encoder_ffn_dim": 2048,
27
  "encoder_layerdrop": 0.0,
28
  "encoder_layers": 6,
29
  "eos_token_id": 0,
30
+ "extra_pos_embeddings": 2,
31
+ "force_bos_token_to_be_generated": false,
32
  "id2label": {
33
  "0": "LABEL_0",
34
  "1": "LABEL_1",
48
  "normalize_embedding": false,
49
  "num_beams": 6,
50
  "num_hidden_layers": 6,
51
+ "pad_token_id": 63139,
52
  "scale_embedding": true,
53
  "static_position_embeddings": true,
54
+ "use_cache": true,
55
+ "vocab_size": 63140
56
  }
metadata.json ADDED
@@ -0,0 +1 @@
 
1
+ {"hf_name":"fr-he","source_languages":"fra","target_languages":"heb","opus_readme_url":"https:\/\/github.com\/Helsinki-NLP\/Tatoeba-Challenge\/tree\/master\/models\/fra-heb\/README.md","original_repo":"Tatoeba-Challenge","tags":["translation"],"languages":["fr","he"],"src_constituents":["French",["fra"]],"tgt_constituents":["Hebrew",["heb"]],"src_multilingual":false,"tgt_multilingual":false,"long_pair":"fra-heb","prepro":" normalization + SentencePiece (spm32k,spm32k)","url_model":"https:\/\/object.pouta.csc.fi\/Tatoeba-MT-models\/fra-heb\/opus-2020-12-10.zip","url_test_set":"https:\/\/object.pouta.csc.fi\/Tatoeba-MT-models\/fra-heb\/opus-2020-12-10.test.txt","src_alpha3":"fra","tgt_alpha3":"heb","chrF2_score":0.598,"bleu":39.2,"brevity_penalty":1.0,"ref_len":20655.0,"src_name":"French","tgt_name":"Hebrew","train_date":1607558400000,"src_alpha2":"fr","tgt_alpha2":"he","prefer_old":false,"short_pair":"fr-he","helsinki_git_sha":"b317f78a3ec8a556a481b6a53dc70dc11769ca96","transformers_git_sha":"1310e1a758edc8e89ec363db76863c771fbeb1de","port_machine":"LM0-400-22516.local","port_time":"2020-12-11-16:02"}
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:383fe741dd875022f6b3ff61922c6207440628a1c607c08f8b78a3dcd6558608
3
- size 308364681
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e83b86c1a998e84487de2a9d26db337ec99c564324ca2a178d4a59bb1a63557
3
+ size 153146267
source.spm CHANGED
Binary files a/source.spm and b/source.spm differ
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm CHANGED
Binary files a/target.spm and b/target.spm differ
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"target_lang": "he", "source_lang": "fr"}
1
+ {"source_lang": "fra", "target_lang": "heb", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "marian_ckpt/fra-heb"}
vocab.json CHANGED
The diff for this file is too large to render. See raw diff