staka commited on
Commit
536b0cc
1 Parent(s): 8f3fc48

Update the model and add an eval section

Browse files
Files changed (6) hide show
  1. README.md +17 -0
  2. config.json +2 -2
  3. pytorch_model.bin +2 -2
  4. source.spm +2 -2
  5. target.spm +2 -2
  6. vocab.json +0 -0
README.md CHANGED
@@ -41,3 +41,20 @@ from transformers import pipeline
41
  tako_translator = pipeline('translation', model='staka/takomt')
42
  tako_translator('This is a cat.')
43
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  tako_translator = pipeline('translation', model='staka/takomt')
42
  tako_translator('This is a cat.')
43
  ```
44
+
45
+ ### Eval results
46
+
47
+ The results of the evaluation using [tatoeba](https://tatoeba.org/ja)(randomly selected 500 sentences) are as follows::
48
+
49
+ |source |target |BLEU(*1)|
50
+ |-------|-------|--------|
51
+ |de |ja |27.8 |
52
+ |en |ja |28.4 |
53
+ |es |ja |32.0 |
54
+ |fr |ja |27.9 |
55
+ |it |ja |24.3 |
56
+ |ru |ja |27.3 |
57
+ |uk |ja |29.8 |
58
+
59
+
60
+ (*1) sacrebleu --tokenize ja-mecab
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "encoder_attention_heads": 8,
23
  "encoder_ffn_dim": 2048,
24
  "encoder_layerdrop": 0.0,
25
- "encoder_layers": 8,
26
  "eos_token_id": 0,
27
  "forced_eos_token_id": 0,
28
  "gradient_checkpointing": false,
@@ -33,7 +33,7 @@
33
  "model_type": "marian",
34
  "normalize_embedding": false,
35
  "num_beams": 12,
36
- "num_hidden_layers": 8,
37
  "pad_token_id": 96000,
38
  "scale_embedding": true,
39
  "static_position_embeddings": true,
 
22
  "encoder_attention_heads": 8,
23
  "encoder_ffn_dim": 2048,
24
  "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 6,
26
  "eos_token_id": 0,
27
  "forced_eos_token_id": 0,
28
  "gradient_checkpointing": false,
 
33
  "model_type": "marian",
34
  "normalize_embedding": false,
35
  "num_beams": 12,
36
+ "num_hidden_layers": 6,
37
  "pad_token_id": 96000,
38
  "scale_embedding": true,
39
  "static_position_embeddings": true,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2646f164e2038a98f5a160b10afd54ed7728a6c4f0a165e6c4f12dc85cf2a82
3
- size 297779907
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62833e6e935aee7a15a0a5b7cd3c3a4f136d858b8d5b598993062b72f7f9fd2d
3
+ size 285160131
source.spm CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0e96f456c06bf0e8d8833824c3e931af1973ad83f7954ca6fef3dbbd3672c50
3
- size 2085213
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7269a36b6dc795a71c9f52e747a55623caa5a729c06a939ed42544e76546af50
3
+ size 2100964
target.spm CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0e96f456c06bf0e8d8833824c3e931af1973ad83f7954ca6fef3dbbd3672c50
3
- size 2085213
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7269a36b6dc795a71c9f52e747a55623caa5a729c06a939ed42544e76546af50
3
+ size 2100964
vocab.json CHANGED
The diff for this file is too large to render. See raw diff