Update the model and add an eval section

Files changed (6) hide show

README.md CHANGED Viewed

@@ -41,3 +41,20 @@ from transformers import pipeline
 tako_translator = pipeline('translation', model='staka/takomt')
 tako_translator('This is a cat.')
 ```

 tako_translator = pipeline('translation', model='staka/takomt')
 tako_translator('This is a cat.')
 ```
+### Eval results
+The results of the evaluation using [tatoeba](https://tatoeba.org/ja)(randomly selected 500 sentences) are as follows::
+|source |target |BLEU(*1)|
+|-------|-------|--------|
+|de     |ja     |27.8    |
+|en     |ja     |28.4    |
+|es     |ja     |32.0    |
+|fr     |ja     |27.9    |
+|it     |ja     |24.3    |
+|ru     |ja     |27.3    |
+|uk     |ja     |29.8    |
+(*1) sacrebleu --tokenize ja-mecab

config.json CHANGED Viewed

@@ -22,7 +22,7 @@
   "encoder_attention_heads": 8,
   "encoder_ffn_dim": 2048,
   "encoder_layerdrop": 0.0,
-  "encoder_layers": 8,
   "eos_token_id": 0,
   "forced_eos_token_id": 0,
   "gradient_checkpointing": false,
@@ -33,7 +33,7 @@
   "model_type": "marian",
   "normalize_embedding": false,
   "num_beams": 12,
-  "num_hidden_layers": 8,
   "pad_token_id": 96000,
   "scale_embedding": true,
   "static_position_embeddings": true,

   "encoder_attention_heads": 8,
   "encoder_ffn_dim": 2048,
   "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
   "eos_token_id": 0,
   "forced_eos_token_id": 0,
   "gradient_checkpointing": false,
   "model_type": "marian",
   "normalize_embedding": false,
   "num_beams": 12,
+  "num_hidden_layers": 6,
   "pad_token_id": 96000,
   "scale_embedding": true,
   "static_position_embeddings": true,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2646f164e2038a98f5a160b10afd54ed7728a6c4f0a165e6c4f12dc85cf2a82
-size 297779907

 version https://git-lfs.github.com/spec/v1
+oid sha256:62833e6e935aee7a15a0a5b7cd3c3a4f136d858b8d5b598993062b72f7f9fd2d
+size 285160131

source.spm CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0e96f456c06bf0e8d8833824c3e931af1973ad83f7954ca6fef3dbbd3672c50
-size 2085213

 version https://git-lfs.github.com/spec/v1
+oid sha256:7269a36b6dc795a71c9f52e747a55623caa5a729c06a939ed42544e76546af50
+size 2100964

target.spm CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0e96f456c06bf0e8d8833824c3e931af1973ad83f7954ca6fef3dbbd3672c50
-size 2085213

 version https://git-lfs.github.com/spec/v1
+oid sha256:7269a36b6dc795a71c9f52e747a55623caa5a729c06a939ed42544e76546af50
+size 2100964

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff