|
--- |
|
pipeline_tag: translation |
|
language: |
|
- ja |
|
- ko |
|
tags: |
|
- python |
|
- transformer |
|
- pytorch |
|
--- |
|
https://github.com/akpe12/JP-KR-ocr-translator-for-travel |
|
|
|
- Usage |
|
``` |
|
from transformers import( |
|
EncoderDecoderModel, |
|
PreTrainedTokenizerFast, |
|
# XLMRobertaTokenizerFast, |
|
BertTokenizerFast, |
|
) |
|
|
|
encoder_model_name = "cl-tohoku/bert-base-japanese-v2" |
|
decoder_model_name = "skt/kogpt2-base-v2" |
|
|
|
src_tokenizer = BertTokenizerFast.from_pretrained(encoder_model_name) |
|
trg_tokenizer = PreTrainedTokenizerFast.from_pretrained(decoder_model_name) |
|
model = EncoderDecoderModel.from_pretrained("figuringoutmine/translator-for-travel-jp-to-kr") |
|
``` |
|
|
|
``` |
|
text = "豚骨ラーメン" |
|
embeddings = src_tokenizer(text, return_attention_mask=False, return_token_type_ids=False, return_tensors='pt') |
|
embeddings = {k: v for k, v in embeddings.items()} |
|
output = model.generate(**embeddings)[0, 1:-1] |
|
|
|
trg_tokenizer.decode(output.cpu()) |
|
``` |
|
|