sappho192's picture
Set max_length=500 when calling generate()
137b040
import gradio as gr
import transformers
encoder_model_name = "cl-tohoku/bert-base-japanese-v2"
decoder_model_name = "skt/kogpt2-base-v2"
src_tokenizer = transformers.BertJapaneseTokenizer.from_pretrained(encoder_model_name)
trg_tokenizer = transformers.PreTrainedTokenizerFast.from_pretrained(decoder_model_name)
model = transformers.EncoderDecoderModel.from_pretrained("sappho192/aihub-ja-ko-translator")
def translate(text_src):
embeddings = src_tokenizer(text_src, return_attention_mask=False, return_token_type_ids=False, return_tensors='pt')
embeddings = {k: v for k, v in embeddings.items()}
# using default generation method: GreedySearch, No LogitsProcessor
output = model.generate(**embeddings, max_length=500)[0, 1:-1]
text_trg = trg_tokenizer.decode(output.cpu())
return text_trg
def endpoint(sentence):
return translate(sentence)
# demo = gr.Interface(fn=endpoint, inputs="text", outputs="text")
with gr.Blocks() as demo:
input = gr.Textbox(label="Sentence")
output = gr.Textbox(label="Result")
btn = gr.Button(value="Submit")
btn.click(endpoint, inputs=[input], outputs=[output])
gr.Markdown("## Examples")
gr.Markdown(
"""
Translated result can be wrong or containg misleading content.
๋ฒˆ์—ญ๋œ ๊ฒฐ๊ณผ๋Š” ์ •ํ™•ํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์œผ๋ฉฐ, ์ ์ ˆ์น˜ ์•Š์€ ํ‘œํ˜„์„ ํฌํ•จํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
""")
gr.Examples(
[["่ฉฆ้จ“ๅ‰ใซ็ทŠๅผตใ—ใŸใ‚ใพใ‚Šใ€็†ฑใŒใงใฆใ—ใพใฃใŸใ€‚"],
["ๅฑฑ็”ฐใฏ่‹ฑ่ชžใซใ‹ใ‘ใฆใฏใ‚ฏใƒฉใ‚นใฎ่ชฐใซใ‚‚่ฒ ใ‘ใชใ„ใ€‚"],
["ใ“ใฎๆœฌใซใ‚ˆใ‚Œใฐใ€ๆœ€ๅˆใฎไบบๅทฅๆฉ‹ๆขใฏๆ–ฐ็Ÿณๅ™จๆ™‚ไปฃใซใ•ใ‹ใฎใผใ‚‹ใจใ„ใ†ใ€‚"]],
[input],
output,
endpoint,
cache_examples=False
)
if __name__ == "__main__":
demo.launch(share=True)