TwentyNine
commited on
Commit
β’
6e13b58
1
Parent(s):
b5efccc
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,30 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
|
4 |
+
tokenizer = AutoTokenizer.from_pretrained("TwentyNine/byt5-ain-kana-latin-converter-v2")
|
5 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-ain-kana-latin-converter-v2")
|
6 |
+
|
7 |
+
def transcribe(input_str):
|
8 |
+
output_str = ''
|
9 |
+
|
10 |
+
for input in input_str.split('\n'):
|
11 |
+
input_enc = tokenizer.encode(input.strip(), return_tensors='pt')
|
12 |
+
output_enc = model.generate(input_enc, max_length=256)
|
13 |
+
|
14 |
+
if len(output_str) > 0:
|
15 |
+
output_str = output_str + '\n'
|
16 |
+
|
17 |
+
output_str = output_str + tokenizer.decode(output_enc[0], skip_special_tokens=True)
|
18 |
+
|
19 |
+
return output_str
|
20 |
+
|
21 |
+
gradio_app = gr.Interface(
|
22 |
+
transcribe,
|
23 |
+
inputs=gr.Textbox(label='Input (kana)', value='γγ₯γ€γγγγγ―γγ¨γ¨γ°γγ―γγγͺγΌγ€γͺγ€γγγΌγ«γγγγ«γ»γγ€γ³οΌ', placeholder='γγ₯γ€γγγγγ―γγ¨γ¨γ°γγ―γγγͺγΌγ€γͺγ€γγγΌγ«γγγγ«γ»γγ€γ³οΌ', info='Ainu text written in Japanese katakana (input).', interactive=True, autofocus=True),
|
24 |
+
outputs=gr.Textbox(label='Output (alphabet)', info='Ainu text written in the Latin alphabet (output).'),
|
25 |
+
title='BYT5 Ainu Kana-Latin Converter (V1)',
|
26 |
+
article='<p>Example sentence borrowed from <a href="https://www.hakusuisha.co.jp/book/b584600.html">New Express Ainu-go</a> by <a href="https://researchmap.jp/read0064265/?lang=english">Professor NAKAGAWA Hiroshi</a> of Chiba University.</p>'
|
27 |
+
)
|
28 |
+
|
29 |
+
if __name__ == '__main__':
|
30 |
+
gradio_app.launch()
|