File size: 903 Bytes
dbd0cd3
f7c5310
59c9f9b
f7c5310
 
 
 
 
 
45fdae9
f7c5310
 
 
6c82ba0
 
f7c5310
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("TwentyNine/byt5-ain-kana-latin-converter-v1")
model = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-ain-kana-latin-converter-v1")

def transcribe(input_str):
    input_enc  = tokenizer.encode(input_str, return_tensors='pt')
    output_enc = model.generate(input_ids, max_length=256)
    return tokenizer.decode(output_enc[0], skip_special_tokens=True)

gradio_app = gr.Interface(
    transcribe,
    inputs=gr.Textbox(label='Input (kana)', value='', placeholder='', info='Ainu text written in Japanese katakana (input).', interactive=True, autofocus=True),
    outputs=gr.Textbox(label='Output (alphabet)', info='Ainu text written in the Latin alphabet (output).'),
    title="BYT5 Ainu Kana-Latin Converter (V1)",
)

if __name__ == "__main__":
    gradio_app.launch()