import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-cos_w_restarts") model1 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-cos_w_restarts") model2 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-polynomial") model3 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-linear") def transcribe(input_str, model_index): output_str = '' model = None match model_index: case 1: model = model1 case 2: model = model2 case 3: model = model3 case _: model = model1 for input in input_str.split('\n'): input_enc = tokenizer.encode(input.strip(), return_tensors='pt') output_enc = model.generate(input_enc, max_length=256) if len(output_str) > 0: output_str = output_str + '\n' output_str = output_str + tokenizer.decode(output_enc[0], skip_special_tokens=True) return output_str gradio_app = gr.Interface( transcribe, inputs=[gr.Textbox(label='Input (kana)', value='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', placeholder='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', info='Ainu text written in Japanese katakana (input).', interactive=True, autofocus=True), gr.Radio(label="Training scheduler type", choices=[("Cosine with Restarts", 1), ("Polynomial", 2), ("Linear", 3)])], outputs=gr.Textbox(label='Output (alphabet)', info='Ainu text written in the Latin alphabet (output).'), title='KIT/TIP ByT5 Ainu Kana-Latin Converter', article='

Example sentence borrowed from New Express Ainu-go by Professor NAKAGAWA Hiroshi of Chiba University.

' ) if __name__ == '__main__': gradio_app.launch()