import gradio as gr from googletrans import Translator from transformers import pipeline pipe = pipeline(model="Nikolajvestergaard/Japanese_Fine_Tuned_Whisper_Model") def translate_and_transcribe(audio): translator = Translator() # Transcribe Japanese audio to text transcription = pipe(audio)["text"] # Translate the transcription to English result = translator.translate(transcription, dest='en') translation = result.text # Get the pronunciation of the transcription in Japanese pronunciation = translator.translate(transcription, dest='ja').pronunciation return transcription, pronunciation, translation input_audio = gr.inputs.Audio(label="Upload your Japanese speech here. Try to say 'Kon'nichiwa', 'Arigatō' or perhaps 'Sayōnara'", source="microphone", type="filepath") output_textbox1 = gr.outputs.Textbox(label="Transcription") output_textbox2 = gr.outputs.Textbox(label="Pronunciation") output_textbox3 = gr.outputs.Textbox(label="Translation") iface = gr.Interface( fn=translate_and_transcribe, inputs=input_audio, outputs=[output_textbox1, output_textbox2, output_textbox3], title="Japanese Automatic Speech Recognition, Pronunciation and Translation", description="Record Japanese speech to get its pronunciation and translate it to English. All done by using a fine-tuned version of the tiny Whisper model which is connected to a Google Translate API" ) iface.launch()