# pip install transformers from transformers import pipeline import gradio as gr model = pipeline(model="openai/whisper-base") en_jp_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-jap") # "automatic-speech-recognition" # transcriber = pipeline(model="openai/whisper-base") # transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac") def transcribe_audio(mic=None, file=None): if mic is not None: audio = mic elif file is not None: audio = file else: return "You must either provide a mic recording or a file" transcription = model(audio)["text"] return transcription def translate_text(transcription): return en_jp_translator(transcription)[0]["translation_text"] def combined_function(b): transcribe_audio(inputs=audio_file, outputs=text) translate_text(inputs=text, outputs=translate) demo = gr.Blocks() with demo: audio_file = gr.Audio(type="filepath") text = gr.Textbox() translate = gr.Textbox() # b1 = gr.Button("Recognize Speech & Translate") b1 = gr.Button("Recognize Speech") b2 = gr.Button("Translate") # b1.click(combined_function) b1.click(transcribe_audio, inputs=audio_file, outputs=text) b2.click(translate_text, inputs=text, outputs= translate) demo.launch()