import gradio as gr from deep_translator import GoogleTranslator #import deepl from transformers import pipeline import os from gtts import gTTS pipe = pipeline(model="freeja/lab2-whisper-sv") def transcribe_audio(audio,language): transcribed = pipe(audio,"text") result = "Transcribed text\n" result += transcribed + "\n" result += "Translated text\n" trans_text = translate_audio(transcribed,language) result += trans_text text_to_speech(trans_text,language) result += text_to_speech return result def translate_audio(text,language): #translate = deepl.Translator language_dict = {"English":"en","Spanish":"es","German":"de","French":"fr","Italian":"it"} lang = language_dict[language] translated_text = GoogleTranslator(source='sv', target=lang).translate(text) return translated_text def text_to_speech(text,language): language_dict = {"English":"en","Spanish":"es","German":"de","French":"fr","Italian":"it"} lang = language_dict[language] gTTS(text,lang,slow=False) face = gr.Interface( fn=transcribe_audio, inputs=[ gr.Audio(source="microphone", type="filepath", label="Transcribe from Microphone"), gr.Dropdown(["English","Spanish","Dutch","French","Italian"], value="English", label="Translate to ") ], outputs="text", title="Whisper Small Swedish", description="Realtime demo for Swedish speech recognition with translation using a fine-tuned Whisper small model" )