# import gradio as gr # gr.Interface.load("models/facebook/fastspeech2-en-ljspeech").launch() # import gradio as gr # gr.Interface.load("models/openai/whisper-large-v2").launch() import gradio as gr import torch.cuda import whisper from whisper.tokenizer import LANGUAGES gpu = torch.cuda.is_available() model = None # DESCRIPTION = """ #
# """ def transcribe(recording, file, language, task): if recording and file: text = "Please only use one field." elif not recording and not file: text = "Please use one field." else: language = None if language == "Detect" else language filepath = file if file else recording text = model.transcribe( filepath, task=task.lower(), language=language, fp16=gpu )["text"].strip() return text def interface(model_name="small"): global model model = whisper.load_model(model_name) return gr.Interface( fn=transcribe, inputs=[ gr.Audio(label="Record", source="microphone", type="filepath"), gr.Audio(label="Upload", source="upload", type="filepath"), gr.Dropdown( label="Language", choices=["Detect"] + sorted([i.title() for i in LANGUAGES.values()]), value="Detect", ), gr.Dropdown( label="Task", choices=["Transcribe", "Translate"], value="Transcribe", info="Whether to perform X->X speech recognition or X->English translation", ), ], outputs=gr.Textbox(label="Transcription", lines=26), #theme=gr.themes.Default(), theme = gr.themes.Glass(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.purple), title="Whisper is listening to you", #description=DESCRIPTION, allow_flagging="never", ) if __name__ == "__main__": demo = interface() demo.queue().launch(debug=True)