|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
import torch.cuda |
|
import whisper |
|
from whisper.tokenizer import LANGUAGES |
|
|
|
gpu = torch.cuda.is_available() |
|
model = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def transcribe(recording, file, language, task): |
|
if recording and file: |
|
text = "Please only use one field." |
|
elif not recording and not file: |
|
text = "Please use one field." |
|
else: |
|
language = None if language == "Detect" else language |
|
filepath = file if file else recording |
|
text = model.transcribe( |
|
filepath, task=task.lower(), language=language, fp16=gpu |
|
)["text"].strip() |
|
return text |
|
|
|
|
|
def interface(model_name="small"): |
|
global model |
|
model = whisper.load_model(model_name) |
|
|
|
return gr.Interface( |
|
fn=transcribe, |
|
inputs=[ |
|
gr.Audio(label="Record", source="microphone", type="filepath"), |
|
gr.Audio(label="Upload", source="upload", type="filepath"), |
|
gr.Dropdown( |
|
label="Language", |
|
choices=["Detect"] + sorted([i.title() for i in LANGUAGES.values()]), |
|
value="Detect", |
|
), |
|
gr.Dropdown( |
|
label="Task", |
|
choices=["Transcribe", "Translate"], |
|
value="Transcribe", |
|
info="Whether to perform X->X speech recognition or X->English translation", |
|
), |
|
], |
|
outputs=gr.Textbox(label="Transcription", lines=26), |
|
|
|
theme = gr.themes.Glass(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.purple), |
|
title="Whisper is listening to you", |
|
|
|
allow_flagging="never", |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = interface() |
|
demo.queue().launch(debug=True) |