ASR / app.py
drclab's picture
Orange and Purple
72a34c4
raw
history blame contribute delete
No virus
2.75 kB
# import gradio as gr
# gr.Interface.load("models/facebook/fastspeech2-en-ljspeech").launch()
# import gradio as gr
# gr.Interface.load("models/openai/whisper-large-v2").launch()
import gradio as gr
import torch.cuda
import whisper
from whisper.tokenizer import LANGUAGES
gpu = torch.cuda.is_available()
model = None
# DESCRIPTION = """
# <div style="display:flex; gap: 1em; justify-content: center; align-items: center;">
# <a target="_blank" href="https://github.com/dsymbol">
# <img alt="GitHub" src="https://img.shields.io/github/followers/dsymbol?style=social">
# </a>
# <a target="_blank" href="https://colab.research.google.com/#fileId=https://huggingface.co/spaces/dsymbol/whisper-webui/blob/main/notebook.ipynb">
# <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
# </a>
# <a target="_blank" href="https://huggingface.co/spaces/dsymbol/whisper-webui" rel="noopener noreferrer"><img
# src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces">
# </a>
# </div>
# """
def transcribe(recording, file, language, task):
if recording and file:
text = "Please only use one field."
elif not recording and not file:
text = "Please use one field."
else:
language = None if language == "Detect" else language
filepath = file if file else recording
text = model.transcribe(
filepath, task=task.lower(), language=language, fp16=gpu
)["text"].strip()
return text
def interface(model_name="small"):
global model
model = whisper.load_model(model_name)
return gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(label="Record", source="microphone", type="filepath"),
gr.Audio(label="Upload", source="upload", type="filepath"),
gr.Dropdown(
label="Language",
choices=["Detect"] + sorted([i.title() for i in LANGUAGES.values()]),
value="Detect",
),
gr.Dropdown(
label="Task",
choices=["Transcribe", "Translate"],
value="Transcribe",
info="Whether to perform X->X speech recognition or X->English translation",
),
],
outputs=gr.Textbox(label="Transcription", lines=26),
#theme=gr.themes.Default(),
theme = gr.themes.Glass(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.purple),
title="Whisper is listening to you",
#description=DESCRIPTION,
allow_flagging="never",
)
if __name__ == "__main__":
demo = interface()
demo.queue().launch(debug=True)