File size: 2,751 Bytes
e3dc46b faf1b1b e3dc46b 7050f49 e3dc46b 72a34c4 7050f49 e3dc46b faf1b1b e3dc46b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# import gradio as gr
# gr.Interface.load("models/facebook/fastspeech2-en-ljspeech").launch()
# import gradio as gr
# gr.Interface.load("models/openai/whisper-large-v2").launch()
import gradio as gr
import torch.cuda
import whisper
from whisper.tokenizer import LANGUAGES
gpu = torch.cuda.is_available()
model = None
# DESCRIPTION = """
# <div style="display:flex; gap: 1em; justify-content: center; align-items: center;">
# <a target="_blank" href="https://github.com/dsymbol">
# <img alt="GitHub" src="https://img.shields.io/github/followers/dsymbol?style=social">
# </a>
# <a target="_blank" href="https://colab.research.google.com/#fileId=https://huggingface.co/spaces/dsymbol/whisper-webui/blob/main/notebook.ipynb">
# <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
# </a>
# <a target="_blank" href="https://huggingface.co/spaces/dsymbol/whisper-webui" rel="noopener noreferrer"><img
# src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces">
# </a>
# </div>
# """
def transcribe(recording, file, language, task):
if recording and file:
text = "Please only use one field."
elif not recording and not file:
text = "Please use one field."
else:
language = None if language == "Detect" else language
filepath = file if file else recording
text = model.transcribe(
filepath, task=task.lower(), language=language, fp16=gpu
)["text"].strip()
return text
def interface(model_name="small"):
global model
model = whisper.load_model(model_name)
return gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(label="Record", source="microphone", type="filepath"),
gr.Audio(label="Upload", source="upload", type="filepath"),
gr.Dropdown(
label="Language",
choices=["Detect"] + sorted([i.title() for i in LANGUAGES.values()]),
value="Detect",
),
gr.Dropdown(
label="Task",
choices=["Transcribe", "Translate"],
value="Transcribe",
info="Whether to perform X->X speech recognition or X->English translation",
),
],
outputs=gr.Textbox(label="Transcription", lines=26),
#theme=gr.themes.Default(),
theme = gr.themes.Glass(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.purple),
title="Whisper is listening to you",
#description=DESCRIPTION,
allow_flagging="never",
)
if __name__ == "__main__":
demo = interface()
demo.queue().launch(debug=True) |