Spaces:

wjbmattingly
/

whisper-app

Sleeping

File size: 1,470 Bytes

ab3a2d3
70091ec
 
17abae8
ab3a2d3
 
17abae8
70091ec
17abae8
ab3a2d3
17abae8
ab3a2d3
17abae8
 
 
 
 
 
ab3a2d3
 
17abae8
e9f9f9c
17abae8
 
ab3a2d3
17abae8
 
ab3a2d3
e9f9f9c
 
17abae8
 
ab3a2d3
17abae8
 
ab3a2d3
 
17abae8
 
 
 
 
 
 
 
ab3a2d3
 
e9f9f9c
 
70091ec
e9f9f9c

import gradio as gr
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
from transformers import pipeline
import spaces

BATCH_SIZE = 8
# Load the model and processor
MODEL_NAME = "TheirStory/whisper-small-xhosa"

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)


@spaces.GPU
def transcribe(inputs):
    if inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
    return  text

demo = gr.Blocks()

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(type="filepath", label="Audio file"),
        # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
    ],
    outputs="text",
    theme="huggingface",
    title="Whisper App",
    description=(
        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
        " of arbitrary length."
    ),
    allow_flagging="never",
)

with demo:
    gr.TabbedInterface([file_transcribe], ["Microphone"])
# Launch the app
demo.launch()