File size: 958 Bytes
a00f7c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61d3d19
a00f7c2
 
ada8911
a00f7c2
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
from transformers import pipeline
import torch
import spaces

pipe = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-large-v3-turbo",
    torch_dtype=torch.float16,
    device="cuda:0",
)

@spaces.GPU
def transcribe(audio, task):
    gr.Info("Starting transcription task")
    outputs = pipe(
        audio,
        chunk_length_s=30,
        batch_size=128,
        generate_kwargs={"task": task},
        return_timestamps="word"
    )
    gr.Info("Finished transcription task")
    return outputs['chunks']

with gr.Blocks() as demo:
    audio = gr.Audio(label="Audio", type="filepath", interactive=True)
    task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe", interactive=True)
    btn = gr.Button("Transcribe", variant="primary")
    output = gr.Textbox(label="Transcription", interactive=False)
    btn.click(transcribe, inputs=[audio, task], outputs=output)

demo.queue().launch()