|
import os |
|
import tempfile |
|
|
|
import gradio as gr |
|
import torch |
|
from transformers import pipeline |
|
from transformers.pipelines.audio_utils import ffmpeg_read |
|
|
|
MODEL_NAME = "jensenlwt/whisper-small-singlish-122k" |
|
FILE_LIMIT_MB = 1000 |
|
|
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
|
pipe = pipeline( |
|
task="automatic-speech-recognition", |
|
model=MODEL_NAME, |
|
chunk_length_s=30, |
|
device=device, |
|
) |
|
|
|
|
|
def transcribe(inputs, task): |
|
if inputs is None: |
|
raise gr.Error( |
|
"No audio file submitted! Please upload or record an audio file before submitting your request." |
|
) |
|
|
|
text = pipe( |
|
inputs, |
|
generate_kwargs={"language": "english"}, |
|
return_timestamps=True, |
|
)["chunks"] |
|
return text |
|
|
|
|
|
demo = gr.Blocks() |
|
|
|
mf_transcribe = gr.Interface( |
|
fn=transcribe, |
|
inputs=[ |
|
gr.inputs.Audio(source="microphone", type="filepath", optional=True), |
|
], |
|
outputs="text", |
|
layout="horizontal", |
|
theme="huggingface", |
|
title="Whisper Small: Singlish Edition ๐ธ๐ฌ", |
|
description=(""), |
|
allow_flagging="never", |
|
) |
|
|
|
file_transcribe = gr.Interface( |
|
fn=transcribe, |
|
inputs=[ |
|
gr.inputs.Audio( |
|
source="upload", type="filepath", optional=True, label="Audio file" |
|
), |
|
], |
|
outputs="text", |
|
layout="horizontal", |
|
theme="huggingface", |
|
title="Whisper Small: Singlish Edition ๐ธ๐ฌ", |
|
description=( |
|
"NOTE: Current space seems to cut off the last few seconds of the recording. For exploration, I would recommend sticking to audio <10s long." |
|
), |
|
allow_flagging="never", |
|
) |
|
|
|
|
|
with demo: |
|
gr.TabbedInterface( |
|
[mf_transcribe, file_transcribe], |
|
["Microphone", "Audio file"], |
|
) |
|
|
|
demo.launch(enable_queue=True) |
|
|