wtlow003
fix: description
5e68fac
import os
import tempfile
import gradio as gr
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
MODEL_NAME = "jensenlwt/whisper-small-singlish-122k"
FILE_LIMIT_MB = 1000
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe(inputs, task):
if inputs is None:
raise gr.Error(
"No audio file submitted! Please upload or record an audio file before submitting your request."
)
text = pipe(
inputs,
generate_kwargs={"language": "english"},
return_timestamps=True,
)["chunks"]
return text
demo = gr.Blocks()
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Whisper Small: Singlish Edition ๐Ÿ‡ธ๐Ÿ‡ฌ",
description=(""),
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(
source="upload", type="filepath", optional=True, label="Audio file"
),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Whisper Small: Singlish Edition ๐Ÿ‡ธ๐Ÿ‡ฌ",
description=(
"NOTE: Current space seems to cut off the last few seconds of the recording. For exploration, I would recommend sticking to audio <10s long."
),
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mf_transcribe, file_transcribe],
["Microphone", "Audio file"],
)
demo.launch(enable_queue=True)