s2t / app.py
azamat's picture
tiny to base
80fa84e
import gradio as gr
from faster_whisper import WhisperModel
device = "cpu"
model_size = "base"
compute_type = "int8"
model = WhisperModel(model_size, device=device, compute_type=compute_type)
def transcribe(audio):
segments, _ = model.transcribe(audio, beam_size=5)
return "".join([segment.text for segment in segments])
gr.Interface(
title = 'Fast Whisper for Speech Recognition',
description = 'This is a base version running on CPU with int8 compute type due to limited resources. These choices can slightly reduce accuracy.',
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath")
],
outputs=[
"textbox"
]
).launch()