joheras's picture
Create app.py
efdbebb
from transformers import pipeline
from transformers import WhisperForConditionalGeneration, WhisperProcessor
from transformers import WhisperTokenizer
from transformers import WhisperFeatureExtractor
import gradio as gr
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="Spanish", task="transcribe")
model = WhisperForConditionalGeneration.from_pretrained("mirari/whisper-small-es")
feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-small")
pipe = pipeline(task="automatic-speech-recognition",model=model, tokenizer=tokenizer,feature_extractor=feature_extractor)
def transcribe(audio):
text = pipe(audio)["text"]
return text
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text",
title="Whisper Small Hindi",
description="Realtime demo for Spanish speech recognition using a fine-tuned Whisper small model.",
)
iface.launch()