Spaces:
Sleeping
Sleeping
import gradio as gr | |
import time | |
import io | |
import librosa | |
import torch | |
import soundfile as sf | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
#Instantiating the model object. | |
model = AutoModelForSpeechSeq2Seq.from_pretrained(pretrained_model_name_or_path= "openai/whisper-large-v3", | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
use_safetensors=True) | |
model = model.to("cuda") | |
#Instantiating the processor object. | |
processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="openai/whisper-large-v3") | |
#Instantiating the transformer class' pipeline object. | |
pipe = pipeline(task="automatic-speech-recognition", | |
model="openai/whisper-large-v3", | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
max_new_tokens=128, | |
chunk_length_s=30, | |
batch_size=16, | |
return_timestamps=True, | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
device="cuda" | |
) | |
#Defining speech-to-text function. | |
def convert(audio, state=""): | |
""" | |
This function performs speech to text conversion and will be used in Gradio's Interface function. | |
Parameters: | |
- audio: audio data as a bytes-like object. | |
- state: a string representing the accumulated text from previous conversions. | |
""" | |
time.sleep(3) | |
try: | |
result = pipe(audio) | |
transcribed_text = result['text'] | |
state += transcribed_text + " " | |
except Exception as e: | |
return f"Error processing audio: Please start recording!", state | |
return state, state | |
#Instantiating Gradio Interface. | |
gr_interface = gr.Interface( | |
fn = convert, | |
title = "Automatic Speech-to-Text", | |
description = "### Record your speech and watch it get converted to text!", | |
inputs = [ | |
gr.Audio( | |
label="Please Record Your Speech Here!", | |
sources="microphone", | |
type="filepath"), | |
"state"], | |
outputs = [ | |
"textbox", | |
"state" | |
], | |
live=True | |
) | |
#Launching the app (share=True). | |
gr_interface.launch() |