Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
import torch | |
# Set up device for torch based on GPU availability | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Load Whisper model and processor | |
model_id = "distil-whisper/distil-large-v3" | |
processor = WhisperProcessor.from_pretrained(model_id) | |
model = WhisperForConditionalGeneration.from_pretrained(model_id) | |
model.to(device) | |
print(f"Model and processor loaded successfully: {model_id}") | |
def transcribe_speech(file_info): | |
# Read the audio file | |
audio_input = file_info["content"] | |
# Process the audio file with the Whisper processor | |
inputs = processor(audio_input, return_tensors="pt", sampling_rate=processor.feature_extractor.sampling_rate) | |
inputs = inputs.to(device) | |
# Generate transcription using the Whisper model | |
output = model.generate(inputs["input_values"]) | |
# Decode the model output to get the transcription text | |
transcription = processor.batch_decode(output, skip_special_tokens=True)[0] | |
return transcription | |
# Set up the Gradio UI | |
with gr.Blocks() as demo: | |
with gr.Tab("Transcribe Audio"): | |
with gr.Row(): | |
audio_input = gr.Audio(label="Upload audio file or record") | |
with gr.Row(): | |
audio_output = gr.Textbox(label="Transcription") | |
# Setup the interaction - When audio is provided, transcribe it | |
audio_input.change(transcribe_speech, inputs=audio_input, outputs=audio_output) | |
demo.launch(share=True) | |