Ai-Transcript / app666.py
Eldermind's picture
Rename app.py to app666.py
333ac25 verified
import gradio as gr
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
# Set up device for torch based on GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load Whisper model and processor
model_id = "distil-whisper/distil-large-v3"
processor = WhisperProcessor.from_pretrained(model_id)
model = WhisperForConditionalGeneration.from_pretrained(model_id)
model.to(device)
print(f"Model and processor loaded successfully: {model_id}")
def transcribe_speech(file_info):
# Read the audio file
audio_input = file_info["content"]
# Process the audio file with the Whisper processor
inputs = processor(audio_input, return_tensors="pt", sampling_rate=processor.feature_extractor.sampling_rate)
inputs = inputs.to(device)
# Generate transcription using the Whisper model
output = model.generate(inputs["input_values"])
# Decode the model output to get the transcription text
transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
return transcription
# Set up the Gradio UI
with gr.Blocks() as demo:
with gr.Tab("Transcribe Audio"):
with gr.Row():
audio_input = gr.Audio(label="Upload audio file or record")
with gr.Row():
audio_output = gr.Textbox(label="Transcription")
# Setup the interaction - When audio is provided, transcribe it
audio_input.change(transcribe_speech, inputs=audio_input, outputs=audio_output)
demo.launch(share=True)