Spaces:
Runtime error
Runtime error
File size: 2,567 Bytes
dad2a9b bdc7600 dad2a9b bdc7600 dad4b00 bdc7600 dad4b00 bdc7600 dad4b00 bdc7600 dad4b00 bdc7600 dad4b00 bdc7600 dad4b00 bdc7600 dad4b00 bdc7600 dad4b00 bdc7600 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import os
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import numpy as np
import librosa
# Initialize Whisper model
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
# Set light green theme
theme = gr.themes.Base(
primary_hue="emerald",
secondary_hue="emerald",
neutral_hue="gray",
)
def validate_file(file):
# Check file size (25 MB limit)
file_size_mb = os.path.getsize(file) / (1024 * 1024)
if file_size_mb > 25:
return False, f"File size is {file_size_mb:.2f} MB. Please upload a file smaller than 25 MB."
# Check file extension
file_extension = os.path.splitext(file)[1].lower()
if file_extension not in ['.mp3', '.wav']:
return False, "Only .mp3 and .wav formats are supported."
return True, "File is valid."
def transcribe_audio(audio_file):
# Validate the file first
is_valid, message = validate_file(audio_file)
if not is_valid:
return message
try:
# Load audio file
speech_array, sampling_rate = librosa.load(audio_file, sr=16000)
# Process the audio file
input_features = processor(speech_array, sampling_rate=16000, return_tensors="pt").input_features
# Generate token ids
predicted_ids = model.generate(input_features)
# Decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
except Exception as e:
return f"An error occurred during transcription: {str(e)}"
# Create Gradio interface
with gr.Blocks(theme=theme) as demo:
gr.Markdown("# Audio Transcription with Whisper")
gr.Markdown("Upload an audio file (.mp3 or .wav) of maximum 25MB to get the transcription.")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
submit_btn = gr.Button("Transcribe", variant="primary")
with gr.Column():
output = gr.Textbox(label="Transcription Result", lines=10)
submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output)
gr.Markdown("### Limitations")
gr.Markdown("- Maximum file size: 25 MB")
gr.Markdown("- Supported formats: .mp3 and .wav")
gr.Markdown("- Uses the Whisper base model which works best with clear audio")
# Launch the app
demo.launch() |