Brightsun10's picture
Update app.py
ad8fa51 verified
import gradio as gr
import whisper
import torch
import time
# --- MODEL INITIALIZATION ---
# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the Whisper model.
# "base" is a good starting point. For higher accuracy, you can use "medium" or "large",
# but they require more resources.
print("Loading Whisper model...")
model = whisper.load_model("base", device=device)
print("Whisper model loaded successfully.")
# --- TRANSCRIPTION FUNCTION ---
def transcribe_audio(microphone_input, file_input):
"""
Transcribes audio from either a microphone recording or an uploaded file.
Args:
microphone_input (tuple or None): Audio data from the microphone.
file_input (str or None): Path to the uploaded audio file.
Returns:
str: The transcribed text.
"""
# Determine the input source
if microphone_input is not None:
audio_source = microphone_input
elif file_input is not None:
audio_source = file_input
else:
return "No audio source provided. Please record or upload an audio file."
# Perform the transcription
try:
# The transcribe function returns a dictionary with the text
result = model.transcribe(audio_source)
transcription = result["text"]
return transcription
except Exception as e:
return f"An error occurred during transcription: {e}"
# --- GRADIO INTERFACE ---
# Use gr.Blocks for more complex layouts and custom styling
with gr.Blocks(css="assets/style.css", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎙️ Voice Recognition")
gr.Markdown(
"This application uses OpenAI's Whisper model to transcribe speech to text. "
"You can either record audio directly from your microphone or upload an audio file."
)
with gr.Row(elem_classes="audio-container"):
with gr.Column():
# Microphone input
mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record from Microphone")
# File upload input
file_upload = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")
# Transcribe Button
transcribe_button = gr.Button("Transcribe Audio")
# Transcription Output
output_text = gr.Textbox(
lines=10,
label="Transcription Result",
placeholder="Your transcribed text will appear here...",
elem_id="transcription_output"
)
# Define the action for the button click
transcribe_button.click(
fn=transcribe_audio,
inputs=[mic_input, file_upload],
outputs=output_text
)
# Launch the application
if __name__ == "__main__":
demo.launch(debug=True)