Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
import torch | |
import time | |
# --- MODEL INITIALIZATION --- | |
# Check for GPU availability | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"Using device: {device}") | |
# Load the Whisper model. | |
# "base" is a good starting point. For higher accuracy, you can use "medium" or "large", | |
# but they require more resources. | |
print("Loading Whisper model...") | |
model = whisper.load_model("base", device=device) | |
print("Whisper model loaded successfully.") | |
# --- TRANSCRIPTION FUNCTION --- | |
def transcribe_audio(microphone_input, file_input): | |
""" | |
Transcribes audio from either a microphone recording or an uploaded file. | |
Args: | |
microphone_input (tuple or None): Audio data from the microphone. | |
file_input (str or None): Path to the uploaded audio file. | |
Returns: | |
str: The transcribed text. | |
""" | |
# Determine the input source | |
if microphone_input is not None: | |
audio_source = microphone_input | |
elif file_input is not None: | |
audio_source = file_input | |
else: | |
return "No audio source provided. Please record or upload an audio file." | |
# Perform the transcription | |
try: | |
# The transcribe function returns a dictionary with the text | |
result = model.transcribe(audio_source) | |
transcription = result["text"] | |
return transcription | |
except Exception as e: | |
return f"An error occurred during transcription: {e}" | |
# --- GRADIO INTERFACE --- | |
# Use gr.Blocks for more complex layouts and custom styling | |
with gr.Blocks(css="assets/style.css", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# 🎙️ Voice Recognition") | |
gr.Markdown( | |
"This application uses OpenAI's Whisper model to transcribe speech to text. " | |
"You can either record audio directly from your microphone or upload an audio file." | |
) | |
with gr.Row(elem_classes="audio-container"): | |
with gr.Column(): | |
# Microphone input | |
mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record from Microphone") | |
# File upload input | |
file_upload = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File") | |
# Transcribe Button | |
transcribe_button = gr.Button("Transcribe Audio") | |
# Transcription Output | |
output_text = gr.Textbox( | |
lines=10, | |
label="Transcription Result", | |
placeholder="Your transcribed text will appear here...", | |
elem_id="transcription_output" | |
) | |
# Define the action for the button click | |
transcribe_button.click( | |
fn=transcribe_audio, | |
inputs=[mic_input, file_upload], | |
outputs=output_text | |
) | |
# Launch the application | |
if __name__ == "__main__": | |
demo.launch(debug=True) |