Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import transformers | |
import librosa | |
import numpy as np | |
import os | |
class UltravoxInterface: | |
def __init__(self): | |
"""Initialize with smaller model footprint""" | |
print("Initializing voice interface...") | |
# Use smaller whisper model | |
self.model_name = "openai/whisper-small" | |
self.pipe = transformers.pipeline( | |
"automatic-speech-recognition", | |
model=self.model_name, | |
torch_dtype=torch.float16, | |
device="cpu" # Explicitly set to CPU | |
) | |
print("Model loaded successfully!") | |
def process_audio(self, audio_path, custom_prompt=None): | |
"""Process audio with optimized memory usage""" | |
try: | |
if audio_path is None: | |
return "Please provide an audio input." | |
# Load audio in chunks to save memory | |
audio, sr = librosa.load(audio_path, sr=16000, mono=True) | |
# Process audio in smaller segments if needed | |
max_length = 30 * sr # 30 seconds chunks | |
if len(audio) > max_length: | |
segments = [] | |
for i in range(0, len(audio), max_length): | |
segment = audio[i:i + max_length] | |
result = self.pipe(segment, batch_size=1) | |
segments.append(result["text"]) | |
return " ".join(segments) | |
# Process shorter audio directly | |
result = self.pipe(audio, batch_size=1) | |
return result["text"] | |
except Exception as e: | |
return f"Error processing audio: {str(e)}" | |
def create_interface(self): | |
"""Create and configure the Gradio interface""" | |
interface = gr.Interface( | |
fn=self.process_audio, | |
inputs=[ | |
gr.Audio( | |
label="Speak here", | |
sources=["microphone"], | |
type="filepath" | |
) | |
], | |
outputs=[ | |
gr.Textbox( | |
label="Transcription", | |
lines=5, | |
placeholder="Transcription will appear here..." | |
) | |
], | |
title="Voice Assistant", | |
description="Speak into the microphone and get text transcription!", | |
theme=gr.themes.Soft(primary_hue="orange"), | |
examples=[[None]], | |
) | |
return interface | |
# Create the interface | |
app = UltravoxInterface() | |
interface = app.create_interface() | |
# Launch the interface - this is crucial for Hugging Face Spaces | |
interface.launch() |