Spaces:

Somnath3570
/

Voice_Assistant

Runtime error

App Files Files Community

Voice_Assistant / app.py

Somnath3570

Update app.py

04cf931 verified 6 months ago

raw

history blame contribute delete

2.7 kB

	import gradio as gr
	import torch
	import transformers
	import librosa
	import numpy as np
	import os

	class UltravoxInterface:
	def __init__(self):
	"""Initialize with smaller model footprint"""
	print("Initializing voice interface...")

	# Use smaller whisper model
	self.model_name = "openai/whisper-small"
	self.pipe = transformers.pipeline(
	"automatic-speech-recognition",
	model=self.model_name,
	torch_dtype=torch.float16,
	device="cpu" # Explicitly set to CPU
	)

	print("Model loaded successfully!")

	def process_audio(self, audio_path, custom_prompt=None):
	"""Process audio with optimized memory usage"""
	try:
	if audio_path is None:
	return "Please provide an audio input."

	# Load audio in chunks to save memory
	audio, sr = librosa.load(audio_path, sr=16000, mono=True)

	# Process audio in smaller segments if needed
	max_length = 30 * sr # 30 seconds chunks
	if len(audio) > max_length:
	segments = []
	for i in range(0, len(audio), max_length):
	segment = audio[i:i + max_length]
	result = self.pipe(segment, batch_size=1)
	segments.append(result["text"])
	return " ".join(segments)

	# Process shorter audio directly
	result = self.pipe(audio, batch_size=1)
	return result["text"]

	except Exception as e:
	return f"Error processing audio: {str(e)}"

	def create_interface(self):
	"""Create and configure the Gradio interface"""

	interface = gr.Interface(
	fn=self.process_audio,
	inputs=[
	gr.Audio(
	label="Speak here",
	sources=["microphone"],
	type="filepath"
	)
	],
	outputs=[
	gr.Textbox(
	label="Transcription",
	lines=5,
	placeholder="Transcription will appear here..."
	)
	],
	title="Voice Assistant",
	description="Speak into the microphone and get text transcription!",
	theme=gr.themes.Soft(primary_hue="orange"),
	examples=[[None]],
	)

	return interface

	# Create the interface
	app = UltravoxInterface()
	interface = app.create_interface()

	# Launch the interface - this is crucial for Hugging Face Spaces
	interface.launch()