Spaces:
Runtime error
Runtime error
File size: 1,953 Bytes
0720a74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import whisper
import gradio as gr
from gtts import gTTS
import os
# Load Whisper model
model = whisper.load_model("base")
# Function to transcribe audio to text
def transcribe_audio(audio_file):
try:
audio = whisper.load_audio(audio_file)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
result = model.transcribe(mel)
return result["text"]
except Exception as e:
return f"Error in transcription: {e}"
# Function to generate text-to-speech
def generate_speech(text):
try:
tts = gTTS(text)
output_file = "response.mp3"
tts.save(output_file)
return output_file
except Exception as e:
return f"Error in TTS: {e}"
# Voice-to-Voice chatbot function
def voice_to_voice(audio_file):
try:
# Transcribe the audio input
transcribed_text = transcribe_audio(audio_file)
if "Error" in transcribed_text:
return transcribed_text, None
# Generate a response (mock response for now)
response_text = f"You said: {transcribed_text}"
# Convert response text to speech
audio_response = generate_speech(response_text)
if "Error" in audio_response:
return response_text, None
return response_text, audio_response
except Exception as e:
return f"Error in processing: {e}", None
# Gradio Interface
iface = gr.Interface(
fn=voice_to_voice,
inputs=gr.Audio(type="filepath"), # Accepts audio input
outputs=[
gr.Textbox(label="Transcription"), # Displays transcribed text
gr.Audio(type="filepath") # Returns audio response
],
title="Voice-to-Voice Chatbot",
description="Speak into the microphone, and the chatbot will respond with speech."
)
# Launch the app
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)
|