Spaces:

Waseem7711
/

Voice_to_Voice_ChatBot

Sleeping

App Files Files

xet

Community

Voice_to_Voice_ChatBot / app.py

Waseem771

Update app.py

b4f17d1 verified 9 months ago

raw

history blame

5.04 kB

	import gradio as gr
	import whisper
	from gtts import gTTS
	from groq import Groq
	import os
	import numpy as np
	import soundfile as sf
	import logging

	# Initialize the Groq API key
	GROQ_API_KEY = "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"

	# Configure logging
	logging.basicConfig(level=logging.DEBUG)

	# Initialize Whisper model (No API key required)
	try:
	whisper_model = whisper.load_model("base")
	logging.info("Whisper model loaded successfully.")
	except Exception as e:
	raise RuntimeError(f"Error loading Whisper model: {e}")

	# Initialize Groq client (API key required for Groq API)
	try:
	client = Groq(
	api_key=GROQ_API_KEY # Directly use the API key from the variable
	)
	logging.info("Groq client initialized successfully.")
	except Exception as e:
	raise RuntimeError(f"Error initializing Groq client: {e}")

	# Function to transcribe audio using Whisper
	def transcribe_audio(audio):
	try:
	# Load audio file with soundfile
	logging.debug(f"Loading audio file: {audio}")
	audio_data, sample_rate = sf.read(audio, dtype='float32') # Ensure dtype is float32
	logging.debug(f"Audio loaded with sample rate: {sample_rate}, data shape: {audio_data.shape}")

	# Whisper expects a specific sample rate
	if sample_rate != 16000:
	logging.debug(f"Resampling audio from {sample_rate} to 16000 Hz")
	# Resample audio data to 16000 Hz
	num_samples = int(len(audio_data) * (16000 / sample_rate))
	audio_data_resampled = np.interp(np.linspace(0, len(audio_data), num_samples),
	np.arange(len(audio_data)),
	audio_data)
	audio_data = audio_data_resampled.astype(np.float32) # Ensure dtype is float32
	sample_rate = 16000

	# Perform the transcription
	result = whisper_model.transcribe(audio_data)
	logging.debug(f"Transcription result: {result['text']}")
	return result['text']
	except Exception as e:
	logging.error(f"Error during transcription: {e}")
	return f"Error during transcription: {e}"

	# Function to get response from LLaMA model using Groq API
	def get_response(text):
	try:
	logging.debug(f"Sending request to Groq API with text: {text}")
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": text, # Using the transcribed text as input
	}
	],
	model="llama3-8b-8192", # Ensure the correct model is used
	)

	# Extract and return the model's response
	response_text = chat_completion.choices[0].message.content
	logging.debug(f"Received response from Groq API: {response_text}")
	return response_text
	except Exception as e:
	logging.error(f"Error during model response generation: {e}")
	return f"Error during model response generation: {e}"

	# Function to convert text to speech using gTTS
	def text_to_speech(text):
	try:
	tts = gTTS(text)
	tts.save("response.mp3")
	logging.debug("Text-to-speech conversion completed successfully.")
	return "response.mp3"
	except Exception as e:
	logging.error(f"Error during text-to-speech conversion: {e}")
	return f"Error during text-to-speech conversion: {e}"

	# Combined function for Gradio
	def chatbot(audio):
	try:
	# Step 1: Transcribe the audio input using Whisper
	user_input = transcribe_audio(audio)

	# Check if transcription returned an error
	if "Error" in user_input:
	return user_input, None

	logging.debug(f"Transcribed text: {user_input}")

	# Step 2: Get response from the LLaMA model using Groq API
	response_text = get_response(user_input)

	# Check if the response generation returned an error
	if "Error" in response_text:
	return response_text, None

	logging.debug(f"Response text: {response_text}")

	# Step 3: Convert the response text to speech using gTTS
	response_audio = text_to_speech(response_text)

	# Check if the text-to-speech conversion returned an error
	if "Error" in response_audio:
	return response_audio, None

	# Step 4: Return the response text and response audio file
	return response_text, response_audio

	except Exception as e:
	logging.error(f"Unexpected error occurred: {e}")
	return f"Unexpected error occurred: {e}", None

	# Gradio Interface
	iface = gr.Interface(
	fn=chatbot,
	inputs=gr.Audio(type="filepath"),
	outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
	live=True,
	title="Voice-to-Voice Chatbot",
	description="Speak to the bot, and it will respond with voice.",
	)

	try:
	iface.launch()
	except Exception as e:
	logging.error(f"Error launching Gradio interface: {e}")