Spaces:

habibrehman1996
/

speech-chatbot

Sleeping

App Files Files Community

speech-chatbot / app.py

habibrehman1996

Update app.py

31979a7 verified 5 months ago

raw

history blame contribute delete

5.1 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import speech_recognition as sr
	from gtts import gTTS
	import tempfile
	import os
	import sys

	# Check for Hugging Face API token
	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	print("Error: HF_TOKEN environment variable not set. Please set it with your Hugging Face API token.")
	sys.exit(1)

	# Initialize Hugging Face client
	try:
	client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1", token=HF_TOKEN)
	print("Successfully initialized InferenceClient with Mixtral-8x7B-Instruct-v0.1")
	except Exception as e:
	print(f"Failed to initialize InferenceClient: {e}")
	sys.exit(1)

	# Speech-to-Text Function
	def speech_to_text(audio_path):
	if not os.path.exists(audio_path):
	return "Error: Audio file not found."
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_path) as source:
	audio_data = recognizer.record(source)
	try:
	text = recognizer.recognize_google(audio_data)
	print(f"Speech-to-Text Output: {text}")
	return text
	except sr.UnknownValueError:
	return "Could not understand the audio."
	except sr.RequestError as e:
	return f"Speech recognition service error: {e}"

	# Text-to-Speech Function
	def text_to_speech(text):
	try:
	print(f"Text-to-Speech Input: {text}")
	tts = gTTS(text)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	temp_file.close()
	tts.save(temp_file.name)
	print(f"Audio file generated at: {temp_file.name}")
	return temp_file.name
	except Exception as e:
	print(f"Text-to-Speech Error: {e}")
	return None

	# Chatbot Response Function
	def respond(message, history, system_message, max_tokens, temperature, top_p):
	messages = [{"role": "system", "content": system_message}]

	# Build message history
	for user_msg, bot_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if bot_msg:
	messages.append({"role": "assistant", "content": bot_msg})

	messages.append({"role": "user", "content": message})
	response = ""

	try:
	for message in client.chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	# Check for valid token content to avoid TypeError
	token_content = message.choices[0].delta.content
	if token_content is not None:
	response += token_content
	if not response:
	return "Error: Empty response from the model."
	print(f"Chatbot Response: {response}")
	return response
	except Exception as e:
	print(f"Chatbot Error: {e}")
	return f"Error generating response from the chatbot: {e}"

	# Voice-to-Voice Functionality
	def voice_to_voice(audio, history, system_message, max_tokens, temperature, top_p):
	# Convert user voice to text
	user_message = speech_to_text(audio)
	if user_message.startswith("Error") or user_message.startswith("Could not understand"):
	return user_message, history, None

	# Get chatbot response
	response_text = respond(user_message, history, system_message, max_tokens, temperature, top_p)
	if response_text.startswith("Error"):
	return response_text, history, None

	# Update chat history (truncate to last 5 messages)
	history.append((user_message, response_text))
	history = history[-5:]

	# Convert chatbot response to audio
	audio_file = text_to_speech(response_text)
	if not audio_file:
	return "Failed to generate audio response.", history, None

	return response_text, history, audio_file

	# Gradio Interface
	def main_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# Voice-to-Voice Chatbot")
	system_message = gr.Textbox(
	value="You are a friendly and helpful chatbot.",
	label="System Message",
	lines=2
	)
	max_tokens = gr.Slider(
	minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"
	)
	temperature = gr.Slider(
	minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
	)
	top_p = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"
	)
	audio_input = gr.Audio(
	sources=["microphone"], type="filepath", label="Speak Your Question"
	)
	response_output = gr.Textbox(label="Chatbot Response")
	audio_output = gr.Audio(label="Response Audio", type="filepath")
	history_state = gr.State([])

	gr.Button("Submit").click(
	fn=voice_to_voice,
	inputs=[audio_input, history_state, system_message, max_tokens, temperature, top_p],
	outputs=[response_output, history_state, audio_output],
	)

	return demo

	if __name__ == "__main__":
	demo = main_interface()
	demo.launch()