Spaces:

Razzaqi3143
/

VoicetoVoiceChatbot

Sleeping

App Files Files Community

VoicetoVoiceChatbot / app.py

Razzaqi3143

Create app.py

69c2a91 verified 6 months ago

raw

history blame contribute delete

3.71 kB

	import os
	from groq import Groq, GroqError
	import gradio as gr
	import torch
	from parler_tts import ParlerTTSForConditionalGeneration
	from transformers import AutoTokenizer
	import soundfile as sf

	# Initialize Groq client with API key
	GROQ_API_KEY = "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"

	# Device setup for Parler-TTS
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	parler_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1").to(device)
	parler_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1")

	# Function to transcribe audio using Whisper through Groq, with error handling
	def transcribe_audio(audio):
	try:
	# Ensure the audio is in the correct format supported by Groq
	audio_input = audio
	transcription_response = client.transcriptions.create(
	model="openai/whisper-large-v3",
	audio=audio_input,
	)
	return transcription_response['text']
	except GroqError as e:
	print(f"Groq transcription error: {e}")
	return "Error: Failed to transcribe audio."

	# Function to generate a response using LLaMA through Groq, with error handling
	def generate_response(text):
	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": text}],
	model="llama3-70b-8192", # Modify based on the model you're using
	)
	return chat_completion.choices[0].message['content']
	except GroqError as e:
	print(f"Groq response generation error: {e}")
	return "Error: Failed to generate a response."

	# Function to convert text to speech using Parler-TTS, unchanged
	def text_to_speech(text):
	try:
	description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch."
	input_ids = parler_tokenizer(description, return_tensors="pt").input_ids.to(device)
	prompt_input_ids = parler_tokenizer(text, return_tensors="pt").input_ids.to(device)
	generation = parler_model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
	audio_arr = generation.cpu().numpy().squeeze()
	sf.write("parler_tts_out.wav", audio_arr, parler_model.config.sampling_rate)
	return "parler_tts_out.wav"
	except Exception as e:
	print(f"Parler-TTS error: {e}")
	return "Error: Failed to convert text to speech."

	# Gradio interface combining all the components, with error handling in each step
	def chatbot_pipeline(audio):
	# Step 1: Convert speech to text using Whisper through Groq
	transcribed_text = transcribe_audio(audio)

	# If there was an error in transcription, return the error message
	if "Error" in transcribed_text:
	return transcribed_text, None

	# Step 2: Generate a response using LLaMA through Groq
	response_text = generate_response(transcribed_text)

	# If there was an error in response generation, return the error message
	if "Error" in response_text:
	return response_text, None

	# Step 3: Convert response text to speech using Parler-TTS
	response_audio_path = text_to_speech(response_text)

	# If there was an error in TTS conversion, return the error message
	if "Error" in response_audio_path:
	return response_text, None

	# Return both text and audio for output
	return response_text, response_audio_path

	# Gradio interface setup
	ui = gr.Interface(
	fn=chatbot_pipeline,
	inputs=gr.Audio(type="numpy"), # Removed 'source' and 'streaming'
	outputs=[gr.Textbox(label="Chatbot Response"), gr.Audio(label="Chatbot Voice Response")],
	live=True
	)

	ui.launch()