Spaces:

Azazkhan
/

Voice2VoiceChatbot

Sleeping

App Files Files Community

Voice2VoiceChatbot / app.py

Azazkhan

Update app.py

cb36667 verified 11 months ago

raw

history blame contribute delete

1.83 kB

	import os
	import gradio as gr
	import whisper
	from gtts import gTTS
	import io
	from groq import Groq

	# Access the API key using environment Variable we set
	groq_api_key = os.getenv("GROQ_API_KEY")
	groq_client = Groq(api_key= groq_api_key)

	# Load the Whisper model
	model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"

	def process_audio(file_path):
	try:
	# Load the audio file
	audio = whisper.load_audio(file_path)

	# Transcribe the audio using Whisper
	result = model.transcribe(audio)
	text = result["text"]

	# Generate a response using Groq
	chat_completion = groq_client.chat.completions.create(
	messages=[{"role": "user", "content": text}],
	model="llama3-8b-8192", # Replace with the correct model if necessary
	)

	# Access the response using dot notation
	response_message = chat_completion.choices[0].message.content.strip()

	# Convert the response text to speech
	tts = gTTS(response_message)
	response_audio_io = io.BytesIO()
	tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
	response_audio_io.seek(0)

	# Save audio to a file to ensure it's generated correctly
	with open("response.mp3", "wb") as audio_file:
	audio_file.write(response_audio_io.getvalue())

	# Return the response text and the path to the saved audio file
	return response_message, "response.mp3"

	except Exception as e:
	return f"An error occurred: {e}", None

	iface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="filepath"), # Use type="filepath"
	outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
	live=True
	)

	iface.launch(share=True)