Spaces:

Safwanahmad619
/

voice-to-voice

Sleeping

App Files Files Community

voice-to-voice / app.py

Safwanahmad619

Update app.py

bbbe41e verified 4 months ago

raw

history blame

5.73 kB

	# import os
	# import gradio as gr
	# import whisper
	# from gtts import gTTS
	# import io
	# from groq import Groq

	# # Initialize the Groq client
	# groq_api_key = os.getenv('GROQ_API_KEY')
	# client = Groq(api_key=groq_api_key)

	# # Load the Whisper model
	# model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"

	# def process_audio(file_path):
	# try:
	# # Load the audio file
	# audio = whisper.load_audio(file_path)

	# # Transcribe the audio using Whisper
	# result = model.transcribe(audio)
	# text = result["text"]

	# # Generate a response using Groq
	# chat_completion = client.chat.completions.create(
	# messages=[{"role": "user", "content": text}],
	# model="llama3-8b-8192", # Replace with the correct model if necessary
	# )

	# # Access the response using dot notation
	# response_message = chat_completion.choices[0].message.content.strip()

	# # Convert the response text to speech
	# tts = gTTS(response_message)
	# response_audio_io = io.BytesIO()
	# tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
	# response_audio_io.seek(0)

	# # Save audio to a file to ensure it's generated correctly
	# with open("response.mp3", "wb") as audio_file:
	# audio_file.write(response_audio_io.getvalue())

	# # Return the response text and the path to the saved audio file
	# return response_message, "response.mp3"

	# except Exception as e:
	# return f"An error occurred: {e}", None

	# iface = gr.Interface(
	# fn=process_audio,
	# inputs=gr.Audio(type="filepath"), # Use type="filepath"
	# outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
	# live=True
	# )

	# iface.launch()
	import os
	import gradio as gr
	import whisper
	from gtts import gTTS
	from anthropic import Anthropic # Import the Anthropic client
	import io # Import io for BytesIO

	# Get the Anthropic API key from environment variables
	ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
	if not ANTHROPIC_API_KEY:
	raise ValueError("ANTHROPIC_API_KEY environment variable is not set.")
	client = Anthropic(api_key=ANTHROPIC_API_KEY) # Initialize the Anthropic client

	# Load Whisper model
	model = whisper.load_model("base") # You can also use "small", "medium", "large"

	def chatbot(audio=None):
	try:
	if audio is None:
	return "No input detected. Please provide an audio input.", None

	# Transcribe the audio input using Whisper
	transcription = model.transcribe(audio)
	user_input = transcription.get("text", "")

	# Generate a response using Anthropic API
	chat_completion = client.completions.create(
	model="claude-v1", # Specify the model
	prompt=user_input, # Provide the user input as the prompt
	max_tokens_to_sample=100, # Specify the maximum tokens to sample
	)
	response_text = chat_completion['completion']

	# Convert the response text to speech using gTTS
	tts = gTTS(text=response_text, lang='en')
	response_audio_io = io.BytesIO() # Create a BytesIO object
	tts.save(response_audio_io) # Save the audio to the BytesIO object
	response_audio_io.seek(0) # Rewind the BytesIO object

	return response_text, response_audio_io

	except Exception as e:
	return f"An error occurred: {e}", None

	def clear_inputs():
	return None, None, None

	# Create a custom interface
	def build_interface():
	with gr.Blocks(css="""
	.block-title {
	text-align: center;
	color: white;
	background-color: #4CAF50;
	padding: 10px;
	border-radius: 8px;
	}
	.gradio-row {
	background-color: #f9f9f9;
	border-radius: 8px;
	padding: 20px;
	margin: 10px;
	box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
	}
	.gradio-column {
	padding: 10px;
	}
	.gradio-button {
	background-color: #ff6347 !important;
	color: white !important;
	border-radius: 8px !important;
	padding: 10px 20px !important;
	font-size: 16px !important;
	border: none !important;
	cursor: pointer !important;
	box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important;
	transition: background-color 0.3s ease !important;
	}
	.gradio-button:hover {
	background-color: #e5533d !important;
	}
	""") as demo:
	gr.Markdown(
	"""
	<h1 class="block-title">Voice-to-Voice AI Chatbot</h1>
	"""
	)
	with gr.Row(elem_classes="gradio-row"):
	with gr.Column(elem_classes="gradio-column", scale=1):
	audio_input = gr.Audio(type="filepath", label="Record Your Voice")
	with gr.Column(elem_classes="gradio-column", scale=2):
	chatbot_output_text = gr.Textbox(label="Chatbot Response")
	chatbot_output_audio = gr.Audio(label="Audio Response")

	clear_button = gr.Button("Clear", elem_classes="gradio-button")

	clear_button.click(
	fn=clear_inputs,
	outputs=[audio_input, chatbot_output_text, chatbot_output_audio]
	)

	audio_input.change(
	fn=chatbot,
	inputs=[audio_input],
	outputs=[chatbot_output_text, chatbot_output_audio]
	)

	return demo

	# Launch the interface
	if __name__ == "__main__":
	interface = build_interface()
	interface.launch()