Spaces:

antfraia
/

TTS_Anto

Build error

App Files Files Community

TTS_Anto / app.py

antfraia

Create app.py

5fc56a1 verified 10 months ago

raw

history blame contribute delete

3.95 kB

	import gradio as gr
	from langchain_groq import ChatGroq
	from langchain.schema import SystemMessage, HumanMessage
	import requests
	import tempfile
	import time

	# Configuration of the Groq model
	groq_api_key = "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" # Replace with your actual Groq API key
	llm = ChatGroq(api_key=groq_api_key, model_name="llama3-70b-8192") # Corrected model name

	# ElevenLabs API key and voice ID
	XI_API_KEY = "sk_b254c267851485b60d23fb2e15fa8fde9f5fbc0d835127e2" # Replace with your ElevenLabs API key
	VOICE_ID = "iYwRDEf2D1WyqRRecXPA" # Replace with your voice ID

	def translate_and_speak(user_input, target_language):
	try:
	start_time = time.time() # Start total processing time

	# Generate translation using Groq model
	translation_start = time.time()
	system_prompt = f"You are expected to translate the user input exclusively into {target_language} without adding anything else."
	messages = [
	SystemMessage(content=system_prompt),
	HumanMessage(content=user_input)
	]
	response = llm.invoke(messages)
	translation_end = time.time()

	# Check if the response is valid
	if not response or not hasattr(response, 'content'):
	raise ValueError("Invalid response from the translation model.")

	generated_text = response.content.strip()

	# Use ElevenLabs API to generate speech
	tts_start = time.time()
	url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": XI_API_KEY
	}
	data = {
	"text": generated_text,
	"model_id": "eleven_multilingual_v2",
	"voice_settings": {
	"stability": 0.75,
	"similarity_boost": 0.75
	}
	}
	tts_response = requests.post(url, json=data, headers=headers)
	tts_end = time.time()

	if tts_response.status_code == 200:
	# Save audio to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
	fp.write(tts_response.content)
	audio_file = fp.name
	end_time = time.time()

	# Calculate processing times
	translation_time = translation_end - translation_start
	tts_time = tts_end - tts_start
	total_time = end_time - start_time

	# Prepare timings information
	timings_info = f"Translation time: {translation_time:.2f} seconds\n"
	timings_info += f"Text-to-Speech time: {tts_time:.2f} seconds\n"
	timings_info += f"Total processing time: {total_time:.2f} seconds"

	return generated_text, audio_file, timings_info
	else:
	error_message = f"Text-to-Speech API Error: {tts_response.status_code} - {tts_response.text}"
	return error_message, None, None
	except Exception as e:
	# Return the exception message
	error_details = f"An error occurred: {str(e)}"
	return error_details, None, None

	# Create Gradio interface
	iface = gr.Interface(
	fn=translate_and_speak,
	inputs=[
	gr.Textbox(lines=2, placeholder="Enter text to translate...", label="Input Text"),
	gr.Dropdown(
	choices=["Spanish", "French", "German", "Italian", "Chinese", "Japanese"],
	value="Spanish",
	label="Target Language"
	)
	],
	outputs=[
	gr.Textbox(label="Translated Text"),
	gr.Audio(label="Spoken Audio", autoplay=True),
	gr.Textbox(label="Processing Times")
	],
	title="Multilingual Text Translator and Speech Synthesizer",
	description="Translate text into the selected language and listen to the spoken audio.",
	allow_flagging="never"
	)

	# Launch the app
	iface.launch()