TTS_Anto / app.py
antfraia's picture
Create app.py
5fc56a1 verified
import gradio as gr
from langchain_groq import ChatGroq
from langchain.schema import SystemMessage, HumanMessage
import requests
import tempfile
import time
# Configuration of the Groq model
groq_api_key = "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" # Replace with your actual Groq API key
llm = ChatGroq(api_key=groq_api_key, model_name="llama3-70b-8192") # Corrected model name
# ElevenLabs API key and voice ID
XI_API_KEY = "sk_b254c267851485b60d23fb2e15fa8fde9f5fbc0d835127e2" # Replace with your ElevenLabs API key
VOICE_ID = "iYwRDEf2D1WyqRRecXPA" # Replace with your voice ID
def translate_and_speak(user_input, target_language):
try:
start_time = time.time() # Start total processing time
# Generate translation using Groq model
translation_start = time.time()
system_prompt = f"You are expected to translate the user input exclusively into {target_language} without adding anything else."
messages = [
SystemMessage(content=system_prompt),
HumanMessage(content=user_input)
]
response = llm.invoke(messages)
translation_end = time.time()
# Check if the response is valid
if not response or not hasattr(response, 'content'):
raise ValueError("Invalid response from the translation model.")
generated_text = response.content.strip()
# Use ElevenLabs API to generate speech
tts_start = time.time()
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}"
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": XI_API_KEY
}
data = {
"text": generated_text,
"model_id": "eleven_multilingual_v2",
"voice_settings": {
"stability": 0.75,
"similarity_boost": 0.75
}
}
tts_response = requests.post(url, json=data, headers=headers)
tts_end = time.time()
if tts_response.status_code == 200:
# Save audio to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
fp.write(tts_response.content)
audio_file = fp.name
end_time = time.time()
# Calculate processing times
translation_time = translation_end - translation_start
tts_time = tts_end - tts_start
total_time = end_time - start_time
# Prepare timings information
timings_info = f"Translation time: {translation_time:.2f} seconds\n"
timings_info += f"Text-to-Speech time: {tts_time:.2f} seconds\n"
timings_info += f"Total processing time: {total_time:.2f} seconds"
return generated_text, audio_file, timings_info
else:
error_message = f"Text-to-Speech API Error: {tts_response.status_code} - {tts_response.text}"
return error_message, None, None
except Exception as e:
# Return the exception message
error_details = f"An error occurred: {str(e)}"
return error_details, None, None
# Create Gradio interface
iface = gr.Interface(
fn=translate_and_speak,
inputs=[
gr.Textbox(lines=2, placeholder="Enter text to translate...", label="Input Text"),
gr.Dropdown(
choices=["Spanish", "French", "German", "Italian", "Chinese", "Japanese"],
value="Spanish",
label="Target Language"
)
],
outputs=[
gr.Textbox(label="Translated Text"),
gr.Audio(label="Spoken Audio", autoplay=True),
gr.Textbox(label="Processing Times")
],
title="Multilingual Text Translator and Speech Synthesizer",
description="Translate text into the selected language and listen to the spoken audio.",
allow_flagging="never"
)
# Launch the app
iface.launch()