|
import gradio as gr |
|
from langchain_groq import ChatGroq |
|
from langchain.schema import SystemMessage, HumanMessage |
|
import requests |
|
import tempfile |
|
import time |
|
|
|
|
|
groq_api_key = "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" |
|
llm = ChatGroq(api_key=groq_api_key, model_name="llama3-70b-8192") |
|
|
|
|
|
XI_API_KEY = "sk_b254c267851485b60d23fb2e15fa8fde9f5fbc0d835127e2" |
|
VOICE_ID = "iYwRDEf2D1WyqRRecXPA" |
|
|
|
def translate_and_speak(user_input, target_language): |
|
try: |
|
start_time = time.time() |
|
|
|
|
|
translation_start = time.time() |
|
system_prompt = f"You are expected to translate the user input exclusively into {target_language} without adding anything else." |
|
messages = [ |
|
SystemMessage(content=system_prompt), |
|
HumanMessage(content=user_input) |
|
] |
|
response = llm.invoke(messages) |
|
translation_end = time.time() |
|
|
|
|
|
if not response or not hasattr(response, 'content'): |
|
raise ValueError("Invalid response from the translation model.") |
|
|
|
generated_text = response.content.strip() |
|
|
|
|
|
tts_start = time.time() |
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}" |
|
headers = { |
|
"Accept": "audio/mpeg", |
|
"Content-Type": "application/json", |
|
"xi-api-key": XI_API_KEY |
|
} |
|
data = { |
|
"text": generated_text, |
|
"model_id": "eleven_multilingual_v2", |
|
"voice_settings": { |
|
"stability": 0.75, |
|
"similarity_boost": 0.75 |
|
} |
|
} |
|
tts_response = requests.post(url, json=data, headers=headers) |
|
tts_end = time.time() |
|
|
|
if tts_response.status_code == 200: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: |
|
fp.write(tts_response.content) |
|
audio_file = fp.name |
|
end_time = time.time() |
|
|
|
|
|
translation_time = translation_end - translation_start |
|
tts_time = tts_end - tts_start |
|
total_time = end_time - start_time |
|
|
|
|
|
timings_info = f"Translation time: {translation_time:.2f} seconds\n" |
|
timings_info += f"Text-to-Speech time: {tts_time:.2f} seconds\n" |
|
timings_info += f"Total processing time: {total_time:.2f} seconds" |
|
|
|
return generated_text, audio_file, timings_info |
|
else: |
|
error_message = f"Text-to-Speech API Error: {tts_response.status_code} - {tts_response.text}" |
|
return error_message, None, None |
|
except Exception as e: |
|
|
|
error_details = f"An error occurred: {str(e)}" |
|
return error_details, None, None |
|
|
|
|
|
iface = gr.Interface( |
|
fn=translate_and_speak, |
|
inputs=[ |
|
gr.Textbox(lines=2, placeholder="Enter text to translate...", label="Input Text"), |
|
gr.Dropdown( |
|
choices=["Spanish", "French", "German", "Italian", "Chinese", "Japanese"], |
|
value="Spanish", |
|
label="Target Language" |
|
) |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Translated Text"), |
|
gr.Audio(label="Spoken Audio", autoplay=True), |
|
gr.Textbox(label="Processing Times") |
|
], |
|
title="Multilingual Text Translator and Speech Synthesizer", |
|
description="Translate text into the selected language and listen to the spoken audio.", |
|
allow_flagging="never" |
|
) |
|
|
|
|
|
iface.launch() |