DocChat_n_Talk / openai_tts_tool.py
capradeepgujaran's picture
Update openai_tts_tool.py
49c3721 verified
raw
history blame
3.03 kB
from openai import OpenAI
import os
def translate_text(client, text, target_language, model_name):
"""
Translate the input text to the target language using specified OpenAI GPT model.
"""
prompt = f"Translate the following text to {target_language}:\n\n{text}\n\nTranslation:"
response = client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
"""
Generate audio and text files from input text using OpenAI's TTS API.
Args:
api_key (str): OpenAI API key
input_text (str): Text to convert to speech
model_name (str): OpenAI model name for translation
voice_type (str): Voice type for TTS
voice_speed (float): Speed of speech
language (str): Language code for synthesis
output_option (str): Output type ('audio', 'script_text', or 'both')
Returns:
tuple: (audio_file_path, script_file_path, status_message)
"""
if not input_text:
return None, None, "No input text provided"
if not api_key:
return None, None, "No API key provided"
try:
client = OpenAI(api_key=api_key)
# Translate the text if the target language is not the same as the input text language
translated_text = translate_text(client, input_text, language, model_name)
# Create temp directory if it doesn't exist
temp_dir = os.path.join(os.getcwd(), 'temp')
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Generate audio file
audio_file = None
if output_option in ["audio", "both"]:
speech_response = client.audio.speech.create(
model="tts-1-hd",
voice=voice_type,
input=translated_text,
speed=float(voice_speed)
)
# Save the audio to a temporary file
audio_path = os.path.join(temp_dir, f"output_{hash(translated_text)}_{language}.mp3")
with open(audio_path, "wb") as f:
for chunk in speech_response.iter_bytes():
f.write(chunk)
audio_file = audio_path
# Save the translated text as a script file
script_file = None
if output_option in ["script_text", "both"]:
script_path = os.path.join(temp_dir, f"script_{hash(translated_text)}_{language}.txt")
with open(script_path, "w", encoding='utf-8') as f:
f.write(translated_text)
script_file = script_path
status_message = f"Generation completed successfully in {language}!"
return audio_file, script_file, status_message
except Exception as e:
return None, None, f"Error: {str(e)}"