TextToSpeech / app.py
di-mitris's picture
Update app.py
4b2dc3b verified
import gradio as gr
from gtts import gTTS
import io
import os
import time
from gtts.lang import _main_langs
AUDIO_DIR = 'audio_files'
MAX_FILE_AGE = 24 * 60 * 60 # maximum age of audio files in seconds (24 hours)
def text_to_speech(text, lang, tld):
# map the language name to its corresponding code
lang_codes = {lang_name: lang_code for lang_code, lang_name in _main_langs().items()}
lang_code = lang_codes[lang]
# create the text-to-speech audio
tts = gTTS(text, lang=lang_code, tld=tld)
fp = io.BytesIO()
tts.write_to_fp(fp)
fp.seek(0)
# create the audio directory if it does not exist
os.makedirs(AUDIO_DIR, exist_ok=True)
# generate a unique file name for the audio file
file_name = str(time.time()) + '.wav'
file_path = os.path.join(AUDIO_DIR, file_name)
# save the audio stream to a file
with open(file_path, 'wb') as f:
f.write(fp.read())
# delete old audio files
delete_old_audio_files()
# return the file path
return file_path, f.name
def delete_old_audio_files():
# delete audio files older than MAX_FILE_AGE
now = time.time()
for file_name in os.listdir(AUDIO_DIR):
file_path = os.path.join(AUDIO_DIR, file_name)
if now - os.path.getmtime(file_path) > MAX_FILE_AGE:
os.remove(file_path)
# list of supported TLDs
tlds = [
"com",
"gr",
"fr"
]
# create the Gradio interface
iface = gr.Interface(fn=text_to_speech,
inputs=[gr.Textbox(lines=10, label="Enter your text here:"),
gr.Dropdown(choices=list(_main_langs().values()), label="Select language:"),
gr.Dropdown(choices=[tld for tld in tlds], label="Select TLD:")],
outputs=[gr.Audio(label="Audio"), gr.File(label="Audio File")],
allow_flagging="never")
iface.launch()