gTTS / app.py
Nick088's picture
Update app.py
1cdadae verified
raw
history blame contribute delete
No virus
1.8 kB
import gradio as gr
from gtts import gTTS
import io
import os
import librosa
import soundfile as sf
def text_to_speech(text, language_accent, pitch):
lang, tld = language_tld_map[language_accent].split(',')
# create the text-to-speech audio
tts = gTTS(text, lang=lang, tld=tld)
tts.save('gtts.wav')
# Load the audio file
y, sr = librosa.load('gtts.wav')
# Specify the number of semitones to shift
new_y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=pitch)
sf.write("generated_gtts.wav", new_y, sr)
# return the generated audio
return 'generated_gtts.wav'
language_tld_map = {
"English_Australia": "en,com.au",
"English_United_Kingdom": "en,co.uk",
"English_United_States": "en,com",
"English_Canada": "en,ca",
"English_Nigerian": "en,com.ng",
"English_Ireland": "en,ie",
"English_South Africa": "en,co.za",
"French_Canada": "fr,ca",
"French_France": "fr,fr",
"Mandarin_China_Mainland": "zh-CN,com",
"Mandarin_Taiwan": "zh-TW,com",
"Portuguese_Brazil": "pt,com.br",
"Portuguese_Portugal": "pt,pt",
"Spanish_Mexico": "es,com.mx",
"Spanish_Spain": "es,es",
"Spanish_United_States": "es,com"
}
# create the Gradio interface
iface = gr.Interface(fn=text_to_speech,
inputs=[gr.Textbox(lines=10, label="Enter your text here:"),
gr.Dropdown(choices=list(language_tld_map.keys()), label="Select Language & Accent:", value="English_United_Kingdom", type="value"),
gr.Number(label="Pitch (0 = no variations, negative pitch makes it more masculine, + pitch makes it more feminine):", value=0)],
outputs=[gr.Audio(label="Audio")],
allow_flagging="never")
iface.launch()