import os os.system('git clone https://github.com/Open-Speech-EkStep/vakyansh-tts') os.chdir('vakyansh-tts') os.system('bash install.sh') os.system('python setup.py bdist_wheel') os.system('pip install -e .') os.chdir('tts_infer') os.system('mkdir translit_models') os.chdir('translit_models') os.system('wget https://storage.googleapis.com/vakyaansh-open-models/translit_models/default_lineup.json') os.system('mkdir hindi') os.chdir('hindi') os.system('wget https://storage.googleapis.com/vakyaansh-open-models/translit_models/hindi/hindi_transliteration.zip') os.system('unzip hindi_transliteration') os.system('wget https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/female_voice_0/glow.zip') os.system('unzip glow.zip') os.system('wget https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/female_voice_0/hifi.zip') os.system('unzip hifi.zip') os.system('rm glow.zip') os.system('rm hifi.zip') os.chdir('/content/vakyansh-tts/') from tts_infer.tts import TextToMel, MelToWav from tts_infer.transliterate import XlitEngine from tts_infer.num_to_word_on_sent import normalize_nums import re from scipy.io.wavfile import write device = 'cpu' text_to_mel = TextToMel(glow_model_dir='/content/vakyansh-tts/tts_infer/translit_models/hindi/glow_ckp', device=device) mel_to_wav = MelToWav(hifi_model_dir='/content/vakyansh-tts/tts_infer/translit_models/hindi/hifi_ckp', device=device) def translit(text, lang): reg = re.compile(r'[a-zA-Z]') engine = XlitEngine(lang) words = [engine.translit_word(word, topk=1)[lang][0] if reg.match(word) else word for word in text.split()] updated_sent = ' '.join(words) return updated_sent def run_tts(text, lang): text = text.replace('ред', '.') # only for hindi models text_num_to_word = normalize_nums(text, lang) # converting numbers to words in lang text_num_to_word_and_transliterated = translit(text_num_to_word, lang) # transliterating english words to lang mel = text_to_mel.generate_mel(text_num_to_word_and_transliterated) audio, sr = mel_to_wav.generate_wav(mel) write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed return (sr, audio) _, audio = run_tts('hello my name is harveen', 'hi')