File size: 2,354 Bytes
69e8afe
699482f
06c276c
69e8afe
 
 
 
 
 
d6363d7
69e8afe
 
d6363d7
69e8afe
 
d6363d7
69e8afe
 
d6363d7
69e8afe
d6363d7
69e8afe
 
 
d6363d7
06c276c
5a42dce
69e8afe
5a42dce
06c276c
 
69e8afe
 
 
 
 
316fec1
 
69e8afe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
#os.system('git clone https://github.com/Open-Speech-EkStep/vakyansh-tts')
os.chdir('vakyansh_tts') 
os.system('bash install.sh')
os.system('python setup.py bdist_wheel')
os.system('pip install -e .')
os.chdir('tts_infer')
os.system('mkdir translit_models')
os.chdir('translit_models')
os.system('wget -q https://storage.googleapis.com/vakyaansh-open-models/translit_models/default_lineup.json')
os.system('mkdir hindi')
os.chdir('hindi')
os.system('wget -q https://storage.googleapis.com/vakyaansh-open-models/translit_models/hindi/hindi_transliteration.zip')
os.system('unzip hindi_transliteration')

os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/female_voice_0/glow.zip')
os.system('unzip glow.zip')

os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/female_voice_0/hifi.zip')
os.system('unzip hifi.zip')
os.system('pwd')
os.system('rm glow.zip')
os.system('rm hifi.zip')


os.chdir('/home/user/app/')
os.system('pwd')

from vakyansh_tts.tts_infer.tts import TextToMel, MelToWav
from vakyansh_tts.tts_infer.transliterate import XlitEngine
from vakyansh_tts.tts_infer.num_to_word_on_sent import normalize_nums

import re
from scipy.io.wavfile import write
device = 'cpu'

text_to_mel = TextToMel(glow_model_dir='/home/user/app/vakyansh-tts/tts_infer/translit_models/hindi/glow_ckp', device=device)
mel_to_wav = MelToWav(hifi_model_dir='/home/user/app/vakyansh-tts/tts_infer/translit_models/hindi/hifi_ckp', device=device)

def translit(text, lang):
    reg = re.compile(r'[a-zA-Z]')
    engine = XlitEngine(lang)
    words = [engine.translit_word(word, topk=1)[lang][0] if reg.match(word) else word for word in text.split()]
    updated_sent = ' '.join(words)
    return updated_sent
    
def run_tts(text, lang):
    text = text.replace('।', '.') # only for hindi models
    text_num_to_word = normalize_nums(text, lang) # converting numbers to words in lang
    text_num_to_word_and_transliterated = translit(text_num_to_word, lang) # transliterating english words to lang
    
    mel = text_to_mel.generate_mel(text_num_to_word_and_transliterated)
    audio, sr = mel_to_wav.generate_wav(mel)
    write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
    return (sr, audio)
    
_, audio = run_tts('hello my name is harveen', 'hi')