File size: 4,630 Bytes
69e8afe
fe316fd
699482f
06c276c
69e8afe
 
 
 
 
 
d6363d7
69e8afe
 
d6363d7
69e8afe
 
d6363d7
69e8afe
 
d6363d7
69e8afe
67eacd3
 
 
fcbcfb1
 
67eacd3
 
 
 
 
 
 
d6363d7
69e8afe
 
195b9fe
 
c28a518
 
d6363d7
06c276c
5a42dce
69e8afe
5a42dce
06c276c
 
69e8afe
 
 
88fa597
67eacd3
 
 
 
 
69e8afe
 
 
 
 
 
 
 
 
67eacd3
e393e3c
69e8afe
 
 
 
67eacd3
 
 
 
 
 
765679f
69e8afe
 
88fa597
765679f
fe316fd
765679f
 
 
 
67eacd3
 
 
765679f
d361dba
 
f079036
c68edef
f079036
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import gradio as gr
#os.system('git clone https://github.com/Open-Speech-EkStep/vakyansh-tts')
os.chdir('vakyansh_tts') 
os.system('bash install.sh')
os.system('python setup.py bdist_wheel')
os.system('pip install -e .')
os.chdir('tts_infer')
os.system('mkdir translit_models')
os.chdir('translit_models')
os.system('wget -q https://storage.googleapis.com/vakyaansh-open-models/translit_models/default_lineup.json')
os.system('mkdir hindi')
os.chdir('hindi')
os.system('wget -q https://storage.googleapis.com/vakyaansh-open-models/translit_models/hindi/hindi_transliteration.zip')
os.system('unzip hindi_transliteration')

os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/female_voice_0/glow.zip')
os.system('unzip glow.zip')

os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/female_voice_0/hifi.zip')
os.system('unzip hifi.zip')
os.system('rm glow.zip')
os.system('rm hifi.zip')

os.system('mkdir male')
os.chdir('male')
os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/male_voice_1/glow.zip')
os.system('unzip glow.zip')

os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/male_voice_1/hifi.zip')
os.system('unzip hifi.zip')


os.system('pwd')
os.system('rm glow.zip')
os.system('rm hifi.zip')
os.system('pip uninstall -y numpy')
os.system('pip install numpy==1.19.5')
os.system('pip uninstall -y numba')
os.system('pip install numba==0.53')

os.chdir('/home/user/app/')
os.system('pwd')

from vakyansh_tts.tts_infer.tts import TextToMel, MelToWav
from vakyansh_tts.tts_infer.transliterate import XlitEngine
from vakyansh_tts.tts_infer.num_to_word_on_sent import normalize_nums

import re
from scipy.io.wavfile import write
device = 'cpu'

text_to_mel_f = TextToMel(glow_model_dir='/home/user/app/vakyansh_tts/tts_infer/translit_models/hindi/glow_ckp', device=device)
mel_to_wav_f = MelToWav(hifi_model_dir='/home/user/app/vakyansh_tts/tts_infer/translit_models/hindi/hifi_ckp', device=device)
text_to_mel_m = TextToMel(glow_model_dir='/home/user/app/vakyansh_tts/tts_infer/translit_models/hindi/male/glow_ckp', device=device)
mel_to_wav_m = MelToWav(hifi_model_dir='/home/user/app/vakyansh_tts/tts_infer/translit_models/hindi/male/hifi_ckp', device=device)


def translit(text, lang):
    reg = re.compile(r'[a-zA-Z]')
    engine = XlitEngine(lang)
    words = [engine.translit_word(word, topk=1)[lang][0] if reg.match(word) else word for word in text.split()]
    updated_sent = ' '.join(words)
    return updated_sent
    
def run_tts(text, gender):
    lang='hi'
    text = text.replace('।', '.') # only for hindi models
    text_num_to_word = normalize_nums(text, lang) # converting numbers to words in lang
    text_num_to_word_and_transliterated = translit(text_num_to_word, lang) # transliterating english words to lang
    
    if gender == 'female':
        mel = text_to_mel_f.generate_mel(text_num_to_word_and_transliterated)
        audio, sr = mel_to_wav_f.generate_wav(mel)
    else:
        mel = text_to_mel_m.generate_mel(text_num_to_word_and_transliterated)
        audio, sr = mel_to_wav_m.generate_wav(mel)
    #write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
    return (sr, audio)
    
#_, audio = run_tts('hello my name is harveen')


textbox = gr.inputs.Textbox(
    placeholder="Enter Hindi text here", default="", label="TTS"
)

choices = ['male', 'female']
radioBtns = gr.inputs.Radio(choices, type="value", default='male', label=None)

op = gr.outputs.Audio(type="numpy", label=None)
examples = [['क्रिप्टो करेंसी दरअसल, वित्तीय लेन-देन का एक जरिया है। बिल्कुल भारतीय रुपये और अमेरिकी डॉलर के समान, अंतर सिर्फ इतना है कि यह आभाषी है और दिखाई नहीं देती, न ही आप इसे छू सकते हैं।', 'male'],
            ['mujhe abhi bhi yakeen nai aa raha ki yeh aise bhi chal sakta hai', 'male'],
            ['मुझे 26 रुपए दे दो, फिर मेरे पास 50 रुपए हो जाएंगे', 'male']]

iface = gr.Interface(fn=run_tts, examples=examples, inputs=[textbox,radioBtns], outputs=op, title='Vakyansh Text To Speech (TTS): Hindi Demo', description = 'Glow TTS + hifi gan. Training Code: https://github.com/Open-Speech-EkStep/vakyansh-tts')
iface.launch(enable_queue=True, cache_examples=True)