File size: 4,982 Bytes
69e8afe
a74bf57
 
fe316fd
f848ab9
699482f
06c276c
69e8afe
 
 
 
 
 
d6363d7
69e8afe
 
d6363d7
69e8afe
 
d6363d7
69e8afe
 
d6363d7
69e8afe
67eacd3
 
 
fcbcfb1
 
67eacd3
 
 
 
 
 
 
d6363d7
69e8afe
 
195b9fe
 
c28a518
 
d6363d7
06c276c
5a42dce
d6d0dc5
69e8afe
5a42dce
06c276c
 
69e8afe
 
 
ea303e4
67eacd3
 
 
 
 
69e8afe
 
 
 
 
 
 
 
 
67eacd3
2a2dd20
e393e3c
69e8afe
 
 
2a2dd20
67eacd3
 
 
 
 
 
765679f
69e8afe
 
88fa597
765679f
fe316fd
765679f
 
 
 
67eacd3
 
 
765679f
d361dba
 
f079036
c68edef
6b21d52
f079036
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
os.system("pip uninstall -y gradio")
os.system("pip install gradio==2.7")
import gradio as gr

#os.system('git clone https://github.com/Open-Speech-EkStep/vakyansh-tts')
os.chdir('vakyansh_tts') 
os.system('bash install.sh')
os.system('python setup.py bdist_wheel')
os.system('pip install -e .')
os.chdir('tts_infer')
os.system('mkdir translit_models')
os.chdir('translit_models')
os.system('wget -q https://storage.googleapis.com/vakyaansh-open-models/translit_models/default_lineup.json')
os.system('mkdir hindi')
os.chdir('hindi')
os.system('wget -q https://storage.googleapis.com/vakyaansh-open-models/translit_models/hindi/hindi_transliteration.zip')
os.system('unzip hindi_transliteration')

os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/female_voice_0/glow.zip')
os.system('unzip glow.zip')

os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/female_voice_0/hifi.zip')
os.system('unzip hifi.zip')
os.system('rm glow.zip')
os.system('rm hifi.zip')

os.system('mkdir male')
os.chdir('male')
os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/male_voice_1/glow.zip')
os.system('unzip glow.zip')

os.system('wget -q https://storage.googleapis.com/vakyansh-open-models/tts/hindi/hi-IN/male_voice_1/hifi.zip')
os.system('unzip hifi.zip')


os.system('pwd')
os.system('rm glow.zip')
os.system('rm hifi.zip')
os.system('pip uninstall -y numpy')
os.system('pip install numpy==1.19.5')
os.system('pip uninstall -y numba')
os.system('pip install numba==0.53')

os.chdir('/home/user/app/')
os.system('pwd')
#print('hello')

from vakyansh_tts.tts_infer.tts import TextToMel, MelToWav
from vakyansh_tts.tts_infer.transliterate import XlitEngine
from vakyansh_tts.tts_infer.num_to_word_on_sent import normalize_nums

import re
from scipy.io.wavfile import write
device = 'cpu'

text_to_mel_f = TextToMel(glow_model_dir='/home/user/app/vakyansh_tts/tts_infer/translit_models/hindi/glow_ckp', device=device)
mel_to_wav_f = MelToWav(hifi_model_dir='/home/user/app/vakyansh_tts/tts_infer/translit_models/hindi/hifi_ckp', device=device)
text_to_mel_m = TextToMel(glow_model_dir='/home/user/app/vakyansh_tts/tts_infer/translit_models/hindi/male/glow_ckp', device=device)
mel_to_wav_m = MelToWav(hifi_model_dir='/home/user/app/vakyansh_tts/tts_infer/translit_models/hindi/male/hifi_ckp', device=device)


def translit(text, lang):
    reg = re.compile(r'[a-zA-Z]')
    engine = XlitEngine(lang)
    words = [engine.translit_word(word, topk=1)[lang][0] if reg.match(word) else word for word in text.split()]
    updated_sent = ' '.join(words)
    return updated_sent
    
def run_tts(text, gender):
    print("Original Text from user: ", text)
    lang='hi'
    text = text.replace('।', '.') # only for hindi models
    text_num_to_word = normalize_nums(text, lang) # converting numbers to words in lang
    text_num_to_word_and_transliterated = translit(text_num_to_word, lang) # transliterating english words to lang
    print("Text after preprocessing: ", text_num_to_word_and_transliterated)
    if gender == 'female':
        mel = text_to_mel_f.generate_mel(text_num_to_word_and_transliterated)
        audio, sr = mel_to_wav_f.generate_wav(mel)
    else:
        mel = text_to_mel_m.generate_mel(text_num_to_word_and_transliterated)
        audio, sr = mel_to_wav_m.generate_wav(mel)
    #write(filename='temp.wav', rate=sr, data=audio) # for saving wav file, if needed
    return (sr, audio)
    
#_, audio = run_tts('hello my name is harveen')


textbox = gr.inputs.Textbox(
    placeholder="Enter Hindi text here", default="", label="TTS"
)

choices = ['male', 'female']
radioBtns = gr.inputs.Radio(choices, type="value", default='male', label=None)

op = gr.outputs.Audio(type="numpy", label=None)
examples = [['क्रिप्टो करेंसी दरअसल, वित्तीय लेन-देन का एक जरिया है। बिल्कुल भारतीय रुपये और अमेरिकी डॉलर के समान, अंतर सिर्फ इतना है कि यह आभाषी है और दिखाई नहीं देती, न ही आप इसे छू सकते हैं।', 'male'],
            ['mujhe abhi bhi yakeen nai aa raha ki yeh aise bhi chal sakta hai', 'male'],
            ['मुझे 26 रुपए दे दो, फिर मेरे पास 50 रुपए हो जाएंगे', 'male']]

iface = gr.Interface(fn=run_tts, examples=examples, inputs=[textbox,radioBtns], outputs=op, title='Vakyansh Text To Speech (TTS): Hindi Demo', description = 'Glow TTS + hifi gan. Training Code: https://github.com/Open-Speech-EkStep/vakyansh-tts ' , article = '<br> <b>Note: This space is running on CPU, inference times will be higher. Please report issues to @harveenchadha twitter. </b>')
iface.launch(enable_queue=True, cache_examples=True)