File size: 2,685 Bytes
d6b40b1
 
 
 
 
752aa18
d6b40b1
eb13ebc
 
 
 
f90bdf9
eb13ebc
 
 
0b6d3e5
 
e5afc71
 
f90bdf9
 
c87b7a4
72d900d
c87b7a4
 
2f9a388
972f7a1
 
c87b7a4
2f9a388
c87b7a4
972f7a1
60444aa
 
86b2f53
eb13ebc
 
 
 
 
 
0b6d3e5
 
 
 
e5afc71
 
 
 
f90bdf9
 
c87b7a4
 
d6b40b1
c87b7a4
f90bdf9
c87b7a4
752aa18
d6b40b1
752aa18
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import os
from TTS.api import TTS
import time

count = 0

"""
For Fairseq models, use the following name format: tts_models/<lang-iso_code>/fairseq/vits. You can find the language ISO codes here and learn about the Fairseq models here.
https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html
"""

api_rus = TTS(f"tts_models/rus/fairseq/vits", gpu=False)
api_eng = TTS(f"tts_models/eng/fairseq/vits", gpu=False)
api_spa = TTS(f"tts_models/spa/fairseq/vits", gpu=False)
api_fas = TTS(f"tts_models/fas/fairseq/vits", gpu=False)  # Persian
api_tur = TTS(f"tts_models/tur/fairseq/vits", gpu=False)  # Turkish
api_deu = TTS(f"tts_models/deu/fairseq/vits", gpu=False)  # German, Standard
api_ara = TTS(f"tts_models/ara/fairseq/vits", gpu=False)  # Arabic
api_por = TTS(f"tts_models/por/fairseq/vits", gpu=False)  # Portuguese


def audio_tts(txt, language, audio_file):
    global count
    count += 1
    print(f"Count: {count}")
    if count > 150:
        time.sleep(30)
        os.system("rm -R /tmp/*")
        print(f"Reset count: {count}")
        count = 0
        gr.Error("Reset counter")

    # TTS with on the fly voice conversion
    print(f"Language: {language}")
    if language == "rus":
        api_rus.tts_with_vc_to_file(txt, speaker_wav=audio_file, file_path="ouptut.wav")
    elif language == "eng":
        api_eng.tts_with_vc_to_file(txt, speaker_wav=audio_file, file_path="ouptut.wav")
    elif language == "spa":
        api_spa.tts_with_vc_to_file(txt, speaker_wav=audio_file, file_path="ouptut.wav")
    elif language == "fas":
        api_fas.tts_with_vc_to_file(txt, speaker_wav=audio_file, file_path="ouptut.wav")
    elif language == "tur":
        api_tur.tts_with_vc_to_file(txt, speaker_wav=audio_file, file_path="ouptut.wav")
    elif language == "deu":
        api_deu.tts_with_vc_to_file(txt, speaker_wav=audio_file, file_path="ouptut.wav")
    elif language == "ara":
        api_ara.tts_with_vc_to_file(txt, speaker_wav=audio_file, file_path="ouptut.wav")
    elif language == "por":
        api_por.tts_with_vc_to_file(txt, speaker_wav=audio_file, file_path="ouptut.wav")
    return "ouptut.wav"
    

demo = gr.Interface(fn=audio_tts, inputs=[gr.Textbox(label="Input text TTS", value="Привет! Я Макс."),
                                          gr.Dropdown(choices=["rus", "eng", "spa", "fas", "tur", "deu", "ara", "por"],label="Language", value="rus"),
                                          gr.Audio(source="upload", type="filepath", label="Input audio")],
                    outputs=gr.Audio(source="upload", type="filepath", label="Output audio"))

demo.queue(concurrency_count=1).launch(show_error=True)