Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from tts import TTS
|
2 |
+
import soundfile as sf
|
3 |
+
import gradio as gr
|
4 |
+
import subprocess
|
5 |
+
import random
|
6 |
+
import torch
|
7 |
+
import os
|
8 |
+
import re
|
9 |
+
|
10 |
+
# Preparing paths
|
11 |
+
BASE_PATH = os.path.dirname(__file__)
|
12 |
+
MODEL_PATH_v1 = os.path.join(BASE_PATH, "xtts_v1.1")
|
13 |
+
MODEL_PATH_v2 = os.path.join(BASE_PATH, "xtts_v2")
|
14 |
+
OUTPUTS_PATH = os.path.join(BASE_PATH, "outputs")
|
15 |
+
REFERENCES_PATH = os.path.join(BASE_PATH, "references")
|
16 |
+
|
17 |
+
HTML_BUTTON = """
|
18 |
+
</br>
|
19 |
+
<div style="text-align: center;">
|
20 |
+
<button type="button" onclick="alert(\'{}\')" style="padding: 10px 20px;
|
21 |
+
font-size: 16px; background-color: #4CAF50; color: white;
|
22 |
+
border: none; cursor: pointer; border-radius: 4px;">
|
23 |
+
Expose model names
|
24 |
+
</button>
|
25 |
+
</div>"""
|
26 |
+
|
27 |
+
models = {
|
28 |
+
"xtts_v2_wajeez": MaqsamTTS(MODEL_PATH_v2 + "_wajeez", torch.device('cuda:0'))
|
29 |
+
}
|
30 |
+
|
31 |
+
MODELS_COUNT = len(models)
|
32 |
+
|
33 |
+
def predict(text, speaker):
|
34 |
+
reference_file_path = os.path.join(REFERENCES_PATH, speaker + ".wav")
|
35 |
+
|
36 |
+
output_paths = []
|
37 |
+
for model_name, model in models.items():
|
38 |
+
wav = model.inference(text, reference_file_path)
|
39 |
+
path = os.path.join(OUTPUTS_PATH, model_name + ".wav")
|
40 |
+
sf.write(path, wav, 24000)
|
41 |
+
output_paths.append(path)
|
42 |
+
|
43 |
+
random.shuffle(output_paths)
|
44 |
+
actual_models = '\\n'.join([f"- The model number {i + 1} is {path.split('/')[-1][:-4]}" for i, path in enumerate(output_paths)])
|
45 |
+
return (text, *output_paths, HTML_BUTTON.format(actual_models))
|
46 |
+
|
47 |
+
|
48 |
+
# Get speakers from references path to prepare the speakers list
|
49 |
+
speakers = [speaker[:-4] for speaker in os.listdir(REFERENCES_PATH)]
|
50 |
+
examples = [
|
51 |
+
[
|
52 |
+
"ุงููุชูุดููู ุฃููุซูุฑู ุนููู ู
ูุฌูู
ููุนูุชูููุง ุงูููุงุณูุนูุฉู ู
ููู ููุงุฌูููุงุชู ุงูุชููุทูุจููููุงุชู ุงูุจูุฑูู
ูุฌููููุฉู.",
|
53 |
+
"Nour", None, None, False
|
54 |
+
],
|
55 |
+
[
|
56 |
+
"ุจูุงุณูุชูุฎูุฏูุงู
ู ุงูุฃูุฑูููุงู
ู ุงูู
ูุญูููููููุฉู ุ ููุณููุนู ููุทูุงูู ููุตูููููู ุจูุฃูุฑูููุงู
ู ู
ููู ุฌูู
ููุนู ุฃููุญูุงุกู ุงูุนูุงููู
ู ุฏูููู ุงูุงุถูุทูุฑูุงุฑู ููููุชูุญู ููุฑููุนู ุจูุชููููู ุงูุฃูู
ูุงูููู.",
|
57 |
+
"Nour", None, None, False
|
58 |
+
]
|
59 |
+
]
|
60 |
+
|
61 |
+
|
62 |
+
playground = gr.Interface(
|
63 |
+
fn = predict,
|
64 |
+
inputs = [
|
65 |
+
gr.Textbox(
|
66 |
+
value = "ู
ุฑุญุจุง ููู ุญุงููุ",
|
67 |
+
label = "Input text",
|
68 |
+
info = "One or two sentences at a time is better. Up to 200 text characters."
|
69 |
+
),
|
70 |
+
gr.Dropdown(
|
71 |
+
speakers,
|
72 |
+
value="Nour",
|
73 |
+
label = "Speaker / Reference source",
|
74 |
+
info = "Choose your speaker or choose to upload / record a new speaker."
|
75 |
+
),
|
76 |
+
],
|
77 |
+
outputs = [gr.Textbox(
|
78 |
+
label = "Synthesized text",
|
79 |
+
info = "The text used as input after preprocessing is done (if any)."
|
80 |
+
)] + [gr.components.Audio(label = f'Model {i + 1}', type = 'filepath') for i in range(MODELS_COUNT)] + [gr.HTML()],
|
81 |
+
examples = examples,
|
82 |
+
cache_examples = False,
|
83 |
+
allow_flagging = 'never'
|
84 |
+
)
|
85 |
+
|
86 |
+
playground.launch(share = True)
|