muhyzatar commited on
Commit
39a0c0a
โ€ข
1 Parent(s): f8bbc39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py CHANGED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tts import TTS
2
+ import soundfile as sf
3
+ import gradio as gr
4
+ import subprocess
5
+ import random
6
+ import torch
7
+ import os
8
+ import re
9
+
10
+ # Preparing paths
11
+ BASE_PATH = os.path.dirname(__file__)
12
+ MODEL_PATH_v1 = os.path.join(BASE_PATH, "xtts_v1.1")
13
+ MODEL_PATH_v2 = os.path.join(BASE_PATH, "xtts_v2")
14
+ OUTPUTS_PATH = os.path.join(BASE_PATH, "outputs")
15
+ REFERENCES_PATH = os.path.join(BASE_PATH, "references")
16
+
17
+ HTML_BUTTON = """
18
+ </br>
19
+ <div style="text-align: center;">
20
+ <button type="button" onclick="alert(\'{}\')" style="padding: 10px 20px;
21
+ font-size: 16px; background-color: #4CAF50; color: white;
22
+ border: none; cursor: pointer; border-radius: 4px;">
23
+ Expose model names
24
+ </button>
25
+ </div>"""
26
+
27
+ models = {
28
+ "xtts_v2_wajeez": MaqsamTTS(MODEL_PATH_v2 + "_wajeez", torch.device('cuda:0'))
29
+ }
30
+
31
+ MODELS_COUNT = len(models)
32
+
33
+ def predict(text, speaker):
34
+ reference_file_path = os.path.join(REFERENCES_PATH, speaker + ".wav")
35
+
36
+ output_paths = []
37
+ for model_name, model in models.items():
38
+ wav = model.inference(text, reference_file_path)
39
+ path = os.path.join(OUTPUTS_PATH, model_name + ".wav")
40
+ sf.write(path, wav, 24000)
41
+ output_paths.append(path)
42
+
43
+ random.shuffle(output_paths)
44
+ actual_models = '\\n'.join([f"- The model number {i + 1} is {path.split('/')[-1][:-4]}" for i, path in enumerate(output_paths)])
45
+ return (text, *output_paths, HTML_BUTTON.format(actual_models))
46
+
47
+
48
+ # Get speakers from references path to prepare the speakers list
49
+ speakers = [speaker[:-4] for speaker in os.listdir(REFERENCES_PATH)]
50
+ examples = [
51
+ [
52
+ "ุงูƒู’ุชูŽุดููู’ ุฃูƒู’ุซูŽุฑู’ ุนูŽู†ู’ ู…ูŽุฌู’ู…ููˆุนูŽุชูู†ูŽุง ุงู„ูˆูŽุงุณูุนูŽุฉู ู…ูู†ู’ ูˆูŽุงุฌูู‡ูŽุงุชู ุงู„ุชู‘ูŽุทู’ุจููŠู‚ูŽุงุชู ุงู„ุจูŽุฑู’ู…ูŽุฌููŠู‘ูŽุฉู.",
53
+ "Nour", None, None, False
54
+ ],
55
+ [
56
+ "ุจูุงุณู’ุชูุฎู’ุฏูŽุงู…ู ุงู„ุฃูŽุฑู’ู‚ูŽุงู…ู ุงู„ู…ูŽุญูŽู„ู‘ููŠู‘ูŽุฉู ุŒ ูˆูŽุณู‘ูุนู’ ู†ูุทูŽุงู‚ูŽ ูˆูุตููˆู„ููƒูŽ ุจูุฃูŽุฑู’ู‚ูŽุงู…ู ู…ูู†ู’ ุฌูŽู…ููŠุนู ุฃู†ู’ุญูŽุงุกู ุงู„ุนูŽุงู„ูŽู…ู ุฏููˆู†ูŽ ุงู„ุงุถู’ุทูุฑูŽุงุฑู ู„ูููŽุชู’ุญู ููุฑููˆุนู ุจูุชูู„ู’ูƒูŽ ุงู„ุฃูŽู…ูŽุงูƒูู†ู’.",
57
+ "Nour", None, None, False
58
+ ]
59
+ ]
60
+
61
+
62
+ playground = gr.Interface(
63
+ fn = predict,
64
+ inputs = [
65
+ gr.Textbox(
66
+ value = "ู…ุฑุญุจุง ูƒูŠู ุญุงู„ูƒุŸ",
67
+ label = "Input text",
68
+ info = "One or two sentences at a time is better. Up to 200 text characters."
69
+ ),
70
+ gr.Dropdown(
71
+ speakers,
72
+ value="Nour",
73
+ label = "Speaker / Reference source",
74
+ info = "Choose your speaker or choose to upload / record a new speaker."
75
+ ),
76
+ ],
77
+ outputs = [gr.Textbox(
78
+ label = "Synthesized text",
79
+ info = "The text used as input after preprocessing is done (if any)."
80
+ )] + [gr.components.Audio(label = f'Model {i + 1}', type = 'filepath') for i in range(MODELS_COUNT)] + [gr.HTML()],
81
+ examples = examples,
82
+ cache_examples = False,
83
+ allow_flagging = 'never'
84
+ )
85
+
86
+ playground.launch(share = True)