Spaces:

muhyzatar
/

TTS

Sleeping

App Files Files Community

muhyzatar commited on Jan 23

Commit

39a0c0a

•

1 Parent(s): f8bbc39

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -0

app.py CHANGED Viewed

	@@ -0,0 +1,86 @@

+from tts import TTS
+import soundfile as sf
+import gradio as gr
+import subprocess
+import random
+import torch
+import os
+import re
+# Preparing paths
+BASE_PATH = os.path.dirname(__file__)
+MODEL_PATH_v1 = os.path.join(BASE_PATH, "xtts_v1.1")
+MODEL_PATH_v2 = os.path.join(BASE_PATH, "xtts_v2")
+OUTPUTS_PATH = os.path.join(BASE_PATH, "outputs")
+REFERENCES_PATH = os.path.join(BASE_PATH, "references")
+HTML_BUTTON = """
+</br>
+<div style="text-align: center;">
+    <button type="button" onclick="alert(\'{}\')" style="padding: 10px 20px;
+            font-size: 16px; background-color: #4CAF50; color: white;
+            border: none; cursor: pointer; border-radius: 4px;">
+        Expose model names
+    </button>
+</div>"""
+models = {
+    "xtts_v2_wajeez": MaqsamTTS(MODEL_PATH_v2 + "_wajeez", torch.device('cuda:0'))
+}
+MODELS_COUNT = len(models)
+def predict(text, speaker):
+    reference_file_path = os.path.join(REFERENCES_PATH, speaker + ".wav")
+    output_paths = []
+    for model_name, model in models.items():
+        wav = model.inference(text, reference_file_path)
+        path = os.path.join(OUTPUTS_PATH, model_name + ".wav")
+        sf.write(path, wav, 24000)
+        output_paths.append(path)
+    random.shuffle(output_paths)
+    actual_models = '\\n'.join([f"- The model number {i + 1} is {path.split('/')[-1][:-4]}" for i, path in enumerate(output_paths)])
+    return (text, *output_paths, HTML_BUTTON.format(actual_models))
+# Get speakers from references path to prepare the speakers list
+speakers = [speaker[:-4] for speaker in os.listdir(REFERENCES_PATH)]
+examples = [
+    [
+        "اكْتَشِفْ أكْثَرْ عَنْ مَجْمُوعَتِنَا الوَاسِعَةِ مِنْ وَاجِهَاتِ التَّطْبِيقَاتِ البَرْمَجِيَّةِ.",
+        "Nour", None, None, False
+    ],
+    [
+        "بِاسْتِخْدَامِ الأَرْقَامِ المَحَلِّيَّةِ ، وَسِّعْ نِطَاقَ وُصُولِكَ بِأَرْقَامٍ مِنْ جَمِيعِ أنْحَاءِ العَالَمِ دُونَ الاضْطِرَارِ لِفَتْحِ فُرُوعٍ بِتِلْكَ الأَمَاكِنْ.",
+        "Nour", None, None, False
+    ]
+]
+playground = gr.Interface(
+    fn = predict,
+    inputs = [
+        gr.Textbox(
+            value = "مرحبا كيف حالك؟",
+            label = "Input text",
+            info = "One or two sentences at a time is better. Up to 200 text characters."
+        ),
+        gr.Dropdown(
+            speakers,
+            value="Nour",
+            label = "Speaker / Reference source",
+            info = "Choose your speaker or choose to upload / record a new speaker."
+        ),
+    ],
+    outputs = [gr.Textbox(
+        label = "Synthesized text",
+        info = "The text used as input after preprocessing is done (if any)."
+    )] + [gr.components.Audio(label = f'Model {i + 1}', type = 'filepath') for i in range(MODELS_COUNT)] + [gr.HTML()],
+    examples = examples,
+    cache_examples = False,
+    allow_flagging = 'never'
+)
+playground.launch(share = True)