parler-tts-expresso

Runtime error

App Files Files Community

DHEIVER commited on Jun 13

Commit

197bc3e

•

1 Parent(s): 76a0ee2

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -47

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import spaces
 import gradio as gr
 import torch
 from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
@@ -46,27 +45,19 @@ examples = [
 number_normalizer = EnglishNumberNormalizer()
 def preprocess(text):
     text = number_normalizer(text).strip()
-    if text[-1] not in punctuation:
-        text = f"{text}."
     abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
-    def separate_abb(chunk):
-        chunk = chunk.replace(".", "")
-        print(chunk)
-        return " ".join(chunk)
-    abbreviations = re.findall(abbreviations_pattern, text)
-    for abv in abbreviations:
-        if abv in text:
-            text = text.replace(abv, separate_abb(abv))
     return text
-@spaces.GPU
 def gen_tts(text, description):
     inputs = tokenizer(description, return_tensors="pt").to(device)
     prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
@@ -77,7 +68,6 @@ def gen_tts(text, description):
     return SAMPLE_RATE, audio_arr
 css = """
         #share-btn-container {
             display: flex;
@@ -114,38 +104,40 @@ css = """
             display: none !important;
         }
 """
 with gr.Blocks(css=css) as block:
-    gr.HTML(
-        """
-            <div style="text-align: center; max-width: 700px; margin: 0 auto;">
-              <div
-                style="
-                  display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;
-                "
-              >
-                <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
-                  Parler-TTS: Expresso ☕️️
-                </h1>
-              </div>
-            </div>
-        """
-    )
-    gr.HTML(
-        f"""
-        <p><a href="https://huggingface.co/parler-tts/parler-tts-mini-expresso"> Parler-TTS Mini: Expresso</a>
-        is a text-to-speech (TTS) model fine-tuned on the <a href="https://huggingface.co/datasets/ylacombe/expresso"> Expresso dataset</a>.
-        It generates high-quality speech in a given <b>emotion</b> and <b>voice</b> that can be controlled through a simple text prompt.</p>
-        <p>Tips for ensuring good generation:
-        <ul>
-            <li>Specify the name of a male speaker (Jerry, Thomas) or female speaker (Talia, Elisabeth) for consistent voices</li>
-            <li>The model can generate in a range of emotions, including: "happy", "confused", "default" (meaning no particular emotion conveyed), "laughing", "sad", "whisper", "emphasis"</li>
-            <li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
-            <li>To emphasise particular words, wrap them in asterisk (e.g. *you* in the example above) and include "emphasis" in the prompt</li>
-        </ul>
-        </p>
-        """
-    )
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(label="Input Text", lines=2, value=default_text, elem_id="input_text")
@@ -168,4 +160,4 @@ with gr.Blocks(css=css) as block:
     )
 block.queue()
-block.launch(share=True)

 import gradio as gr
 import torch
 from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
 number_normalizer = EnglishNumberNormalizer()
 def preprocess(text):
     text = number_normalizer(text).strip()
+    if not text.endswith(punctuation):
+        text += "."
     abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
+    def separate_abb(match):
+        return match.group(0).replace(".", " ")
+    text = re.sub(abbreviations_pattern, separate_abb, text)
     return text
 def gen_tts(text, description):
     inputs = tokenizer(description, return_tensors="pt").to(device)
     prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
     return SAMPLE_RATE, audio_arr
 css = """
         #share-btn-container {
             display: flex;
             display: none !important;
         }
 """
+html_blocks = [
+    """
+    <div style="text-align: center; max-width: 700px; margin: 0 auto;">
+      <div
+        style="
+          display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;
+        "
+      >
+        <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
+          Parler-TTS: Expresso ☕️️
+        </h1>
+      </div>
+    </div>
+    """,
+    f"""
+    <p><a href="https://huggingface.co/parler-tts/parler-tts-mini-expresso"> Parler-TTS Mini: Expresso</a>
+    is a text-to-speech (TTS) model fine-tuned on the <a href="https://huggingface.co/datasets/ylacombe/expresso"> Expresso dataset</a>.
+    It generates high-quality speech in a given <b>emotion</b> and <b>voice</b> that can be controlled through a simple text prompt.</p>
+    <p>Tips for ensuring good generation:
+    <ul>
+        <li>Specify the name of a male speaker (Jerry, Thomas) or female speaker (Talia, Elisabeth) for consistent voices</li>
+        <li>The model can generate in a range of emotions, including: "happy", "confused", "default" (meaning no particular emotion conveyed), "laughing", "sad", "whisper", "emphasis"</li>
+        <li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
+        <li>To emphasise particular words, wrap them in asterisk (e.g. *you* in the example above) and include "emphasis" in the prompt</li>
+    </ul>
+    </p>
+    """
+]
 with gr.Blocks(css=css) as block:
+    for html_block in html_blocks:
+        gr.HTML(html_block)
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(label="Input Text", lines=2, value=default_text, elem_id="input_text")
     )
 block.queue()
+block.launch(share=True)