Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import spaces
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
|
@@ -46,27 +45,19 @@ examples = [
|
|
46 |
|
47 |
number_normalizer = EnglishNumberNormalizer()
|
48 |
|
49 |
-
|
50 |
def preprocess(text):
|
51 |
text = number_normalizer(text).strip()
|
52 |
-
if
|
53 |
-
text
|
54 |
|
55 |
abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
|
56 |
|
57 |
-
def separate_abb(
|
58 |
-
|
59 |
-
print(chunk)
|
60 |
-
return " ".join(chunk)
|
61 |
|
62 |
-
|
63 |
-
for abv in abbreviations:
|
64 |
-
if abv in text:
|
65 |
-
text = text.replace(abv, separate_abb(abv))
|
66 |
return text
|
67 |
|
68 |
-
|
69 |
-
@spaces.GPU
|
70 |
def gen_tts(text, description):
|
71 |
inputs = tokenizer(description, return_tensors="pt").to(device)
|
72 |
prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
|
@@ -77,7 +68,6 @@ def gen_tts(text, description):
|
|
77 |
|
78 |
return SAMPLE_RATE, audio_arr
|
79 |
|
80 |
-
|
81 |
css = """
|
82 |
#share-btn-container {
|
83 |
display: flex;
|
@@ -114,38 +104,40 @@ css = """
|
|
114 |
display: none !important;
|
115 |
}
|
116 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
with gr.Blocks(css=css) as block:
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
<div
|
122 |
-
style="
|
123 |
-
display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;
|
124 |
-
"
|
125 |
-
>
|
126 |
-
<h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
|
127 |
-
Parler-TTS: Expresso ☕️️
|
128 |
-
</h1>
|
129 |
-
</div>
|
130 |
-
</div>
|
131 |
-
"""
|
132 |
-
)
|
133 |
-
gr.HTML(
|
134 |
-
f"""
|
135 |
-
<p><a href="https://huggingface.co/parler-tts/parler-tts-mini-expresso"> Parler-TTS Mini: Expresso</a>
|
136 |
-
is a text-to-speech (TTS) model fine-tuned on the <a href="https://huggingface.co/datasets/ylacombe/expresso"> Expresso dataset</a>.
|
137 |
-
It generates high-quality speech in a given <b>emotion</b> and <b>voice</b> that can be controlled through a simple text prompt.</p>
|
138 |
-
|
139 |
-
<p>Tips for ensuring good generation:
|
140 |
-
<ul>
|
141 |
-
<li>Specify the name of a male speaker (Jerry, Thomas) or female speaker (Talia, Elisabeth) for consistent voices</li>
|
142 |
-
<li>The model can generate in a range of emotions, including: "happy", "confused", "default" (meaning no particular emotion conveyed), "laughing", "sad", "whisper", "emphasis"</li>
|
143 |
-
<li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
|
144 |
-
<li>To emphasise particular words, wrap them in asterisk (e.g. *you* in the example above) and include "emphasis" in the prompt</li>
|
145 |
-
</ul>
|
146 |
-
</p>
|
147 |
-
"""
|
148 |
-
)
|
149 |
with gr.Row():
|
150 |
with gr.Column():
|
151 |
input_text = gr.Textbox(label="Input Text", lines=2, value=default_text, elem_id="input_text")
|
@@ -168,4 +160,4 @@ with gr.Blocks(css=css) as block:
|
|
168 |
)
|
169 |
|
170 |
block.queue()
|
171 |
-
block.launch(share=True)
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
|
|
|
45 |
|
46 |
number_normalizer = EnglishNumberNormalizer()
|
47 |
|
|
|
48 |
def preprocess(text):
|
49 |
text = number_normalizer(text).strip()
|
50 |
+
if not text.endswith(punctuation):
|
51 |
+
text += "."
|
52 |
|
53 |
abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
|
54 |
|
55 |
+
def separate_abb(match):
|
56 |
+
return match.group(0).replace(".", " ")
|
|
|
|
|
57 |
|
58 |
+
text = re.sub(abbreviations_pattern, separate_abb, text)
|
|
|
|
|
|
|
59 |
return text
|
60 |
|
|
|
|
|
61 |
def gen_tts(text, description):
|
62 |
inputs = tokenizer(description, return_tensors="pt").to(device)
|
63 |
prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
|
|
|
68 |
|
69 |
return SAMPLE_RATE, audio_arr
|
70 |
|
|
|
71 |
css = """
|
72 |
#share-btn-container {
|
73 |
display: flex;
|
|
|
104 |
display: none !important;
|
105 |
}
|
106 |
"""
|
107 |
+
|
108 |
+
html_blocks = [
|
109 |
+
"""
|
110 |
+
<div style="text-align: center; max-width: 700px; margin: 0 auto;">
|
111 |
+
<div
|
112 |
+
style="
|
113 |
+
display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;
|
114 |
+
"
|
115 |
+
>
|
116 |
+
<h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
|
117 |
+
Parler-TTS: Expresso ☕️️
|
118 |
+
</h1>
|
119 |
+
</div>
|
120 |
+
</div>
|
121 |
+
""",
|
122 |
+
f"""
|
123 |
+
<p><a href="https://huggingface.co/parler-tts/parler-tts-mini-expresso"> Parler-TTS Mini: Expresso</a>
|
124 |
+
is a text-to-speech (TTS) model fine-tuned on the <a href="https://huggingface.co/datasets/ylacombe/expresso"> Expresso dataset</a>.
|
125 |
+
It generates high-quality speech in a given <b>emotion</b> and <b>voice</b> that can be controlled through a simple text prompt.</p>
|
126 |
+
<p>Tips for ensuring good generation:
|
127 |
+
<ul>
|
128 |
+
<li>Specify the name of a male speaker (Jerry, Thomas) or female speaker (Talia, Elisabeth) for consistent voices</li>
|
129 |
+
<li>The model can generate in a range of emotions, including: "happy", "confused", "default" (meaning no particular emotion conveyed), "laughing", "sad", "whisper", "emphasis"</li>
|
130 |
+
<li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
|
131 |
+
<li>To emphasise particular words, wrap them in asterisk (e.g. *you* in the example above) and include "emphasis" in the prompt</li>
|
132 |
+
</ul>
|
133 |
+
</p>
|
134 |
+
"""
|
135 |
+
]
|
136 |
+
|
137 |
with gr.Blocks(css=css) as block:
|
138 |
+
for html_block in html_blocks:
|
139 |
+
gr.HTML(html_block)
|
140 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
with gr.Row():
|
142 |
with gr.Column():
|
143 |
input_text = gr.Textbox(label="Input Text", lines=2, value=default_text, elem_id="input_text")
|
|
|
160 |
)
|
161 |
|
162 |
block.queue()
|
163 |
+
block.launch(share=True)
|