Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -25,26 +25,26 @@ SEED = 42
|
|
25 |
default_text = "La voix humaine est un instrument de musique au-dessus de tous les autres."
|
26 |
default_description = "The voice speaks slowly with a very noisy background, carrying a low-pitch tone and displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue."
|
27 |
examples = [
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
]
|
49 |
number_normalizer = EnglishNumberNormalizer()
|
50 |
|
@@ -138,11 +138,12 @@ with gr.Blocks(css=css) as block:
|
|
138 |
<p><a href="https://github.com/huggingface/parler-tts">Parler-TTS</a> is a training and inference library for
|
139 |
high-fidelity text-to-speech (TTS) models.</p>
|
140 |
<p>The model demonstrated here, French Parler-TTS <a href="https://huggingface.co/PHBJT/french_parler_tts_mini_v0.1">Mini v0.1 French</a>,
|
141 |
-
has been fine-tuned on a French dataset. It generates high-quality
|
142 |
-
|
143 |
|
144 |
<p>By default, Parler-TTS generates 🎲 random male voice characteristics. To ensure 🎯 <b>speaker consistency</b> across generations, try to use consistent descriptions in your prompts.</p>
|
145 |
-
<p
|
|
|
146 |
"""
|
147 |
)
|
148 |
with gr.Row():
|
|
|
25 |
default_text = "La voix humaine est un instrument de musique au-dessus de tous les autres."
|
26 |
default_description = "The voice speaks slowly with a very noisy background, carrying a low-pitch tone and displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue."
|
27 |
examples = [
|
28 |
+
[
|
29 |
+
"La voix humaine est un instrument de musique au-dessus de tous les autres.",
|
30 |
+
"A male voice speaks slowly with a very noisy background, carrying a low-pitch tone and displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue.",
|
31 |
+
None,
|
32 |
+
],
|
33 |
+
[
|
34 |
+
"Tout ce qu'un homme est capable d'imaginer, d'autres hommes seront capables de le réaliser.",
|
35 |
+
"A female voice delivers a slightly expressive and animated speech with a moderate speed. The recording features a low-pitch voice and slight background noise, creating a close-sounding audio experience.",
|
36 |
+
None,
|
37 |
+
],
|
38 |
+
[
|
39 |
+
"La machine elle-même, si perfectionnée qu'on la suppose, n'est qu'un outil.",
|
40 |
+
"A male voice provides a monotone yet slightly fast delivery, with a very close recording that almost has no background noise.",
|
41 |
+
None,
|
42 |
+
],
|
43 |
+
[
|
44 |
+
"Le progrès fait naître plus de besoins qu'il n'en satisfait.",
|
45 |
+
"A female voice, in a very poor recording quality, delivers slightly expressive and animated words with a fast pace. There's a high level of background noise and a very distant-sounding reverberation. The voice is slightly higher pitched than average.",
|
46 |
+
None,
|
47 |
+
],
|
48 |
]
|
49 |
number_normalizer = EnglishNumberNormalizer()
|
50 |
|
|
|
138 |
<p><a href="https://github.com/huggingface/parler-tts">Parler-TTS</a> is a training and inference library for
|
139 |
high-fidelity text-to-speech (TTS) models.</p>
|
140 |
<p>The model demonstrated here, French Parler-TTS <a href="https://huggingface.co/PHBJT/french_parler_tts_mini_v0.1">Mini v0.1 French</a>,
|
141 |
+
has been fine-tuned on a French dataset. It generates high-quality speech with features that can be controlled using a simple text prompt (e.g. gender, background noise, speaking rate, pitch and reverberation).
|
142 |
+
Due to limitations on the dataset, this model might underperform for female voices.</p>
|
143 |
|
144 |
<p>By default, Parler-TTS generates 🎲 random male voice characteristics. To ensure 🎯 <b>speaker consistency</b> across generations, try to use consistent descriptions in your prompts.</p>
|
145 |
+
<p><b>Note:</b> do NOT specify the nationnality of the speaker it will cause inconsistent audio generation (do: "a male speaker", don't: "a french male speaker") </p>
|
146 |
+
<p><b>Important note:</b> this model does NOT work in english, it will generate incoherent audios. But you can still use the original Parler TTS model for that. </p>
|
147 |
"""
|
148 |
)
|
149 |
with gr.Row():
|