french_parler_tts

Running on Zero

App Files Files Community

PHBJT commited on Sep 15, 2024

Commit

7163182

verified ·

1 Parent(s): 26997dc

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -23

app.py CHANGED Viewed

@@ -25,26 +25,26 @@ SEED = 42
 default_text = "La voix humaine est un instrument de musique au-dessus de tous les autres."
 default_description = "The voice speaks slowly with a very noisy background, carrying a low-pitch tone and displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue."
 examples = [
-    [
-        "La voix humaine est un instrument de musique au-dessus de tous les autres.",
-        "The voice speaks slowly with a very noisy background, carrying a low-pitch tone and displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue.",
-        None,
-    ],
-    [
-        "Tout ce qu'un homme est capable d'imaginer, d'autres hommes seront capables de le réaliser.",
-        "A slightly expressive and animated speech with a moderate speed. The recording features a low-pitch voice and slight background noise, creating a close-sounding audio experience.",
-        None,
-    ],
-    [
-        "La machine elle-même, si perfectionnée qu'on la suppose, n'est qu'un outil.",
-        "A monotone yet slightly fast delivery, with a very close recording that almost has no background noise.",
-        None,
-    ],
-    [
-        "Le progrès fait naître plus de besoins qu'il n'en satisfait.",
-        "In a very poor recording quality, the voice delivers slightly expressive and animated words with a fast pace. There's a high level of background noise and a very distant-sounding reverberation. The voice is slightly higher pitched than average.",
-        None,
-    ],
 ]
 number_normalizer = EnglishNumberNormalizer()
@@ -138,11 +138,12 @@ with gr.Blocks(css=css) as block:
        <p><a href="https://github.com/huggingface/parler-tts">Parler-TTS</a> is a training and inference library for
 high-fidelity text-to-speech (TTS) models.</p>
 <p>The model demonstrated here, French Parler-TTS <a href="https://huggingface.co/PHBJT/french_parler_tts_mini_v0.1">Mini v0.1 French</a>,
-has been fine-tuned on a French dataset. It generates high-quality male speech
-with features that can be controlled using a simple text prompt (e.g. background noise, speaking rate, pitch and reverberation). Please note that this model currently supports only male voices (due to limitations on the dataset).</p>
 <p>By default, Parler-TTS generates 🎲 random male voice characteristics. To ensure 🎯 <b>speaker consistency</b> across generations, try to use consistent descriptions in your prompts.</p>
-<p> <b>Important note:</b> this model does NOT work in english, it will generate incoherent audios. But you can still use the original Parler TTS model for that. </p>
         """
     )
     with gr.Row():

 default_text = "La voix humaine est un instrument de musique au-dessus de tous les autres."
 default_description = "The voice speaks slowly with a very noisy background, carrying a low-pitch tone and displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue."
 examples = [
+[
+"La voix humaine est un instrument de musique au-dessus de tous les autres.",
+"A male voice speaks slowly with a very noisy background, carrying a low-pitch tone and displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue.",
+None,
+],
+[
+"Tout ce qu'un homme est capable d'imaginer, d'autres hommes seront capables de le réaliser.",
+"A female voice delivers a slightly expressive and animated speech with a moderate speed. The recording features a low-pitch voice and slight background noise, creating a close-sounding audio experience.",
+None,
+],
+[
+"La machine elle-même, si perfectionnée qu'on la suppose, n'est qu'un outil.",
+"A male voice provides a monotone yet slightly fast delivery, with a very close recording that almost has no background noise.",
+None,
+],
+[
+"Le progrès fait naître plus de besoins qu'il n'en satisfait.",
+"A female voice, in a very poor recording quality, delivers slightly expressive and animated words with a fast pace. There's a high level of background noise and a very distant-sounding reverberation. The voice is slightly higher pitched than average.",
+None,
+],
 ]
 number_normalizer = EnglishNumberNormalizer()
        <p><a href="https://github.com/huggingface/parler-tts">Parler-TTS</a> is a training and inference library for
 high-fidelity text-to-speech (TTS) models.</p>
 <p>The model demonstrated here, French Parler-TTS <a href="https://huggingface.co/PHBJT/french_parler_tts_mini_v0.1">Mini v0.1 French</a>,
+has been fine-tuned on a French dataset. It generates high-quality speech with features that can be controlled using a simple text prompt (e.g. gender, background noise, speaking rate, pitch and reverberation).
+Due to limitations on the dataset, this model might underperform for female voices.</p>
 <p>By default, Parler-TTS generates 🎲 random male voice characteristics. To ensure 🎯 <b>speaker consistency</b> across generations, try to use consistent descriptions in your prompts.</p>
+<p><b>Note:</b> do NOT specify the nationnality of the speaker it will cause inconsistent audio generation (do: "a male speaker", don't: "a french male speaker") </p>
+<p><b>Important note:</b> this model does NOT work in english, it will generate incoherent audios. But you can still use the original Parler TTS model for that. </p>
         """
     )
     with gr.Row():