|
import gradio as gr |
|
from TTS.api import TTS |
|
import os |
|
import time |
|
import torch |
|
from torch.serialization import add_safe_globals |
|
from TTS.tts.configs.xtts_config import XttsConfig |
|
|
|
|
|
add_safe_globals([XttsConfig]) |
|
|
|
|
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False) |
|
|
|
|
|
output_folder = "output_audio" |
|
os.makedirs(output_folder, exist_ok=True) |
|
|
|
def predict(prompt, speaker, agree, subfolder_name, file_name): |
|
if not agree: |
|
raise gr.Error("Veuillez accepter les conditions d'utilisation.") |
|
|
|
|
|
subfolder_name = subfolder_name.strip() or f"session_{int(time.time())}" |
|
session_folder = os.path.join(output_folder, subfolder_name) |
|
os.makedirs(session_folder, exist_ok=True) |
|
|
|
|
|
file_name = file_name.strip() or "output.wav" |
|
if not file_name.endswith(".wav"): |
|
file_name += ".wav" |
|
|
|
output_path = os.path.join(session_folder, file_name) |
|
|
|
|
|
speaker_wav_paths = [os.path.join("examples", f) for f in os.listdir("examples") if f.startswith(speaker) and f.endswith(".wav")] |
|
|
|
if not speaker_wav_paths: |
|
raise gr.Error(f"Aucun fichier audio trouvé pour le speaker : {speaker}") |
|
|
|
|
|
tts.tts_to_file( |
|
text=prompt, |
|
file_path=output_path, |
|
speaker_wav=speaker_wav_paths, |
|
language="fr", |
|
split_sentences=False |
|
) |
|
|
|
|
|
waveform = gr.make_waveform(audio=output_path) |
|
|
|
return 100, waveform, output_path |
|
|
|
|
|
custom_css = """ |
|
.gradio-container { |
|
font-family: 'Arial', sans-serif; |
|
background-color: #f0f4f8; |
|
} |
|
.gr-form { |
|
background-color: white; |
|
border-radius: 10px; |
|
padding: 20px; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
} |
|
.gr-button { |
|
background-color: #4a90e2; |
|
border: none; |
|
} |
|
.gr-button:hover { |
|
background-color: #3a7bc8; |
|
} |
|
""" |
|
|
|
title = "Synthèse Vocale XTTS 🎙️" |
|
|
|
description = """ |
|
<h3>Bienvenue sur notre outil de synthèse vocale XTTS !</h3> |
|
<p>Cet outil vous permet de générer une voix naturelle à partir d'un texte en français. |
|
Choisissez une voix, entrez votre texte, et écoutez le résultat !</p> |
|
""" |
|
|
|
article = """ |
|
<div style='margin:20px auto; text-align: center;'> |
|
<p>En utilisant cette démo, vous acceptez les conditions d'utilisation du modèle Coqui Public disponibles sur |
|
<a href='https://coqui.ai/cpml' target='_blank'>https://coqui.ai/cpml</a></p> |
|
</div> |
|
""" |
|
|
|
|
|
available_speakers = list(set([f.split('_')[0] for f in os.listdir("examples") if f.endswith(".wav")])) |
|
|
|
with gr.Blocks(css=custom_css) as demo: |
|
gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>") |
|
gr.Markdown(description) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
prompt = gr.Textbox( |
|
label="Texte pour la synthèse vocale", |
|
info="Une ou deux phrases à la fois sont préférables* (max : 10)", |
|
placeholder="Bonjour ! Comment allez-vous aujourd'hui ?", |
|
lines=10 |
|
) |
|
with gr.Column(scale=1): |
|
speaker = gr.Dropdown( |
|
label="Voix", |
|
choices=available_speakers, |
|
value=available_speakers[0] if available_speakers else None |
|
) |
|
agree = gr.Checkbox( |
|
label="J'accepte les conditions d'utilisation", |
|
value=True |
|
) |
|
subfolder_name = gr.Textbox( |
|
label="Nom du sous-dossier (facultatif)", |
|
placeholder="Nom du sous-dossier pour stocker l'audio" |
|
) |
|
file_name = gr.Textbox( |
|
label="Nom du fichier (facultatif)", |
|
placeholder="Nom du fichier audio généré" |
|
) |
|
|
|
generate_btn = gr.Button("Générer la voix", variant="primary") |
|
|
|
progress = gr.Progress() |
|
|
|
with gr.Row(): |
|
audio_output = gr.Audio(label="Audio généré") |
|
waveform_output = gr.Video(label="Forme d'onde") |
|
|
|
generate_btn.click( |
|
predict, |
|
inputs=[prompt, speaker, agree, subfolder_name, file_name], |
|
outputs=[progress, waveform_output, audio_output] |
|
) |
|
|
|
gr.Markdown(article) |
|
|
|
demo.launch(debug=True) |