import spaces
import gradio as gr
import torch
from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
from string import punctuation
import re
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
device = "cuda:0" if torch.cuda.is_available() else "cpu"
repo_id = "PHBJT/french_parler_tts_mini_v0.1"
model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
tokenizer = AutoTokenizer.from_pretrained(repo_id)
feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
SAMPLE_RATE = feature_extractor.sampling_rate
SEED = 42
default_text = "La voix humaine est un instrument de musique au-dessus de tous les autres."
default_description = "A male voice speaks slowly with a very noisy background, displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue."
examples = [
"La voix humaine est un instrument de musique au-dessus de tous les autres.",
"A male voice speaks slowly with a very noisy background, displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue.",
"Tout ce qu'un homme est capable d'imaginer, d'autres hommes seront capables de le réaliser.",
"A male voice delivers a slightly expressive and animated speech with a moderate speed. The recording features a low-pitch voice, creating a close-sounding audio experience.",
"La machine elle-même, si perfectionnée qu'on la suppose, n'est qu'un outil.",
"A male voice provides a monotone yet slightly fast delivery, with a very close recording that almost has no background noise.",
"Le progrès fait naître plus de besoins qu'il n'en satisfait.",
"A female voice, in a very poor recording quality, delivers slightly expressive and animated words with a fast pace. There's a high level of background noise and a very distant-sounding reverberation. The voice is slightly higher pitched than average.",
number_normalizer = EnglishNumberNormalizer()
def preprocess(text):
text = number_normalizer(text).strip()
text = text.replace("-", " ")
if text[-1] not in punctuation:
text = f"{text}."
abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
def separate_abb(chunk):
chunk = chunk.replace(".","")
return " ".join(chunk)
abbreviations = re.findall(abbreviations_pattern, text)
for abv in abbreviations:
if abv in text:
text = text.replace(abv, separate_abb(abv))
return text
def gen_tts(text, description):
inputs = tokenizer(description.strip(), return_tensors="pt").to(device)
prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
generation = model.generate(
input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids, attention_mask=inputs.attention_mask, prompt_attention_mask=prompt.attention_mask, do_sample=True, temperature=1.0
audio_arr = generation.cpu().numpy().squeeze()
return SAMPLE_RATE, audio_arr
def extract_text(file):
from pypdf import PdfReader
reader = PdfReader(file)
number_of_pages = len(reader.pages)
text = ''.join(page.extract_text() for page in reader.pages[:10])
return text
with gr.Blocks() as demo:
gr.Markdown("""# PDF reader
Un lecteur pdf construit avec [MeloTTS](
### Comment l'utiliser ?
1. Téléversez le document pdf à lire.
2. Cliquez sur "Extraire le texte" pour extraire les 10 premières pages.
3. Cliquez sur "Réciter le texte" pour générer l'audio.""")
with gr.Group():
speaker_description = gr.Textbox(value='A male voice delivers a slightly expressive and animated speech with a quick speed. The recording features a low-pitch voice, creating a close-sounding audio experience.', label='Description de la voix')
file = gr.File(label="Document à lire")
btn_extract = gr.Button('Extraire le texte', variant='primary')
text = gr.Textbox(label="Texte extrait")
btn = gr.Button('Réciter le texte', variant='primary')
audio_out = gr.Audio(label="Parler-TTS generation", type="numpy", elem_id="audio_out"), inputs=[file], outputs=[text]), inputs=[text, speaker_description], outputs=[audio_out])
demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True, share=True)