mafoaurelie commited on
Commit
be0bf55
1 Parent(s): 8c93416

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -12
app.py CHANGED
@@ -1,17 +1,42 @@
1
- import transformers
2
- from transformers import pipeline
3
- text_speech=pipeline(model="suno/bark")
 
 
 
4
  import gradio as gr
5
- def text_to_speech(message):
6
- texte = text_speech(message)
7
 
8
- return texte
 
 
 
 
 
9
 
10
- iface = gr.Interface(text_to_speech, inputs = 'text',
11
- outputs = 'audio',
12
- title = 'text to Audio Application',
13
- description = 'A simple application to convert PDF files in audio speech. Upload your own file, or click one of the examples to load them.',
14
- )
15
 
16
- iface.launch()
 
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
3
+ from datasets import load_dataset
4
+ import torch
5
+ import soundfile as sf
6
+ import numpy as np
7
  import gradio as gr
8
+ import io
9
+ import sentencepiece
10
 
11
+ # Charger les modèles et les embeddings du locuteur une seule fois pour éviter de les recharger à chaque appel
12
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
13
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
14
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
15
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
16
+ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
17
 
 
 
 
 
 
18
 
19
+ def text_to_speech(text):
20
+ # Prétraiter le texte
21
+ inputs = processor(text=text, return_tensors="pt")
22
 
23
+ # Générer la parole
24
+ speech = model.generate_speech(
25
+ inputs["input_ids"], speaker_embeddings, vocoder=vocoder
26
+ )
27
+
28
+ # Enregistrer l'audio dans un buffer
29
+ buffer = io.BytesIO()
30
+ sf.write(buffer, speech.numpy(), samplerate=16000, format="WAV")
31
+
32
+ return buffer.getvalue()
33
+
34
+
35
+ # Créer l'interface Gradio
36
+ interface = gr.Interface(
37
+ fn=text_to_speech,
38
+ inputs="text",
39
+ outputs=gr.Audio(label="Processed Audio"),
40
+ title="Application du type Text to speech",
41
+ description="Entrez un texte en anglais et l'application va la traduire en audio"
42
+ ).launch()