te-ch
commited on
Commit
•
23ce701
1
Parent(s):
f64d86f
removed junk
Browse files- Dockerfile +2 -12
- app.py +6 -92
- requirements.txt +0 -2
Dockerfile
CHANGED
@@ -5,9 +5,9 @@ RUN apt-get update && apt-get install -y gnupg && \
|
|
5 |
echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
|
6 |
echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
|
7 |
apt-get update && \
|
8 |
-
apt-get -y install
|
9 |
|
10 |
-
RUN git clone -b ca-
|
11 |
|
12 |
RUN cd espeak-ng && \
|
13 |
./autogen.sh && \
|
@@ -19,7 +19,6 @@ RUN useradd -m -u 1000 user
|
|
19 |
|
20 |
USER user
|
21 |
|
22 |
-
|
23 |
ENV HOME=/home/user \
|
24 |
PATH=/home/user/.local/bin:$PATH
|
25 |
|
@@ -31,15 +30,6 @@ COPY --chown=user models models
|
|
31 |
|
32 |
RUN pip install -r requirements.txt
|
33 |
|
34 |
-
RUN git clone https://github.com/jaywalnut310/vits.git && \
|
35 |
-
cd vits && sed s/torch==1.6.0/torch==1.7.0/ requirements.txt > requirements.txt && pip install -r requirements.txt && cd monotonic_align && \
|
36 |
-
python setup.py build_ext --inplace && cd /home/user
|
37 |
-
|
38 |
-
ENV PYTHONPATH=$PYTHONPATH:/home/user/app/vits
|
39 |
-
|
40 |
-
COPY --chown=user engine.py .
|
41 |
-
COPY --chown=user mms.py .
|
42 |
-
COPY --chown=user festival.py .
|
43 |
COPY --chown=user app.py .
|
44 |
|
45 |
RUN mkdir -p cache && chmod 777 cache
|
|
|
5 |
echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
|
6 |
echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
|
7 |
apt-get update && \
|
8 |
+
apt-get -y install lame git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev libatlas-base-dev gfortran
|
9 |
|
10 |
+
RUN git clone -b ca-pr https://github.com/projecte-aina/espeak-ng
|
11 |
|
12 |
RUN cd espeak-ng && \
|
13 |
./autogen.sh && \
|
|
|
19 |
|
20 |
USER user
|
21 |
|
|
|
22 |
ENV HOME=/home/user \
|
23 |
PATH=/home/user/.local/bin:$PATH
|
24 |
|
|
|
30 |
|
31 |
RUN pip install -r requirements.txt
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
COPY --chown=user app.py .
|
34 |
|
35 |
RUN mkdir -p cache && chmod 777 cache
|
app.py
CHANGED
@@ -1,111 +1,30 @@
|
|
1 |
import tempfile
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
-
from TTS.utils.synthesizer import Synthesizer
|
5 |
from espeak_phonemizer import Phonemizer
|
6 |
-
from engine import Piper
|
7 |
-
from festival import festival_synthesize
|
8 |
-
from mms import MMS
|
9 |
|
10 |
MAX_TXT_LEN = 325
|
11 |
|
12 |
fonemitzador = Phonemizer("ca")
|
13 |
|
14 |
-
def carrega_bsc():
|
15 |
-
model_path = os.getcwd() + "/models/bsc/best_model.pth"
|
16 |
-
config_path = os.getcwd() + "/models/bsc/config.json"
|
17 |
-
speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth"
|
18 |
-
vocoder_path = None
|
19 |
-
vocoder_config_path = None
|
20 |
-
|
21 |
-
synthesizer = Synthesizer(
|
22 |
-
model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
|
23 |
-
)
|
24 |
-
|
25 |
-
return synthesizer
|
26 |
-
|
27 |
-
def carrega_collectivat():
|
28 |
-
model_path = os.getcwd() + "/models/collectivat/fast-speech_best_model.pth"
|
29 |
-
config_path = os.getcwd() + "/models/collectivat/fast-speech_config.json"
|
30 |
-
vocoder_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_model_file.pth"
|
31 |
-
vocoder_config_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_config.json"
|
32 |
-
synthesizer = Synthesizer(
|
33 |
-
model_path, config_path, None, None, vocoder_path, vocoder_config_path
|
34 |
-
)
|
35 |
-
|
36 |
-
return synthesizer
|
37 |
-
|
38 |
-
def carrega_piper():
|
39 |
-
return Piper(os.getcwd() + "/models/piper/ca-upc_ona-x-low.onnx")
|
40 |
-
|
41 |
-
def carrega_mms():
|
42 |
-
return MMS(os.getcwd() + "/models/mms")
|
43 |
-
|
44 |
-
|
45 |
-
model_bsc = carrega_bsc()
|
46 |
-
SPEAKERS = model_bsc.tts_model.speaker_manager.speaker_names
|
47 |
-
|
48 |
-
model_collectivat = carrega_collectivat()
|
49 |
-
|
50 |
-
model_piper = carrega_piper()
|
51 |
-
|
52 |
-
model_mms = carrega_mms()
|
53 |
-
|
54 |
request_count = 0
|
55 |
|
56 |
-
def
|
57 |
if len(text) > MAX_TXT_LEN:
|
58 |
text = text[:MAX_TXT_LEN]
|
59 |
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
|
60 |
print(text)
|
61 |
|
62 |
# synthesize
|
63 |
-
wav_bsc = model_bsc.tts(text, speaker_idx)
|
64 |
-
wav_coll = model_collectivat.tts(text)
|
65 |
-
wav_piper = model_piper.synthesize(text)
|
66 |
-
|
67 |
-
fp_bsc = ""
|
68 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
69 |
-
model_bsc.save_wav(wav_bsc, fp)
|
70 |
-
fp_bsc = fp.name
|
71 |
-
|
72 |
-
fp_coll = ""
|
73 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
74 |
-
model_collectivat.save_wav(wav_coll, fp)
|
75 |
-
fp_coll = fp.name
|
76 |
-
|
77 |
-
fp_piper = ""
|
78 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
79 |
-
fp.write(wav_piper)
|
80 |
-
fp_piper = fp.name
|
81 |
-
|
82 |
-
fp_mms = ""
|
83 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
84 |
-
model_mms.synthesize(fp.name, text)
|
85 |
-
fp_mms = fp.name
|
86 |
|
87 |
fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
|
88 |
|
89 |
-
fp_festival = festival_synthesize(text, festival_voice)
|
90 |
-
|
91 |
global request_count
|
92 |
request_count += 1
|
93 |
print(f"Requests: {request_count}")
|
94 |
-
return fonemes
|
95 |
-
|
96 |
|
97 |
description="""
|
98 |
-
Amb aquesta aplicació podeu sintetitzar text a veu amb els últims models neuronals lliures pel català i amb el motor Festival.
|
99 |
-
|
100 |
-
1. Model multi-parlant VITS entrenat pel BSC (Projecte Aina) [enllaç](https://huggingface.co/projecte-aina/tts-ca-coqui-vits-multispeaker)
|
101 |
-
2. Model Fastspeech entrenat per Col·lectivat [enllaç](https://github.com/CollectivaT-dev/TTS-API)
|
102 |
-
3. Model VITS entrenat per Piper/Home Assistant [enllaç](https://github.com/rhasspy/piper)
|
103 |
-
3. Model VITS entrenat per Meta (llicència CC-BY-NC) [enllaç](https://github.com/facebookresearch/fairseq/tree/main/examples/mms)
|
104 |
-
|
105 |
-
El primer model ha estat entrenat amb totes les veus de FestCAT, els talls de Common Voice 8 i un altre corpus pel que conté moltes veus de qualitat variable. La veu d'Ona està seleccionada per defecte per la comparativa però podeu provar les altres.
|
106 |
-
Els models 2 i 3 han estat entrenats amb la veu d'Ona de FestCAT.
|
107 |
-
El model 4, anomenat MMS, de Meta (Facebook) ha estat entrenat a partir de dades d'un [audiollibre](http://live.bible.is/bible/CATBSS/LUK/1) de la Bíblia
|
108 |
-
|
109 |
Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
|
110 |
https://github.com/projecte-aina/espeak-ng
|
111 |
|
@@ -114,23 +33,18 @@ NOTA: El model de col·lectivat treballa amb grafemes pel que no fa servir espea
|
|
114 |
article= ""
|
115 |
|
116 |
iface = gr.Interface(
|
117 |
-
fn=tts,
|
118 |
inputs=[
|
119 |
gr.Textbox(
|
120 |
label="Text",
|
121 |
value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
|
122 |
),
|
123 |
-
gr.Dropdown(label="
|
124 |
-
|
125 |
],
|
126 |
outputs=[
|
127 |
-
gr.Markdown(label="Fonemes")
|
128 |
-
gr.Audio(label="Festival",type="filepath"),
|
129 |
-
gr.Audio(label="BSC VITS",type="filepath"),
|
130 |
-
gr.Audio(label="Collectivat Fastspeech",type="filepath"),
|
131 |
-
gr.Audio(label="Piper VITS",type="filepath"),
|
132 |
-
gr.Audio(label="Meta MMS VITS",type="filepath")
|
133 |
],
|
|
|
134 |
title="Comparativa de síntesi lliure en català️",
|
135 |
description=description,
|
136 |
article=article,
|
|
|
1 |
import tempfile
|
2 |
import gradio as gr
|
3 |
import os
|
|
|
4 |
from espeak_phonemizer import Phonemizer
|
|
|
|
|
|
|
5 |
|
6 |
MAX_TXT_LEN = 325
|
7 |
|
8 |
fonemitzador = Phonemizer("ca")
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
request_count = 0
|
11 |
|
12 |
+
def phonemiser(text):
|
13 |
if len(text) > MAX_TXT_LEN:
|
14 |
text = text[:MAX_TXT_LEN]
|
15 |
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
|
16 |
print(text)
|
17 |
|
18 |
# synthesize
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
|
21 |
|
|
|
|
|
22 |
global request_count
|
23 |
request_count += 1
|
24 |
print(f"Requests: {request_count}")
|
25 |
+
return fonemes
|
|
|
26 |
|
27 |
description="""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
|
29 |
https://github.com/projecte-aina/espeak-ng
|
30 |
|
|
|
33 |
article= ""
|
34 |
|
35 |
iface = gr.Interface(
|
|
|
36 |
inputs=[
|
37 |
gr.Textbox(
|
38 |
label="Text",
|
39 |
value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
|
40 |
),
|
41 |
+
gr.Dropdown(label="dialect", choices="")
|
42 |
+
|
43 |
],
|
44 |
outputs=[
|
45 |
+
gr.Markdown(label="Fonemes")
|
|
|
|
|
|
|
|
|
|
|
46 |
],
|
47 |
+
|
48 |
title="Comparativa de síntesi lliure en català️",
|
49 |
description=description,
|
50 |
article=article,
|
requirements.txt
CHANGED
@@ -1,4 +1,2 @@
|
|
1 |
-
git+https://github.com/coqui-ai/TTS@dev#egg=TTS
|
2 |
gradio
|
3 |
espeak-phonemizer>=1.1.0,<2
|
4 |
-
onnxruntime~=1.11.0
|
|
|
|
|
1 |
gradio
|
2 |
espeak-phonemizer>=1.1.0,<2
|
|