Spaces:

projecte-aina
/

transcripcio-fonetica-catala

Running

App Files Files Community

te-ch commited on Jun 28, 2023

Commit

23ce701

•

1 Parent(s): f64d86f

removed junk

Browse files

Files changed (3) hide show

Dockerfile +2 -12
app.py +6 -92
requirements.txt +0 -2

Dockerfile CHANGED Viewed

@@ -5,9 +5,9 @@ RUN apt-get update && apt-get install -y gnupg && \
      echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
      echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
      apt-get update && \
-     apt-get -y install festival festvox-ca-ona-hts festvox-ca-pau-hts lame git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev libatlas-base-dev gfortran
-RUN git clone -b ca-to-pr https://github.com/projecte-aina/espeak-ng
 RUN cd espeak-ng && \
  ./autogen.sh && \
@@ -19,7 +19,6 @@ RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \
 	PATH=/home/user/.local/bin:$PATH
@@ -31,15 +30,6 @@ COPY --chown=user models models
 RUN pip install -r requirements.txt
-RUN git clone https://github.com/jaywalnut310/vits.git && \
- cd vits && sed s/torch==1.6.0/torch==1.7.0/ requirements.txt > requirements.txt && pip install -r requirements.txt && cd monotonic_align && \
- python setup.py build_ext --inplace && cd /home/user
-ENV PYTHONPATH=$PYTHONPATH:/home/user/app/vits
-COPY --chown=user engine.py .
-COPY --chown=user mms.py .
-COPY --chown=user festival.py .
 COPY --chown=user app.py .
 RUN mkdir -p cache && chmod 777 cache

      echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
      echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
      apt-get update && \
+     apt-get -y install lame git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev libatlas-base-dev gfortran
+RUN git clone -b ca-pr https://github.com/projecte-aina/espeak-ng
 RUN cd espeak-ng && \
  ./autogen.sh && \
 USER user
 ENV HOME=/home/user \
 	PATH=/home/user/.local/bin:$PATH
 RUN pip install -r requirements.txt
 COPY --chown=user app.py .
 RUN mkdir -p cache && chmod 777 cache

app.py CHANGED Viewed

@@ -1,111 +1,30 @@
 import tempfile
 import gradio as gr
 import os
-from TTS.utils.synthesizer import Synthesizer
 from espeak_phonemizer import Phonemizer
-from engine import Piper
-from festival import festival_synthesize
-from mms import MMS
 MAX_TXT_LEN = 325
 fonemitzador = Phonemizer("ca")
-def carrega_bsc():
-    model_path = os.getcwd() + "/models/bsc/best_model.pth"
-    config_path = os.getcwd() + "/models/bsc/config.json"
-    speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth"
-    vocoder_path = None
-    vocoder_config_path = None
-    synthesizer = Synthesizer(
-        model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
-    )
-    return synthesizer
-def carrega_collectivat():
-    model_path = os.getcwd() + "/models/collectivat/fast-speech_best_model.pth"
-    config_path = os.getcwd() + "/models/collectivat/fast-speech_config.json"
-    vocoder_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_model_file.pth"
-    vocoder_config_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_config.json"
-    synthesizer = Synthesizer(
-        model_path, config_path, None, None, vocoder_path, vocoder_config_path
-    )
-    return synthesizer
-def carrega_piper():
-    return Piper(os.getcwd() + "/models/piper/ca-upc_ona-x-low.onnx")
-def carrega_mms():
-    return MMS(os.getcwd() + "/models/mms")
-model_bsc = carrega_bsc()
-SPEAKERS = model_bsc.tts_model.speaker_manager.speaker_names
-model_collectivat = carrega_collectivat()
-model_piper = carrega_piper()
-model_mms = carrega_mms()
 request_count = 0
-def tts(text, festival_voice, speaker_idx):
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
         print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
     print(text)
     # synthesize
-    wav_bsc = model_bsc.tts(text, speaker_idx)
-    wav_coll = model_collectivat.tts(text)
-    wav_piper = model_piper.synthesize(text)
-    fp_bsc = ""
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        model_bsc.save_wav(wav_bsc, fp)
-        fp_bsc = fp.name
-    fp_coll = ""
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        model_collectivat.save_wav(wav_coll, fp)
-        fp_coll = fp.name
-    fp_piper = ""
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        fp.write(wav_piper)
-        fp_piper = fp.name
-    fp_mms = ""
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        model_mms.synthesize(fp.name, text)
-        fp_mms = fp.name
     fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
-    fp_festival = festival_synthesize(text, festival_voice)
     global request_count
     request_count += 1
     print(f"Requests: {request_count}")
-    return fonemes, fp_festival, fp_bsc, fp_coll, fp_piper, fp_mms
 description="""
-Amb aquesta aplicació podeu sintetitzar text a veu amb els últims models neuronals lliures pel català i amb el motor Festival.
-1. Model multi-parlant VITS entrenat pel BSC (Projecte Aina) [enllaç](https://huggingface.co/projecte-aina/tts-ca-coqui-vits-multispeaker)
-2. Model Fastspeech entrenat per Col·lectivat [enllaç](https://github.com/CollectivaT-dev/TTS-API)
-3. Model VITS entrenat per Piper/Home Assistant [enllaç](https://github.com/rhasspy/piper)
-3. Model VITS entrenat per Meta (llicència CC-BY-NC) [enllaç](https://github.com/facebookresearch/fairseq/tree/main/examples/mms)
-El primer model ha estat entrenat amb totes les veus de FestCAT, els talls de Common Voice 8 i un altre corpus pel que conté moltes veus de qualitat variable. La veu d'Ona està seleccionada per defecte per la comparativa però podeu provar les altres.
-Els models 2 i 3 han estat entrenats amb la veu d'Ona de FestCAT.
-El model 4, anomenat MMS, de Meta (Facebook) ha estat entrenat a partir de dades d'un [audiollibre](http://live.bible.is/bible/CATBSS/LUK/1) de la Bíblia
 Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
 https://github.com/projecte-aina/espeak-ng
@@ -114,23 +33,18 @@ NOTA: El model de col·lectivat treballa amb grafemes pel que no fa servir espea
 article= ""
 iface = gr.Interface(
-    fn=tts,
     inputs=[
         gr.Textbox(
             label="Text",
             value="L'Èlia i l'Alí a l'aula.  L'oli i l'ou.  Lulú olorava la lila.",
         ),
-        gr.Dropdown(label="Parlant del motor Festival", choices=["ona", "pau"], value="ona"),
-        gr.Dropdown(label="Parlant del model VITS multi-parlant del BSC", choices=SPEAKERS, value="ona")
     ],
     outputs=[
-        gr.Markdown(label="Fonemes"),
-        gr.Audio(label="Festival",type="filepath"),
-        gr.Audio(label="BSC VITS",type="filepath"),
-        gr.Audio(label="Collectivat Fastspeech",type="filepath"),
-        gr.Audio(label="Piper VITS",type="filepath"),
-        gr.Audio(label="Meta MMS VITS",type="filepath")
     ],
     title="Comparativa de síntesi lliure en català️",
     description=description,
     article=article,

 import tempfile
 import gradio as gr
 import os
 from espeak_phonemizer import Phonemizer
 MAX_TXT_LEN = 325
 fonemitzador = Phonemizer("ca")
 request_count = 0
+def phonemiser(text):
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
         print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
     print(text)
     # synthesize
     fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
     global request_count
     request_count += 1
     print(f"Requests: {request_count}")
+    return fonemes
 description="""
 Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
 https://github.com/projecte-aina/espeak-ng
 article= ""
 iface = gr.Interface(
     inputs=[
         gr.Textbox(
             label="Text",
             value="L'Èlia i l'Alí a l'aula.  L'oli i l'ou.  Lulú olorava la lila.",
         ),
+        gr.Dropdown(label="dialect", choices="")
     ],
     outputs=[
+        gr.Markdown(label="Fonemes")
     ],
     title="Comparativa de síntesi lliure en català️",
     description=description,
     article=article,

requirements.txt CHANGED Viewed

@@ -1,4 +1,2 @@
-git+https://github.com/coqui-ai/TTS@dev#egg=TTS
 gradio
 espeak-phonemizer>=1.1.0,<2
-onnxruntime~=1.11.0



1	gradio
2	espeak-phonemizer>=1.1.0,<2