djkesu commited on
Commit
ffdeba9
1 Parent(s): d89e354

updated app, and added caching in docker container

Browse files
Files changed (2) hide show
  1. Dockerfile +18 -12
  2. app.py +14 -5
Dockerfile CHANGED
@@ -21,19 +21,25 @@ EXPOSE 80
21
  # Create the directory for pretrained models
22
  # RUN mkdir -p $TORTOISE_MODELS_DIR
23
 
 
 
24
  # Download all the models
25
- # RUN wget -O $TORTOISE_MODELS_DIR/autoregressive.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/autoregressive.pth && \
26
- # wget -O $TORTOISE_MODELS_DIR/classifier.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/classifier.pth && \
27
- # wget -O $TORTOISE_MODELS_DIR/clvp2.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/clvp2.pth && \
28
- # wget -O $TORTOISE_MODELS_DIR/cvvp.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/cvvp.pth && \
29
- # wget -O $TORTOISE_MODELS_DIR/diffusion_decoder.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/diffusion_decoder.pth && \
30
- # wget -O $TORTOISE_MODELS_DIR/vocoder.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/vocoder.pth && \
31
- # wget -O $TORTOISE_MODELS_DIR/rlg_auto.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_auto.pth && \
32
- # wget -O $TORTOISE_MODELS_DIR/rlg_diffuser.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_diffuser.pth && \
33
- # wget -O $TORTOISE_MODELS_DIR/bigvgan_base_24khz_100band_g.pth https://drive.google.com/uc?id=1_cKskUDuvxQJUEBwdgjAxKuDTUW6kPdY && \
34
- # wget -O $TORTOISE_MODELS_DIR/bigvgan_24khz_100band_g.pth https://drive.google.com/uc?id=1wmP_mAs7d00KHVfVEl8B5Gb72Kzpcavp
35
-
36
- RUN ls -la /app
 
 
 
 
37
 
38
  # Run app.py when the container launches
39
  CMD ["streamlit","run", "app.py"]
 
21
  # Create the directory for pretrained models
22
  # RUN mkdir -p $TORTOISE_MODELS_DIR
23
 
24
+ RUN echo "Downloading models through docker container..."
25
+
26
  # Download all the models
27
+ RUN wget -O $TORTOISE_MODELS_DIR/autoregressive.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/autoregressive.pth && \
28
+ wget -O $TORTOISE_MODELS_DIR/classifier.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/classifier.pth && \
29
+ wget -O $TORTOISE_MODELS_DIR/clvp2.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/clvp2.pth && \
30
+ wget -O $TORTOISE_MODELS_DIR/cvvp.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/cvvp.pth && \
31
+ wget -O $TORTOISE_MODELS_DIR/diffusion_decoder.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/diffusion_decoder.pth && \
32
+ wget -O $TORTOISE_MODELS_DIR/vocoder.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/vocoder.pth && \
33
+ wget -O $TORTOISE_MODELS_DIR/rlg_auto.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_auto.pth && \
34
+ wget -O $TORTOISE_MODELS_DIR/rlg_diffuser.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_diffuser.pth && \
35
+ wget -O $TORTOISE_MODELS_DIR/bigvgan_base_24khz_100band_g.pth https://drive.google.com/uc?id=1_cKskUDuvxQJUEBwdgjAxKuDTUW6kPdY && \
36
+ wget -O $TORTOISE_MODELS_DIR/bigvgan_24khz_100band_g.pth https://drive.google.com/uc?id=1wmP_mAs7d00KHVfVEl8B5Gb72Kzpcavp
37
+
38
+ RUN echo "Finished downloading models through docker container..."
39
+
40
+ RUN echo "Current directory contents:"
41
+ RUN ls -la
42
+
43
 
44
  # Run app.py when the container launches
45
  CMD ["streamlit","run", "app.py"]
app.py CHANGED
@@ -3,9 +3,10 @@ import shutil
3
  import streamlit as st
4
  import torchaudio
5
  import IPython
 
6
 
7
  from tortoise.api import TextToSpeech
8
- from tortoise.utils.audio import load_voice
9
 
10
  # Initialize TextToSpeech model
11
  tts = TextToSpeech()
@@ -23,7 +24,7 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
23
  os.makedirs(OUTPUT_FOLDER, exist_ok=True)
24
 
25
  # Streamlit UI elements
26
- st.title("Tortoise Text-to-Speech App")
27
 
28
  # Upload .wav files
29
  st.sidebar.header("Upload Audio Samples")
@@ -95,9 +96,17 @@ if st.button("Generate Speech"):
95
 
96
  # Display the generated audio
97
  st.subheader("Generated Output")
98
- st.audio(output_path, format="audio/wav")
99
- if st.checkbox("Play Audio"):
100
- IPython.display.Audio(output_path)
 
 
 
 
 
 
 
 
101
 
102
  st.success("Speech generated successfully!")
103
 
 
3
  import streamlit as st
4
  import torchaudio
5
  import IPython
6
+ import base64
7
 
8
  from tortoise.api import TextToSpeech
9
+ from tortoise.utils.audio import load_voice, load
10
 
11
  # Initialize TextToSpeech model
12
  tts = TextToSpeech()
 
24
  os.makedirs(OUTPUT_FOLDER, exist_ok=True)
25
 
26
  # Streamlit UI elements
27
+ st.title("Tortoise Cloning App")
28
 
29
  # Upload .wav files
30
  st.sidebar.header("Upload Audio Samples")
 
96
 
97
  # Display the generated audio
98
  st.subheader("Generated Output")
99
+
100
+ # Create a download link for the generated audio
101
+ audio_base64 = base64.b64encode(open(output_path, 'rb').read()).decode('utf-8')
102
+ href = f'<a href="data:audio/wav;base64,{audio_base64}" download="generated.wav">Download Audio</a>'
103
+ st.markdown(href, unsafe_allow_html=True)
104
+
105
+ # Display the generated audio
106
+ st.audio(output_path, format="audio/wav", start_time=0)
107
+
108
+ # Autoplay the audio
109
+ st.audio(output_path, format="audio/wav", start_time=0, autoplay=True)
110
 
111
  st.success("Speech generated successfully!")
112