Spaces:

djkesu
/

tortoise5c

Running

App Files Files Community

djkesu commited on Sep 27, 2023

Commit

ffdeba9

1 Parent(s): d89e354

updated app, and added caching in docker container

Browse files

Files changed (2) hide show

Dockerfile +18 -12
app.py +14 -5

Dockerfile CHANGED Viewed

@@ -21,19 +21,25 @@ EXPOSE 80
 # Create the directory for pretrained models
 # RUN mkdir -p $TORTOISE_MODELS_DIR
 # Download all the models
-# RUN wget -O $TORTOISE_MODELS_DIR/autoregressive.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/autoregressive.pth && \
-#     wget -O $TORTOISE_MODELS_DIR/classifier.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/classifier.pth && \
-#     wget -O $TORTOISE_MODELS_DIR/clvp2.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/clvp2.pth && \
-#     wget -O $TORTOISE_MODELS_DIR/cvvp.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/cvvp.pth && \
-#     wget -O $TORTOISE_MODELS_DIR/diffusion_decoder.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/diffusion_decoder.pth && \
-#     wget -O $TORTOISE_MODELS_DIR/vocoder.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/vocoder.pth && \
-#     wget -O $TORTOISE_MODELS_DIR/rlg_auto.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_auto.pth && \
-#     wget -O $TORTOISE_MODELS_DIR/rlg_diffuser.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_diffuser.pth && \
-#     wget -O $TORTOISE_MODELS_DIR/bigvgan_base_24khz_100band_g.pth https://drive.google.com/uc?id=1_cKskUDuvxQJUEBwdgjAxKuDTUW6kPdY && \
-#     wget -O $TORTOISE_MODELS_DIR/bigvgan_24khz_100band_g.pth https://drive.google.com/uc?id=1wmP_mAs7d00KHVfVEl8B5Gb72Kzpcavp
-RUN ls -la /app
 # Run app.py when the container launches
 CMD ["streamlit","run", "app.py"]

 # Create the directory for pretrained models
 # RUN mkdir -p $TORTOISE_MODELS_DIR
+RUN echo "Downloading models through docker container..."
 # Download all the models
+RUN wget -O $TORTOISE_MODELS_DIR/autoregressive.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/autoregressive.pth && \
+    wget -O $TORTOISE_MODELS_DIR/classifier.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/classifier.pth && \
+    wget -O $TORTOISE_MODELS_DIR/clvp2.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/clvp2.pth && \
+    wget -O $TORTOISE_MODELS_DIR/cvvp.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/cvvp.pth && \
+    wget -O $TORTOISE_MODELS_DIR/diffusion_decoder.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/diffusion_decoder.pth && \
+    wget -O $TORTOISE_MODELS_DIR/vocoder.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/vocoder.pth && \
+    wget -O $TORTOISE_MODELS_DIR/rlg_auto.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_auto.pth && \
+    wget -O $TORTOISE_MODELS_DIR/rlg_diffuser.pth https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_diffuser.pth && \
+    wget -O $TORTOISE_MODELS_DIR/bigvgan_base_24khz_100band_g.pth https://drive.google.com/uc?id=1_cKskUDuvxQJUEBwdgjAxKuDTUW6kPdY && \
+    wget -O $TORTOISE_MODELS_DIR/bigvgan_24khz_100band_g.pth https://drive.google.com/uc?id=1wmP_mAs7d00KHVfVEl8B5Gb72Kzpcavp
+RUN echo "Finished downloading models through docker container..."
+RUN echo "Current directory contents:"
+RUN ls -la
 # Run app.py when the container launches
 CMD ["streamlit","run", "app.py"]

app.py CHANGED Viewed

@@ -3,9 +3,10 @@ import shutil
 import streamlit as st
 import torchaudio
 import IPython
 from tortoise.api import TextToSpeech
-from tortoise.utils.audio import load_voice
 # Initialize TextToSpeech model
 tts = TextToSpeech()
@@ -23,7 +24,7 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(OUTPUT_FOLDER, exist_ok=True)
 # Streamlit UI elements
-st.title("Tortoise Text-to-Speech App")
 # Upload .wav files
 st.sidebar.header("Upload Audio Samples")
@@ -95,9 +96,17 @@ if st.button("Generate Speech"):
         # Display the generated audio
         st.subheader("Generated Output")
-        st.audio(output_path, format="audio/wav")
-        if st.checkbox("Play Audio"):
-            IPython.display.Audio(output_path)
         st.success("Speech generated successfully!")

 import streamlit as st
 import torchaudio
 import IPython
+import base64
 from tortoise.api import TextToSpeech
+from tortoise.utils.audio import load_voice, load
 # Initialize TextToSpeech model
 tts = TextToSpeech()
 os.makedirs(OUTPUT_FOLDER, exist_ok=True)
 # Streamlit UI elements
+st.title("Tortoise Cloning App")
 # Upload .wav files
 st.sidebar.header("Upload Audio Samples")
         # Display the generated audio
         st.subheader("Generated Output")
+        # Create a download link for the generated audio
+        audio_base64 = base64.b64encode(open(output_path, 'rb').read()).decode('utf-8')
+        href = f'<a href="data:audio/wav;base64,{audio_base64}" download="generated.wav">Download Audio</a>'
+        st.markdown(href, unsafe_allow_html=True)
+        # Display the generated audio
+        st.audio(output_path, format="audio/wav", start_time=0)
+        # Autoplay the audio
+        st.audio(output_path, format="audio/wav", start_time=0, autoplay=True)
         st.success("Speech generated successfully!")