Spaces:

sandeshb
/

VidTune-Gradio

Sleeping

App Files Files Community

Sandesh Bharadwaj commited on Aug 12

Commit

b81e951

•

2 Parent(s): 47c18e5 e957316

Merge pull request #3 from animikhaich/web-app-dev

Browse files

Files changed (11) hide show

.streamlit/config.toml +4 -1
Dockerfile +20 -0
assets/VidTune-Logo-With-BG.png +0 -0
assets/VidTune-Logo-Without-BG.png +0 -0
assets/favicon.png +0 -0
assets/homepage.png +0 -0
engine/video_descriptor.py +2 -2
environment.yml +252 -0
id_cleaner.py +66 -0
main.py +154 -79
requirements.txt +221 -2

.streamlit/config.toml CHANGED Viewed

@@ -1,2 +1,5 @@
 [browser]
-gatherUsageStats = false

 [browser]
+gatherUsageStats = false
+[theme]
+base = "light"

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use micromamba as the base image
+FROM python:3.9.19
+# Set the working directory in the container
+WORKDIR /src
+# Copy Requirements file
+COPY requirements.txt /src
+# Install the required packages
+RUN pip install -r requirements.txt
+# Expose port 8003 for Streamlit
+EXPOSE 8003
+# Copy the current directory contents into the container at /src
+COPY . /src
+# Run id_cleaner.py as a background process and then start Streamlit
+CMD ["sh", "-c", "python id_cleaner.py & streamlit run main.py --server.port 8003"]

assets/VidTune-Logo-With-BG.png ADDED Viewed

assets/VidTune-Logo-Without-BG.png ADDED Viewed

assets/favicon.png ADDED Viewed

assets/homepage.png ADDED Viewed

engine/video_descriptor.py CHANGED Viewed

@@ -37,9 +37,9 @@ You must return your response using this JSON schema: {json_schema}
 class DescribeVideo:
-    def __init__(self, model="flash"):
         self.model = self.get_model_name(model)
-        __api_key = self.load_api_key()
         self.is_safety_set = False
         self.safety_settings = self.get_safety_settings()

 class DescribeVideo:
+    def __init__(self, model="flash", google_api_key=None):
         self.model = self.get_model_name(model)
+        __api_key = google_api_key # self.load_api_key()
         self.is_safety_set = False
         self.safety_settings = self.get_safety_settings()

environment.yml ADDED Viewed

	@@ -0,0 +1,252 @@

+name: vidtune
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1
+  - _openmp_mutex=5.1
+  - ca-certificates=2024.3.11
+  - ld_impl_linux-64=2.38
+  - libffi=3.4.4
+  - libgcc-ng=11.2.0
+  - libgomp=11.2.0
+  - libstdcxx-ng=11.2.0
+  - ncurses=6.4
+  - openssl=3.0.14
+  - pip=24.0
+  - python=3.9.19
+  - readline=8.2
+  - setuptools=69.5.1
+  - sqlite=3.45.3
+  - tk=8.6.14
+  - wheel=0.43.0
+  - xz=5.4.6
+  - zlib=1.2.13
+  - pip:
+      - aiofiles==23.2.1
+      - altair==5.3.0
+      - annotated-types==0.7.0
+      - antlr4-python3-runtime==4.9.3
+      - anyio==4.4.0
+      - asttokens==2.4.1
+      - attrs==23.2.0
+      - audiocraft==1.3.0
+      - audioread==3.0.1
+      - av==11.0.0
+      - backcall==0.2.0
+      - beautifulsoup4==4.12.3
+      - bleach==6.1.0
+      - blinker==1.8.2
+      - blis==0.7.11
+      - cachetools==5.3.3
+      - catalogue==2.0.10
+      - certifi==2024.7.4
+      - cffi==1.16.0
+      - charset-normalizer==3.3.2
+      - click==8.1.7
+      - cloudpathlib==0.18.1
+      - cloudpickle==3.0.0
+      - colorlog==6.8.2
+      - confection==0.1.5
+      - contourpy==1.2.1
+      - cycler==0.12.1
+      - cymem==2.0.8
+      - decorator==4.4.2
+      - defusedxml==0.7.1
+      - demucs==4.0.1
+      - dnspython==2.6.1
+      - docopt==0.6.2
+      - dora-search==0.1.12
+      - einops==0.8.0
+      - email-validator==2.2.0
+      - encodec==0.1.1
+      - exceptiongroup==1.2.2
+      - executing==2.0.1
+      - fastapi==0.111.0
+      - fastapi-cli==0.0.4
+      - fastjsonschema==2.20.0
+      - ffmpy==0.3.2
+      - filelock==3.15.4
+      - flashy==0.0.2
+      - fonttools==4.53.1
+      - fsspec==2024.6.1
+      - gitdb==4.0.11
+      - gitpython==3.1.43
+      - google-ai-generativelanguage==0.6.6
+      - google-api-core==2.19.1
+      - google-api-python-client==2.137.0
+      - google-auth==2.32.0
+      - google-auth-httplib2==0.2.0
+      - google-generativeai==0.7.2
+      - googleapis-common-protos==1.63.2
+      - gradio==4.38.1
+      - gradio-client==1.1.0
+      - grpcio==1.64.1
+      - grpcio-status==1.62.2
+      - h11==0.14.0
+      - httpcore==1.0.5
+      - httplib2==0.22.0
+      - httptools==0.6.1
+      - httpx==0.27.0
+      - huggingface-hub==0.23.4
+      - hydra-colorlog==1.2.0
+      - hydra-core==1.3.2
+      - idna==3.7
+      - imageio==2.34.2
+      - imageio-ffmpeg==0.5.1
+      - importlib-metadata==8.2.0
+      - importlib-resources==6.4.0
+      - ipython==8.12.3
+      - jedi==0.19.1
+      - jinja2==3.1.4
+      - joblib==1.4.2
+      - jsonschema==4.23.0
+      - jsonschema-specifications==2023.12.1
+      - julius==0.2.7
+      - jupyter-client==8.6.2
+      - jupyter-core==5.7.2
+      - jupyterlab-pygments==0.3.0
+      - kiwisolver==1.4.5
+      - lameenc==1.7.0
+      - langcodes==3.4.0
+      - language-data==1.2.0
+      - lazy-loader==0.4
+      - librosa==0.10.2.post1
+      - lightning-utilities==0.11.5
+      - llvmlite==0.43.0
+      - marisa-trie==1.2.0
+      - markdown-it-py==3.0.0
+      - markupsafe==2.1.5
+      - matplotlib==3.9.1
+      - matplotlib-inline==0.1.7
+      - mdurl==0.1.2
+      - mistune==3.0.2
+      - moviepy==1.0.3
+      - mpmath==1.3.0
+      - msgpack==1.0.8
+      - murmurhash==1.0.10
+      - nbclient==0.10.0
+      - nbconvert==7.16.4
+      - nbformat==5.10.4
+      - networkx==3.2.1
+      - num2words==0.5.13
+      - numba==0.60.0
+      - numpy==1.26.4
+      - nvidia-cublas-cu12==12.1.3.1
+      - nvidia-cuda-cupti-cu12==12.1.105
+      - nvidia-cuda-nvrtc-cu12==12.1.105
+      - nvidia-cuda-runtime-cu12==12.1.105
+      - nvidia-cudnn-cu12==8.9.2.26
+      - nvidia-cufft-cu12==11.0.2.54
+      - nvidia-curand-cu12==10.3.2.106
+      - nvidia-cusolver-cu12==11.4.5.107
+      - nvidia-cusparse-cu12==12.1.0.106
+      - nvidia-nccl-cu12==2.18.1
+      - nvidia-nvjitlink-cu12==12.5.82
+      - nvidia-nvtx-cu12==12.1.105
+      - omegaconf==2.3.0
+      - openunmix==1.3.0
+      - orjson==3.10.6
+      - packaging==24.1
+      - pandas==2.2.2
+      - pandocfilters==1.5.1
+      - parso==0.8.4
+      - pexpect==4.9.0
+      - pickleshare==0.7.5
+      - pillow==10.4.0
+      - pipreqs==0.5.0
+      - platformdirs==4.2.2
+      - pooch==1.8.2
+      - preshed==3.0.9
+      - proglog==0.1.10
+      - prompt-toolkit==3.0.47
+      - proto-plus==1.24.0
+      - protobuf==4.25.3
+      - psutil==6.0.0
+      - ptyprocess==0.7.0
+      - pure-eval==0.2.3
+      - pyarrow==16.1.0
+      - pyasn1==0.6.0
+      - pyasn1-modules==0.4.0
+      - pycparser==2.22
+      - pydantic==2.7.3
+      - pydantic-core==2.18.4
+      - pydeck==0.9.1
+      - pydub==0.25.1
+      - pygments==2.18.0
+      - pyparsing==3.1.2
+      - python-dateutil==2.9.0.post0
+      - python-dotenv==1.0.1
+      - python-multipart==0.0.9
+      - pytz==2024.1
+      - pyyaml==6.0.1
+      - pyzmq==26.1.0
+      - referencing==0.35.1
+      - regex==2024.5.15
+      - requests==2.32.3
+      - retrying==1.3.4
+      - rich==13.7.1
+      - rpds-py==0.19.0
+      - rsa==4.9
+      - ruff==0.5.2
+      - safetensors==0.4.3
+      - scikit-learn==1.5.1
+      - scipy==1.13.1
+      - semantic-version==2.10.0
+      - sentencepiece==0.2.0
+      - shellingham==1.5.4
+      - six==1.16.0
+      - smart-open==7.0.4
+      - smmap==5.0.1
+      - sniffio==1.3.1
+      - soundfile==0.12.1
+      - soupsieve==2.5
+      - soxr==0.3.7
+      - spacy==3.7.5
+      - spacy-legacy==3.0.12
+      - spacy-loggers==1.0.5
+      - srsly==2.4.8
+      - stack-data==0.6.3
+      - starlette==0.37.2
+      - streamlit==1.36.0
+      - submitit==1.5.1
+      - sympy==1.13.0
+      - tenacity==8.5.0
+      - thinc==8.2.5
+      - threadpoolctl==3.5.0
+      - tinycss2==1.3.0
+      - tokenizers==0.19.1
+      - toml==0.10.2
+      - tomlkit==0.12.0
+      - toolz==0.12.1
+      - --extra-index-url https://download.pytorch.org/whl/cu121
+      - torch==2.1.0
+      - torchaudio==2.1.0
+      - torchdata==0.7.0
+      - torchmetrics==1.4.0.post0
+      - torchtext==0.16.0
+      - torchvision==0.16.0
+      - tornado==6.4.1
+      - tqdm==4.66.4
+      - traitlets==5.14.3
+      - transformers==4.42.4
+      - treetable==0.2.5
+      - triton==2.1.0
+      - typer==0.12.3
+      - typing-extensions==4.12.2
+      - tzdata==2024.1
+      - ujson==5.10.0
+      - uritemplate==4.1.1
+      - urllib3==2.2.2
+      - uvicorn==0.30.1
+      - uvloop==0.19.0
+      - wasabi==1.1.3
+      - watchdog==4.0.1
+      - watchfiles==0.22.0
+      - wcwidth==0.2.13
+      - weasel==0.4.1
+      - webencodings==0.5.1
+      - websockets==11.0.3
+      - wrapt==1.16.0
+      - xformers==0.0.22.post7
+      - yarg==0.1.9
+      - zipp==3.19.2

id_cleaner.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import time
+import shutil
+import logging
+from datetime import datetime, timedelta
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+# Configure logging
+FORMAT = "%(asctime)s: %(levelname)s: %(message)s"
+logging.basicConfig(filename="logs.log", level=logging.INFO, format=FORMAT)
+STDERRLOGGER = logging.StreamHandler()
+STDERRLOGGER.setFormatter(logging.Formatter(FORMAT))
+logging.getLogger().addHandler(STDERRLOGGER)
+class DirectoryCleanupHandler(FileSystemEventHandler):
+    def __init__(self, threshold_minutes=60, check_interval_minutes=5):
+        self.threshold = timedelta(minutes=threshold_minutes)
+        self.check_interval = check_interval_minutes * 60
+        self.last_check_time = datetime.now()
+    def on_modified(self, event):
+        if event.is_directory and event.src_path.startswith("_id_"):
+            logging.info(f"Detected modification in directory: {event.src_path}")
+            self.cleanup_directories()
+    def cleanup_directories(self):
+        now = datetime.now()
+        for dirpath, _, _ in os.walk("."):
+            if os.path.basename(dirpath).startswith("_id_"):
+                try:
+                    mtime = os.path.getmtime(dirpath)
+                    mtime_dt = datetime.fromtimestamp(mtime)
+                    if now - mtime_dt > self.threshold:
+                        logging.info(f"Deleting directory: {dirpath}")
+                        shutil.rmtree(dirpath)
+                except Exception as e:
+                    logging.error(f"Error deleting {dirpath}: {e}")
+    def start_cleanup_loop(self):
+        while True:
+            current_time = datetime.now()
+            if (
+                current_time - self.last_check_time
+            ).total_seconds() >= self.check_interval:
+                logging.info("Woke up to check directories")
+                self.cleanup_directories()
+                self.last_check_time = current_time
+            time.sleep(self.check_interval)
+if __name__ == "__main__":
+    logging.info("Starting directory cleanup script")
+    event_handler = DirectoryCleanupHandler(
+        threshold_minutes=60, check_interval_minutes=30
+    )
+    observer = Observer()
+    observer.schedule(event_handler, path=".", recursive=True)
+    observer.start()
+    try:
+        event_handler.start_cleanup_loop()
+    except KeyboardInterrupt:
+        logging.info("Stopping directory cleanup script due to keyboard interrupt")
+        observer.stop()
+    observer.join()

main.py CHANGED Viewed

@@ -5,13 +5,16 @@ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
 from moviepy.audio.fx.volumex import volumex
 from streamlit.runtime.scriptrunner import get_script_run_ctx
 def get_session_id():
     session_id = get_script_run_ctx().session_id
-    session_id = session_id.replace('-','_')
-    session_id = '_id_' + session_id
     return session_id
-print(get_session_id())
 # Define model maps
 video_model_map = {
     "Fast": "flash",
@@ -46,13 +49,27 @@ genre_map = {
 # Streamlit page configuration
 st.set_page_config(
-    page_title="VidTune: Where Videos Find Their Melody", layout="centered"
 )
 # Title and Description
-st.title("VidTune: Where Videos Find Their Melody")
-st.write(
-    "VidTune is a web application that allows users to upload videos and generate melodies matching the mood of the video."
 )
 # Initialize session state for advanced settings and other inputs
@@ -80,9 +97,30 @@ if "orig_audio_vol" not in st.session_state:
     st.session_state.orig_audio_vol = 100
 if "generated_audio_vol" not in st.session_state:
     st.session_state.generated_audio_vol = 100
 # Sidebar
-st.sidebar.title("Settings")
 # Basic Settings
 st.session_state.video_model = st.sidebar.selectbox(
@@ -138,26 +176,34 @@ generate_button = st.sidebar.button("Generate Music")
 # Cache the model loading
 @st.cache_resource
-def load_models(video_model_key, music_model_key):
-    video_descriptor = DescribeVideo(model=video_model_map[video_model_key])
     audio_generator = GenerateAudio(model=music_model_map[music_model_key])
     return video_descriptor, audio_generator
 # Load models
 video_descriptor, audio_generator = load_models(
-    st.session_state.video_model, st.session_state.music_model
 )
 # Video Uploader
 uploaded_video = st.file_uploader("Upload Video", type=["mp4"])
 if uploaded_video is not None:
     st.session_state.uploaded_video = uploaded_video
-    with open("temp.mp4", mode="wb") as w:
         w.write(uploaded_video.getvalue())
 # Video Player
-if os.path.exists("temp.mp4") and uploaded_video is not None:
     st.video(uploaded_video)
 # Submit button if video is not uploaded
@@ -168,101 +214,130 @@ if generate_button:
     with st.spinner("Analyzing video..."):
         video_description = video_descriptor.describe_video(
-            "temp.mp4",
             genre=st.session_state.music_genre,
             bpm=st.session_state.music_bpm,
             user_keywords=st.session_state.user_keywords,
         )
-        video_duration = VideoFileClip("temp.mp4").duration
-        music_prompt = video_description["Music Prompt"]
         st.success("Video description generated successfully.")
-        # Display Video Description and Music Prompt
-        st.text_area(
-            "Video Description",
-            video_description["Content Description"],
-            disabled=True,
-            height=120,
-        )
-        music_prompt = st.text_area(
-            "Music Prompt",
-            music_prompt,
-            disabled=False,
-            height=120,
-        )
     # Generate Music
     with st.spinner("Generating music..."):
         if video_duration > 30:
             st.warning(
                 "Due to hardware limitations, the maximum music length is capped at 30 seconds."
             )
-        music_prompt = [music_prompt] * st.session_state.num_samples
         audio_generator.generate_audio(music_prompt, duration=video_duration)
         st.session_state.audio_paths = audio_generator.save_audio()
         st.success("Music generated successfully.")
         st.balloons()
 # Callback function for radio button selection change
 def on_audio_selection_change():
-    selected_index = audio_options.index(st.session_state.selected_audio) - 1
-    if selected_index >= 0:
-        st.session_state.selected_audio_path = st.session_state.audio_paths[selected_index]
     else:
         st.session_state.selected_audio_path = None
-# Display radio buttons and handle audio selections
 if st.session_state.audio_paths:
     for i, audio_path in enumerate(st.session_state.audio_paths):
         st.audio(audio_path, format="audio/wav")
-    audio_options = ["None"]+[f"Sample {i+1}" for i in range(len(st.session_state.audio_paths))]
-    st.radio(
         "Select one of the generated audio files for further processing:",
-        audio_options,
         index=0,
         key="selected_audio",
-        on_change=on_audio_selection_change
     )
-    if st.session_state.selected_audio_path:
-        st.write(f"**Selected Audio:** {st.session_state.selected_audio_path}")
 # Handle Audio Mixing and Export
-if st.session_state.selected_audio_path is not None:
-    orig_clip = VideoFileClip("temp.mp4")
-    orig_clip_audio = orig_clip.audio
-    generated_audio = AudioFileClip(st.session_state.selected_audio_path)
-    st.session_state.orig_audio_vol = st.slider(
-        "Original Audio Volume", 0, 200, st.session_state.orig_audio_vol
-    )
-    st.session_state.generated_audio_vol = st.slider(
-        "Selected Sample Volume", 0, 200, st.session_state.generated_audio_vol
-    )
-    orig_clip_audio = volumex(orig_clip_audio, float(st.session_state.orig_audio_vol/100))
-    generated_audio = volumex(generated_audio, float(st.session_state.generated_audio_vol/100))
-    orig_clip.audio = CompositeAudioClip([orig_clip_audio, generated_audio])
-    final_video_path="out_tmp.mp4"
-    orig_clip.write_videofile(final_video_path)
-    orig_clip.close()
-    generated_audio.close()
-    st.session_state.final_video_path = final_video_path
-    st.video(final_video_path)
-    if st.session_state.final_video_path:
-        with open(st.session_state.final_video_path, "rb") as video_file:
-            st.download_button(
-                label="Download final video",
-                data=video_file,
-                file_name="final_video.mp4",
-                mime="video/mp4",
-            )

 from moviepy.audio.fx.volumex import volumex
 from streamlit.runtime.scriptrunner import get_script_run_ctx
 def get_session_id():
     session_id = get_script_run_ctx().session_id
+    session_id = session_id.replace("-", "_")
+    session_id = "_id_" + session_id
     return session_id
+user_session_id = get_session_id()
+os.makedirs(user_session_id, exist_ok=True)
 # Define model maps
 video_model_map = {
     "Fast": "flash",
 # Streamlit page configuration
 st.set_page_config(
+    page_title="VidTune: Where Videos Find Their Melody",
+    layout="centered",
+    page_icon="assets/favicon.png",
 )
+left_co, cent_co, last_co = st.columns(3)
+with cent_co:
+    st.image("assets/VidTune-Logo-Without-BG.png", use_column_width=False, width=200)
 # Title and Description
+st.markdown(
+    """
+    <style>
+    h2, p, div, img {
+        text-align: center;
+    }
+    </style>
+    <div style="font-size: 35px; font-weight: bold;">VidTune: Where Videos Find Their Melody</div>
+    <p>VidTune is a web application to effortlessly tailor perfect soundtracks for your videos with AI.</p>
+    """,
+    unsafe_allow_html=True,
 )
 # Initialize session state for advanced settings and other inputs
     st.session_state.orig_audio_vol = 100
 if "generated_audio_vol" not in st.session_state:
     st.session_state.generated_audio_vol = 100
+if "generate_button_flag" not in st.session_state:
+    st.session_state.generate_button_flag = False
+if "video_description_content" not in st.session_state:
+    st.session_state.video_description_content = ""
+if "music_prompt" not in st.session_state:
+    st.session_state.music_prompt = ""
+if "audio_mix_flag" not in st.session_state:
+    st.session_state.audio_mix_flag = False
+if "google_api_key" not in st.session_state:
+    st.session_state.google_api_key = ""
 # Sidebar
+st.sidebar.title("Configuration")
+# Google API Key
+st.session_state.google_api_key = st.sidebar.text_input(
+    "Enter your [Google API Key](https://ai.google.dev/gemini-api/docs/api-key) to get started :",
+    st.session_state.google_api_key,
+    type="password",
+)
+if not st.session_state.google_api_key:
+    st.warning("Please enter your Google API Key to proceed.")
+    st.stop()
 # Basic Settings
 st.session_state.video_model = st.sidebar.selectbox(
 # Cache the model loading
 @st.cache_resource
+def load_models(video_model_key, music_model_key, google_api_key):
+    video_descriptor = DescribeVideo(
+        model=video_model_map[video_model_key], google_api_key=google_api_key
+    )
     audio_generator = GenerateAudio(model=music_model_map[music_model_key])
+    if audio_generator.device == "cpu":
+        st.warning(
+            "The music generator model is running on CPU. For faster results, consider using a GPU."
+        )
     return video_descriptor, audio_generator
 # Load models
 video_descriptor, audio_generator = load_models(
+    st.session_state.video_model,
+    st.session_state.music_model,
+    st.session_state.google_api_key,
 )
 # Video Uploader
 uploaded_video = st.file_uploader("Upload Video", type=["mp4"])
 if uploaded_video is not None:
     st.session_state.uploaded_video = uploaded_video
+    with open(f"{user_session_id}/temp.mp4", mode="wb") as w:
         w.write(uploaded_video.getvalue())
 # Video Player
+if os.path.exists(f"{user_session_id}/temp.mp4") and uploaded_video is not None:
     st.video(uploaded_video)
 # Submit button if video is not uploaded
     with st.spinner("Analyzing video..."):
         video_description = video_descriptor.describe_video(
+            f"{user_session_id}/temp.mp4",
             genre=st.session_state.music_genre,
             bpm=st.session_state.music_bpm,
             user_keywords=st.session_state.user_keywords,
         )
+        video_duration = VideoFileClip(f"{user_session_id}/temp.mp4").duration
+        st.session_state.video_description_content = video_description[
+            "Content Description"
+        ]
+        st.session_state.music_prompt = video_description["Music Prompt"]
         st.success("Video description generated successfully.")
+        st.session_state.generate_button_flag = True
+# Display Video Description and Music Prompt
+if st.session_state.generate_button_flag:
+    st.text_area(
+        "Video Description",
+        st.session_state.video_description_content,
+        disabled=True,
+        height=120,
+    )
+    music_prompt = st.text_area(
+        "Music Prompt",
+        st.session_state.music_prompt,
+        disabled=True,
+        height=120,
+    )
+if generate_button:
     # Generate Music
     with st.spinner("Generating music..."):
         if video_duration > 30:
             st.warning(
                 "Due to hardware limitations, the maximum music length is capped at 30 seconds."
             )
+        music_prompt = [st.session_state.music_prompt] * st.session_state.num_samples
         audio_generator.generate_audio(music_prompt, duration=video_duration)
         st.session_state.audio_paths = audio_generator.save_audio()
         st.success("Music generated successfully.")
         st.balloons()
 # Callback function for radio button selection change
 def on_audio_selection_change():
+    st.session_state.audio_mix_flag = False
+    selected_audio_index = st.session_state.selected_audio
+    if selected_audio_index > 0:
+        st.session_state.selected_audio_path = st.session_state.audio_paths[
+            selected_audio_index - 1
+        ]
     else:
         st.session_state.selected_audio_path = None
 if st.session_state.audio_paths:
+    # Dropdown to select one of the generated audio files
+    audio_options = ["None"] + [
+        f"Generated Music {i+1}" for i in range(len(st.session_state.audio_paths))
+    ]
+    # Display the audio files
     for i, audio_path in enumerate(st.session_state.audio_paths):
         st.audio(audio_path, format="audio/wav")
+    selected_audio_index = st.selectbox(
         "Select one of the generated audio files for further processing:",
+        range(len(audio_options)),
+        format_func=lambda x: audio_options[x],
         index=0,
         key="selected_audio",
+        on_change=on_audio_selection_change,
     )
+    # Button to confirm the selection
+    if st.button("Add Generated Music to Video"):
+        st.session_state.audio_mix_flag = True
 # Handle Audio Mixing and Export
+if st.session_state.selected_audio_path is not None and st.session_state.audio_mix_flag:
+    with st.spinner("Mixing Audio..."):
+        orig_clip = VideoFileClip(f"{user_session_id}/temp.mp4")
+        orig_clip_audio = orig_clip.audio
+        generated_audio = AudioFileClip(st.session_state.selected_audio_path)
+        st.session_state.orig_audio_vol = st.slider(
+            "Original Audio Volume",
+            0,
+            200,
+            st.session_state.orig_audio_vol,
+            format="%d%%",
+        )
+        st.session_state.generated_audio_vol = st.slider(
+            "Generated Music Volume",
+            0,
+            200,
+            st.session_state.generated_audio_vol,
+            format="%d%%",
+        )
+        orig_clip_audio = volumex(
+            orig_clip_audio, float(st.session_state.orig_audio_vol / 100)
+        )
+        generated_audio = volumex(
+            generated_audio, float(st.session_state.generated_audio_vol / 100)
+        )
+        orig_clip.audio = CompositeAudioClip([orig_clip_audio, generated_audio])
+        final_video_path = f"{user_session_id}/out_tmp.mp4"
+        orig_clip.write_videofile(final_video_path)
+        orig_clip.close()
+        generated_audio.close()
+        st.session_state.final_video_path = final_video_path
+        st.video(final_video_path)
+        if st.session_state.final_video_path:
+            with open(st.session_state.final_video_path, "rb") as video_file:
+                st.download_button(
+                    label="Download final video",
+                    data=video_file,
+                    file_name="final_video.mp4",
+                    mime="video/mp4",
+                )

requirements.txt CHANGED Viewed

@@ -1,9 +1,228 @@
 audiocraft==1.3.0
 fastapi==0.111.0
 numpy==1.26.4
 pydantic==2.7.3
-Requests==2.32.3
 scipy==1.13.1
 torch==2.1.0
 uvicorn==0.30.1
-psutil==6.0.0

+aiofiles==23.2.1
+altair==5.3.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.4.0
+asttokens==2.4.1
+attrs==23.2.0
 audiocraft==1.3.0
+audioread==3.0.1
+av==11.0.0
+backcall==0.2.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+blinker==1.8.2
+blis==0.7.11
+cachetools==5.3.3
+catalogue==2.0.10
+certifi==2024.7.4
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+cloudpathlib==0.18.1
+cloudpickle==3.0.0
+colorlog==6.8.2
+confection==0.1.5
+contourpy==1.2.1
+cycler==0.12.1
+cymem==2.0.8
+decorator==4.4.2
+defusedxml==0.7.1
+demucs==4.0.1
+dnspython==2.6.1
+docopt==0.6.2
+dora_search==0.1.12
+einops==0.8.0
+email_validator==2.2.0
+encodec==0.1.1
+exceptiongroup==1.2.2
+executing==2.0.1
 fastapi==0.111.0
+fastapi-cli==0.0.4
+fastjsonschema==2.20.0
+ffmpy==0.3.2
+filelock==3.15.4
+flashy==0.0.2
+fonttools==4.53.1
+fsspec==2024.6.1
+gitdb==4.0.11
+GitPython==3.1.43
+google-ai-generativelanguage==0.6.6
+google-api-core==2.19.1
+google-api-python-client==2.137.0
+google-auth==2.32.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.7.2
+googleapis-common-protos==1.63.2
+gradio==4.38.1
+gradio_client==1.1.0
+grpcio==1.64.1
+grpcio-status==1.62.2
+h11==0.14.0
+httpcore==1.0.5
+httplib2==0.22.0
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.4
+hydra-colorlog==1.2.0
+hydra-core==1.3.2
+idna==3.7
+imageio==2.34.2
+imageio-ffmpeg==0.5.1
+importlib_metadata==8.2.0
+importlib_resources==6.4.0
+ipython==8.12.3
+jedi==0.19.1
+Jinja2==3.1.4
+joblib==1.4.2
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+julius==0.2.7
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyterlab_pygments==0.3.0
+kiwisolver==1.4.5
+lameenc==1.7.0
+langcodes==3.4.0
+language_data==1.2.0
+lazy_loader==0.4
+librosa==0.10.2.post1
+lightning-utilities==0.11.5
+llvmlite==0.43.0
+marisa-trie==1.2.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.1
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.0.2
+moviepy==1.0.3
+mpmath==1.3.0
+msgpack==1.0.8
+murmurhash==1.0.10
+nbclient==0.10.0
+nbconvert==7.16.4
+nbformat==5.10.4
+networkx==3.2.1
+num2words==0.5.13
+numba==0.60.0
 numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.18.1
+nvidia-nvjitlink-cu12==12.5.82
+nvidia-nvtx-cu12==12.1.105
+omegaconf==2.3.0
+openunmix==1.3.0
+orjson==3.10.6
+packaging==24.1
+pandas==2.2.2
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pickleshare==0.7.5
+pillow==10.4.0
+pipreqs==0.5.0
+platformdirs==4.2.2
+pooch==1.8.2
+preshed==3.0.9
+proglog==0.1.10
+prompt_toolkit==3.0.47
+proto-plus==1.24.0
+protobuf==4.25.3
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==16.1.0
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
 pydantic==2.7.3
+pydantic_core==2.18.4
+pydeck==0.9.1
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+pyzmq==26.1.0
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+retrying==1.3.4
+rich==13.7.1
+rpds-py==0.19.0
+rsa==4.9
+ruff==0.5.2
+safetensors==0.4.3
+scikit-learn==1.5.1
 scipy==1.13.1
+semantic-version==2.10.0
+sentencepiece==0.2.0
+shellingham==1.5.4
+six==1.16.0
+smart-open==7.0.4
+smmap==5.0.1
+sniffio==1.3.1
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.7
+spacy==3.7.5
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.4.8
+stack-data==0.6.3
+starlette==0.37.2
+streamlit==1.36.0
+submitit==1.5.1
+sympy==1.13.0
+tenacity==8.5.0
+thinc==8.2.5
+threadpoolctl==3.5.0
+tinycss2==1.3.0
+tokenizers==0.19.1
+toml==0.10.2
+tomlkit==0.12.0
+toolz==0.12.1
+--extra-index-url https://download.pytorch.org/whl/cu121
 torch==2.1.0
+torchaudio==2.1.0
+torchdata==0.7.0
+torchmetrics==1.4.0.post0
+torchtext==0.16.0
+torchvision==0.16.0
+tornado==6.4.1
+tqdm==4.66.4
+traitlets==5.14.3
+transformers==4.42.4
+treetable==0.2.5
+triton==2.1.0
+typer==0.12.3
+typing_extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+uritemplate==4.1.1
+urllib3==2.2.2
 uvicorn==0.30.1
+uvloop==0.19.0
+wasabi==1.1.3
+watchdog==4.0.1
+watchfiles==0.22.0
+wcwidth==0.2.13
+weasel==0.4.1
+webencodings==0.5.1
+websockets==11.0.3
+wrapt==1.16.0
+xformers==0.0.22.post7
+yarg==0.1.9
+zipp==3.19.2