import os import shutil import subprocess import uuid from pathlib import Path import gradio as gr from moviepy.editor import AudioFileClip, ImageClip output_dir = Path("temp/").absolute() output_dir.mkdir(exist_ok=True, parents=True) os.chdir( output_dir ) # change working directory to output_dir because the hf spaces model has no option to specify output directory ¯\_(ツ)_/¯ # TODO: Add an if statement that checks if a gpu is available, if one is then do weird stable diffusion stuff, if one isn't, then just use the regular hugging face api class SpotifyApi: spotify_directory = Path("spotify") final_directory = output_dir def __init__(self): self.setup_spotify() def setup_spotify(self) -> None: # Check if the credentials file exists if not os.path.exists("spotify.rc"): with open("spotify.rc", "w") as f: f.write( f"{os.environ['SPOTIFY_USERNAME']} {os.environ['SPOTIFY_PASSWORD']}" ) subprocess.call(["spodcast", "-l", "spotify.rc"]) def download_episode(self, episode_url) -> str: # Generate a 8 character random string foldername = str(uuid.uuid4())[:8] out_path = (self.spotify_directory / foldername).resolve() subprocess.call(["spodcast", "--root-path", out_path, episode_url]) self.foldername = foldername mp3_path = self.get_final_mp3() assert mp3_path is not None return mp3_path def get_final_mp3(self) -> str | None: # Look in all the subdirectories of spotify for the mp3 file for root, dirs, files in os.walk( Path(self.spotify_directory / self.foldername).resolve() ): for file in files: if file.endswith(".mp3"): final_mp3 = ( Path(self.final_directory / self.foldername) .with_suffix(".mp3") .absolute() ) shutil.copy(os.path.join(root, file), final_mp3) shutil.rmtree( Path(self.spotify_directory / self.foldername).absolute() ) return final_mp3.as_posix() class AudioInput: def __init__(self, path: str, start_time: int, run_for: int): self.path = path self.start_time = start_time self.run_for = run_for def process_inputs( prompt: str, audio_path: str, spotify_url: str, start_time: int, run_for: int ) -> str: audio_input = AudioInput(audio_path, start_time, run_for) if spotify_url: spotify = SpotifyApi() audio_input.path = spotify.download_episode(spotify_url) image = get_stable_diffusion_image(prompt) video = add_static_image_to_audio(image, audio_input) return video def add_static_image_to_audio(image, audio_input) -> str: """Create and save a video file to `output_path` after combining a static image that is located in `image_path` with an audio file in `audio_path`""" # Generate a random folder name and change directories to there foldername = str(uuid.uuid4())[:8] vid_output_dir = Path(output_dir / foldername) vid_output_dir.mkdir(exist_ok=True, parents=True) audio_clip = AudioFileClip(audio_input.path) # Make the audio clip start at the specified time and set the duration to the specified duration audio_clip = audio_clip.subclip( audio_input.start_time, audio_input.start_time + audio_input.run_for ) image_clip = ImageClip(image) video_clip = image_clip.set_audio(audio_clip) video_clip.duration = ( audio_clip.duration ) # The duration here is the cut duration from above video_clip.fps = 1 path = Path(vid_output_dir / "output.mp4").as_posix() video_clip.write_videofile(path) return path def get_stable_diffusion_image(prompt) -> str: stable_diffusion = gr.Blocks.load(name="spaces/stabilityai/stable-diffusion") gallery_dir = stable_diffusion(prompt, fn_index=2) return [os.path.join(gallery_dir, img) for img in os.listdir(gallery_dir)][0] iface = gr.Interface( fn=process_inputs, inputs=[ gr.Textbox(label="Describe your podcast clip"), gr.Audio(type="filepath", label="Upload an mp3"), gr.Textbox(label="Or Paste a spotify episode link"), gr.Number(label="Start time (in seconds)"), gr.Number(label="Run for (in seconds)"), ], outputs="video", ) iface.launch()