#!/bin/bash # Ensure we are in the correct directory cd ~/ai-dreams-x # Install required Python packages pip install gradio torch diffusers moviepy sounddevice soundfile numpy librosa matplotlib # Create the Python script for the Gradio interface cat < gradio_interface_extended.py import gradio as gr import torch from diffusers import StableDiffusionPipeline import moviepy.editor as mp import sounddevice as sd import soundfile as sf import numpy as np import librosa import librosa.display import matplotlib.pyplot as plt from pathlib import Path import time # Load the model model_id = "runwayml/stable-diffusion-v1-5" device = "cuda" if torch.cuda.is_available() else "cpu" pipe = StableDiffusionPipeline.from_pretrained(model_id) pipe.to(device) # Function to generate video from text def generate_video(prompt, duration, frame_rate, audio_file, mic_input): if mic_input: fs = 44100 # Sample rate seconds = 10 # Duration of recording print("Recording audio...") audio_data = sd.rec(int(seconds * fs), samplerate=fs, channels=2) sd.wait() # Wait until recording is finished audio_path = "mic_audio.wav" sf.write(audio_path, audio_data, fs) elif audio_file is not None: audio_path = audio_file.name else: audio_path = None if audio_path: y, sr = librosa.load(audio_path) tempo, _ = librosa.beat.beat_track(y=y, sr=sr) duration = librosa.get_duration(y=y, sr=sr) else: sr = 22050 duration = float(duration) frames = [] start_time = time.time() for i in range(int(duration * frame_rate)): frame = pipe(prompt).images[0] frames.append(frame) clip = mp.ImageSequenceClip([np.array(f) for f in frames], fps=frame_rate) if audio_path: audio_clip = mp.AudioFileClip(audio_path) video = clip.set_audio(audio_clip) else: video = clip # Add visualizer if audio_path: waveform = np.abs(librosa.stft(y)) plt.figure(figsize=(10, 4)) librosa.display.specshow(librosa.amplitude_to_db(waveform, ref=np.max), sr=sr, x_axis='time', y_axis='log') plt.colorbar(format='%+2.0f dB') plt.title('Power spectrogram') plt.tight_layout() visualizer_path = "/Users/unseenseven/Desktop/AI_DREAMS & VISIONS/visualizer.png" plt.savefig(visualizer_path) plt.close() output_path = "/Users/unseenseven/Desktop/AI_DREAMS & VISIONS/generated_video.mp4" video.write_videofile(output_path, codec="libx264") estimated_time = time.time() - start_time return output_path, f"Estimated time to completion: {estimated_time:.2f} seconds", None # Gradio interface with gr.Blocks() as demo: gr.Markdown("# AI DREAMS & VISIONS Video Generator") with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Text Prompt") duration = gr.Slider(minimum=1, maximum=60, label="Duration (seconds)") frame_rate = gr.Slider(minimum=1, maximum=60, label="Frame Rate (fps)") audio_file = gr.File(label="Upload Audio File (optional)") mic_input = gr.Checkbox(label="Use Microphone Input") submit = gr.Button("Generate Video") email = gr.Markdown("Contact us: [aidreams@aidreams.company](mailto:aidreams@aidreams.company)") with gr.Column(): video_preview = gr.Video(label="Generated Video Preview") estimated_time_output = gr.Textbox(label="Estimated Time to Completion", interactive=False) download_link = gr.File(label="Download Video") submit.click( generate_video, inputs=[prompt, duration, frame_rate, audio_file, mic_input], outputs=[video_preview, estimated_time_output, download_link] ) demo.launch(share=True) EOF # Run the Python script python gradio_interface_extended.py