Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from pathlib import Path | |
| import torch | |
| from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
| from diffusers import StableDiffusionPipeline | |
| from TTS.api import TTS | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| import tempfile | |
| import os | |
| from moviepy.editor import * | |
| import base64 | |
| class VideoGenerator: | |
| def __init__(self): | |
| # Initialize text generation model | |
| self.text_model = AutoModelForCausalLM.from_pretrained( | |
| "facebook/opt-1.3b", | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| self.text_tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b") | |
| # Initialize image generation model | |
| self.image_generator = StableDiffusionPipeline.from_pretrained( | |
| "runwayml/stable-diffusion-v1-5", | |
| torch_dtype=torch.float16 | |
| ).to("cuda") | |
| # Initialize TTS model | |
| self.tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False) | |
| # Create temp directory | |
| self.temp_dir = Path(tempfile.mkdtemp()) | |
| def generate_script(self, prompt): | |
| """Generate detailed script with facts and scenes""" | |
| input_ids = self.text_tokenizer( | |
| f"Generate a detailed video script with facts about: {prompt}. Include scene descriptions.", | |
| return_tensors="pt" | |
| ).input_ids.to("cuda") | |
| outputs = self.text_model.generate( | |
| input_ids, | |
| max_length=500, | |
| temperature=0.7, | |
| num_return_sequences=1 | |
| ) | |
| script = self.text_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return script | |
| def generate_scene_images(self, scene_descriptions): | |
| """Generate images for each scene using Stable Diffusion""" | |
| image_paths = [] | |
| for i, desc in enumerate(scene_descriptions): | |
| image = self.image_generator(desc).images[0] | |
| path = self.temp_dir / f"scene_{i}.png" | |
| image.save(path) | |
| image_paths.append(path) | |
| return image_paths | |
| def generate_voiceover(self, script): | |
| """Generate voice narration using TTS""" | |
| audio_path = self.temp_dir / "voiceover.wav" | |
| self.tts.tts_to_file(script, file_path=str(audio_path)) | |
| return audio_path | |
| def create_video(self, image_paths, audio_path, duration_per_image=5): | |
| """Combine images and audio into video""" | |
| clips = [] | |
| for img_path in image_paths: | |
| clip = ImageClip(str(img_path)).set_duration(duration_per_image) | |
| clips.append(clip) | |
| video = concatenate_videoclips(clips) | |
| audio = AudioFileClip(str(audio_path)) | |
| # Adjust video duration to match audio | |
| video = video.set_duration(audio.duration) | |
| final_video = video.set_audio(audio) | |
| output_path = self.temp_dir / "output_video.mp4" | |
| final_video.write_videofile(str(output_path), fps=24) | |
| return output_path | |
| def main(): | |
| st.set_page_config(page_title="AI Video Generator", layout="wide") | |
| st.title("π¬ AI Text-to-Video Generator") | |
| # Initialize session state | |
| if 'video_generator' not in st.session_state: | |
| st.session_state.video_generator = VideoGenerator() | |
| # Input section | |
| st.header("Enter Your Topic") | |
| text_input = st.text_area( | |
| "What would you like to create a video about?", | |
| height=100, | |
| placeholder="Example: Explain the process of photosynthesis in plants..." | |
| ) | |
| # Generation settings | |
| st.header("Video Settings") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| video_length = st.slider("Approximate video length (seconds)", 30, 300, 60) | |
| with col2: | |
| style = st.selectbox( | |
| "Video style", | |
| ["Educational", "Documentary", "Engaging", "Professional"] | |
| ) | |
| # Generate button | |
| if st.button("π₯ Generate Video"): | |
| if text_input: | |
| with st.spinner("π€ Generating your video..."): | |
| try: | |
| # Progress bar | |
| progress_bar = st.progress(0) | |
| progress_text = st.empty() | |
| # Generate script | |
| progress_text.text("Generating script...") | |
| script = st.session_state.video_generator.generate_script(text_input) | |
| progress_bar.progress(25) | |
| # Extract scene descriptions | |
| progress_text.text("Processing scenes...") | |
| scenes = [s.strip() for s in script.split("Scene:") if s.strip()] | |
| progress_bar.progress(40) | |
| # Generate images | |
| progress_text.text("Creating visuals...") | |
| image_paths = st.session_state.video_generator.generate_scene_images(scenes) | |
| progress_bar.progress(60) | |
| # Generate voiceover | |
| progress_text.text("Generating voiceover...") | |
| audio_path = st.session_state.video_generator.generate_voiceover(script) | |
| progress_bar.progress(80) | |
| # Create video | |
| progress_text.text("Composing final video...") | |
| video_path = st.session_state.video_generator.create_video( | |
| image_paths, | |
| audio_path, | |
| duration_per_image=video_length/len(scenes) | |
| ) | |
| progress_bar.progress(100) | |
| progress_text.text("Video generation complete!") | |
| # Display results | |
| st.header("Generated Content") | |
| # Show script | |
| with st.expander("π Generated Script"): | |
| st.write(script) | |
| # Show video | |
| st.header("π₯ Your Video") | |
| video_file = open(str(video_path), 'rb') | |
| video_bytes = video_file.read() | |
| st.video(video_bytes) | |
| # Download button | |
| st.download_button( | |
| label="Download Video", | |
| data=video_bytes, | |
| file_name="generated_video.mp4", | |
| mime="video/mp4" | |
| ) | |
| except Exception as e: | |
| st.error(f"An error occurred: {str(e)}") | |
| else: | |
| st.warning("Please enter some text to generate a video!") | |
| if __name__ == "__main__": | |
| main() |