import gradio as gr from transformers import pipeline from diffusers import StableDiffusionPipeline import torch import os from moviepy.editor import * from TTS.api import TTS import tempfile import random # Load story generator story_gen = pipeline("text-generation", model="gpt2") # Load image generator sd_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16) sd_pipe = sd_pipe.to("cuda" if torch.cuda.is_available() else "cpu") # Load TTS tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available()) def generate_story(prompt): output = story_gen(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"] return output def generate_images(story_text, num_images=4): sentences = story_text.split('.') selected = [s.strip() for s in sentences if s.strip()][:num_images] images = [] for sentence in selected: image = sd_pipe(sentence).images[0] images.append((sentence, image)) return images def create_narration(story_text): tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") tts.tts_to_file(text=story_text, file_path=tmp_audio.name) return tmp_audio.name def create_video(images_with_text, audio_path): clips = [] duration_per_image = 4 for i, (caption, image) in enumerate(images_with_text): img_path = f"frame_{i}.png" image.save(img_path) clip = ImageClip(img_path).set_duration(duration_per_image) clip = clip.set_position("center").resize(height=720) clips.append(clip) video = concatenate_videoclips(clips, method="compose") audio = AudioFileClip(audio_path) video = video.set_audio(audio) output_path = "output_story_video.mp4" video.write_videofile(output_path, fps=24) return output_path def full_pipeline(prompt): story = generate_story(prompt) images = generate_images(story) audio_path = create_narration(story) video_path = create_video(images, audio_path) return video_path, story # Gradio UI iface = gr.Interface( fn=full_pipeline, inputs=gr.Textbox(lines=3, placeholder="Enter your story prompt here..."), outputs=[ gr.Video(label="Generated Story Animation"), gr.Textbox(label="Generated Story") ], title="Prompt-to-Animated Story Generator", description="Enter a story prompt and get an animated video with narration" ) iface.launch()