| import gradio as gr |
| from transformers import pipeline |
| from diffusers import StableDiffusionPipeline |
| import torch |
| import os |
| from moviepy.editor import * |
| from TTS.api import TTS |
| import tempfile |
| import random |
|
|
| |
| story_gen = pipeline("text-generation", model="gpt2") |
|
|
| |
| sd_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16) |
| sd_pipe = sd_pipe.to("cuda" if torch.cuda.is_available() else "cpu") |
|
|
| |
| tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available()) |
|
|
| def generate_story(prompt): |
| output = story_gen(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"] |
| return output |
|
|
| def generate_images(story_text, num_images=4): |
| sentences = story_text.split('.') |
| selected = [s.strip() for s in sentences if s.strip()][:num_images] |
| images = [] |
| for sentence in selected: |
| image = sd_pipe(sentence).images[0] |
| images.append((sentence, image)) |
| return images |
|
|
| def create_narration(story_text): |
| tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") |
| tts.tts_to_file(text=story_text, file_path=tmp_audio.name) |
| return tmp_audio.name |
|
|
| def create_video(images_with_text, audio_path): |
| clips = [] |
| duration_per_image = 4 |
| for i, (caption, image) in enumerate(images_with_text): |
| img_path = f"frame_{i}.png" |
| image.save(img_path) |
| clip = ImageClip(img_path).set_duration(duration_per_image) |
| clip = clip.set_position("center").resize(height=720) |
| clips.append(clip) |
|
|
| video = concatenate_videoclips(clips, method="compose") |
| audio = AudioFileClip(audio_path) |
| video = video.set_audio(audio) |
| output_path = "output_story_video.mp4" |
| video.write_videofile(output_path, fps=24) |
| return output_path |
|
|
| def full_pipeline(prompt): |
| story = generate_story(prompt) |
| images = generate_images(story) |
| audio_path = create_narration(story) |
| video_path = create_video(images, audio_path) |
| return video_path, story |
|
|
| |
| iface = gr.Interface( |
| fn=full_pipeline, |
| inputs=gr.Textbox(lines=3, placeholder="Enter your story prompt here..."), |
| outputs=[ |
| gr.Video(label="Generated Story Animation"), |
| gr.Textbox(label="Generated Story") |
| ], |
| title="Prompt-to-Animated Story Generator", |
| description="Enter a story prompt and get an animated video with narration" |
| ) |
|
|
| iface.launch() |