File size: 2,493 Bytes
57bdc6d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | import gradio as gr
from transformers import pipeline
from diffusers import StableDiffusionPipeline
import torch
import os
from moviepy.editor import *
from TTS.api import TTS
import tempfile
import random
# Load story generator
story_gen = pipeline("text-generation", model="gpt2")
# Load image generator
sd_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
sd_pipe = sd_pipe.to("cuda" if torch.cuda.is_available() else "cpu")
# Load TTS
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
def generate_story(prompt):
output = story_gen(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"]
return output
def generate_images(story_text, num_images=4):
sentences = story_text.split('.')
selected = [s.strip() for s in sentences if s.strip()][:num_images]
images = []
for sentence in selected:
image = sd_pipe(sentence).images[0]
images.append((sentence, image))
return images
def create_narration(story_text):
tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
tts.tts_to_file(text=story_text, file_path=tmp_audio.name)
return tmp_audio.name
def create_video(images_with_text, audio_path):
clips = []
duration_per_image = 4
for i, (caption, image) in enumerate(images_with_text):
img_path = f"frame_{i}.png"
image.save(img_path)
clip = ImageClip(img_path).set_duration(duration_per_image)
clip = clip.set_position("center").resize(height=720)
clips.append(clip)
video = concatenate_videoclips(clips, method="compose")
audio = AudioFileClip(audio_path)
video = video.set_audio(audio)
output_path = "output_story_video.mp4"
video.write_videofile(output_path, fps=24)
return output_path
def full_pipeline(prompt):
story = generate_story(prompt)
images = generate_images(story)
audio_path = create_narration(story)
video_path = create_video(images, audio_path)
return video_path, story
# Gradio UI
iface = gr.Interface(
fn=full_pipeline,
inputs=gr.Textbox(lines=3, placeholder="Enter your story prompt here..."),
outputs=[
gr.Video(label="Generated Story Animation"),
gr.Textbox(label="Generated Story")
],
title="Prompt-to-Animated Story Generator",
description="Enter a story prompt and get an animated video with narration"
)
iface.launch() |