File size: 2,493 Bytes
57bdc6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from transformers import pipeline
from diffusers import StableDiffusionPipeline
import torch
import os
from moviepy.editor import *
from TTS.api import TTS
import tempfile
import random

# Load story generator
story_gen = pipeline("text-generation", model="gpt2")

# Load image generator
sd_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
sd_pipe = sd_pipe.to("cuda" if torch.cuda.is_available() else "cpu")

# Load TTS
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())

def generate_story(prompt):
    output = story_gen(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"]
    return output

def generate_images(story_text, num_images=4):
    sentences = story_text.split('.')
    selected = [s.strip() for s in sentences if s.strip()][:num_images]
    images = []
    for sentence in selected:
        image = sd_pipe(sentence).images[0]
        images.append((sentence, image))
    return images

def create_narration(story_text):
    tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    tts.tts_to_file(text=story_text, file_path=tmp_audio.name)
    return tmp_audio.name

def create_video(images_with_text, audio_path):
    clips = []
    duration_per_image = 4
    for i, (caption, image) in enumerate(images_with_text):
        img_path = f"frame_{i}.png"
        image.save(img_path)
        clip = ImageClip(img_path).set_duration(duration_per_image)
        clip = clip.set_position("center").resize(height=720)
        clips.append(clip)

    video = concatenate_videoclips(clips, method="compose")
    audio = AudioFileClip(audio_path)
    video = video.set_audio(audio)
    output_path = "output_story_video.mp4"
    video.write_videofile(output_path, fps=24)
    return output_path

def full_pipeline(prompt):
    story = generate_story(prompt)
    images = generate_images(story)
    audio_path = create_narration(story)
    video_path = create_video(images, audio_path)
    return video_path, story

# Gradio UI
iface = gr.Interface(
    fn=full_pipeline,
    inputs=gr.Textbox(lines=3, placeholder="Enter your story prompt here..."),
    outputs=[
        gr.Video(label="Generated Story Animation"),
        gr.Textbox(label="Generated Story")
    ],
    title="Prompt-to-Animated Story Generator",
    description="Enter a story prompt and get an animated video with narration"
)

iface.launch()