VR / app.py
Prometheus799's picture
Create app.py
213a958 verified
Raw
History Blame Contribute Delete
2.49 kB
import gradio as gr
from transformers import pipeline
from diffusers import StableDiffusionPipeline
import torch
import os
from moviepy.editor import *
from TTS.api import TTS
import tempfile
import random
# Load story generator
story_gen = pipeline("text-generation", model="gpt2")
# Load image generator
sd_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
sd_pipe = sd_pipe.to("cuda" if torch.cuda.is_available() else "cpu")
# Load TTS
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
def generate_story(prompt):
output = story_gen(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"]
return output
def generate_images(story_text, num_images=4):
sentences = story_text.split('.')
selected = [s.strip() for s in sentences if s.strip()][:num_images]
images = []
for sentence in selected:
image = sd_pipe(sentence).images[0]
images.append((sentence, image))
return images
def create_narration(story_text):
tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
tts.tts_to_file(text=story_text, file_path=tmp_audio.name)
return tmp_audio.name
def create_video(images_with_text, audio_path):
clips = []
duration_per_image = 4
for i, (caption, image) in enumerate(images_with_text):
img_path = f"frame_{i}.png"
image.save(img_path)
clip = ImageClip(img_path).set_duration(duration_per_image)
clip = clip.set_position("center").resize(height=720)
clips.append(clip)
video = concatenate_videoclips(clips, method="compose")
audio = AudioFileClip(audio_path)
video = video.set_audio(audio)
output_path = "output_story_video.mp4"
video.write_videofile(output_path, fps=24)
return output_path
def full_pipeline(prompt):
story = generate_story(prompt)
images = generate_images(story)
audio_path = create_narration(story)
video_path = create_video(images, audio_path)
return video_path, story
# Gradio UI
iface = gr.Interface(
fn=full_pipeline,
inputs=gr.Textbox(lines=3, placeholder="Enter your story prompt here..."),
outputs=[
gr.Video(label="Generated Story Animation"),
gr.Textbox(label="Generated Story")
],
title="Prompt-to-Animated Story Generator",
description="Enter a story prompt and get an animated video with narration"
)
iface.launch()