Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline | |
| from diffusers import StableDiffusionPipeline | |
| import torch | |
| import os | |
| from moviepy.editor import * | |
| from TTS.api import TTS | |
| import tempfile | |
| import random | |
| # Load story generator | |
| story_gen = pipeline("text-generation", model="gpt2") | |
| # Load image generator | |
| sd_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16) | |
| sd_pipe = sd_pipe.to("cuda" if torch.cuda.is_available() else "cpu") | |
| # Load TTS | |
| tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available()) | |
| def generate_story(prompt): | |
| output = story_gen(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"] | |
| return output | |
| def generate_images(story_text, num_images=4): | |
| sentences = story_text.split('.') | |
| selected = [s.strip() for s in sentences if s.strip()][:num_images] | |
| images = [] | |
| for sentence in selected: | |
| image = sd_pipe(sentence).images[0] | |
| images.append((sentence, image)) | |
| return images | |
| def create_narration(story_text): | |
| tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| tts.tts_to_file(text=story_text, file_path=tmp_audio.name) | |
| return tmp_audio.name | |
| def create_video(images_with_text, audio_path): | |
| clips = [] | |
| duration_per_image = 4 | |
| for i, (caption, image) in enumerate(images_with_text): | |
| img_path = f"frame_{i}.png" | |
| image.save(img_path) | |
| clip = ImageClip(img_path).set_duration(duration_per_image) | |
| clip = clip.set_position("center").resize(height=720) | |
| clips.append(clip) | |
| video = concatenate_videoclips(clips, method="compose") | |
| audio = AudioFileClip(audio_path) | |
| video = video.set_audio(audio) | |
| output_path = "output_story_video.mp4" | |
| video.write_videofile(output_path, fps=24) | |
| return output_path | |
| def full_pipeline(prompt): | |
| story = generate_story(prompt) | |
| images = generate_images(story) | |
| audio_path = create_narration(story) | |
| video_path = create_video(images, audio_path) | |
| return video_path, story | |
| # Gradio UI | |
| iface = gr.Interface( | |
| fn=full_pipeline, | |
| inputs=gr.Textbox(lines=3, placeholder="Enter your story prompt here..."), | |
| outputs=[ | |
| gr.Video(label="Generated Story Animation"), | |
| gr.Textbox(label="Generated Story") | |
| ], | |
| title="Prompt-to-Animated Story Generator", | |
| description="Enter a story prompt and get an animated video with narration" | |
| ) | |
| iface.launch() |