Spaces:

Manasa1
/

AI_Comedy_Show

Runtime error

App Files Files Community

Manasa1 commited on Oct 14, 2024

Commit

0b60483

verified ·

1 Parent(s): 859e5f2

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -98

app.py CHANGED Viewed

@@ -3,9 +3,10 @@ import torch
 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 from TTS.api import TTS
 import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.animation as animation
-import re
 # Initialize text generation model (GPT-2)
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
@@ -14,9 +15,15 @@ model = AutoModelForCausalLM.from_pretrained("gpt2")
 # Initialize TTS model
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
 def generate_text(prompt, max_length=200):
     input_ids = tokenizer.encode(prompt, return_tensors="pt")
-    output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2)
     return tokenizer.decode(output[0], skip_special_tokens=True)
 def generate_speech(text):
@@ -24,110 +31,65 @@ def generate_speech(text):
     tts.tts_to_file(text=text, file_path=output_path)
     return output_path
-def parse_script(script):
-    lines = script.split('\n')
-    scenes = []
-    current_scene = {"characters": set(), "actions": []}
-    for line in lines:
-        if line.strip():
-            if ':' in line:
-                character, action = line.split(':', 1)
-                current_scene["characters"].add(character.strip())
-                current_scene["actions"].append((character.strip(), action.strip()))
-            else:
-                if current_scene["actions"]:
-                    scenes.append(current_scene)
-                    current_scene = {"characters": set(), "actions": []}
-    if current_scene["actions"]:
-        scenes.append(current_scene)
-    return scenes
-def create_stick_figure(ax, x, y, color):
-    circle = plt.Circle((x, y+0.1), 0.1, fc=color)
-    line = plt.Line2D([x, x], [y-0.3, y], color=color)
-    left_arm = plt.Line2D([x-0.2, x], [y, y-0.1], color=color)
-    right_arm = plt.Line2D([x, x+0.2], [y-0.1, y], color=color)
-    left_leg = plt.Line2D([x-0.1, x], [y-0.5, y-0.3], color=color)
-    right_leg = plt.Line2D([x, x+0.1], [y-0.3, y-0.5], color=color)
-    ax.add_artist(circle)
-    ax.add_artist(line)
-    ax.add_artist(left_arm)
-    ax.add_artist(right_arm)
-    ax.add_artist(left_leg)
-    ax.add_artist(right_leg)
-def animate_scene(scene, ax):
-    characters = list(scene["characters"])
-    colors = plt.cm.get_cmap('Set3')(np.linspace(0, 1, len(characters)))
-    character_positions = {char: (i/(len(characters)-1) if len(characters) > 1 else 0.5, 0.5) for i, char in enumerate(characters)}
-    def init():
-        ax.clear()
-        ax.set_xlim(0, 1)
-        ax.set_ylim(0, 1)
-        ax.axis('off')
-        for char, (x, y) in character_positions.items():
-            create_stick_figure(ax, x, y, colors[characters.index(char)])
-        return []
-    def animate(frame):
-        ax.clear()
-        ax.set_xlim(0, 1)
-        ax.set_ylim(0, 1)
-        ax.axis('off')
-        action = scene["actions"][frame % len(scene["actions"])]
-        speaking_char, text = action
-        for char, (x, y) in character_positions.items():
-            if char == speaking_char:
-                y += 0.05 * np.sin(frame * 0.5)  # Make the speaking character bounce
-            create_stick_figure(ax, x, y, colors[characters.index(char)])
-        ax.text(0.5, 0.9, text, ha='center', va='center', wrap=True)
-        return []
-    return animation.FuncAnimation(fig, animate, init_func=init, frames=len(scene["actions"])*5, interval=1000, blit=True)
-def create_character_animation(script):
-    scenes = parse_script(script)
-    fig, ax = plt.subplots(figsize=(10, 6))
-    animations = [animate_scene(scene, ax) for scene in scenes]
-    # Combine all animations
-    combined_animation = animation.ArtistAnimation(fig, sum([anim._framedata for anim in animations], []), interval=1000, blit=True, repeat_delay=1000)
-    # Save animation as gif
-    combined_animation.save('character_animation.gif', writer='pillow')
-    return 'character_animation.gif'
 def generate_comedy_animation(prompt):
     script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
-    animation_path = create_character_animation(script)
     speech_path = generate_speech(script)
-    return script, animation_path, speech_path
 # Gradio Interface
 with gr.Blocks() as app:
-    gr.Markdown("## Character-based Animation Generator")
-    comedy_prompt = gr.Textbox(label="Enter comedy prompt")
-    comedy_generate_btn = gr.Button("Generate Comedy Animation")
-    comedy_script = gr.Textbox(label="Generated Comedy Script")
-    comedy_animation = gr.Image(label="Comedy Animation")
-    comedy_audio = gr.Audio(label="Comedy Speech")
-    comedy_generate_btn.click(
-        generate_comedy_animation,
-        inputs=comedy_prompt,
-        outputs=[comedy_script, comedy_animation, comedy_audio]
-    )
 app.launch()

 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 from TTS.api import TTS
 import numpy as np
+from PIL import Image
+from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
+from torchvision.io import write_video
+import os
 # Initialize text generation model (GPT-2)
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
 # Initialize TTS model
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
+# Initialize Stable Diffusion pipeline
+pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+pipe = pipe.to("cuda")
 def generate_text(prompt, max_length=200):
     input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    attention_mask = torch.ones_like(input_ids)
+    output = model.generate(input_ids, attention_mask=attention_mask, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id)
     return tokenizer.decode(output[0], skip_special_tokens=True)
 def generate_speech(text):
     tts.tts_to_file(text=text, file_path=output_path)
     return output_path
+def generate_video_frames(prompt, num_frames=30):
+    frames = []
+    for i in range(num_frames):
+        # Add some variation to the prompt for each frame
+        frame_prompt = f"{prompt}, frame {i+1} of {num_frames}"
+        image = pipe(frame_prompt).images[0]
+        frames.append(np.array(image))
+    return frames
+def create_video_from_frames(frames, output_path="output_video.mp4", fps=10):
+    frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2)
+    write_video(output_path, frames_tensor, fps=fps)
+    return output_path
 def generate_comedy_animation(prompt):
     script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
+    video_prompt = f"A comedic scene with two characters: {prompt}"
+    frames = generate_video_frames(video_prompt)
+    video_path = create_video_from_frames(frames)
     speech_path = generate_speech(script)
+    return script, video_path, speech_path
+def generate_kids_music_animation(theme):
+    lyrics = generate_text(f"Write short and simple lyrics for a children's song about {theme}. Each line should be on a new line. Don't include 'Verse' or 'Chorus' labels.")
+    video_prompt = f"A colorful, animated music video for children about {theme}"
+    frames = generate_video_frames(video_prompt)
+    video_path = create_video_from_frames(frames)
+    speech_path = generate_speech(lyrics)
+    return lyrics, video_path, speech_path
 # Gradio Interface
 with gr.Blocks() as app:
+    gr.Markdown("## AI-Generated Video and Audio Content")
+    with gr.Tab("Comedy Animation"):
+        comedy_prompt = gr.Textbox(label="Enter comedy prompt")
+        comedy_generate_btn = gr.Button("Generate Comedy Animation")
+        comedy_script = gr.Textbox(label="Generated Comedy Script")
+        comedy_animation = gr.Video(label="Comedy Animation")
+        comedy_audio = gr.Audio(label="Comedy Speech")
+        comedy_generate_btn.click(
+            generate_comedy_animation,
+            inputs=comedy_prompt,
+            outputs=[comedy_script, comedy_animation, comedy_audio]
+        )
+    with gr.Tab("Kids Music Animation"):
+        music_theme = gr.Textbox(label="Enter music theme for kids")
+        music_generate_btn = gr.Button("Generate Kids Music Animation")
+        music_lyrics = gr.Textbox(label="Generated Lyrics")
+        music_animation = gr.Video(label="Music Animation")
+        music_audio = gr.Audio(label="Music Audio")
+        music_generate_btn.click(
+            generate_kids_music_animation,
+            inputs=music_theme,
+            outputs=[music_lyrics, music_animation, music_audio]
+        )
 app.launch()