Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,9 +3,10 @@ import torch
|
|
3 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
4 |
from TTS.api import TTS
|
5 |
import numpy as np
|
6 |
-
|
7 |
-
import
|
8 |
-
import
|
|
|
9 |
|
10 |
# Initialize text generation model (GPT-2)
|
11 |
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
@@ -14,9 +15,15 @@ model = AutoModelForCausalLM.from_pretrained("gpt2")
|
|
14 |
# Initialize TTS model
|
15 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
|
16 |
|
|
|
|
|
|
|
|
|
|
|
17 |
def generate_text(prompt, max_length=200):
|
18 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
19 |
-
|
|
|
20 |
return tokenizer.decode(output[0], skip_special_tokens=True)
|
21 |
|
22 |
def generate_speech(text):
|
@@ -24,110 +31,65 @@ def generate_speech(text):
|
|
24 |
tts.tts_to_file(text=text, file_path=output_path)
|
25 |
return output_path
|
26 |
|
27 |
-
def
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
character, action = line.split(':', 1)
|
36 |
-
current_scene["characters"].add(character.strip())
|
37 |
-
current_scene["actions"].append((character.strip(), action.strip()))
|
38 |
-
else:
|
39 |
-
if current_scene["actions"]:
|
40 |
-
scenes.append(current_scene)
|
41 |
-
current_scene = {"characters": set(), "actions": []}
|
42 |
-
|
43 |
-
if current_scene["actions"]:
|
44 |
-
scenes.append(current_scene)
|
45 |
-
|
46 |
-
return scenes
|
47 |
-
|
48 |
-
def create_stick_figure(ax, x, y, color):
|
49 |
-
circle = plt.Circle((x, y+0.1), 0.1, fc=color)
|
50 |
-
line = plt.Line2D([x, x], [y-0.3, y], color=color)
|
51 |
-
left_arm = plt.Line2D([x-0.2, x], [y, y-0.1], color=color)
|
52 |
-
right_arm = plt.Line2D([x, x+0.2], [y-0.1, y], color=color)
|
53 |
-
left_leg = plt.Line2D([x-0.1, x], [y-0.5, y-0.3], color=color)
|
54 |
-
right_leg = plt.Line2D([x, x+0.1], [y-0.3, y-0.5], color=color)
|
55 |
-
|
56 |
-
ax.add_artist(circle)
|
57 |
-
ax.add_artist(line)
|
58 |
-
ax.add_artist(left_arm)
|
59 |
-
ax.add_artist(right_arm)
|
60 |
-
ax.add_artist(left_leg)
|
61 |
-
ax.add_artist(right_leg)
|
62 |
-
|
63 |
-
def animate_scene(scene, ax):
|
64 |
-
characters = list(scene["characters"])
|
65 |
-
colors = plt.cm.get_cmap('Set3')(np.linspace(0, 1, len(characters)))
|
66 |
-
character_positions = {char: (i/(len(characters)-1) if len(characters) > 1 else 0.5, 0.5) for i, char in enumerate(characters)}
|
67 |
-
|
68 |
-
def init():
|
69 |
-
ax.clear()
|
70 |
-
ax.set_xlim(0, 1)
|
71 |
-
ax.set_ylim(0, 1)
|
72 |
-
ax.axis('off')
|
73 |
-
for char, (x, y) in character_positions.items():
|
74 |
-
create_stick_figure(ax, x, y, colors[characters.index(char)])
|
75 |
-
return []
|
76 |
-
|
77 |
-
def animate(frame):
|
78 |
-
ax.clear()
|
79 |
-
ax.set_xlim(0, 1)
|
80 |
-
ax.set_ylim(0, 1)
|
81 |
-
ax.axis('off')
|
82 |
-
|
83 |
-
action = scene["actions"][frame % len(scene["actions"])]
|
84 |
-
speaking_char, text = action
|
85 |
-
|
86 |
-
for char, (x, y) in character_positions.items():
|
87 |
-
if char == speaking_char:
|
88 |
-
y += 0.05 * np.sin(frame * 0.5) # Make the speaking character bounce
|
89 |
-
create_stick_figure(ax, x, y, colors[characters.index(char)])
|
90 |
-
|
91 |
-
ax.text(0.5, 0.9, text, ha='center', va='center', wrap=True)
|
92 |
-
return []
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
fig, ax = plt.subplots(figsize=(10, 6))
|
100 |
-
animations = [animate_scene(scene, ax) for scene in scenes]
|
101 |
-
|
102 |
-
# Combine all animations
|
103 |
-
combined_animation = animation.ArtistAnimation(fig, sum([anim._framedata for anim in animations], []), interval=1000, blit=True, repeat_delay=1000)
|
104 |
-
|
105 |
-
# Save animation as gif
|
106 |
-
combined_animation.save('character_animation.gif', writer='pillow')
|
107 |
-
|
108 |
-
return 'character_animation.gif'
|
109 |
|
110 |
def generate_comedy_animation(prompt):
|
111 |
script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
|
112 |
-
|
|
|
|
|
113 |
speech_path = generate_speech(script)
|
114 |
-
return script,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
# Gradio Interface
|
117 |
with gr.Blocks() as app:
|
118 |
-
gr.Markdown("##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
|
132 |
app.launch()
|
133 |
|
|
|
3 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
4 |
from TTS.api import TTS
|
5 |
import numpy as np
|
6 |
+
from PIL import Image
|
7 |
+
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
|
8 |
+
from torchvision.io import write_video
|
9 |
+
import os
|
10 |
|
11 |
# Initialize text generation model (GPT-2)
|
12 |
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
|
|
15 |
# Initialize TTS model
|
16 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
|
17 |
|
18 |
+
# Initialize Stable Diffusion pipeline
|
19 |
+
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
|
20 |
+
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
21 |
+
pipe = pipe.to("cuda")
|
22 |
+
|
23 |
def generate_text(prompt, max_length=200):
|
24 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
25 |
+
attention_mask = torch.ones_like(input_ids)
|
26 |
+
output = model.generate(input_ids, attention_mask=attention_mask, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id)
|
27 |
return tokenizer.decode(output[0], skip_special_tokens=True)
|
28 |
|
29 |
def generate_speech(text):
|
|
|
31 |
tts.tts_to_file(text=text, file_path=output_path)
|
32 |
return output_path
|
33 |
|
34 |
+
def generate_video_frames(prompt, num_frames=30):
|
35 |
+
frames = []
|
36 |
+
for i in range(num_frames):
|
37 |
+
# Add some variation to the prompt for each frame
|
38 |
+
frame_prompt = f"{prompt}, frame {i+1} of {num_frames}"
|
39 |
+
image = pipe(frame_prompt).images[0]
|
40 |
+
frames.append(np.array(image))
|
41 |
+
return frames
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
def create_video_from_frames(frames, output_path="output_video.mp4", fps=10):
|
44 |
+
frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2)
|
45 |
+
write_video(output_path, frames_tensor, fps=fps)
|
46 |
+
return output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def generate_comedy_animation(prompt):
|
49 |
script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
|
50 |
+
video_prompt = f"A comedic scene with two characters: {prompt}"
|
51 |
+
frames = generate_video_frames(video_prompt)
|
52 |
+
video_path = create_video_from_frames(frames)
|
53 |
speech_path = generate_speech(script)
|
54 |
+
return script, video_path, speech_path
|
55 |
+
|
56 |
+
def generate_kids_music_animation(theme):
|
57 |
+
lyrics = generate_text(f"Write short and simple lyrics for a children's song about {theme}. Each line should be on a new line. Don't include 'Verse' or 'Chorus' labels.")
|
58 |
+
video_prompt = f"A colorful, animated music video for children about {theme}"
|
59 |
+
frames = generate_video_frames(video_prompt)
|
60 |
+
video_path = create_video_from_frames(frames)
|
61 |
+
speech_path = generate_speech(lyrics)
|
62 |
+
return lyrics, video_path, speech_path
|
63 |
|
64 |
# Gradio Interface
|
65 |
with gr.Blocks() as app:
|
66 |
+
gr.Markdown("## AI-Generated Video and Audio Content")
|
67 |
+
|
68 |
+
with gr.Tab("Comedy Animation"):
|
69 |
+
comedy_prompt = gr.Textbox(label="Enter comedy prompt")
|
70 |
+
comedy_generate_btn = gr.Button("Generate Comedy Animation")
|
71 |
+
comedy_script = gr.Textbox(label="Generated Comedy Script")
|
72 |
+
comedy_animation = gr.Video(label="Comedy Animation")
|
73 |
+
comedy_audio = gr.Audio(label="Comedy Speech")
|
74 |
+
|
75 |
+
comedy_generate_btn.click(
|
76 |
+
generate_comedy_animation,
|
77 |
+
inputs=comedy_prompt,
|
78 |
+
outputs=[comedy_script, comedy_animation, comedy_audio]
|
79 |
+
)
|
80 |
|
81 |
+
with gr.Tab("Kids Music Animation"):
|
82 |
+
music_theme = gr.Textbox(label="Enter music theme for kids")
|
83 |
+
music_generate_btn = gr.Button("Generate Kids Music Animation")
|
84 |
+
music_lyrics = gr.Textbox(label="Generated Lyrics")
|
85 |
+
music_animation = gr.Video(label="Music Animation")
|
86 |
+
music_audio = gr.Audio(label="Music Audio")
|
87 |
|
88 |
+
music_generate_btn.click(
|
89 |
+
generate_kids_music_animation,
|
90 |
+
inputs=music_theme,
|
91 |
+
outputs=[music_lyrics, music_animation, music_audio]
|
92 |
+
)
|
93 |
|
94 |
app.launch()
|
95 |
|