Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,14 +3,8 @@ import torch
|
|
3 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
4 |
from TTS.api import TTS
|
5 |
import numpy as np
|
6 |
-
import librosa
|
7 |
-
import soundfile as sf
|
8 |
import matplotlib.pyplot as plt
|
9 |
import matplotlib.animation as animation
|
10 |
-
from mpl_toolkits.mplot3d import Axes3D
|
11 |
-
import io
|
12 |
-
import base64
|
13 |
-
import os
|
14 |
import re
|
15 |
|
16 |
# Initialize text generation model (GPT-2)
|
@@ -20,109 +14,120 @@ model = AutoModelForCausalLM.from_pretrained("gpt2")
|
|
20 |
# Initialize TTS model
|
21 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
|
22 |
|
23 |
-
|
24 |
-
def generate_text(prompt, max_length=100):
|
25 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
26 |
output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2)
|
27 |
return tokenizer.decode(output[0], skip_special_tokens=True)
|
28 |
|
29 |
-
# Function to generate speech using TTS
|
30 |
def generate_speech(text):
|
31 |
output_path = "generated_speech.wav"
|
32 |
tts.tts_to_file(text=text, file_path=output_path)
|
33 |
return output_path
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
for txt in ax.texts:
|
51 |
-
txt.remove()
|
52 |
-
ax.text2D(0.05, 0.95, keyword, transform=ax.transAxes)
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
x = np.sin(t) * (1 + len(keywords)/20)
|
62 |
-
y = np.cos(t) * (1 + len(keywords)/20)
|
63 |
-
z = t/10
|
64 |
-
|
65 |
-
data = np.array([x, y, z])
|
66 |
-
line, = ax.plot(data[0, 0:1], data[1, 0:1], data[2, 0:1])
|
67 |
|
68 |
-
|
69 |
-
ax.set_xlim3d([-2.0, 2.0])
|
70 |
-
ax.set_xlabel('X')
|
71 |
-
ax.set_ylim3d([-2.0, 2.0])
|
72 |
-
ax.set_ylabel('Y')
|
73 |
-
ax.set_zlim3d([0.0, 4.0])
|
74 |
-
ax.set_zlabel('Z')
|
75 |
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
# Save animation as gif
|
79 |
-
|
80 |
|
81 |
return 'character_animation.gif'
|
82 |
|
83 |
-
# Main function to generate comedy animation
|
84 |
def generate_comedy_animation(prompt):
|
85 |
-
script = generate_text(f"Write a short comedy script about {prompt}: ")
|
86 |
animation_path = create_character_animation(script)
|
87 |
speech_path = generate_speech(script)
|
88 |
return script, animation_path, speech_path
|
89 |
|
90 |
-
# Main function to generate kids music animation
|
91 |
-
def generate_kids_music_animation(theme):
|
92 |
-
lyrics = generate_text(f"Write a short children's song about {theme}: ")
|
93 |
-
animation_path = create_character_animation(lyrics)
|
94 |
-
speech_path = generate_speech(lyrics)
|
95 |
-
return lyrics, animation_path, speech_path
|
96 |
-
|
97 |
# Gradio Interface
|
98 |
with gr.Blocks() as app:
|
99 |
-
gr.Markdown("##
|
100 |
-
|
101 |
-
with gr.Tab("Comedy Animation"):
|
102 |
-
comedy_prompt = gr.Textbox(label="Enter comedy prompt")
|
103 |
-
comedy_generate_btn = gr.Button("Generate Comedy Animation")
|
104 |
-
comedy_script = gr.Textbox(label="Generated Comedy Script")
|
105 |
-
comedy_animation = gr.Image(label="Comedy Animation")
|
106 |
-
comedy_audio = gr.Audio(label="Comedy Speech")
|
107 |
-
|
108 |
-
comedy_generate_btn.click(
|
109 |
-
generate_comedy_animation,
|
110 |
-
inputs=comedy_prompt,
|
111 |
-
outputs=[comedy_script, comedy_animation, comedy_audio]
|
112 |
-
)
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
)
|
126 |
|
127 |
app.launch()
|
128 |
|
|
|
3 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
4 |
from TTS.api import TTS
|
5 |
import numpy as np
|
|
|
|
|
6 |
import matplotlib.pyplot as plt
|
7 |
import matplotlib.animation as animation
|
|
|
|
|
|
|
|
|
8 |
import re
|
9 |
|
10 |
# Initialize text generation model (GPT-2)
|
|
|
14 |
# Initialize TTS model
|
15 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
|
16 |
|
17 |
+
def generate_text(prompt, max_length=200):
|
|
|
18 |
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
19 |
output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2)
|
20 |
return tokenizer.decode(output[0], skip_special_tokens=True)
|
21 |
|
|
|
22 |
def generate_speech(text):
|
23 |
output_path = "generated_speech.wav"
|
24 |
tts.tts_to_file(text=text, file_path=output_path)
|
25 |
return output_path
|
26 |
|
27 |
+
def parse_script(script):
|
28 |
+
lines = script.split('\n')
|
29 |
+
scenes = []
|
30 |
+
current_scene = {"characters": set(), "actions": []}
|
31 |
+
|
32 |
+
for line in lines:
|
33 |
+
if line.strip():
|
34 |
+
if ':' in line:
|
35 |
+
character, action = line.split(':', 1)
|
36 |
+
current_scene["characters"].add(character.strip())
|
37 |
+
current_scene["actions"].append((character.strip(), action.strip()))
|
38 |
+
else:
|
39 |
+
if current_scene["actions"]:
|
40 |
+
scenes.append(current_scene)
|
41 |
+
current_scene = {"characters": set(), "actions": []}
|
42 |
+
|
43 |
+
if current_scene["actions"]:
|
44 |
+
scenes.append(current_scene)
|
45 |
+
|
46 |
+
return scenes
|
47 |
+
|
48 |
+
def create_stick_figure(ax, x, y, color):
|
49 |
+
circle = plt.Circle((x, y+0.1), 0.1, fc=color)
|
50 |
+
line = plt.Line2D([x, x], [y-0.3, y], color=color)
|
51 |
+
left_arm = plt.Line2D([x-0.2, x], [y, y-0.1], color=color)
|
52 |
+
right_arm = plt.Line2D([x, x+0.2], [y-0.1, y], color=color)
|
53 |
+
left_leg = plt.Line2D([x-0.1, x], [y-0.5, y-0.3], color=color)
|
54 |
+
right_leg = plt.Line2D([x, x+0.1], [y-0.3, y-0.5], color=color)
|
55 |
|
56 |
+
ax.add_artist(circle)
|
57 |
+
ax.add_artist(line)
|
58 |
+
ax.add_artist(left_arm)
|
59 |
+
ax.add_artist(right_arm)
|
60 |
+
ax.add_artist(left_leg)
|
61 |
+
ax.add_artist(right_leg)
|
62 |
+
|
63 |
+
def animate_scene(scene, ax):
|
64 |
+
characters = list(scene["characters"])
|
65 |
+
colors = plt.cm.get_cmap('Set3')(np.linspace(0, 1, len(characters)))
|
66 |
+
character_positions = {char: (i/(len(characters)-1) if len(characters) > 1 else 0.5, 0.5) for i, char in enumerate(characters)}
|
67 |
+
|
68 |
+
def init():
|
69 |
+
ax.clear()
|
70 |
+
ax.set_xlim(0, 1)
|
71 |
+
ax.set_ylim(0, 1)
|
72 |
+
ax.axis('off')
|
73 |
+
for char, (x, y) in character_positions.items():
|
74 |
+
create_stick_figure(ax, x, y, colors[characters.index(char)])
|
75 |
+
return []
|
76 |
+
|
77 |
+
def animate(frame):
|
78 |
+
ax.clear()
|
79 |
+
ax.set_xlim(0, 1)
|
80 |
+
ax.set_ylim(0, 1)
|
81 |
+
ax.axis('off')
|
82 |
|
83 |
+
action = scene["actions"][frame % len(scene["actions"])]
|
84 |
+
speaking_char, text = action
|
|
|
|
|
|
|
85 |
|
86 |
+
for char, (x, y) in character_positions.items():
|
87 |
+
if char == speaking_char:
|
88 |
+
y += 0.05 * np.sin(frame * 0.5) # Make the speaking character bounce
|
89 |
+
create_stick_figure(ax, x, y, colors[characters.index(char)])
|
90 |
+
|
91 |
+
ax.text(0.5, 0.9, text, ha='center', va='center', wrap=True)
|
92 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
+
return animation.FuncAnimation(fig, animate, init_func=init, frames=len(scene["actions"])*5, interval=1000, blit=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
def create_character_animation(script):
|
97 |
+
scenes = parse_script(script)
|
98 |
+
|
99 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
100 |
+
animations = [animate_scene(scene, ax) for scene in scenes]
|
101 |
+
|
102 |
+
# Combine all animations
|
103 |
+
combined_animation = animation.ArtistAnimation(fig, sum([anim._framedata for anim in animations], []), interval=1000, blit=True, repeat_delay=1000)
|
104 |
|
105 |
# Save animation as gif
|
106 |
+
combined_animation.save('character_animation.gif', writer='pillow')
|
107 |
|
108 |
return 'character_animation.gif'
|
109 |
|
|
|
110 |
def generate_comedy_animation(prompt):
|
111 |
+
script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
|
112 |
animation_path = create_character_animation(script)
|
113 |
speech_path = generate_speech(script)
|
114 |
return script, animation_path, speech_path
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
# Gradio Interface
|
117 |
with gr.Blocks() as app:
|
118 |
+
gr.Markdown("## Character-based Animation Generator")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
+
comedy_prompt = gr.Textbox(label="Enter comedy prompt")
|
121 |
+
comedy_generate_btn = gr.Button("Generate Comedy Animation")
|
122 |
+
comedy_script = gr.Textbox(label="Generated Comedy Script")
|
123 |
+
comedy_animation = gr.Image(label="Comedy Animation")
|
124 |
+
comedy_audio = gr.Audio(label="Comedy Speech")
|
125 |
+
|
126 |
+
comedy_generate_btn.click(
|
127 |
+
generate_comedy_animation,
|
128 |
+
inputs=comedy_prompt,
|
129 |
+
outputs=[comedy_script, comedy_animation, comedy_audio]
|
130 |
+
)
|
|
|
131 |
|
132 |
app.launch()
|
133 |
|