Manasa1 commited on
Commit
0b60483
·
verified ·
1 Parent(s): 859e5f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -98
app.py CHANGED
@@ -3,9 +3,10 @@ import torch
3
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
  from TTS.api import TTS
5
  import numpy as np
6
- import matplotlib.pyplot as plt
7
- import matplotlib.animation as animation
8
- import re
 
9
 
10
  # Initialize text generation model (GPT-2)
11
  tokenizer = AutoTokenizer.from_pretrained("gpt2")
@@ -14,9 +15,15 @@ model = AutoModelForCausalLM.from_pretrained("gpt2")
14
  # Initialize TTS model
15
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
16
 
 
 
 
 
 
17
  def generate_text(prompt, max_length=200):
18
  input_ids = tokenizer.encode(prompt, return_tensors="pt")
19
- output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2)
 
20
  return tokenizer.decode(output[0], skip_special_tokens=True)
21
 
22
  def generate_speech(text):
@@ -24,110 +31,65 @@ def generate_speech(text):
24
  tts.tts_to_file(text=text, file_path=output_path)
25
  return output_path
26
 
27
- def parse_script(script):
28
- lines = script.split('\n')
29
- scenes = []
30
- current_scene = {"characters": set(), "actions": []}
31
-
32
- for line in lines:
33
- if line.strip():
34
- if ':' in line:
35
- character, action = line.split(':', 1)
36
- current_scene["characters"].add(character.strip())
37
- current_scene["actions"].append((character.strip(), action.strip()))
38
- else:
39
- if current_scene["actions"]:
40
- scenes.append(current_scene)
41
- current_scene = {"characters": set(), "actions": []}
42
-
43
- if current_scene["actions"]:
44
- scenes.append(current_scene)
45
-
46
- return scenes
47
-
48
- def create_stick_figure(ax, x, y, color):
49
- circle = plt.Circle((x, y+0.1), 0.1, fc=color)
50
- line = plt.Line2D([x, x], [y-0.3, y], color=color)
51
- left_arm = plt.Line2D([x-0.2, x], [y, y-0.1], color=color)
52
- right_arm = plt.Line2D([x, x+0.2], [y-0.1, y], color=color)
53
- left_leg = plt.Line2D([x-0.1, x], [y-0.5, y-0.3], color=color)
54
- right_leg = plt.Line2D([x, x+0.1], [y-0.3, y-0.5], color=color)
55
-
56
- ax.add_artist(circle)
57
- ax.add_artist(line)
58
- ax.add_artist(left_arm)
59
- ax.add_artist(right_arm)
60
- ax.add_artist(left_leg)
61
- ax.add_artist(right_leg)
62
-
63
- def animate_scene(scene, ax):
64
- characters = list(scene["characters"])
65
- colors = plt.cm.get_cmap('Set3')(np.linspace(0, 1, len(characters)))
66
- character_positions = {char: (i/(len(characters)-1) if len(characters) > 1 else 0.5, 0.5) for i, char in enumerate(characters)}
67
-
68
- def init():
69
- ax.clear()
70
- ax.set_xlim(0, 1)
71
- ax.set_ylim(0, 1)
72
- ax.axis('off')
73
- for char, (x, y) in character_positions.items():
74
- create_stick_figure(ax, x, y, colors[characters.index(char)])
75
- return []
76
-
77
- def animate(frame):
78
- ax.clear()
79
- ax.set_xlim(0, 1)
80
- ax.set_ylim(0, 1)
81
- ax.axis('off')
82
-
83
- action = scene["actions"][frame % len(scene["actions"])]
84
- speaking_char, text = action
85
-
86
- for char, (x, y) in character_positions.items():
87
- if char == speaking_char:
88
- y += 0.05 * np.sin(frame * 0.5) # Make the speaking character bounce
89
- create_stick_figure(ax, x, y, colors[characters.index(char)])
90
-
91
- ax.text(0.5, 0.9, text, ha='center', va='center', wrap=True)
92
- return []
93
 
94
- return animation.FuncAnimation(fig, animate, init_func=init, frames=len(scene["actions"])*5, interval=1000, blit=True)
95
-
96
- def create_character_animation(script):
97
- scenes = parse_script(script)
98
-
99
- fig, ax = plt.subplots(figsize=(10, 6))
100
- animations = [animate_scene(scene, ax) for scene in scenes]
101
-
102
- # Combine all animations
103
- combined_animation = animation.ArtistAnimation(fig, sum([anim._framedata for anim in animations], []), interval=1000, blit=True, repeat_delay=1000)
104
-
105
- # Save animation as gif
106
- combined_animation.save('character_animation.gif', writer='pillow')
107
-
108
- return 'character_animation.gif'
109
 
110
  def generate_comedy_animation(prompt):
111
  script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
112
- animation_path = create_character_animation(script)
 
 
113
  speech_path = generate_speech(script)
114
- return script, animation_path, speech_path
 
 
 
 
 
 
 
 
115
 
116
  # Gradio Interface
117
  with gr.Blocks() as app:
118
- gr.Markdown("## Character-based Animation Generator")
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- comedy_prompt = gr.Textbox(label="Enter comedy prompt")
121
- comedy_generate_btn = gr.Button("Generate Comedy Animation")
122
- comedy_script = gr.Textbox(label="Generated Comedy Script")
123
- comedy_animation = gr.Image(label="Comedy Animation")
124
- comedy_audio = gr.Audio(label="Comedy Speech")
 
125
 
126
- comedy_generate_btn.click(
127
- generate_comedy_animation,
128
- inputs=comedy_prompt,
129
- outputs=[comedy_script, comedy_animation, comedy_audio]
130
- )
131
 
132
  app.launch()
133
 
 
3
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
  from TTS.api import TTS
5
  import numpy as np
6
+ from PIL import Image
7
+ from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
8
+ from torchvision.io import write_video
9
+ import os
10
 
11
  # Initialize text generation model (GPT-2)
12
  tokenizer = AutoTokenizer.from_pretrained("gpt2")
 
15
  # Initialize TTS model
16
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
17
 
18
+ # Initialize Stable Diffusion pipeline
19
+ pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
20
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
21
+ pipe = pipe.to("cuda")
22
+
23
  def generate_text(prompt, max_length=200):
24
  input_ids = tokenizer.encode(prompt, return_tensors="pt")
25
+ attention_mask = torch.ones_like(input_ids)
26
+ output = model.generate(input_ids, attention_mask=attention_mask, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id)
27
  return tokenizer.decode(output[0], skip_special_tokens=True)
28
 
29
  def generate_speech(text):
 
31
  tts.tts_to_file(text=text, file_path=output_path)
32
  return output_path
33
 
34
+ def generate_video_frames(prompt, num_frames=30):
35
+ frames = []
36
+ for i in range(num_frames):
37
+ # Add some variation to the prompt for each frame
38
+ frame_prompt = f"{prompt}, frame {i+1} of {num_frames}"
39
+ image = pipe(frame_prompt).images[0]
40
+ frames.append(np.array(image))
41
+ return frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ def create_video_from_frames(frames, output_path="output_video.mp4", fps=10):
44
+ frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2)
45
+ write_video(output_path, frames_tensor, fps=fps)
46
+ return output_path
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def generate_comedy_animation(prompt):
49
  script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
50
+ video_prompt = f"A comedic scene with two characters: {prompt}"
51
+ frames = generate_video_frames(video_prompt)
52
+ video_path = create_video_from_frames(frames)
53
  speech_path = generate_speech(script)
54
+ return script, video_path, speech_path
55
+
56
+ def generate_kids_music_animation(theme):
57
+ lyrics = generate_text(f"Write short and simple lyrics for a children's song about {theme}. Each line should be on a new line. Don't include 'Verse' or 'Chorus' labels.")
58
+ video_prompt = f"A colorful, animated music video for children about {theme}"
59
+ frames = generate_video_frames(video_prompt)
60
+ video_path = create_video_from_frames(frames)
61
+ speech_path = generate_speech(lyrics)
62
+ return lyrics, video_path, speech_path
63
 
64
  # Gradio Interface
65
  with gr.Blocks() as app:
66
+ gr.Markdown("## AI-Generated Video and Audio Content")
67
+
68
+ with gr.Tab("Comedy Animation"):
69
+ comedy_prompt = gr.Textbox(label="Enter comedy prompt")
70
+ comedy_generate_btn = gr.Button("Generate Comedy Animation")
71
+ comedy_script = gr.Textbox(label="Generated Comedy Script")
72
+ comedy_animation = gr.Video(label="Comedy Animation")
73
+ comedy_audio = gr.Audio(label="Comedy Speech")
74
+
75
+ comedy_generate_btn.click(
76
+ generate_comedy_animation,
77
+ inputs=comedy_prompt,
78
+ outputs=[comedy_script, comedy_animation, comedy_audio]
79
+ )
80
 
81
+ with gr.Tab("Kids Music Animation"):
82
+ music_theme = gr.Textbox(label="Enter music theme for kids")
83
+ music_generate_btn = gr.Button("Generate Kids Music Animation")
84
+ music_lyrics = gr.Textbox(label="Generated Lyrics")
85
+ music_animation = gr.Video(label="Music Animation")
86
+ music_audio = gr.Audio(label="Music Audio")
87
 
88
+ music_generate_btn.click(
89
+ generate_kids_music_animation,
90
+ inputs=music_theme,
91
+ outputs=[music_lyrics, music_animation, music_audio]
92
+ )
93
 
94
  app.launch()
95