ruizhaocv commited on
Commit
ab23a40
1 Parent(s): fc58c48

Upload 2 files

Browse files
demo/MotionDirector_gradio.py CHANGED
@@ -21,12 +21,14 @@ with gr.Blocks() as demo:
21
  </a>
22
  <div>
23
  <h1 >MotionDirector: Motion Customization of Text-to-Video Diffusion Models</h1>
24
- <h5 style="margin: 0;">More MotionDirectors are on the way. Stay tuned 🔥! Give us a star ✨ on Github for the latest update.</h5>
 
25
  </br>
26
  <div style="display: flex; justify-content: center; align-items: center; text-align: center;>
27
- <a href="https://arxiv.org/abs/2310.08465"><img src="https://img.shields.io/badge/arXiv-MotionDirector-b31b1b.svg"></a>&nbsp;&nbsp;
28
- <a href='https://showlab.github.io/MotionDirector'><img src='https://img.shields.io/badge/Project_Page-MotionDirector-green'></a>&nbsp;&nbsp;
29
- <a href='https://github.com/showlab/MotionDirector'><img src='https://img.shields.io/badge/Github-MotionDirector-blue'></a>&nbsp;&nbsp;
 
30
  </div>
31
  </div>
32
  </div>
@@ -43,15 +45,24 @@ with gr.Blocks() as demo:
43
 
44
  with gr.Row():
45
  model_select = gr.Dropdown(
46
- ["1-1: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom)",
47
  "1-2: [Cinematic Shots] -- Zoom In",
48
  "1-3: [Cinematic Shots] -- Zoom Out",
 
 
 
 
 
 
 
 
49
  "2-1: [Object Trajectory] -- Right to Left",
50
  "2-2: [Object Trajectory] -- Left to Right",
51
  "3-1: [Sports Concepts] -- Riding Bicycle",
52
  "3-2: [Sports Concepts] -- Riding Horse",
53
  "3-3: [Sports Concepts] -- Lifting Weights",
54
- "3-4: [Sports Concepts] -- Playing Golf"
 
55
  ],
56
  label="MotionDirector",
57
  info="Which MotionDirector would you like to use!"
@@ -74,19 +85,31 @@ with gr.Blocks() as demo:
74
  gr.Examples(
75
  fn=motiondirector,
76
  examples=[
77
- ["1-1: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom)", "A lion sitting on top of a cliff captured with a dolly zoom.", 1675932],
78
- ["1-2: [Cinematic Shots] -- Zoom In", "A firefighter standing in front of a burning forest captured with a zoom in.", 1429227],
79
- ["1-3: [Cinematic Shots] -- Zoom Out", "A lion sitting on top of a cliff captured with a zoom out.", 1767994],
80
- ["2-1: [Object Trajectory] -- Right to Left", "A tank is running on the moon.", 8551187],
 
 
 
 
 
 
 
 
 
 
 
81
  ["2-2: [Object Trajectory] -- Left to Right", "A tiger is running in the forest.", 3463673],
82
  ["3-1: [Sports Concepts] -- Riding Bicycle", "An astronaut is riding a bicycle past the pyramids Mars 4K high quailty highly detailed.", 4422954],
83
  ["3-2: [Sports Concepts] -- Riding Horse", "A man riding an elephant through the jungle.", 6230765],
84
  ["3-3: [Sports Concepts] -- Lifting Weights", "A panda is lifting weights in a garden.", 1699276],
85
- ["3-4: [Sports Concepts] -- Playing Golf", "A man is playing golf in front of the White House.", 8870450],
 
86
  ],
87
  inputs=[model_select, text_pormpt, random_seed],
88
  outputs=generated_video,
89
  )
90
 
91
  demo.queue(max_size=15)
92
- demo.launch(share=False)
 
21
  </a>
22
  <div>
23
  <h1 >MotionDirector: Motion Customization of Text-to-Video Diffusion Models</h1>
24
+ <h5 style="margin: 0;">More MotionDirectors are on the way. Stay tuned 🔥!</h5>
25
+ <h5 style="margin: 0;"> If you like our project, please give us a star ✨ on Github for the latest update.</h5>
26
  </br>
27
  <div style="display: flex; justify-content: center; align-items: center; text-align: center;>
28
+ <a href="https://arxiv.org/abs/2310.08465"></a>
29
+ <a href="https://arxiv.org/abs/2310.08465"><img src="https://img.shields.io/badge/arXiv-2310.08465-b31b1b.svg"></a>&nbsp;&nbsp;
30
+ <a href="https://showlab.github.io/MotionDirector"><img src="https://img.shields.io/badge/Project_Page-MotionDirector-green"></a>&nbsp;&nbsp;
31
+ <a href="https://github.com/showlab/MotionDirector"><img src="https://img.shields.io/badge/Github-Code-blue"></a>&nbsp;&nbsp;
32
  </div>
33
  </div>
34
  </div>
 
45
 
46
  with gr.Row():
47
  model_select = gr.Dropdown(
48
+ ["1-1: [Cinematic Shots] -- Zoom Out",
49
  "1-2: [Cinematic Shots] -- Zoom In",
50
  "1-3: [Cinematic Shots] -- Zoom Out",
51
+ "1-3: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 1",
52
+ "1-4: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 2",
53
+ "1-5: [Cinematic Shots] -- Follow",
54
+ "1-6: [Cinematic Shots] -- Reverse Follow",
55
+ "1-7: [Cinematic Shots] -- Chest Transition",
56
+ "1-8: [Cinematic Shots] -- Mini Jib Reveal",
57
+ "1-9: [Cinematic Shots] -- Orbit",
58
+ "1-10: [Cinematic Shots] -- Pull Back",
59
  "2-1: [Object Trajectory] -- Right to Left",
60
  "2-2: [Object Trajectory] -- Left to Right",
61
  "3-1: [Sports Concepts] -- Riding Bicycle",
62
  "3-2: [Sports Concepts] -- Riding Horse",
63
  "3-3: [Sports Concepts] -- Lifting Weights",
64
+ "3-4: [Sports Concepts] -- Playing Golf",
65
+ "3-5: [Sports Concepts] -- Skateboarding",
66
  ],
67
  label="MotionDirector",
68
  info="Which MotionDirector would you like to use!"
 
85
  gr.Examples(
86
  fn=motiondirector,
87
  examples=[
88
+ ["1-1: [Cinematic Shots] -- Zoom Out", "A spaceman standing on the moon captured with a zoom out.",
89
+ 8323920],
90
+ ["1-2: [Cinematic Shots] -- Zoom In", "A polar bear standing at the top of a snowy mountain captured with a zoom in.", 7938587],
91
+ ["1-3: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 1", "A panda standing in front of an ancient Chinese temple captured with a dolly zoom.", 8238823],
92
+ ["1-4: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 2", "A lion sitting on top of a cliff captured with a dolly zoom.", 1675932],
93
+ ["1-5: [Cinematic Shots] -- Follow", "A fireman is walking through fire captured with a follow cinematic shot.", 2927089],
94
+ ["1-6: [Cinematic Shots] -- Reverse Follow", "A fireman is walking through fire captured with a reverse follow cinematic shot.", 9759630],
95
+ ["1-7: [Cinematic Shots] -- Chest Transition", "An ancient Roman soldier walks through the crowd on the street captured with a chest transition cinematic shot.", 3982271],
96
+ ["1-8: [Cinematic Shots] -- Mini Jib Reveal",
97
+ "A British Redcoat soldier is walking through the mountains captured with a mini jib reveal cinematic shot.",
98
+ 566917],
99
+ ["1-9: [Cinematic Shots] -- Orbit", "A spaceman on the moon captured with an orbit cinematic shot.", 5899496],
100
+ ["1-10: [Cinematic Shots] -- Pull Back", "A spaceman on the moon looking at a lunar rover captured with a pull back cinematic shot.",
101
+ 5585865],
102
+ ["2-1: [Object Trajectory] -- Right to Left", "A tank is running on the moon.", 2047046],
103
  ["2-2: [Object Trajectory] -- Left to Right", "A tiger is running in the forest.", 3463673],
104
  ["3-1: [Sports Concepts] -- Riding Bicycle", "An astronaut is riding a bicycle past the pyramids Mars 4K high quailty highly detailed.", 4422954],
105
  ["3-2: [Sports Concepts] -- Riding Horse", "A man riding an elephant through the jungle.", 6230765],
106
  ["3-3: [Sports Concepts] -- Lifting Weights", "A panda is lifting weights in a garden.", 1699276],
107
+ ["3-4: [Sports Concepts] -- Playing Golf", "A monkey is playing golf on a field full of flowers.", 4156856],
108
+ ["3-5: [Sports Concepts] -- Skateboarding", "An astronaut is skateboarding on Mars.", 6615212],
109
  ],
110
  inputs=[model_select, text_pormpt, random_seed],
111
  outputs=generated_video,
112
  )
113
 
114
  demo.queue(max_size=15)
115
+ demo.launch(share=True)
demo/motiondirector.py CHANGED
@@ -85,11 +85,25 @@ def prepare_input_latents(
85
  height: int,
86
  width: int,
87
  latents_path:str,
88
- noise_prior: float
 
89
  ):
90
  # initialize with random gaussian noise
91
  scale = pipe.vae_scale_factor
92
  shape = (batch_size, pipe.unet.config.in_channels, num_frames, height // scale, width // scale)
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  if noise_prior > 0.:
94
  cached_latents = torch.load(latents_path)
95
  if 'inversion_noise' not in cached_latents:
@@ -139,20 +153,6 @@ class MotionDirector():
139
  latents_path = f"{latents_folder}/{random.choice(os.listdir(latents_folder))}"
140
  assert os.path.exists(lora_path)
141
 
142
- if '1-' in model_select:
143
- noise_prior = 0.3
144
- elif '2-' in model_select:
145
- noise_prior = 0.5
146
- elif '3-' in model_select:
147
- noise_prior = 0.
148
- else:
149
- noise_prior = 0.
150
-
151
- if random_seed > 1000:
152
- torch.manual_seed(random_seed)
153
- else:
154
- random_seed = random.randint(100, 10000000)
155
- torch.manual_seed(random_seed)
156
  device = "cuda"
157
  with torch.autocast(device, dtype=torch.half):
158
  # prepare input latents
@@ -164,7 +164,8 @@ class MotionDirector():
164
  height=384,
165
  width=384,
166
  latents_path=latents_path,
167
- noise_prior=noise_prior
 
168
  )
169
  video_frames = self.pipe(
170
  prompt=text_pormpt,
@@ -177,7 +178,6 @@ class MotionDirector():
177
  latents=init_latents
178
  ).frames
179
 
180
-
181
  out_file = f"{out_name}_{random_seed}.mp4"
182
  os.makedirs(os.path.dirname(out_file), exist_ok=True)
183
  export_to_video(video_frames, out_file, 8)
 
85
  height: int,
86
  width: int,
87
  latents_path:str,
88
+ model_select: str,
89
+ random_seed: int,
90
  ):
91
  # initialize with random gaussian noise
92
  scale = pipe.vae_scale_factor
93
  shape = (batch_size, pipe.unet.config.in_channels, num_frames, height // scale, width // scale)
94
+ if random_seed > 1000:
95
+ torch.manual_seed(random_seed)
96
+ else:
97
+ random_seed = random.randint(100, 10000000)
98
+ torch.manual_seed(random_seed)
99
+ if '1-' in model_select:
100
+ noise_prior = 0.3
101
+ elif '2-' in model_select:
102
+ noise_prior = 0.5
103
+ elif '3-' in model_select:
104
+ noise_prior = 0.
105
+ else:
106
+ noise_prior = 0.
107
  if noise_prior > 0.:
108
  cached_latents = torch.load(latents_path)
109
  if 'inversion_noise' not in cached_latents:
 
153
  latents_path = f"{latents_folder}/{random.choice(os.listdir(latents_folder))}"
154
  assert os.path.exists(lora_path)
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  device = "cuda"
157
  with torch.autocast(device, dtype=torch.half):
158
  # prepare input latents
 
164
  height=384,
165
  width=384,
166
  latents_path=latents_path,
167
+ model_select=model_select,
168
+ random_seed=random_seed
169
  )
170
  video_frames = self.pipe(
171
  prompt=text_pormpt,
 
178
  latents=init_latents
179
  ).frames
180
 
 
181
  out_file = f"{out_name}_{random_seed}.mp4"
182
  os.makedirs(os.path.dirname(out_file), exist_ok=True)
183
  export_to_video(video_frames, out_file, 8)