jbilcke-hf HF staff commited on
Commit
f9a691e
1 Parent(s): 7e1bff8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -12
app.py CHANGED
@@ -3,12 +3,17 @@ import torch
3
  import os
4
  import base64
5
  import uuid
 
 
 
 
6
 
7
  from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
8
  from huggingface_hub import hf_hub_download
9
  from safetensors.torch import load_file
10
  from PIL import Image
11
 
 
12
  SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
13
 
14
  # Constants
@@ -29,9 +34,11 @@ dtype = torch.float16
29
  pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
30
  pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
31
 
32
- import tempfile
33
- import numpy as np
34
- import cv2
 
 
35
 
36
  def export_to_video_file(video_frames, output_video_path=None, fps=10):
37
  if output_video_path is None:
@@ -55,7 +62,76 @@ def export_to_video_file(video_frames, output_video_path=None, fps=10):
55
 
56
  return output_video_path
57
 
58
- def generate_image(secret_token, prompt, base, width, height, motion, step):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  if secret_token != SECRET_TOKEN:
60
  raise gr.Error(
61
  f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
@@ -98,7 +174,11 @@ def generate_image(secret_token, prompt, base, width, height, motion, step):
98
 
99
  # I think we are looking time here too, converting to webm is too slow, we should return
100
  # the frames unencoded to the frontend renderer
101
- export_to_video_file(output.frames[0], path, fps=10)
 
 
 
 
102
 
103
  # Read the content of the video file and encode it to base64
104
  with open(path, "rb") as video_file:
@@ -139,22 +219,21 @@ with gr.Blocks() as demo:
139
  "ToonYou",
140
  "epiCRealism",
141
  ],
142
- value=base_loaded,
143
- interactive=True
144
  )
145
  width = gr.Slider(
146
  label='Width',
147
  minimum=128,
148
  maximum=2048,
149
  step=32,
150
- value=1024,
151
  )
152
  height = gr.Slider(
153
  label='Height',
154
  minimum=128,
155
  maximum=2048,
156
  step=32,
157
- value=1024,
158
  )
159
  select_motion = gr.Dropdown(
160
  label='Motion',
@@ -170,7 +249,6 @@ with gr.Blocks() as demo:
170
  ("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
171
  ],
172
  value="",
173
- interactive=True
174
  )
175
  select_step = gr.Dropdown(
176
  label='Inference steps',
@@ -180,15 +258,17 @@ with gr.Blocks() as demo:
180
  ('4-Step', 4),
181
  ('8-Step', 8)],
182
  value=4,
183
- interactive=True
184
  )
 
 
 
185
  submit = gr.Button()
186
 
187
  output_video_base64 = gr.Text()
188
 
189
  submit.click(
190
  fn=generate_image,
191
- inputs=[secret_token, prompt, select_base, width, height, select_motion, select_step],
192
  outputs=output_video_base64,
193
  )
194
 
 
3
  import os
4
  import base64
5
  import uuid
6
+ import tempfile
7
+ import numpy as np
8
+ import cv2
9
+ import subprocess
10
 
11
  from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
12
  from huggingface_hub import hf_hub_download
13
  from safetensors.torch import load_file
14
  from PIL import Image
15
 
16
+
17
  SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
18
 
19
  # Constants
 
34
  pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
35
  pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
36
 
37
+ # ----------------------------- VIDEO ENCODING ---------------------------------
38
+ # Unfortunately, the Hugging Face Diffusers utils hardcode MP4V as a codec,
39
+ # which is not supported by all browsers. This is a critical issue for AiTube,
40
+ # so we are forced to implement our own encoding algorithm.
41
+ # ------------------------------------------------------------------------------
42
 
43
  def export_to_video_file(video_frames, output_video_path=None, fps=10):
44
  if output_video_path is None:
 
62
 
63
  return output_video_path
64
 
65
+ # ----------------------------- FRAME INTERPOLATION ---------------------------------
66
+ # we cannot afford to use AI-based algorithms such as FILM or ST-MFNet,
67
+ # those are way too slow for a AiTube which needs things to be as fast as possible
68
+ # -----------------------------------------------------------------------------------
69
+
70
+ def interpolate_video_frames(file_path, output_fps=10, desired_duration=2):
71
+ """
72
+ Interpolates frames in a video file to adjust frame rate and duration using ffmpeg's minterpolate.
73
+
74
+ Parameters:
75
+ file_path (str): Path to the input video file.
76
+ output_fps (int): Target frames per second for the output video.
77
+ desired_duration (int): Desired duration of the video in seconds.
78
+
79
+ Returns:
80
+ str: The file path of the modified video.
81
+ """
82
+ # Calculate the input fps required to stretch the video to the desired duration
83
+ input_fps = find_input_fps(file_path, desired_duration)
84
+
85
+ # Construct the ffmpeg command for interpolation
86
+ cmd = [
87
+ 'ffmpeg',
88
+ '-i', file_path, # input file
89
+ '-filter:v', f'minterpolate=fps={output_fps}', # minterpolate filter options
90
+ '-r', str(output_fps), # output frame rate
91
+ '-y', # Overwrite output files without asking
92
+ file_path # Output file (Overwrites the original)
93
+ ]
94
+
95
+ # Execute the command
96
+ try:
97
+ subprocess.run(cmd, check=True)
98
+ print("Video interpolation successful.")
99
+ except subprocess.CalledProcessError as e:
100
+ print("Failed to interpolate video. Error:", e)
101
+
102
+ return file_path
103
+
104
+ def find_input_fps(file_path, desired_duration):
105
+ """
106
+ Determine the input fps that, when stretched to the desired duration, matches the original video length.
107
+
108
+ Parameters:
109
+ file_path (str): Path to the video file.
110
+ desired_duration (int or float): Desired duration in seconds.
111
+
112
+ Returns:
113
+ float: Calculated input fps.
114
+ """
115
+ # FFprobe command to find the duration of the video
116
+ ffprobe_cmd = [
117
+ 'ffprobe',
118
+ '-v', 'error',
119
+ '-show_entries', 'format=duration',
120
+ '-of', 'default=noprint_wrappers=1:nokey=1',
121
+ file_path
122
+ ]
123
+
124
+ try:
125
+ result = subprocess.run(ffprobe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
126
+ duration = float(result.stdout.strip())
127
+ input_fps = duration / desired_duration
128
+ except Exception as e:
129
+ print("Failed to get video duration. Error:", e)
130
+ input_fps = 10 # Assume a default value if unable to fetch duration
131
+
132
+ return input_fps
133
+
134
+ def generate_image(secret_token, prompt, base, width, height, motion, step, desired_duration, desired_fps):
135
  if secret_token != SECRET_TOKEN:
136
  raise gr.Error(
137
  f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
 
174
 
175
  # I think we are looking time here too, converting to webm is too slow, we should return
176
  # the frames unencoded to the frontend renderer
177
+ path = export_to_video_file(output.frames[0], path, fps=10)
178
+
179
+ # Optional frame interpolation
180
+ if desired_duration != 2 or desired_fps != 10:
181
+ path = interpolate_video_frames(path, output_fps=desired_fps, desired_duration=desired_duration)
182
 
183
  # Read the content of the video file and encode it to base64
184
  with open(path, "rb") as video_file:
 
219
  "ToonYou",
220
  "epiCRealism",
221
  ],
222
+ value=base_loaded
 
223
  )
224
  width = gr.Slider(
225
  label='Width',
226
  minimum=128,
227
  maximum=2048,
228
  step=32,
229
+ value=512,
230
  )
231
  height = gr.Slider(
232
  label='Height',
233
  minimum=128,
234
  maximum=2048,
235
  step=32,
236
+ value=256,
237
  )
238
  select_motion = gr.Dropdown(
239
  label='Motion',
 
249
  ("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
250
  ],
251
  value="",
 
252
  )
253
  select_step = gr.Dropdown(
254
  label='Inference steps',
 
258
  ('4-Step', 4),
259
  ('8-Step', 8)],
260
  value=4,
 
261
  )
262
+ duration_slider = gr.Slider(label="Desired Duration (seconds)", min_value=2, max_value=30, value=2, step=1)
263
+ fps_slider = gr.Slider(label="Desired Frames Per Second", min_value=10, max_value=60, value=10, step=1)
264
+
265
  submit = gr.Button()
266
 
267
  output_video_base64 = gr.Text()
268
 
269
  submit.click(
270
  fn=generate_image,
271
+ inputs=[secret_token, prompt, select_base, width, height, select_motion, select_step, duration_slider, fps_slider],
272
  outputs=output_video_base64,
273
  )
274