Spaces:
Paused
Paused
File size: 8,516 Bytes
653ce35 7ca5351 c93a0cb 120ac54 c93a0cb 454eedf 39489bd 0cbec0b 854b688 acf84db 3a66e37 1c8c6b0 eda2433 b827d8f eda2433 3801c88 eda2433 35f86ae eda2433 35f86ae 3a66e37 833e264 65a9a4b 28ef21d 833e264 28ef21d 833e264 1c8c6b0 69ec2ea 9f98966 1c8c6b0 9f98966 1c8c6b0 de0aaee 9f98966 1c8c6b0 9f98966 90e6ba4 9f98966 69ec2ea 1c8c6b0 618e51c 69ec2ea 0b7a097 69ec2ea d5a4d02 454eedf a4ad89d fe1dcee d5a4d02 6b906fc cab2223 fe1dcee b015e22 fe1dcee b827d8f b015e22 cab2223 b89ba4a 70de192 d5a4d02 b015e22 69ec2ea 967b7dd 69ec2ea cdd5bef b015e22 69ec2ea b015e22 d5a4d02 b015e22 618d462 967b7dd a4ad89d 6e159a1 84e6bd9 b015e22 967b7dd d5a4d02 967b7dd c043418 1c8c6b0 65a9a4b e23155a 65a9a4b 6a046f7 65a9a4b b5a5c95 a2a8df5 1c8c6b0 bce3142 b827d8f 43af4fa f865d43 bce3142 1c8c6b0 7495fff 98aad5a 39489bd 7ca5351 0269ee9 b827d8f a4ad89d 0269ee9 eaf8a3c 106f93a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
import gradio as gr
import os
import subprocess
import cv2
import numpy as np
from moviepy.editor import VideoFileClip, concatenate_videoclips
import math
from huggingface_hub import snapshot_download
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
model_ids = [
'runwayml/stable-diffusion-v1-5',
'lllyasviel/sd-controlnet-depth',
'lllyasviel/sd-controlnet-canny',
'lllyasviel/sd-controlnet-openpose',
]
for model_id in model_ids:
model_name = model_id.split('/')[-1]
snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
def get_frame_count(filepath):
if filepath is not None:
video = cv2.VideoCapture(filepath)
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
video.release()
#LIMITS
#if frame_count > 24 :
# frame_count = 24 # limit to 24 frames to avoid cuDNN errors
return gr.update(maximum=frame_count)
else:
return gr.update(value=1, maximum=12 )
def get_video_dimension(filepath):
video = cv2.VideoCapture(filepath)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(video.get(cv2.CAP_PROP_FPS))
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
video.release()
return width, height, fps, frame_count
def resize_video(input_vid, output_vid, width, height, fps):
print(f"RESIZING ...")
# Open the input video file
video = cv2.VideoCapture(input_vid)
# Get the original video's width and height
original_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
original_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Create a VideoWriter object to write the resized video
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for the output video
output_video = cv2.VideoWriter(output_vid, fourcc, fps, (width, height))
while True:
# Read a frame from the input video
ret, frame = video.read()
if not ret:
break
# Resize the frame to the desired dimensions
resized_frame = cv2.resize(frame, (width, height))
# Write the resized frame to the output video file
output_video.write(resized_frame)
# Release the video objects
video.release()
output_video.release()
print(f"RESIZE VIDEO DONE!")
return output_vid
def normalize_and_save_video(input_video_path, output_video_path):
print(f"NORMALIZING ...")
cap = cv2.VideoCapture(input_video_path)
# Get video properties
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# Create VideoWriter object to save the normalized video
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Specify the codec (e.g., 'mp4v', 'XVID', 'MPEG')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
# Iterate through each frame in the video
for _ in range(frame_count):
ret, frame = cap.read()
if not ret:
break
# Convert frame to floating point
frame = frame.astype(np.float32)
# Normalize pixel values to the range [0, 1]
frame /= 255.0
# Convert normalized frame back to 8-bit unsigned integer
frame = (frame * 255.0).astype(np.uint8)
# Write the normalized frame to the output video file
out.write(frame)
# Release the VideoCapture and VideoWriter objects
cap.release()
out.release()
print(f"NORMALIZE DONE!")
return output_video_path
def make_nearest_multiple_of_32(number):
remainder = number % 32
if remainder <= 16:
number -= remainder
else:
number += 32 - remainder
return number
def run_inference(prompt, video_path, condition, video_length, seed, steps):
seed = math.floor(seed)
o_width = get_video_dimension(video_path)[0]
o_height = get_video_dimension(video_path)[1]
# Prepare dimensions
if o_width > 512 :
# Calculate the new height while maintaining the aspect ratio
n_height = int(o_height / o_width * 512)
n_width = 512
# Get FPS of original video input
target_fps = get_video_dimension(video_path)[2]
if target_fps > 12 :
print(f"FPS is too high")
target_fps = 12
print(f"INPUT FPS: {target_fps}")
# Count total frames according to fps
total_frames = get_video_dimension(video_path)[3]
# Resize the video
r_width = make_nearest_multiple_of_32(n_width)
r_height = make_nearest_multiple_of_32(n_height)
print(f"multiple of 32 sizes : {r_width}x{r_height}")
# Check if the file already exists
if os.path.exists('resized.mp4'):
# Delete the existing file
os.remove('resized.mp4')
resized = resize_video(video_path, 'resized.mp4', r_width, r_height, target_fps)
# normalize pixels
#normalized = normalize_and_save_video(resized, 'normalized.mp4')
output_path = 'output/'
os.makedirs(output_path, exist_ok=True)
# Check if the file already exists
if os.path.exists(os.path.join(output_path, f"result.mp4")):
# Delete the existing file
os.remove(os.path.join(output_path, f"result.mp4"))
print(f"RUNNING INFERENCE ...")
if video_length > 12:
command = f"python inference.py --prompt '{prompt}' --inference_steps {steps} --condition '{condition}' --video_path '{resized}' --output_path '{output_path}' --temp_chunk_path 'result' --width {r_width} --height {r_height} --fps {target_fps} --seed {seed} --video_length {video_length} --smoother_steps 19 20 --is_long_video"
else:
command = f"python inference.py --prompt '{prompt}' --inference_steps {steps} --condition '{condition}' --video_path '{resized}' --output_path '{output_path}' --temp_chunk_path 'result' --width {r_width} --height {r_height} --fps {target_fps} --seed {seed} --video_length {video_length} --smoother_steps 19 20"
try:
subprocess.run(command, shell=True)
except cuda.Error as e:
return f"CUDA Error: {e}", None
except RuntimeError as e:
return f"Runtime Error: {e}", None
# Construct the video path
video_path_output = os.path.join(output_path, f"result.mp4")
# Resize to original video input size
#o_width = get_video_dimension(video_path)[0]
#o_height = get_video_dimension(video_path)[1]
#resize_video(video_path_output, 'resized_final.mp4', o_width, o_height, target_fps)
print(f"FINISHED !")
return "done", video_path_output
css="""
#col-container {max-width: 810px; margin-left: auto; margin-right: auto;}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("""
<h1 style="text-align: center;">ControlVideo</h1>
""")
with gr.Row():
with gr.Column():
#video_in = gr.Video(source="upload", type="filepath", visible=True)
video_path = gr.Video(source="upload", type="filepath", visible=True)
prompt = gr.Textbox(label="prompt")
with gr.Column():
video_length = gr.Slider(label="Video length", info="How many frames do you want to process ? For demo purpose, max is set to 24", minimum=1, maximum=12, step=1, value=2)
with gr.Row():
condition = gr.Dropdown(label="Condition", choices=["depth", "canny", "pose"], value="depth")
seed = gr.Number(label="seed", value=42)
inference_steps = gr.Slider(label="Inference steps", minimum=25, maximum=50, step=1, value=25)
submit_btn = gr.Button("Submit")
with gr.Column():
video_res = gr.Video(label="result")
status = gr.Textbox(label="result")
video_path.change(fn=get_frame_count,
inputs=[video_path],
outputs=[video_length],
queue=False
)
submit_btn.click(fn=run_inference,
inputs=[prompt,
video_path,
condition,
video_length,
seed,
inference_steps
],
outputs=[status, video_res])
demo.queue(max_size=12).launch() |