from PIL import Image import os import cv2 import numpy as np from PIL import Image from moviepy.editor import * import gradio as gr from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler import torch torch.backends.cuda.matmul.allow_tf32 = True import gc controlnet = ControlNetModel.from_pretrained("ioclab/control_v1p_sd15_brightness", torch_dtype=torch.float16, use_safetensors=True) pipe = StableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None, ) pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) pipe.enable_xformers_memory_efficient_attention() pipe.enable_model_cpu_offload() pipe.enable_attention_slicing() def get_frames(video_in): frames = [] #resize the video clip = VideoFileClip(video_in) #check fps if clip.fps > 30: print("vide rate is over 30, resetting to 30") clip_resized = clip.resize(height=512) clip_resized.write_videofile("video_resized.mp4", fps=30) else: print("video rate is OK") clip_resized = clip.resize(height=512) clip_resized.write_videofile("video_resized.mp4", fps=clip.fps) print("video resized to 512 height") # Opens the Video file with CV2 cap= cv2.VideoCapture("video_resized.mp4") fps = cap.get(cv2.CAP_PROP_FPS) print("video fps: " + str(fps)) i=0 while(cap.isOpened()): ret, frame = cap.read() if ret == False: break cv2.imwrite('kang'+str(i)+'.jpg',frame) frames.append('kang'+str(i)+'.jpg') i+=1 cap.release() cv2.destroyAllWindows() print("broke the video into frames") return frames, fps def create_video(frames, fps): print("building video result") clip = ImageSequenceClip(frames, fps=fps) clip.write_videofile("_result.mp4", fps=fps) return "_result.mp4" def process_brightness( prompt, negative_prompt, conditioning_image, num_inference_steps=30, size=512, guidance_scale=7.0, seed=1234, ): conditioning_image_raw = Image.fromarray(conditioning_image) conditioning_image = conditioning_image_raw.convert('L') g_cpu = torch.Generator() if seed == -1: generator = g_cpu.manual_seed(g_cpu.seed()) else: generator = g_cpu.manual_seed(seed) output_image = pipe( prompt, conditioning_image, height=size, width=size, num_inference_steps=num_inference_steps, generator=generator, negative_prompt=negative_prompt, guidance_scale=guidance_scale, controlnet_conditioning_scale=1.0, ).images[0] del conditioning_image, conditioning_image_raw gc.collect() return output_image def infer(video_in, trim_value, prompt, negative_prompt, num_inference_steps=30, size=512, guidance_scale=7.0, seed=1234 ): # 1. break video into frames and get FPS break_vid = get_frames(video_in) frames_list= break_vid[0] fps = break_vid[1] n_frame = int(trim_value * fps) #n_frame = len(frames_list) if n_frame >= len(frames_list): print("video is shorter than the cut value") n_frame = len(frames_list) # 2. prepare frames result arrays result_frames = [] print("set stop frames to: " + str(n_frame)) for i, image in enumerate(frames_list[0:int(n_frame)]): conditioning_image = Image.open(image).convert("RGB") conditioning_image = np.array(conditioning_image) output_frame = process_brightness( prompt, negative_prompt, conditioning_image, num_inference_steps=30, size=512, guidance_scale=7.0, seed=1234 ) print(output_frame) #image = Image.open(output_frame) #image = Image.fromarray(output_frame[0]) output_frame.save("_frame_" + str(i) + ".jpeg") result_frames.append("_frame_" + str(i) + ".jpeg") print("frame " + str(i) + "/" + str(n_frame) + ": done;") final_vid = create_video(result_frames, fps) return final_vid with gr.Blocks() as demo: gr.Markdown( """ # ControlNet on Brightness • Video This is a demo on ControlNet based on brightness for video. """) with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Prompt", ) negative_prompt = gr.Textbox( label="Negative Prompt", ) video_in = gr.Video( label="Conditioning Video", source="upload", type="filepath" ) trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1) with gr.Accordion('Advanced options', open=False): with gr.Row(): num_inference_steps = gr.Slider( 10, 40, 20, step=1, label="Steps", ) size = gr.Slider( 256, 768, 512, step=128, label="Size", ) with gr.Row(): guidance_scale = gr.Slider( label='Guidance Scale', minimum=0.1, maximum=30.0, value=7.0, step=0.1 ) seed = gr.Slider( label='Seed', value=-1, minimum=-1, maximum=2147483647, step=1, # randomize=True ) submit_btn = gr.Button( value="Submit", variant="primary" ) with gr.Column(min_width=300): output = gr.Video( label="Result", ) submit_btn.click( fn=infer, inputs=[ video_in, trim_in, prompt, negative_prompt, num_inference_steps, size, guidance_scale, seed ], outputs=output ) gr.Markdown( """ * [Dataset](https://huggingface.co/datasets/ioclab/grayscale_image_aesthetic_3M) * [Diffusers model](https://huggingface.co/ioclab/control_v1p_sd15_brightness), [Web UI model](https://huggingface.co/ioclab/ioc-controlnet) * [Training Report](https://api.wandb.ai/links/ciaochaos/oot5cui2), [Doc(Chinese)](https://aigc.ioclab.com/sd-showcase/brightness-controlnet.html) """) demo.launch()