DeepBeepMeep
		
	commited on
		
		
					Commit 
							
							·
						
						6d57bc7
	
1
								Parent(s):
							
							26a6613
								
fixed vace bugs
Browse files- wan/diffusion_forcing.py +6 -3
- wan/image2video.py +7 -13
- wan/modules/model.py +1 -1
- wan/text2video.py +6 -4
- wan/utils/utils.py +10 -0
- wan/utils/vace_preprocessor.py +7 -37
- wgp.py +225 -198
    	
        wan/diffusion_forcing.py
    CHANGED
    
    | @@ -15,6 +15,7 @@ from .modules.model import WanModel | |
| 15 | 
             
            from .modules.t5 import T5EncoderModel
         | 
| 16 | 
             
            from .modules.vae import WanVAE
         | 
| 17 | 
             
            from wan.modules.posemb_layers import get_rotary_pos_embed
         | 
|  | |
| 18 | 
             
            from .utils.fm_solvers import (FlowDPMSolverMultistepScheduler,
         | 
| 19 | 
             
                                           get_sampling_sigmas, retrieve_timesteps)
         | 
| 20 | 
             
            from .utils.fm_solvers_unipc import FlowUniPCMultistepScheduler
         | 
| @@ -190,6 +191,7 @@ class DTT2V: | |
| 190 | 
             
                    input_video = None,
         | 
| 191 | 
             
                    height: int = 480,
         | 
| 192 | 
             
                    width: int = 832,
         | 
|  | |
| 193 | 
             
                    num_frames: int = 97,
         | 
| 194 | 
             
                    num_inference_steps: int = 50,
         | 
| 195 | 
             
                    shift: float = 1.0,
         | 
| @@ -221,15 +223,16 @@ class DTT2V: | |
| 221 | 
             
                    i2v_extra_kwrags = {}
         | 
| 222 | 
             
                    prefix_video = None
         | 
| 223 | 
             
                    predix_video_latent_length = 0
         | 
|  | |
| 224 | 
             
                    if input_video != None:
         | 
| 225 | 
             
                        _ , _ , height, width  = input_video.shape
         | 
| 226 | 
             
                    elif image != None:
         | 
| 227 | 
             
                        image = image[0]
         | 
| 228 | 
             
                        frame_width, frame_height  = image.size
         | 
| 229 | 
            -
                         | 
| 230 | 
            -
                        height = (int(frame_height * scale) // 16) * 16
         | 
| 231 | 
            -
                        width = (int(frame_width * scale) // 16) * 16
         | 
| 232 | 
             
                        image = np.array(image.resize((width, height))).transpose(2, 0, 1)
         | 
|  | |
|  | |
| 233 | 
             
                    latent_length = (num_frames - 1) // 4 + 1
         | 
| 234 | 
             
                    latent_height = height // 8
         | 
| 235 | 
             
                    latent_width = width // 8
         | 
|  | |
| 15 | 
             
            from .modules.t5 import T5EncoderModel
         | 
| 16 | 
             
            from .modules.vae import WanVAE
         | 
| 17 | 
             
            from wan.modules.posemb_layers import get_rotary_pos_embed
         | 
| 18 | 
            +
            from wan.utils.utils import calculate_new_dimensions
         | 
| 19 | 
             
            from .utils.fm_solvers import (FlowDPMSolverMultistepScheduler,
         | 
| 20 | 
             
                                           get_sampling_sigmas, retrieve_timesteps)
         | 
| 21 | 
             
            from .utils.fm_solvers_unipc import FlowUniPCMultistepScheduler
         | 
|  | |
| 191 | 
             
                    input_video = None,
         | 
| 192 | 
             
                    height: int = 480,
         | 
| 193 | 
             
                    width: int = 832,
         | 
| 194 | 
            +
                    fit_into_canvas = True,
         | 
| 195 | 
             
                    num_frames: int = 97,
         | 
| 196 | 
             
                    num_inference_steps: int = 50,
         | 
| 197 | 
             
                    shift: float = 1.0,
         | 
|  | |
| 223 | 
             
                    i2v_extra_kwrags = {}
         | 
| 224 | 
             
                    prefix_video = None
         | 
| 225 | 
             
                    predix_video_latent_length = 0
         | 
| 226 | 
            +
             | 
| 227 | 
             
                    if input_video != None:
         | 
| 228 | 
             
                        _ , _ , height, width  = input_video.shape
         | 
| 229 | 
             
                    elif image != None:
         | 
| 230 | 
             
                        image = image[0]
         | 
| 231 | 
             
                        frame_width, frame_height  = image.size
         | 
| 232 | 
            +
                        height, width = calculate_new_dimensions(height, width, frame_height, frame_width, fit_into_canvas)
         | 
|  | |
|  | |
| 233 | 
             
                        image = np.array(image.resize((width, height))).transpose(2, 0, 1)
         | 
| 234 | 
            +
             | 
| 235 | 
            +
             | 
| 236 | 
             
                    latent_length = (num_frames - 1) // 4 + 1
         | 
| 237 | 
             
                    latent_height = height // 8
         | 
| 238 | 
             
                    latent_width = width // 8
         | 
    	
        wan/image2video.py
    CHANGED
    
    | @@ -25,7 +25,7 @@ from .utils.fm_solvers import (FlowDPMSolverMultistepScheduler, | |
| 25 | 
             
                                           get_sampling_sigmas, retrieve_timesteps)
         | 
| 26 | 
             
            from .utils.fm_solvers_unipc import FlowUniPCMultistepScheduler
         | 
| 27 | 
             
            from wan.modules.posemb_layers import get_rotary_pos_embed
         | 
| 28 | 
            -
            from wan.utils.utils import resize_lanczos
         | 
| 29 |  | 
| 30 | 
             
            def optimized_scale(positive_flat, negative_flat):
         | 
| 31 |  | 
| @@ -120,7 +120,7 @@ class WanI2V: | |
| 120 | 
             
                    img2 = None,
         | 
| 121 | 
             
                    height =720,
         | 
| 122 | 
             
                    width = 1280,
         | 
| 123 | 
            -
                     | 
| 124 | 
             
                    frame_num=81,
         | 
| 125 | 
             
                    shift=5.0,
         | 
| 126 | 
             
                    sample_solver='unipc',
         | 
| @@ -188,22 +188,16 @@ class WanI2V: | |
| 188 | 
             
                        if add_frames_for_end_image:
         | 
| 189 | 
             
                            frame_num +=1
         | 
| 190 | 
             
                            lat_frames = int((frame_num - 2) // self.vae_stride[0] + 2)
         | 
| 191 | 
            -
             | 
| 192 | 
            -
             | 
| 193 | 
             
                    h, w = img.shape[1:]
         | 
| 194 | 
            -
                    # aspect_ratio = h / w
         | 
| 195 | 
            -
             | 
| 196 | 
            -
                    scale1  = min(height / h, width /  w)
         | 
| 197 | 
            -
                    scale2  = min(height / h, width /  w)
         | 
| 198 | 
            -
                    scale = max(scale1, scale2) 
         | 
| 199 | 
            -
                    new_height = int(h * scale) 
         | 
| 200 | 
            -
                    new_width = int(w * scale) 
         | 
| 201 |  | 
|  | |
|  | |
| 202 | 
             
                    lat_h = round(
         | 
| 203 | 
            -
                         | 
| 204 | 
             
                        self.patch_size[1] * self.patch_size[1])
         | 
| 205 | 
             
                    lat_w = round(
         | 
| 206 | 
            -
                         | 
| 207 | 
             
                        self.patch_size[2] * self.patch_size[2])
         | 
| 208 | 
             
                    h = lat_h * self.vae_stride[1]
         | 
| 209 | 
             
                    w = lat_w * self.vae_stride[2]
         | 
|  | |
| 25 | 
             
                                           get_sampling_sigmas, retrieve_timesteps)
         | 
| 26 | 
             
            from .utils.fm_solvers_unipc import FlowUniPCMultistepScheduler
         | 
| 27 | 
             
            from wan.modules.posemb_layers import get_rotary_pos_embed
         | 
| 28 | 
            +
            from wan.utils.utils import resize_lanczos, calculate_new_dimensions
         | 
| 29 |  | 
| 30 | 
             
            def optimized_scale(positive_flat, negative_flat):
         | 
| 31 |  | 
|  | |
| 120 | 
             
                    img2 = None,
         | 
| 121 | 
             
                    height =720,
         | 
| 122 | 
             
                    width = 1280,
         | 
| 123 | 
            +
                    fit_into_canvas = True,
         | 
| 124 | 
             
                    frame_num=81,
         | 
| 125 | 
             
                    shift=5.0,
         | 
| 126 | 
             
                    sample_solver='unipc',
         | 
|  | |
| 188 | 
             
                        if add_frames_for_end_image:
         | 
| 189 | 
             
                            frame_num +=1
         | 
| 190 | 
             
                            lat_frames = int((frame_num - 2) // self.vae_stride[0] + 2)
         | 
| 191 | 
            +
                    
         | 
|  | |
| 192 | 
             
                    h, w = img.shape[1:]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 193 |  | 
| 194 | 
            +
                    h, w = calculate_new_dimensions(height, width, h, w, fit_into_canvas)
         | 
| 195 | 
            +
             
         | 
| 196 | 
             
                    lat_h = round(
         | 
| 197 | 
            +
                        h // self.vae_stride[1] //
         | 
| 198 | 
             
                        self.patch_size[1] * self.patch_size[1])
         | 
| 199 | 
             
                    lat_w = round(
         | 
| 200 | 
            +
                        w // self.vae_stride[2] //
         | 
| 201 | 
             
                        self.patch_size[2] * self.patch_size[2])
         | 
| 202 | 
             
                    h = lat_h * self.vae_stride[1]
         | 
| 203 | 
             
                    w = lat_w * self.vae_stride[2]
         | 
    	
        wan/modules/model.py
    CHANGED
    
    | @@ -963,7 +963,7 @@ class WanModel(ModelMixin, ConfigMixin): | |
| 963 | 
             
                        hints_list = [None ] *len(x_list)
         | 
| 964 | 
             
                    else:
         | 
| 965 | 
             
                        # Vace embeddings
         | 
| 966 | 
            -
                        c = [self.vace_patch_embedding(u.unsqueeze(0)) for u in vace_context]
         | 
| 967 | 
             
                        c = [u.flatten(2).transpose(1, 2) for u in c]
         | 
| 968 | 
             
                        c = c[0]
         | 
| 969 |  | 
|  | |
| 963 | 
             
                        hints_list = [None ] *len(x_list)
         | 
| 964 | 
             
                    else:
         | 
| 965 | 
             
                        # Vace embeddings
         | 
| 966 | 
            +
                        c = [self.vace_patch_embedding(u.to(self.vace_patch_embedding.weight.dtype).unsqueeze(0)) for u in vace_context]
         | 
| 967 | 
             
                        c = [u.flatten(2).transpose(1, 2) for u in c]
         | 
| 968 | 
             
                        c = c[0]
         | 
| 969 |  | 
    	
        wan/text2video.py
    CHANGED
    
    | @@ -177,15 +177,16 @@ class WanT2V: | |
| 177 | 
             
                def vace_latent(self, z, m):
         | 
| 178 | 
             
                    return [torch.cat([zz, mm], dim=0) for zz, mm in zip(z, m)]
         | 
| 179 |  | 
| 180 | 
            -
                def prepare_source(self, src_video, src_mask, src_ref_images, total_frames, image_size,  device, original_video = False, keep_frames= [], start_frame = 0, pre_src_video = None):
         | 
| 181 | 
             
                    image_sizes = []
         | 
| 182 | 
             
                    trim_video = len(keep_frames)
         | 
|  | |
| 183 |  | 
| 184 | 
             
                    for i, (sub_src_video, sub_src_mask, sub_pre_src_video) in enumerate(zip(src_video, src_mask,pre_src_video)):
         | 
| 185 | 
             
                        prepend_count = 0 if sub_pre_src_video == None else sub_pre_src_video.shape[1]
         | 
| 186 | 
             
                        num_frames = total_frames - prepend_count 
         | 
| 187 | 
             
                        if sub_src_mask is not None and sub_src_video is not None:
         | 
| 188 | 
            -
                            src_video[i], src_mask[i], _, _, _ = self.vid_proc.load_video_pair(sub_src_video, sub_src_mask, max_frames= num_frames, trim_video = trim_video - prepend_count, start_frame = start_frame)
         | 
| 189 | 
             
                            # src_video is [-1, 1], 0 = inpainting area (in fact 127  in [0, 255])
         | 
| 190 | 
             
                            # src_mask is [-1, 1], 0 = preserve original video (in fact 127  in [0, 255]) and 1 = Inpainting (in fact 255  in [0, 255])
         | 
| 191 | 
             
                            src_video[i] = src_video[i].to(device)
         | 
| @@ -208,7 +209,7 @@ class WanT2V: | |
| 208 | 
             
                                src_mask[i] = torch.ones_like(src_video[i], device=device)
         | 
| 209 | 
             
                            image_sizes.append(image_size)
         | 
| 210 | 
             
                        else:
         | 
| 211 | 
            -
                            src_video[i], _, _, _ = self.vid_proc.load_video(sub_src_video, max_frames= num_frames, trim_video = trim_video - prepend_count, start_frame = start_frame)
         | 
| 212 | 
             
                            src_video[i] = src_video[i].to(device)
         | 
| 213 | 
             
                            src_mask[i] = torch.zeros_like(src_video[i], device=device) if original_video else torch.ones_like(src_video[i], device=device)
         | 
| 214 | 
             
                            if prepend_count > 0:
         | 
| @@ -277,6 +278,7 @@ class WanT2V: | |
| 277 | 
             
                            target_camera=None,                  
         | 
| 278 | 
             
                            context_scale=1.0,
         | 
| 279 | 
             
                            size=(1280, 720),
         | 
|  | |
| 280 | 
             
                            frame_num=81,
         | 
| 281 | 
             
                            shift=5.0,
         | 
| 282 | 
             
                            sample_solver='unipc',
         | 
| @@ -430,7 +432,7 @@ class WanT2V: | |
| 430 | 
             
                        kwargs.update({'cam_emb': cam_emb})
         | 
| 431 |  | 
| 432 | 
             
                    if vace:
         | 
| 433 | 
            -
                        ref_images_count = len(input_ref_images[0]) if input_ref_images != None else 0 
         | 
| 434 | 
             
                        kwargs.update({'vace_context' : z, 'vace_context_scale' : context_scale})
         | 
| 435 | 
             
                        if overlapped_latents > 0:
         | 
| 436 | 
             
                            z_reactive = [  zz[0:16, ref_images_count:overlapped_latents + ref_images_count].clone() for zz in z]
         | 
|  | |
| 177 | 
             
                def vace_latent(self, z, m):
         | 
| 178 | 
             
                    return [torch.cat([zz, mm], dim=0) for zz, mm in zip(z, m)]
         | 
| 179 |  | 
| 180 | 
            +
                def prepare_source(self, src_video, src_mask, src_ref_images, total_frames, image_size,  device, original_video = False, keep_frames= [], start_frame = 0,  fit_into_canvas = True, pre_src_video = None):
         | 
| 181 | 
             
                    image_sizes = []
         | 
| 182 | 
             
                    trim_video = len(keep_frames)
         | 
| 183 | 
            +
                    canvas_height, canvas_width = image_size
         | 
| 184 |  | 
| 185 | 
             
                    for i, (sub_src_video, sub_src_mask, sub_pre_src_video) in enumerate(zip(src_video, src_mask,pre_src_video)):
         | 
| 186 | 
             
                        prepend_count = 0 if sub_pre_src_video == None else sub_pre_src_video.shape[1]
         | 
| 187 | 
             
                        num_frames = total_frames - prepend_count 
         | 
| 188 | 
             
                        if sub_src_mask is not None and sub_src_video is not None:
         | 
| 189 | 
            +
                            src_video[i], src_mask[i], _, _, _ = self.vid_proc.load_video_pair(sub_src_video, sub_src_mask, max_frames= num_frames, trim_video = trim_video - prepend_count, start_frame = start_frame, canvas_height = canvas_height, canvas_width = canvas_width, fit_into_canvas = fit_into_canvas)
         | 
| 190 | 
             
                            # src_video is [-1, 1], 0 = inpainting area (in fact 127  in [0, 255])
         | 
| 191 | 
             
                            # src_mask is [-1, 1], 0 = preserve original video (in fact 127  in [0, 255]) and 1 = Inpainting (in fact 255  in [0, 255])
         | 
| 192 | 
             
                            src_video[i] = src_video[i].to(device)
         | 
|  | |
| 209 | 
             
                                src_mask[i] = torch.ones_like(src_video[i], device=device)
         | 
| 210 | 
             
                            image_sizes.append(image_size)
         | 
| 211 | 
             
                        else:
         | 
| 212 | 
            +
                            src_video[i], _, _, _ = self.vid_proc.load_video(sub_src_video, max_frames= num_frames, trim_video = trim_video - prepend_count, start_frame = start_frame, canvas_height = canvas_height, canvas_width = canvas_width, fit_into_canvas = fit_into_canvas)
         | 
| 213 | 
             
                            src_video[i] = src_video[i].to(device)
         | 
| 214 | 
             
                            src_mask[i] = torch.zeros_like(src_video[i], device=device) if original_video else torch.ones_like(src_video[i], device=device)
         | 
| 215 | 
             
                            if prepend_count > 0:
         | 
|  | |
| 278 | 
             
                            target_camera=None,                  
         | 
| 279 | 
             
                            context_scale=1.0,
         | 
| 280 | 
             
                            size=(1280, 720),
         | 
| 281 | 
            +
                            fit_into_canvas = True,
         | 
| 282 | 
             
                            frame_num=81,
         | 
| 283 | 
             
                            shift=5.0,
         | 
| 284 | 
             
                            sample_solver='unipc',
         | 
|  | |
| 432 | 
             
                        kwargs.update({'cam_emb': cam_emb})
         | 
| 433 |  | 
| 434 | 
             
                    if vace:
         | 
| 435 | 
            +
                        ref_images_count = len(input_ref_images[0]) if input_ref_images != None and input_ref_images[0] != None else 0 
         | 
| 436 | 
             
                        kwargs.update({'vace_context' : z, 'vace_context_scale' : context_scale})
         | 
| 437 | 
             
                        if overlapped_latents > 0:
         | 
| 438 | 
             
                            z_reactive = [  zz[0:16, ref_images_count:overlapped_latents + ref_images_count].clone() for zz in z]
         | 
    	
        wan/utils/utils.py
    CHANGED
    
    | @@ -67,7 +67,17 @@ def remove_background(img, session=None): | |
| 67 | 
             
                return torch.from_numpy(np.array(img).astype(np.float32) / 255.0).movedim(-1, 0)
         | 
| 68 |  | 
| 69 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 70 |  | 
|  | |
|  | |
|  | |
| 71 |  | 
| 72 | 
             
            def resize_and_remove_background(img_list, budget_width, budget_height, rm_background, fit_into_canvas = False ):
         | 
| 73 | 
             
                if rm_background:
         | 
|  | |
| 67 | 
             
                return torch.from_numpy(np.array(img).astype(np.float32) / 255.0).movedim(-1, 0)
         | 
| 68 |  | 
| 69 |  | 
| 70 | 
            +
            def calculate_new_dimensions(canvas_height, canvas_width, height, width, fit_into_canvas):
         | 
| 71 | 
            +
                if fit_into_canvas:
         | 
| 72 | 
            +
                    scale1  = min(canvas_height / height, canvas_width / width)
         | 
| 73 | 
            +
                    scale2  = min(canvas_width / height, canvas_height / width)
         | 
| 74 | 
            +
                    scale = max(scale1, scale2) 
         | 
| 75 | 
            +
                else:
         | 
| 76 | 
            +
                    scale = (canvas_height * canvas_width / (height * width))**(1/2)
         | 
| 77 |  | 
| 78 | 
            +
                new_height = round( height * scale / 16) * 16
         | 
| 79 | 
            +
                new_width = round( width * scale / 16) * 16
         | 
| 80 | 
            +
                return new_height, new_width
         | 
| 81 |  | 
| 82 | 
             
            def resize_and_remove_background(img_list, budget_width, budget_height, rm_background, fit_into_canvas = False ):
         | 
| 83 | 
             
                if rm_background:
         | 
    	
        wan/utils/vace_preprocessor.py
    CHANGED
    
    | @@ -5,6 +5,7 @@ from PIL import Image | |
| 5 | 
             
            import torch
         | 
| 6 | 
             
            import torch.nn.functional as F
         | 
| 7 | 
             
            import torchvision.transforms.functional as TF
         | 
|  | |
| 8 |  | 
| 9 |  | 
| 10 | 
             
            class VaceImageProcessor(object):
         | 
| @@ -182,53 +183,22 @@ class VaceVideoProcessor(object): | |
| 182 |  | 
| 183 |  | 
| 184 |  | 
| 185 | 
            -
                def _get_frameid_bbox_adjust_last(self, fps, video_frames_count, h, w, crop_box, rng, max_frames= 0, start_frame =0):
         | 
| 186 | 
             
                    from wan.utils.utils import resample
         | 
| 187 |  | 
| 188 | 
             
                    target_fps = self.max_fps
         | 
| 189 |  | 
| 190 | 
            -
                    # video_frames_count = len(frame_timestamps)
         | 
| 191 | 
            -
             | 
| 192 | 
             
                    frame_ids= resample(fps, video_frames_count, max_frames, target_fps, start_frame )
         | 
| 193 |  | 
| 194 | 
             
                    x1, x2, y1, y2 = [0, w, 0, h] if crop_box is None else crop_box
         | 
| 195 | 
             
                    h, w = y2 - y1, x2 - x1
         | 
| 196 | 
            -
                     | 
| 197 | 
            -
                    df, dh, dw = self.downsample
         | 
| 198 | 
            -
                    seq_len  = self.seq_len
         | 
| 199 | 
            -
                    # min/max area of the [latent video]
         | 
| 200 | 
            -
                    min_area_z = self.min_area / (dh * dw)
         | 
| 201 | 
            -
                    # max_area_z = min(seq_len, self.max_area / (dh * dw), (h // dh) * (w // dw))
         | 
| 202 | 
            -
                    max_area_z = min_area_z # workaround bug
         | 
| 203 | 
            -
                    # sample a frame number of the [latent video]
         | 
| 204 | 
            -
                    rand_area_z = np.square(np.power(2, rng.uniform(
         | 
| 205 | 
            -
                        np.log2(np.sqrt(min_area_z)),
         | 
| 206 | 
            -
                        np.log2(np.sqrt(max_area_z))
         | 
| 207 | 
            -
                    )))
         | 
| 208 | 
            -
             | 
| 209 | 
            -
                    seq_len =  max_area_z * ((max_frames- start_frame - 1) // df +1)
         | 
| 210 | 
            -
             | 
| 211 | 
            -
                    # of = min(
         | 
| 212 | 
            -
                    #     (len(frame_ids) - 1) // df + 1,
         | 
| 213 | 
            -
                    #     int(seq_len / rand_area_z)
         | 
| 214 | 
            -
                    # )
         | 
| 215 | 
            -
                    of = (len(frame_ids) - 1) // df + 1
         | 
| 216 | 
            -
             | 
| 217 | 
            -
                         
         | 
| 218 | 
            -
                    # deduce target shape of the [latent video]
         | 
| 219 | 
            -
                    # target_area_z = min(max_area_z, int(seq_len / of))
         | 
| 220 | 
            -
                    target_area_z = max_area_z        
         | 
| 221 | 
            -
                    oh = round(np.sqrt(target_area_z * ratio))
         | 
| 222 | 
            -
                    ow = int(target_area_z / oh)
         | 
| 223 | 
            -
                    of = (of - 1) * df + 1
         | 
| 224 | 
            -
                    oh *= dh
         | 
| 225 | 
            -
                    ow *= dw
         | 
| 226 |  | 
| 227 | 
             
                    return frame_ids, (x1, x2, y1, y2), (oh, ow), target_fps
         | 
| 228 |  | 
| 229 | 
            -
                def _get_frameid_bbox(self, fps, video_frames_count, h, w, crop_box, rng, max_frames= 0, start_frame= 0):
         | 
| 230 | 
             
                    if self.keep_last:
         | 
| 231 | 
            -
                        return self._get_frameid_bbox_adjust_last(fps, video_frames_count, h, w, crop_box, rng, max_frames= max_frames, start_frame= start_frame)
         | 
| 232 | 
             
                    else:
         | 
| 233 | 
             
                        return self._get_frameid_bbox_default(fps, video_frames_count, h, w, crop_box, rng, max_frames= max_frames)
         | 
| 234 |  | 
| @@ -238,7 +208,7 @@ class VaceVideoProcessor(object): | |
| 238 | 
             
                def load_video_pair(self, data_key, data_key2, crop_box=None, seed=2024, **kwargs):
         | 
| 239 | 
             
                    return self.load_video_batch(data_key, data_key2, crop_box=crop_box, seed=seed, **kwargs)
         | 
| 240 |  | 
| 241 | 
            -
                def load_video_batch(self, *data_key_batch, crop_box=None, seed=2024, max_frames= 0, trim_video =0, start_frame = 0, **kwargs):
         | 
| 242 | 
             
                    rng = np.random.default_rng(seed + hash(data_key_batch[0]) % 10000)
         | 
| 243 | 
             
                    # read video
         | 
| 244 | 
             
                    import decord
         | 
| @@ -269,7 +239,7 @@ class VaceVideoProcessor(object): | |
| 269 | 
             
                        h, w = src_video.shape[1:3]
         | 
| 270 | 
             
                    else:
         | 
| 271 | 
             
                        h, w = readers[0].next().shape[:2]
         | 
| 272 | 
            -
                    frame_ids, (x1, x2, y1, y2), (oh, ow), fps = self._get_frameid_bbox(fps, length, h, w, crop_box, rng, max_frames=max_frames, start_frame = start_frame )
         | 
| 273 |  | 
| 274 | 
             
                    # preprocess video
         | 
| 275 | 
             
                    videos = [reader.get_batch(frame_ids)[:, y1:y2, x1:x2, :] for reader in readers]
         | 
|  | |
| 5 | 
             
            import torch
         | 
| 6 | 
             
            import torch.nn.functional as F
         | 
| 7 | 
             
            import torchvision.transforms.functional as TF
         | 
| 8 | 
            +
            from .utils import calculate_new_dimensions
         | 
| 9 |  | 
| 10 |  | 
| 11 | 
             
            class VaceImageProcessor(object):
         | 
|  | |
| 183 |  | 
| 184 |  | 
| 185 |  | 
| 186 | 
            +
                def _get_frameid_bbox_adjust_last(self, fps, video_frames_count, canvas_height, canvas_width, h, w,  fit_into_canvas, crop_box, rng, max_frames= 0, start_frame =0):
         | 
| 187 | 
             
                    from wan.utils.utils import resample
         | 
| 188 |  | 
| 189 | 
             
                    target_fps = self.max_fps
         | 
| 190 |  | 
|  | |
|  | |
| 191 | 
             
                    frame_ids= resample(fps, video_frames_count, max_frames, target_fps, start_frame )
         | 
| 192 |  | 
| 193 | 
             
                    x1, x2, y1, y2 = [0, w, 0, h] if crop_box is None else crop_box
         | 
| 194 | 
             
                    h, w = y2 - y1, x2 - x1
         | 
| 195 | 
            +
                    oh, ow = calculate_new_dimensions(canvas_height, canvas_width, h, w, fit_into_canvas)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 196 |  | 
| 197 | 
             
                    return frame_ids, (x1, x2, y1, y2), (oh, ow), target_fps
         | 
| 198 |  | 
| 199 | 
            +
                def _get_frameid_bbox(self, fps, video_frames_count, h, w, crop_box, rng, max_frames= 0, start_frame= 0, canvas_height = 0, canvas_width = 0, fit_into_canvas= True):
         | 
| 200 | 
             
                    if self.keep_last:
         | 
| 201 | 
            +
                        return self._get_frameid_bbox_adjust_last(fps, video_frames_count, canvas_height, canvas_width, h, w, fit_into_canvas, crop_box, rng, max_frames= max_frames, start_frame= start_frame)
         | 
| 202 | 
             
                    else:
         | 
| 203 | 
             
                        return self._get_frameid_bbox_default(fps, video_frames_count, h, w, crop_box, rng, max_frames= max_frames)
         | 
| 204 |  | 
|  | |
| 208 | 
             
                def load_video_pair(self, data_key, data_key2, crop_box=None, seed=2024, **kwargs):
         | 
| 209 | 
             
                    return self.load_video_batch(data_key, data_key2, crop_box=crop_box, seed=seed, **kwargs)
         | 
| 210 |  | 
| 211 | 
            +
                def load_video_batch(self, *data_key_batch, crop_box=None, seed=2024, max_frames= 0, trim_video =0, start_frame = 0, canvas_height = 0, canvas_width = 0, fit_into_canvas = False, **kwargs):
         | 
| 212 | 
             
                    rng = np.random.default_rng(seed + hash(data_key_batch[0]) % 10000)
         | 
| 213 | 
             
                    # read video
         | 
| 214 | 
             
                    import decord
         | 
|  | |
| 239 | 
             
                        h, w = src_video.shape[1:3]
         | 
| 240 | 
             
                    else:
         | 
| 241 | 
             
                        h, w = readers[0].next().shape[:2]
         | 
| 242 | 
            +
                    frame_ids, (x1, x2, y1, y2), (oh, ow), fps = self._get_frameid_bbox(fps, length, h, w, crop_box, rng,  canvas_height = canvas_height, canvas_width = canvas_width, fit_into_canvas = fit_into_canvas,  max_frames=max_frames, start_frame = start_frame )
         | 
| 243 |  | 
| 244 | 
             
                    # preprocess video
         | 
| 245 | 
             
                    videos = [reader.get_batch(frame_ids)[:, y1:y2, x1:x2, :] for reader in readers]
         | 
    	
        wgp.py
    CHANGED
    
    | @@ -84,7 +84,6 @@ def format_time(seconds): | |
| 84 | 
             
                    hours = int(seconds // 3600)
         | 
| 85 | 
             
                    minutes = int((seconds % 3600) // 60)
         | 
| 86 | 
             
                    return f"{hours}h {minutes}m"
         | 
| 87 | 
            -
             | 
| 88 | 
             
            def pil_to_base64_uri(pil_image, format="png", quality=75):
         | 
| 89 | 
             
                if pil_image is None:
         | 
| 90 | 
             
                    return None
         | 
| @@ -275,12 +274,12 @@ def process_prompt_and_add_tasks(state, model_choice): | |
| 275 | 
             
                    video_guide = inputs["video_guide"]
         | 
| 276 | 
             
                    video_mask = inputs["video_mask"]
         | 
| 277 |  | 
| 278 | 
            -
                    if "1.3B" in model_filename :                
         | 
| 279 | 
            -
             | 
| 280 | 
            -
             | 
| 281 | 
            -
             | 
| 282 | 
            -
             | 
| 283 | 
            -
             | 
| 284 | 
             
                    if "I" in video_prompt_type:
         | 
| 285 | 
             
                        if image_refs == None:
         | 
| 286 | 
             
                            gr.Info("You must provide at least one Refererence Image")
         | 
| @@ -1995,7 +1994,8 @@ def apply_changes(  state, | |
| 1995 | 
             
                                boost_choice = 1,
         | 
| 1996 | 
             
                                clear_file_list = 0,
         | 
| 1997 | 
             
                                preload_model_policy_choice = 1,
         | 
| 1998 | 
            -
                                UI_theme_choice = "default"
         | 
|  | |
| 1999 | 
             
            ):
         | 
| 2000 | 
             
                if args.lock_config:
         | 
| 2001 | 
             
                    return
         | 
| @@ -2016,7 +2016,8 @@ def apply_changes(  state, | |
| 2016 | 
             
                                 "boost" : boost_choice,
         | 
| 2017 | 
             
                                 "clear_file_list" : clear_file_list,
         | 
| 2018 | 
             
                                 "preload_model_policy" : preload_model_policy_choice,
         | 
| 2019 | 
            -
                                 "UI_theme" : UI_theme_choice
         | 
|  | |
| 2020 | 
             
                                   }
         | 
| 2021 |  | 
| 2022 | 
             
                if Path(server_config_filename).is_file():
         | 
| @@ -2050,7 +2051,7 @@ def apply_changes(  state, | |
| 2050 | 
             
                transformer_quantization = server_config["transformer_quantization"]
         | 
| 2051 | 
             
                transformer_types = server_config["transformer_types"]
         | 
| 2052 |  | 
| 2053 | 
            -
                if  all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list"] for change in changes ):
         | 
| 2054 | 
             
                    model_choice = gr.Dropdown()
         | 
| 2055 | 
             
                else:
         | 
| 2056 | 
             
                    reload_needed = True
         | 
| @@ -2413,7 +2414,7 @@ def generate_video( | |
| 2413 | 
             
                file_list = gen["file_list"]
         | 
| 2414 | 
             
                prompt_no = gen["prompt_no"]
         | 
| 2415 |  | 
| 2416 | 
            -
             | 
| 2417 | 
             
                # if wan_model == None:
         | 
| 2418 | 
             
                #     gr.Info("Unable to generate a Video while a new configuration is being applied.")
         | 
| 2419 | 
             
                #     return
         | 
| @@ -2555,7 +2556,7 @@ def generate_video( | |
| 2555 | 
             
                source_video = None
         | 
| 2556 | 
             
                target_camera = None
         | 
| 2557 | 
             
                if "recam" in model_filename:
         | 
| 2558 | 
            -
                    source_video = preprocess_video("", width=width, height=height,video_in=video_source, max_frames= video_length, start_frame = 0, fit_canvas=  | 
| 2559 | 
             
                    target_camera = model_mode
         | 
| 2560 |  | 
| 2561 | 
             
                audio_proj_split = None
         | 
| @@ -2646,7 +2647,7 @@ def generate_video( | |
| 2646 | 
             
                        elif diffusion_forcing:
         | 
| 2647 | 
             
                            if video_source != None and len(video_source) > 0 and window_no == 1:
         | 
| 2648 | 
             
                                keep_frames_video_source= 1000 if len(keep_frames_video_source) ==0 else int(keep_frames_video_source) 
         | 
| 2649 | 
            -
                                prefix_video  = preprocess_video(None, width=width, height=height,video_in=video_source, max_frames= keep_frames_video_source , start_frame = 0, fit_canvas=  | 
| 2650 | 
             
                                prefix_video  = prefix_video .permute(3, 0, 1, 2)
         | 
| 2651 | 
             
                                prefix_video  = prefix_video .float().div_(127.5).sub_(1.) # c, f, h, w
         | 
| 2652 | 
             
                                prefix_video_frames_count = prefix_video.shape[1]
         | 
| @@ -2675,13 +2676,13 @@ def generate_video( | |
| 2675 |  | 
| 2676 | 
             
                                if preprocess_type != None :
         | 
| 2677 | 
             
                                    send_cmd("progress", progress_args)
         | 
| 2678 | 
            -
                                    video_guide_copy = preprocess_video(preprocess_type, width=width, height=height,video_in=video_guide, max_frames= video_length if window_no == 1 else video_length - reuse_frames, start_frame = guide_start_frame, fit_canvas =  | 
| 2679 | 
             
                            keep_frames_parsed, error = parse_keep_frames_video_guide(keep_frames_video_guide, max_frames_to_generate)
         | 
| 2680 | 
             
                            if len(error) > 0:
         | 
| 2681 | 
             
                                raise gr.Error(f"invalid keep frames {keep_frames_video_guide}")
         | 
| 2682 | 
             
                            keep_frames_parsed = keep_frames_parsed[guide_start_frame: guide_start_frame + video_length]
         | 
| 2683 | 
             
                            if window_no == 1:
         | 
| 2684 | 
            -
                                image_size = VACE_SIZE_CONFIGS[resolution_reformated] # default frame dimensions until it is set by video_src (if there is any)
         | 
| 2685 | 
             
                            src_video, src_mask, src_ref_images = wan_model.prepare_source([video_guide_copy],
         | 
| 2686 | 
             
                                                                                    [video_mask_copy ],
         | 
| 2687 | 
             
                                                                                    [image_refs_copy], 
         | 
| @@ -2689,10 +2690,11 @@ def generate_video( | |
| 2689 | 
             
                                                                                    original_video= "O" in video_prompt_type,
         | 
| 2690 | 
             
                                                                                    keep_frames=keep_frames_parsed,
         | 
| 2691 | 
             
                                                                                    start_frame = guide_start_frame,
         | 
| 2692 | 
            -
                                                                                    pre_src_video = [pre_video_guide]
         | 
|  | |
| 2693 | 
             
                                                                                    )
         | 
| 2694 | 
            -
                            if window_no == 1 and src_video != None and len(src_video) > 0:
         | 
| 2695 | 
            -
             | 
| 2696 | 
             
                        prompts_max = gen["prompts_max"]
         | 
| 2697 | 
             
                        status = get_latest_status(state)
         | 
| 2698 |  | 
| @@ -2722,6 +2724,7 @@ def generate_video( | |
| 2722 | 
             
                                    # max_area=MAX_AREA_CONFIGS[resolution_reformated], 
         | 
| 2723 | 
             
                                    height =  height,
         | 
| 2724 | 
             
                                    width = width,
         | 
|  | |
| 2725 | 
             
                                    shift=flow_shift,
         | 
| 2726 | 
             
                                    sampling_steps=num_inference_steps,
         | 
| 2727 | 
             
                                    guide_scale=guidance_scale,
         | 
| @@ -2750,6 +2753,7 @@ def generate_video( | |
| 2750 | 
             
                                    input_video= pre_video_guide,
         | 
| 2751 | 
             
                                    height =  height,
         | 
| 2752 | 
             
                                    width = width,
         | 
|  | |
| 2753 | 
             
                                    seed = seed,
         | 
| 2754 | 
             
                                    num_frames =  (video_length // 4)* 4 + 1, #377
         | 
| 2755 | 
             
                                    num_inference_steps = num_inference_steps,
         | 
| @@ -2777,6 +2781,7 @@ def generate_video( | |
| 2777 | 
             
                                    target_camera= target_camera,
         | 
| 2778 | 
             
                                    frame_num=(video_length // 4)* 4 + 1,
         | 
| 2779 | 
             
                                    size=(width, height),
         | 
|  | |
| 2780 | 
             
                                    shift=flow_shift,
         | 
| 2781 | 
             
                                    sampling_steps=num_inference_steps,
         | 
| 2782 | 
             
                                    guide_scale=guidance_scale,
         | 
| @@ -4042,39 +4047,35 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non | |
| 4042 | 
             
                            wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
         | 
| 4043 | 
             
                            wizard_variables_var = gr.Text(wizard_variables, visible = False)
         | 
| 4044 | 
             
                        with gr.Row():
         | 
| 4045 | 
            -
                            if test_class_i2v(model_filename) | 
| 4046 | 
            -
                                 | 
| 4047 | 
            -
                                     | 
| 4048 | 
            -
             | 
| 4049 | 
            -
             | 
| 4050 | 
            -
                                        ("480p (same amount of pixels)", "832x480"),
         | 
| 4051 | 
            -
                                    ],
         | 
| 4052 | 
            -
                                    value=ui_defaults.get("resolution","480p"),
         | 
| 4053 | 
            -
                                    label="Resolution (video will have the same height / width ratio than the original image)"
         | 
| 4054 | 
            -
                                )
         | 
| 4055 | 
             
                            else:
         | 
| 4056 | 
            -
                                 | 
| 4057 | 
            -
             | 
| 4058 | 
            -
             | 
| 4059 | 
            -
             | 
| 4060 | 
            -
             | 
| 4061 | 
            -
             | 
| 4062 | 
            -
             | 
| 4063 | 
            -
             | 
| 4064 | 
            -
             | 
| 4065 | 
            -
             | 
| 4066 | 
            -
             | 
| 4067 | 
            -
             | 
| 4068 | 
            -
             | 
| 4069 | 
            -
             | 
| 4070 | 
            -
             | 
| 4071 | 
            -
             | 
| 4072 | 
            -
             | 
| 4073 | 
            -
             | 
| 4074 | 
            -
                                     | 
| 4075 | 
            -
             | 
| 4076 | 
            -
             | 
| 4077 | 
            -
                                 | 
|  | |
| 4078 | 
             
                        with gr.Row():
         | 
| 4079 | 
             
                            if recammaster:
         | 
| 4080 | 
             
                                video_length = gr.Slider(5, 193, value=ui_defaults.get("video_length", 81), step=4, label="Number of frames (16 = 1s), locked", interactive= False)
         | 
| @@ -4556,156 +4557,181 @@ def generate_configuration_tab(state, blocks, header, model_choice): | |
| 4556 | 
             
                with gr.Column():
         | 
| 4557 | 
             
                    model_list = []
         | 
| 4558 |  | 
| 4559 | 
            -
                    for model_type in model_types:
         | 
| 4560 | 
            -
                        choice = get_model_filename(model_type, transformer_quantization)
         | 
| 4561 | 
            -
                        model_list.append(choice)
         | 
| 4562 | 
            -
                    dropdown_choices = [ ( get_model_name(choice),  get_model_type(choice) ) for choice in model_list]
         | 
| 4563 | 
            -
                    transformer_types_choices = gr.Dropdown(
         | 
| 4564 | 
            -
                        choices= dropdown_choices,
         | 
| 4565 | 
            -
                        value= transformer_types,
         | 
| 4566 | 
            -
                        label= "Selectable Wan Transformer Models (keep empty to get All of them)",
         | 
| 4567 | 
            -
                        scale= 2,
         | 
| 4568 | 
            -
                        multiselect= True
         | 
| 4569 | 
            -
                        )
         | 
| 4570 |  | 
| 4571 | 
            -
                     | 
| 4572 | 
            -
                         | 
| 4573 | 
            -
             | 
| 4574 | 
            -
                             | 
| 4575 | 
            -
             | 
| 4576 | 
            -
             | 
| 4577 | 
            -
             | 
| 4578 | 
            -
             | 
| 4579 | 
            -
             | 
| 4580 | 
            -
             | 
| 4581 | 
            -
             | 
| 4582 | 
            -
             | 
| 4583 | 
            -
             | 
| 4584 | 
            -
             | 
| 4585 | 
            -
             | 
| 4586 | 
            -
             | 
| 4587 | 
            -
             | 
| 4588 | 
            -
             | 
| 4589 | 
            -
             | 
| 4590 | 
            -
             | 
| 4591 | 
            -
             | 
| 4592 | 
            -
             | 
| 4593 | 
            -
             | 
| 4594 | 
            -
             | 
| 4595 | 
            -
             | 
| 4596 | 
            -
             | 
| 4597 | 
            -
             | 
| 4598 | 
            -
             | 
| 4599 | 
            -
             | 
| 4600 | 
            -
             | 
| 4601 | 
            -
             | 
| 4602 | 
            -
             | 
| 4603 | 
            -
             | 
| 4604 | 
            -
             | 
| 4605 | 
            -
             | 
| 4606 | 
            -
             | 
| 4607 | 
            -
             | 
| 4608 | 
            -
             | 
| 4609 | 
            -
             | 
| 4610 | 
            -
             | 
| 4611 | 
            -
             | 
| 4612 | 
            -
             | 
| 4613 | 
            -
             | 
| 4614 | 
            -
             | 
| 4615 | 
            -
             | 
| 4616 | 
            -
             | 
| 4617 | 
            -
             | 
| 4618 | 
            -
             | 
| 4619 | 
            -
                             | 
| 4620 | 
            -
             | 
| 4621 | 
            -
             | 
| 4622 | 
            -
             | 
| 4623 | 
            -
             | 
| 4624 | 
            -
             | 
| 4625 | 
            -
             | 
| 4626 | 
            -
             | 
| 4627 | 
            -
                             | 
| 4628 | 
            -
             | 
| 4629 | 
            -
             | 
| 4630 | 
            -
             | 
| 4631 | 
            -
             | 
| 4632 | 
            -
             | 
| 4633 | 
            -
             | 
| 4634 | 
            -
             | 
| 4635 | 
            -
             | 
| 4636 | 
            -
             | 
| 4637 | 
            -
             | 
| 4638 | 
            -
             | 
| 4639 | 
            -
             | 
| 4640 | 
            -
             | 
| 4641 | 
            -
             | 
| 4642 | 
            -
             | 
| 4643 | 
            -
             | 
| 4644 | 
            -
             | 
| 4645 | 
            -
             | 
| 4646 | 
            -
             | 
| 4647 | 
            -
             | 
| 4648 | 
            -
             | 
| 4649 | 
            -
             | 
| 4650 | 
            -
             | 
| 4651 | 
            -
             | 
| 4652 | 
            -
             | 
| 4653 | 
            -
             | 
| 4654 | 
            -
             | 
| 4655 | 
            -
                             | 
| 4656 | 
            -
             | 
| 4657 | 
            -
             | 
| 4658 | 
            -
             | 
| 4659 | 
            -
             | 
| 4660 | 
            -
                         | 
| 4661 | 
            -
             | 
| 4662 | 
            -
             | 
| 4663 | 
            -
             | 
| 4664 | 
            -
             | 
| 4665 | 
            -
             | 
| 4666 | 
            -
             | 
| 4667 | 
            -
             | 
| 4668 | 
            -
             | 
| 4669 | 
            -
             | 
| 4670 | 
            -
             | 
| 4671 | 
            -
             | 
| 4672 | 
            -
             | 
| 4673 | 
            -
             | 
| 4674 | 
            -
             | 
| 4675 | 
            -
             | 
| 4676 | 
            -
             | 
| 4677 | 
            -
             | 
| 4678 | 
            -
                             | 
| 4679 | 
            -
             | 
| 4680 | 
            -
             | 
| 4681 | 
            -
             | 
| 4682 | 
            -
             | 
| 4683 | 
            -
             | 
| 4684 | 
            -
             | 
| 4685 | 
            -
             | 
| 4686 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 4687 |  | 
| 4688 | 
            -
                    clear_file_list_choice = gr.Dropdown(
         | 
| 4689 | 
            -
                        choices=[
         | 
| 4690 | 
            -
                            ("None", 0),
         | 
| 4691 | 
            -
                            ("Keep the last video", 1),
         | 
| 4692 | 
            -
                            ("Keep the last 5 videos", 5),
         | 
| 4693 | 
            -
                            ("Keep the last 10 videos", 10),
         | 
| 4694 | 
            -
                            ("Keep the last 20 videos", 20),
         | 
| 4695 | 
            -
                            ("Keep the last 30 videos", 30),
         | 
| 4696 | 
            -
                        ],
         | 
| 4697 | 
            -
                        value=server_config.get("clear_file_list", 5),
         | 
| 4698 | 
            -
                        label="Keep Previously Generated Videos when starting a Generation Batch"
         | 
| 4699 | 
            -
                    )
         | 
| 4700 |  | 
| 4701 | 
            -
                    UI_theme_choice = gr.Dropdown(
         | 
| 4702 | 
            -
                        choices=[
         | 
| 4703 | 
            -
                            ("Blue Sky", "default"),
         | 
| 4704 | 
            -
                            ("Classic Gradio", "gradio"),
         | 
| 4705 | 
            -
                        ],
         | 
| 4706 | 
            -
                        value=server_config.get("UI_theme_choice", "default"),
         | 
| 4707 | 
            -
                        label="User Interface Theme. You will need to restart the App the see new Theme."
         | 
| 4708 | 
            -
                    )
         | 
| 4709 |  | 
| 4710 |  | 
| 4711 | 
             
                    msg = gr.Markdown()            
         | 
| @@ -4728,7 +4754,8 @@ def generate_configuration_tab(state, blocks, header, model_choice): | |
| 4728 | 
             
                                boost_choice,
         | 
| 4729 | 
             
                                clear_file_list_choice,
         | 
| 4730 | 
             
                                preload_model_policy_choice,
         | 
| 4731 | 
            -
                                UI_theme_choice
         | 
|  | |
| 4732 | 
             
                            ],
         | 
| 4733 | 
             
                            outputs= [msg , header, model_choice]
         | 
| 4734 | 
             
                    )
         | 
|  | |
| 84 | 
             
                    hours = int(seconds // 3600)
         | 
| 85 | 
             
                    minutes = int((seconds % 3600) // 60)
         | 
| 86 | 
             
                    return f"{hours}h {minutes}m"
         | 
|  | |
| 87 | 
             
            def pil_to_base64_uri(pil_image, format="png", quality=75):
         | 
| 88 | 
             
                if pil_image is None:
         | 
| 89 | 
             
                    return None
         | 
|  | |
| 274 | 
             
                    video_guide = inputs["video_guide"]
         | 
| 275 | 
             
                    video_mask = inputs["video_mask"]
         | 
| 276 |  | 
| 277 | 
            +
                    # if "1.3B" in model_filename :                
         | 
| 278 | 
            +
                    #     resolution_reformated = str(height) + "*" + str(width) 
         | 
| 279 | 
            +
                    #     if not resolution_reformated in VACE_SIZE_CONFIGS:
         | 
| 280 | 
            +
                    #         res = (" and ").join(VACE_SIZE_CONFIGS.keys())
         | 
| 281 | 
            +
                    #         gr.Info(f"Video Resolution for Vace model is not supported. Only {res} resolutions are allowed.")
         | 
| 282 | 
            +
                    #         return
         | 
| 283 | 
             
                    if "I" in video_prompt_type:
         | 
| 284 | 
             
                        if image_refs == None:
         | 
| 285 | 
             
                            gr.Info("You must provide at least one Refererence Image")
         | 
|  | |
| 1994 | 
             
                                boost_choice = 1,
         | 
| 1995 | 
             
                                clear_file_list = 0,
         | 
| 1996 | 
             
                                preload_model_policy_choice = 1,
         | 
| 1997 | 
            +
                                UI_theme_choice = "default",
         | 
| 1998 | 
            +
                                fit_canvas_choice = 0
         | 
| 1999 | 
             
            ):
         | 
| 2000 | 
             
                if args.lock_config:
         | 
| 2001 | 
             
                    return
         | 
|  | |
| 2016 | 
             
                                 "boost" : boost_choice,
         | 
| 2017 | 
             
                                 "clear_file_list" : clear_file_list,
         | 
| 2018 | 
             
                                 "preload_model_policy" : preload_model_policy_choice,
         | 
| 2019 | 
            +
                                 "UI_theme" : UI_theme_choice,
         | 
| 2020 | 
            +
                                 "fit_canvas": fit_canvas_choice,
         | 
| 2021 | 
             
                                   }
         | 
| 2022 |  | 
| 2023 | 
             
                if Path(server_config_filename).is_file():
         | 
|  | |
| 2051 | 
             
                transformer_quantization = server_config["transformer_quantization"]
         | 
| 2052 | 
             
                transformer_types = server_config["transformer_types"]
         | 
| 2053 |  | 
| 2054 | 
            +
                if  all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas"] for change in changes ):
         | 
| 2055 | 
             
                    model_choice = gr.Dropdown()
         | 
| 2056 | 
             
                else:
         | 
| 2057 | 
             
                    reload_needed = True
         | 
|  | |
| 2414 | 
             
                file_list = gen["file_list"]
         | 
| 2415 | 
             
                prompt_no = gen["prompt_no"]
         | 
| 2416 |  | 
| 2417 | 
            +
                fit_canvas = server_config.get("fit_canvas", 0)
         | 
| 2418 | 
             
                # if wan_model == None:
         | 
| 2419 | 
             
                #     gr.Info("Unable to generate a Video while a new configuration is being applied.")
         | 
| 2420 | 
             
                #     return
         | 
|  | |
| 2556 | 
             
                source_video = None
         | 
| 2557 | 
             
                target_camera = None
         | 
| 2558 | 
             
                if "recam" in model_filename:
         | 
| 2559 | 
            +
                    source_video = preprocess_video("", width=width, height=height,video_in=video_source, max_frames= video_length, start_frame = 0, fit_canvas= fit_canvas)
         | 
| 2560 | 
             
                    target_camera = model_mode
         | 
| 2561 |  | 
| 2562 | 
             
                audio_proj_split = None
         | 
|  | |
| 2647 | 
             
                        elif diffusion_forcing:
         | 
| 2648 | 
             
                            if video_source != None and len(video_source) > 0 and window_no == 1:
         | 
| 2649 | 
             
                                keep_frames_video_source= 1000 if len(keep_frames_video_source) ==0 else int(keep_frames_video_source) 
         | 
| 2650 | 
            +
                                prefix_video  = preprocess_video(None, width=width, height=height,video_in=video_source, max_frames= keep_frames_video_source , start_frame = 0, fit_canvas= fit_canvas, target_fps = fps)
         | 
| 2651 | 
             
                                prefix_video  = prefix_video .permute(3, 0, 1, 2)
         | 
| 2652 | 
             
                                prefix_video  = prefix_video .float().div_(127.5).sub_(1.) # c, f, h, w
         | 
| 2653 | 
             
                                prefix_video_frames_count = prefix_video.shape[1]
         | 
|  | |
| 2676 |  | 
| 2677 | 
             
                                if preprocess_type != None :
         | 
| 2678 | 
             
                                    send_cmd("progress", progress_args)
         | 
| 2679 | 
            +
                                    video_guide_copy = preprocess_video(preprocess_type, width=width, height=height,video_in=video_guide, max_frames= video_length if window_no == 1 else video_length - reuse_frames, start_frame = guide_start_frame, fit_canvas = fit_canvas, target_fps = fps)
         | 
| 2680 | 
             
                            keep_frames_parsed, error = parse_keep_frames_video_guide(keep_frames_video_guide, max_frames_to_generate)
         | 
| 2681 | 
             
                            if len(error) > 0:
         | 
| 2682 | 
             
                                raise gr.Error(f"invalid keep frames {keep_frames_video_guide}")
         | 
| 2683 | 
             
                            keep_frames_parsed = keep_frames_parsed[guide_start_frame: guide_start_frame + video_length]
         | 
| 2684 | 
             
                            if window_no == 1:
         | 
| 2685 | 
            +
                                image_size = (height, width) # VACE_SIZE_CONFIGS[resolution_reformated] # default frame dimensions until it is set by video_src (if there is any)
         | 
| 2686 | 
             
                            src_video, src_mask, src_ref_images = wan_model.prepare_source([video_guide_copy],
         | 
| 2687 | 
             
                                                                                    [video_mask_copy ],
         | 
| 2688 | 
             
                                                                                    [image_refs_copy], 
         | 
|  | |
| 2690 | 
             
                                                                                    original_video= "O" in video_prompt_type,
         | 
| 2691 | 
             
                                                                                    keep_frames=keep_frames_parsed,
         | 
| 2692 | 
             
                                                                                    start_frame = guide_start_frame,
         | 
| 2693 | 
            +
                                                                                    pre_src_video = [pre_video_guide],
         | 
| 2694 | 
            +
                                                                                    fit_into_canvas = fit_canvas 
         | 
| 2695 | 
             
                                                                                    )
         | 
| 2696 | 
            +
                            # if window_no == 1 and src_video != None and len(src_video) > 0:
         | 
| 2697 | 
            +
                            #     image_size = src_video[0].shape[-2:]
         | 
| 2698 | 
             
                        prompts_max = gen["prompts_max"]
         | 
| 2699 | 
             
                        status = get_latest_status(state)
         | 
| 2700 |  | 
|  | |
| 2724 | 
             
                                    # max_area=MAX_AREA_CONFIGS[resolution_reformated], 
         | 
| 2725 | 
             
                                    height =  height,
         | 
| 2726 | 
             
                                    width = width,
         | 
| 2727 | 
            +
                                    fit_into_canvas = fit_canvas,
         | 
| 2728 | 
             
                                    shift=flow_shift,
         | 
| 2729 | 
             
                                    sampling_steps=num_inference_steps,
         | 
| 2730 | 
             
                                    guide_scale=guidance_scale,
         | 
|  | |
| 2753 | 
             
                                    input_video= pre_video_guide,
         | 
| 2754 | 
             
                                    height =  height,
         | 
| 2755 | 
             
                                    width = width,
         | 
| 2756 | 
            +
                                    fit_into_canvas = fit_canvas,
         | 
| 2757 | 
             
                                    seed = seed,
         | 
| 2758 | 
             
                                    num_frames =  (video_length // 4)* 4 + 1, #377
         | 
| 2759 | 
             
                                    num_inference_steps = num_inference_steps,
         | 
|  | |
| 2781 | 
             
                                    target_camera= target_camera,
         | 
| 2782 | 
             
                                    frame_num=(video_length // 4)* 4 + 1,
         | 
| 2783 | 
             
                                    size=(width, height),
         | 
| 2784 | 
            +
                                    fit_into_canvas = fit_canvas,
         | 
| 2785 | 
             
                                    shift=flow_shift,
         | 
| 2786 | 
             
                                    sampling_steps=num_inference_steps,
         | 
| 2787 | 
             
                                    guide_scale=guidance_scale,
         | 
|  | |
| 4047 | 
             
                            wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
         | 
| 4048 | 
             
                            wizard_variables_var = gr.Text(wizard_variables, visible = False)
         | 
| 4049 | 
             
                        with gr.Row():
         | 
| 4050 | 
            +
                            if test_class_i2v(model_filename):
         | 
| 4051 | 
            +
                                if server_config.get("fit_canvas", 0) == 1:
         | 
| 4052 | 
            +
                                    label = "Max Resolution (as it maybe less depending on video width / height ratio)"
         | 
| 4053 | 
            +
                                else:
         | 
| 4054 | 
            +
                                    label = "Max Resolution (as it maybe less depending on video width / height ratio)" 
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 4055 | 
             
                            else:
         | 
| 4056 | 
            +
                                label = "Max Resolution (as it maybe less depending on video width / height ratio)" 
         | 
| 4057 | 
            +
                            resolution = gr.Dropdown(
         | 
| 4058 | 
            +
                                choices=[
         | 
| 4059 | 
            +
                                    # 720p
         | 
| 4060 | 
            +
                                    ("1280x720 (16:9, 720p)", "1280x720"),
         | 
| 4061 | 
            +
                                    ("720x1280 (9:16, 720p)", "720x1280"), 
         | 
| 4062 | 
            +
                                    ("1024x1024 (4:3, 720p)", "1024x024"),
         | 
| 4063 | 
            +
                                    ("832x1104 (3:4, 720p)", "832x1104"),
         | 
| 4064 | 
            +
                                    ("1104x832 (3:4, 720p)", "1104x832"),
         | 
| 4065 | 
            +
                                    ("960x960 (1:1, 720p)", "960x960"),
         | 
| 4066 | 
            +
                                    # 480p
         | 
| 4067 | 
            +
                                    ("960x544 (16:9, 540p)", "960x544"),
         | 
| 4068 | 
            +
                                    ("544x960 (16:9, 540p)", "544x960"),
         | 
| 4069 | 
            +
                                    ("832x480 (16:9, 480p)", "832x480"),
         | 
| 4070 | 
            +
                                    ("480x832 (9:16, 480p)", "480x832"),
         | 
| 4071 | 
            +
                                    ("832x624 (4:3, 480p)", "832x624"), 
         | 
| 4072 | 
            +
                                    ("624x832 (3:4, 480p)", "624x832"),
         | 
| 4073 | 
            +
                                    ("720x720 (1:1, 480p)", "720x720"),
         | 
| 4074 | 
            +
                                    ("512x512 (1:1, 480p)", "512x512"),
         | 
| 4075 | 
            +
                                ],
         | 
| 4076 | 
            +
                                value=ui_defaults.get("resolution","832x480"),
         | 
| 4077 | 
            +
                                label= label 
         | 
| 4078 | 
            +
                            )
         | 
| 4079 | 
             
                        with gr.Row():
         | 
| 4080 | 
             
                            if recammaster:
         | 
| 4081 | 
             
                                video_length = gr.Slider(5, 193, value=ui_defaults.get("video_length", 81), step=4, label="Number of frames (16 = 1s), locked", interactive= False)
         | 
|  | |
| 4557 | 
             
                with gr.Column():
         | 
| 4558 | 
             
                    model_list = []
         | 
| 4559 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 4560 |  | 
| 4561 | 
            +
                    with gr.Tabs():
         | 
| 4562 | 
            +
                        # with gr.Row(visible=advanced_ui) as advanced_row:
         | 
| 4563 | 
            +
                        with gr.Tab("General"):
         | 
| 4564 | 
            +
                            for model_type in model_types:
         | 
| 4565 | 
            +
                                choice = get_model_filename(model_type, transformer_quantization)
         | 
| 4566 | 
            +
                                model_list.append(choice)
         | 
| 4567 | 
            +
                            dropdown_choices = [ ( get_model_name(choice),  get_model_type(choice) ) for choice in model_list]
         | 
| 4568 | 
            +
                            transformer_types_choices = gr.Dropdown(
         | 
| 4569 | 
            +
                                choices= dropdown_choices,
         | 
| 4570 | 
            +
                                value= transformer_types,
         | 
| 4571 | 
            +
                                label= "Selectable Wan Transformer Models (keep empty to get All of them)",
         | 
| 4572 | 
            +
                                scale= 2,
         | 
| 4573 | 
            +
                                multiselect= True
         | 
| 4574 | 
            +
                                )
         | 
| 4575 | 
            +
             | 
| 4576 | 
            +
                            fit_canvas_choice = gr.Dropdown(
         | 
| 4577 | 
            +
                                choices=[
         | 
| 4578 | 
            +
                                    ("Dimensions correspond to the Pixels Budget (as the Prompt Image/Video will be resized to match this pixels budget, output video height or width may exceed the requested dimensions )", 0),
         | 
| 4579 | 
            +
                                    ("Dimensions correspond to the Maximum Width and Height (as the Prompt Image/Video will be resized to fit into these dimensions, the output video may be smaller)", 1),
         | 
| 4580 | 
            +
                                ],
         | 
| 4581 | 
            +
                                value= server_config.get("fit_canvas", 0),
         | 
| 4582 | 
            +
                                label="Generated Video Dimensions when Prompt contains an Image or a Video",
         | 
| 4583 | 
            +
                                interactive= not lock_ui_attention
         | 
| 4584 | 
            +
                             )
         | 
| 4585 | 
            +
             | 
| 4586 | 
            +
             | 
| 4587 | 
            +
                            def check(mode): 
         | 
| 4588 | 
            +
                                if not mode in attention_modes_installed:
         | 
| 4589 | 
            +
                                    return " (NOT INSTALLED)"
         | 
| 4590 | 
            +
                                elif not mode in attention_modes_supported:
         | 
| 4591 | 
            +
                                    return " (NOT SUPPORTED)"
         | 
| 4592 | 
            +
                                else:
         | 
| 4593 | 
            +
                                    return ""
         | 
| 4594 | 
            +
                            attention_choice = gr.Dropdown(
         | 
| 4595 | 
            +
                                choices=[
         | 
| 4596 | 
            +
                                    ("Auto : pick sage2 > sage > sdpa depending on what is installed", "auto"),
         | 
| 4597 | 
            +
                                    ("Scale Dot Product Attention: default, always available", "sdpa"),
         | 
| 4598 | 
            +
                                    ("Flash" + check("flash")+ ": good quality - requires additional install (usually complex to set up on Windows without WSL)", "flash"),
         | 
| 4599 | 
            +
                                    ("Xformers" + check("xformers")+ ": good quality - requires additional install (usually complex, may consume less VRAM to set up on Windows without WSL)", "xformers"),
         | 
| 4600 | 
            +
                                    ("Sage" + check("sage")+ ": 30% faster but slightly worse quality - requires additional install (usually complex to set up on Windows without WSL)", "sage"),
         | 
| 4601 | 
            +
                                    ("Sage2" + check("sage2")+ ": 40% faster but slightly worse quality - requires additional install (usually complex to set up on Windows without WSL)", "sage2"),
         | 
| 4602 | 
            +
                                ],
         | 
| 4603 | 
            +
                                value= attention_mode,
         | 
| 4604 | 
            +
                                label="Attention Type",
         | 
| 4605 | 
            +
                                interactive= not lock_ui_attention
         | 
| 4606 | 
            +
                             )
         | 
| 4607 | 
            +
             | 
| 4608 | 
            +
             | 
| 4609 | 
            +
                            metadata_choice = gr.Dropdown(
         | 
| 4610 | 
            +
                                choices=[
         | 
| 4611 | 
            +
                                    ("Export JSON files", "json"),
         | 
| 4612 | 
            +
                                    ("Add metadata to video", "metadata"),
         | 
| 4613 | 
            +
                                    ("Neither", "none")
         | 
| 4614 | 
            +
                                ],
         | 
| 4615 | 
            +
                                value=server_config.get("metadata_type", "metadata"),
         | 
| 4616 | 
            +
                                label="Metadata Handling"
         | 
| 4617 | 
            +
                            )
         | 
| 4618 | 
            +
                            preload_model_policy_choice = gr.CheckboxGroup([("Preload Model while Launching the App","P"), ("Preload Model while Switching Model", "S"), ("Unload Model when Queue is Done", "U")],
         | 
| 4619 | 
            +
                                value=server_config.get("preload_model_policy",[]),
         | 
| 4620 | 
            +
                                label="RAM Loading / Unloading Model Policy (in any case VRAM will be freed once the queue has been processed)"
         | 
| 4621 | 
            +
                            )
         | 
| 4622 | 
            +
             | 
| 4623 | 
            +
                            clear_file_list_choice = gr.Dropdown(
         | 
| 4624 | 
            +
                                choices=[
         | 
| 4625 | 
            +
                                    ("None", 0),
         | 
| 4626 | 
            +
                                    ("Keep the last video", 1),
         | 
| 4627 | 
            +
                                    ("Keep the last 5 videos", 5),
         | 
| 4628 | 
            +
                                    ("Keep the last 10 videos", 10),
         | 
| 4629 | 
            +
                                    ("Keep the last 20 videos", 20),
         | 
| 4630 | 
            +
                                    ("Keep the last 30 videos", 30),
         | 
| 4631 | 
            +
                                ],
         | 
| 4632 | 
            +
                                value=server_config.get("clear_file_list", 5),
         | 
| 4633 | 
            +
                                label="Keep Previously Generated Videos when starting a new Generation Batch"
         | 
| 4634 | 
            +
                            )
         | 
| 4635 | 
            +
             | 
| 4636 | 
            +
                            UI_theme_choice = gr.Dropdown(
         | 
| 4637 | 
            +
                                choices=[
         | 
| 4638 | 
            +
                                    ("Blue Sky", "default"),
         | 
| 4639 | 
            +
                                    ("Classic Gradio", "gradio"),
         | 
| 4640 | 
            +
                                ],
         | 
| 4641 | 
            +
                                value=server_config.get("UI_theme_choice", "default"),
         | 
| 4642 | 
            +
                                label="User Interface Theme. You will need to restart the App the see new Theme."
         | 
| 4643 | 
            +
                            )
         | 
| 4644 | 
            +
             | 
| 4645 | 
            +
                            save_path_choice = gr.Textbox(
         | 
| 4646 | 
            +
                                label="Output Folder for Generated Videos",
         | 
| 4647 | 
            +
                                value=server_config.get("save_path", save_path)
         | 
| 4648 | 
            +
                            )
         | 
| 4649 | 
            +
             | 
| 4650 | 
            +
                        with gr.Tab("Performance"):
         | 
| 4651 | 
            +
             | 
| 4652 | 
            +
                            quantization_choice = gr.Dropdown(
         | 
| 4653 | 
            +
                                choices=[
         | 
| 4654 | 
            +
                                    ("Scaled Int8 Quantization (recommended)", "int8"),
         | 
| 4655 | 
            +
                                    ("16 bits (no quantization)", "bf16"),
         | 
| 4656 | 
            +
                                ],
         | 
| 4657 | 
            +
                                value= transformer_quantization,
         | 
| 4658 | 
            +
                                label="Wan Transformer Model Quantization Type (if available)",
         | 
| 4659 | 
            +
                            )                
         | 
| 4660 | 
            +
             | 
| 4661 | 
            +
                            mixed_precision_choice = gr.Dropdown(
         | 
| 4662 | 
            +
                                choices=[
         | 
| 4663 | 
            +
                                    ("16 bits only, requires less VRAM", "0"),
         | 
| 4664 | 
            +
                                    ("Mixed 16 / 32 bits, slightly more VRAM needed but better Quality", "1"),
         | 
| 4665 | 
            +
                                ],
         | 
| 4666 | 
            +
                                value= server_config.get("mixed_precision", "0"),
         | 
| 4667 | 
            +
                                label="Transformer Engine Calculation"
         | 
| 4668 | 
            +
                            )
         | 
| 4669 | 
            +
             | 
| 4670 | 
            +
                            index = text_encoder_choices.index(text_encoder_filename)
         | 
| 4671 | 
            +
                            index = 0 if index ==0 else index
         | 
| 4672 | 
            +
                            text_encoder_choice = gr.Dropdown(
         | 
| 4673 | 
            +
                                choices=[
         | 
| 4674 | 
            +
                                    ("UMT5 XXL 16 bits - unquantized text encoder, better quality uses more RAM", 0),
         | 
| 4675 | 
            +
                                    ("UMT5 XXL quantized to 8 bits - quantized text encoder, slightly worse quality but uses less RAM", 1),
         | 
| 4676 | 
            +
                                ],
         | 
| 4677 | 
            +
                                value= index,
         | 
| 4678 | 
            +
                                label="Text Encoder model"
         | 
| 4679 | 
            +
                            )
         | 
| 4680 | 
            +
             | 
| 4681 | 
            +
                            VAE_precision_choice = gr.Dropdown(
         | 
| 4682 | 
            +
                                choices=[
         | 
| 4683 | 
            +
                                    ("16 bits, requires less VRAM and faster", "16"),
         | 
| 4684 | 
            +
                                    ("32 bits, requires twice more VRAM and slower but recommended with Window Sliding", "32"),
         | 
| 4685 | 
            +
                                ],
         | 
| 4686 | 
            +
                                value= server_config.get("vae_precision", "16"),
         | 
| 4687 | 
            +
                                label="VAE Encoding / Decoding precision"
         | 
| 4688 | 
            +
                            )
         | 
| 4689 | 
            +
             | 
| 4690 | 
            +
                            gr.Text("Beware: when restarting the server or changing a resolution or video duration, the first step of generation for a duration / resolution may last a few minutes due to recompilation", interactive= False, show_label= False )
         | 
| 4691 | 
            +
                            compile_choice = gr.Dropdown(
         | 
| 4692 | 
            +
                                choices=[
         | 
| 4693 | 
            +
                                    ("ON: works only on Linux / WSL", "transformer"),
         | 
| 4694 | 
            +
                                    ("OFF: no other choice if you have Windows without using WSL", "" ),
         | 
| 4695 | 
            +
                                ],
         | 
| 4696 | 
            +
                                value= compile,
         | 
| 4697 | 
            +
                                label="Compile Transformer (up to 50% faster and 30% more frames but requires Linux / WSL and Flash or Sage attention)",
         | 
| 4698 | 
            +
                                interactive= not lock_ui_compile
         | 
| 4699 | 
            +
                            )              
         | 
| 4700 | 
            +
             | 
| 4701 | 
            +
                            vae_config_choice = gr.Dropdown(
         | 
| 4702 | 
            +
                                choices=[
         | 
| 4703 | 
            +
                            ("Auto", 0),
         | 
| 4704 | 
            +
                            ("Disabled (faster but may require up to 22 GB of VRAM)", 1),
         | 
| 4705 | 
            +
                            ("256 x 256 : If at least 8 GB of VRAM", 2),
         | 
| 4706 | 
            +
                            ("128 x 128 : If at least 6 GB of VRAM", 3),
         | 
| 4707 | 
            +
                                ],
         | 
| 4708 | 
            +
                                value= vae_config,
         | 
| 4709 | 
            +
                                label="VAE Tiling - reduce the high VRAM requirements for VAE decoding and VAE encoding (if enabled it will be slower)"
         | 
| 4710 | 
            +
                            )
         | 
| 4711 | 
            +
             | 
| 4712 | 
            +
                            boost_choice = gr.Dropdown(
         | 
| 4713 | 
            +
                                choices=[
         | 
| 4714 | 
            +
                                    # ("Auto (ON if Video longer than 5s)", 0),
         | 
| 4715 | 
            +
                                    ("ON", 1), 
         | 
| 4716 | 
            +
                                    ("OFF", 2), 
         | 
| 4717 | 
            +
                                ],
         | 
| 4718 | 
            +
                                value=boost,
         | 
| 4719 | 
            +
                                label="Boost: Give a 10% speedup without losing quality at the cost of a litle VRAM (up to 1GB at max frames and resolution)"
         | 
| 4720 | 
            +
                            )
         | 
| 4721 | 
            +
             | 
| 4722 | 
            +
                            profile_choice = gr.Dropdown(
         | 
| 4723 | 
            +
                                choices=[
         | 
| 4724 | 
            +
                            ("HighRAM_HighVRAM, profile 1: at least 48 GB of RAM and 24 GB of VRAM, the fastest for short videos a RTX 3090 / RTX 4090", 1),
         | 
| 4725 | 
            +
                            ("HighRAM_LowVRAM, profile 2 (Recommended): at least 48 GB of RAM and 12 GB of VRAM, the most versatile profile with high RAM, better suited for RTX 3070/3080/4070/4080 or for RTX 3090 / RTX 4090 with large pictures batches or long videos", 2),
         | 
| 4726 | 
            +
                            ("LowRAM_HighVRAM, profile 3: at least 32 GB of RAM and 24 GB of VRAM, adapted for RTX 3090 / RTX 4090 with limited RAM for good speed short video",3),
         | 
| 4727 | 
            +
                            ("LowRAM_LowVRAM, profile 4 (Default): at least 32 GB of RAM and 12 GB of VRAM, if you have little VRAM or want to generate longer videos",4),
         | 
| 4728 | 
            +
                            ("VerylowRAM_LowVRAM, profile 5: (Fail safe): at least 16 GB of RAM and 10 GB of VRAM, if you don't have much it won't be fast but maybe it will work",5)
         | 
| 4729 | 
            +
                                ],
         | 
| 4730 | 
            +
                                value= profile,
         | 
| 4731 | 
            +
                                label="Profile (for power users only, not needed to change it)"
         | 
| 4732 | 
            +
                            )
         | 
| 4733 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 4734 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 4735 |  | 
| 4736 |  | 
| 4737 | 
             
                    msg = gr.Markdown()            
         | 
|  | |
| 4754 | 
             
                                boost_choice,
         | 
| 4755 | 
             
                                clear_file_list_choice,
         | 
| 4756 | 
             
                                preload_model_policy_choice,
         | 
| 4757 | 
            +
                                UI_theme_choice,
         | 
| 4758 | 
            +
                                fit_canvas_choice
         | 
| 4759 | 
             
                            ],
         | 
| 4760 | 
             
                            outputs= [msg , header, model_choice]
         | 
| 4761 | 
             
                    )
         | 
