File size: 6,209 Bytes
cac2b0c
4bd6d0e
 
 
526c5d8
43b1627
cac2b0c
 
629f20f
2e7920c
 
 
 
 
 
 
 
 
 
629f20f
 
 
 
cac2b0c
 
 
 
2e7920c
 
 
 
 
483dce0
cac2b0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43b1627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1451d5e
43b1627
 
 
 
 
cac2b0c
 
43b1627
 
cac2b0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483dce0
43b1627
 
 
3ace9e3
cac2b0c
ce01e8f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from diffusers import DiffusionPipeline
from .invert import Inverter
from .generate import Generator
from .utils import init_model, seed_everything, get_frame_ids
import torch
from omegaconf import OmegaConf

class VidToMePipeline(DiffusionPipeline):
    # def __init__(self, device="cuda", sd_version="2.1", float_precision="fp16", height=512, width=512):
        # # this will initlize the core pipeline components
        # pipe, scheduler, model_key = init_model(device, sd_version, None, "none", float_precision)
        # self.pipe = pipe
        # self.scheduler = scheduler
        # self.model_key = model_key
        # self.device = device
        # self.sd_version = sd_version
        # self.float_precision = float_precision
        # self.height = height
        # self.width = width

    def __init__(self, device="cuda", sd_version="1.5", float_precision="fp16", height=512, width=512):
        # Register configuration parameters
        self.register_to_config(device=device, sd_version=sd_version, float_precision=float_precision, height=height, width=width)
        self.sd_version = sd_version
        self.float_precision = float_precision
        self.height = height
        self.width = width
        # this will initlize the core pipeline components
        pipe, scheduler, model_key = init_model(device, sd_version, None, "none", float_precision)
        self.pipe = pipe
        self.scheduler = scheduler
        self.model_key = model_key
        super().__init__()

    def __call__(self, video_path=None, video_prompt=None, edit_prompt=None, 
                 control_type="none", n_timesteps=50, guidance_scale=7.5, 
                 negative_prompt="ugly, blurry, low res", frame_range=None, 
                 use_lora=False, seed=123, local_merge_ratio=0.9, global_merge_ratio=0.8):
        
        # dynamic config built from user inputs
        config = self._build_config(video_path, video_prompt, edit_prompt, control_type, 
                                    n_timesteps, guidance_scale, negative_prompt, 
                                    frame_range, use_lora, seed, local_merge_ratio, global_merge_ratio)
        
        # seed for reproducibility - change as you need
        seed_everything(config['seed'])

        # inversion stage
        print("Start inversion!")
        inversion = Inverter(self.pipe, self.scheduler, config)
        inversion(config['input_path'], config['inversion']['save_path'])

        # generation stage
        print("Start generation!")
        generator = Generator(self.pipe, self.scheduler, config)
        frame_ids = get_frame_ids(config['generation']['frame_range'], None)
        generator(config['input_path'], config['generation']['latents_path'], 
                  config['generation']['output_path'], frame_ids=frame_ids)
        print(f"Output generated at: {config['generation']['output_path']}")

    # def _build_config(self, video_path, video_prompt, edit_prompt, control_type, 
    #                   n_timesteps, guidance_scale, negative_prompt, frame_range, 
    #                   use_lora, seed, local_merge_ratio, global_merge_ratio):
    #     # constructing config dictionary from user prompts
    #     config = {
    #         'sd_version': self.sd_version,
    #         'input_path': video_path,
    #         'work_dir': "outputs/",
    #         'height': self.height,
    #         'width': self.width,
    #         'inversion': {
    #             'prompt': video_prompt or "Default video prompt.",
    #             'save_path': "outputs/latents",
    #             'steps': 50,
    #             'save_intermediate': False
    #         },
    #         'generation': {
    #             'control': control_type,
    #             'guidance_scale': guidance_scale,
    #             'n_timesteps': n_timesteps,
    #             'negative_prompt': negative_prompt,
    #             'prompt': edit_prompt or "Default edit prompt.",
    #             'latents_path': "outputs/latents",
    #             'output_path': "outputs/final",
    #             'frame_range': frame_range or [0, 32],
    #             'use_lora': use_lora,
    #             'local_merge_ratio': local_merge_ratio,
    #             'global_merge_ratio': global_merge_ratio
    #         },
    #         'seed': seed,
    #         'device': "cuda",
    #         'float_precision': self.float_precision
    #     }
    #     return config
    from omegaconf import OmegaConf

    def _build_config(self, video_path, video_prompt, edit_prompt, control_type, 
                     n_timesteps, guidance_scale, negative_prompt, frame_range, 
                     use_lora, seed, local_merge_ratio, global_merge_ratio):
        # Create a config using OmegaConf
        config = OmegaConf.create({
            'sd_version': '1.5',
            'input_path': video_path,
            'work_dir': "outputs/",
            'height': 512,
            'width': 512,
            'inversion': {
                'prompt': video_prompt or "Default video prompt.",
                'save_path': "outputs/latents",
                'steps': 50,
                'save_intermediate': False
            },
            'generation': {
                'control': control_type,
                'guidance_scale': guidance_scale,
                'n_timesteps': n_timesteps,
                'negative_prompt': negative_prompt,
                'prompt': edit_prompt or "Default edit prompt.",
                'latents_path': "outputs/latents",
                'output_path': "outputs/final",
                'frame_range': frame_range or [0, 32],
                'use_lora': use_lora,
                'local_merge_ratio': local_merge_ratio,
                'global_merge_ratio': global_merge_ratio
            },
            'seed': seed,
            'device': "cuda",
            'float_precision': "fp16"
        })
        
        return config

# # Sample usage
# pipeline = VidToMePipeline(device="cuda", sd_version="2.1", float_precision="fp16")
# pipeline(video_path="path/to/video.mp4", video_prompt="A beautiful scene of a sunset", 
#          edit_prompt="Make the sunset look more vibrant", control_type="depth", n_timesteps=50)