|
import os |
|
import yaml |
|
import torch |
|
import torchvision |
|
from tqdm import tqdm |
|
from inference.utils import * |
|
from train import ControlNetCore, WurstCoreB |
|
import warnings |
|
warnings.filterwarnings("ignore") |
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
|
|
class Upscale_CaseCade: |
|
def __init__(self) -> None: |
|
self.config_file = './configs/inference/controlnet_c_3b_sr.yaml' |
|
|
|
with open(self.config_file, "r", encoding="utf-8") as file: |
|
loaded_config = yaml.safe_load(file) |
|
self.core = ControlNetCore(config_dict=loaded_config, device=device, training=False) |
|
|
|
self.config_file_b = './configs/inference/stage_b_3b.yaml' |
|
with open(self.config_file_b, "r", encoding="utf-8") as file: |
|
self.config_file_b = yaml.safe_load(file) |
|
self.core_b = WurstCoreB(config_dict=self.config_file_b, device=device, training=False) |
|
self.extras = self.core.setup_extras_pre() |
|
self.models = self.core.setup_models(self.extras) |
|
self.models.generator.eval().requires_grad_(False) |
|
print("CONTROLNET READY") |
|
self.extras_b = self.core_b.setup_extras_pre() |
|
self.models_b = self.core_b.setup_models(self.extras_b, skip_clip=True) |
|
self.models_b = WurstCoreB.Models( |
|
**{**self.models_b.to_dict(), 'tokenizer': self.models.tokenizer, 'text_model': self.models.text_model} |
|
) |
|
self.models_b.generator.eval().requires_grad_(False) |
|
print("STAGE B READY") |
|
self.caption = "a photo of image" |
|
self.cnet_multiplier = 1.0 |
|
|
|
self.extras.sampling_configs['cfg'] = 1 |
|
self.extras.sampling_configs['shift'] = 2 |
|
self.extras.sampling_configs['timesteps'] = 20 |
|
self.extras.sampling_configs['t_start'] = 1.0 |
|
|
|
self.extras_b.sampling_configs['cfg'] = 1.1 |
|
self.extras_b.sampling_configs['shift'] = 1 |
|
self.extras_b.sampling_configs['timesteps'] = 10 |
|
self.extras_b.sampling_configs['t_start'] = 1.0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def upscale_image(self,caption,image_pil,scale_fator): |
|
batch_size = 1 |
|
cnet_override = None |
|
images = resize_image(image_pil).unsqueeze(0).expand(batch_size, -1, -1, -1) |
|
|
|
batch = {'images': images} |
|
|
|
|
|
with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16): |
|
effnet_latents = self.core.encode_latents(batch, self.models, self.extras) |
|
effnet_latents_up = torch.nn.functional.interpolate(effnet_latents, scale_factor=scale_fator, mode="nearest") |
|
cnet = self.models.controlnet(effnet_latents_up) |
|
cnet_uncond = cnet |
|
cnet_input = torch.nn.functional.interpolate(images, scale_factor=scale_fator, mode="nearest") |
|
|
|
|
|
height, width = int(cnet[0].size(-2)*32*4/3), int(cnet[0].size(-1)*32*4/3) |
|
stage_c_latent_shape, stage_b_latent_shape = calculate_latent_sizes(height, width, batch_size=batch_size) |
|
|
|
batch['captions'] = [caption] * batch_size |
|
conditions = self.core.get_conditions(batch, self.models, self.extras, is_eval=True, is_unconditional=False, eval_image_embeds=False) |
|
unconditions = self.core.get_conditions(batch, self.models, self.extras, is_eval=True, is_unconditional=True, eval_image_embeds=False) |
|
conditions['cnet'] = [c.clone() * self.cnet_multiplier if c is not None else c for c in cnet] |
|
unconditions['cnet'] = [c.clone() * self.cnet_multiplier if c is not None else c for c in cnet_uncond] |
|
conditions_b = self.core_b.get_conditions(batch, self.models_b, self.extras_b, is_eval=True, is_unconditional=False) |
|
unconditions_b = self.core_b.get_conditions(batch, self.models_b, self.extras_b, is_eval=True, is_unconditional=True) |
|
|
|
sampling_c = self.extras.gdf.sample( |
|
self.models.generator, conditions, stage_c_latent_shape, |
|
unconditions, device=device, **self.extras.sampling_configs, |
|
) |
|
for (sampled_c, _, _) in tqdm(sampling_c, total=self.extras.sampling_configs['timesteps']): |
|
sampled_c = sampled_c |
|
|
|
|
|
|
|
|
|
conditions_b['effnet'] = sampled_c |
|
unconditions_b['effnet'] = torch.zeros_like(sampled_c) |
|
|
|
sampling_b = self.extras_b.gdf.sample( |
|
self.models_b.generator, conditions_b, stage_b_latent_shape, |
|
unconditions_b, device=device, **self.extras_b.sampling_configs |
|
) |
|
for (sampled_b, _, _) in tqdm(sampling_b, total=self.extras_b.sampling_configs['timesteps']): |
|
sampled_b = sampled_b |
|
sampled = self.models_b.stage_a.decode(sampled_b).float() |
|
|
|
upscale=show_images(sampled,return_images=True) |
|
return upscale |
|
|
|
|
|
|