dreamgaussian

Runtime error

App Files Files Community

dreamgaussian / main2.py

jiawei011

init

12b7f59 10 months ago

raw

history blame

No virus

23.7 kB

	import os
	import cv2
	import time
	import tqdm
	import numpy as np
	import dearpygui.dearpygui as dpg

	import torch
	import torch.nn.functional as F

	import trimesh
	import rembg

	from cam_utils import orbit_camera, OrbitCamera
	from mesh_renderer import Renderer

	# from kiui.lpips import LPIPS

	class GUI:
	def __init__(self, opt):
	self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters.
	self.gui = opt.gui # enable gui
	self.W = opt.W
	self.H = opt.H
	self.cam = OrbitCamera(opt.W, opt.H, r=opt.radius, fovy=opt.fovy)

	self.mode = "image"
	self.seed = "random"

	self.buffer_image = np.ones((self.W, self.H, 3), dtype=np.float32)
	self.need_update = True # update buffer_image

	# models
	self.device = torch.device("cuda")
	self.bg_remover = None

	self.guidance_sd = None
	self.guidance_zero123 = None

	self.enable_sd = False
	self.enable_zero123 = False

	# renderer
	self.renderer = Renderer(opt).to(self.device)

	# input image
	self.input_img = None
	self.input_mask = None
	self.input_img_torch = None
	self.input_mask_torch = None
	self.overlay_input_img = False
	self.overlay_input_img_ratio = 0.5

	# input text
	self.prompt = ""
	self.negative_prompt = ""

	# training stuff
	self.training = False
	self.optimizer = None
	self.step = 0
	self.train_steps = 1 # steps per rendering loop
	# self.lpips_loss = LPIPS(net='vgg').to(self.device)

	# load input data from cmdline
	if self.opt.input is not None:
	self.load_input(self.opt.input)

	# override prompt from cmdline
	if self.opt.prompt is not None:
	self.prompt = self.opt.prompt

	if self.gui:
	dpg.create_context()
	self.register_dpg()
	self.test_step()

	def __del__(self):
	if self.gui:
	dpg.destroy_context()

	def seed_everything(self):
	try:
	seed = int(self.seed)
	except:
	seed = np.random.randint(0, 1000000)

	os.environ["PYTHONHASHSEED"] = str(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = True

	self.last_seed = seed

	def prepare_train(self):

	self.step = 0

	# setup training
	self.optimizer = torch.optim.Adam(self.renderer.get_params())

	# default camera
	pose = orbit_camera(self.opt.elevation, 0, self.opt.radius)
	self.fixed_cam = (pose, self.cam.perspective)


	self.enable_sd = self.opt.lambda_sd > 0 and self.prompt != ""
	self.enable_zero123 = self.opt.lambda_zero123 > 0 and self.input_img is not None

	# lazy load guidance model
	if self.guidance_sd is None and self.enable_sd:
	print(f"[INFO] loading SD...")
	from guidance.sd_utils import StableDiffusion
	self.guidance_sd = StableDiffusion(self.device)
	print(f"[INFO] loaded SD!")

	if self.guidance_zero123 is None and self.enable_zero123:
	print(f"[INFO] loading zero123...")
	from guidance.zero123_utils import Zero123
	self.guidance_zero123 = Zero123(self.device)
	print(f"[INFO] loaded zero123!")

	# input image
	if self.input_img is not None:
	self.input_img_torch = torch.from_numpy(self.input_img).permute(2, 0, 1).unsqueeze(0).to(self.device)
	self.input_img_torch = F.interpolate(
	self.input_img_torch, (self.opt.ref_size, self.opt.ref_size), mode="bilinear", align_corners=False
	)

	self.input_mask_torch = torch.from_numpy(self.input_mask).permute(2, 0, 1).unsqueeze(0).to(self.device)
	self.input_mask_torch = F.interpolate(
	self.input_mask_torch, (self.opt.ref_size, self.opt.ref_size), mode="bilinear", align_corners=False
	)
	self.input_img_torch_channel_last = self.input_img_torch[0].permute(1,2,0).contiguous()

	# prepare embeddings
	with torch.no_grad():

	if self.enable_sd:
	self.guidance_sd.get_text_embeds([self.prompt], [self.negative_prompt])

	if self.enable_zero123:
	self.guidance_zero123.get_img_embeds(self.input_img_torch)

	def train_step(self):
	starter = torch.cuda.Event(enable_timing=True)
	ender = torch.cuda.Event(enable_timing=True)
	starter.record()


	for _ in range(self.train_steps):

	self.step += 1
	step_ratio = min(1, self.step / self.opt.iters_refine)

	loss = 0

	### known view
	if self.input_img_torch is not None:

	ssaa = min(2.0, max(0.125, 2 * np.random.random()))
	out = self.renderer.render(*self.fixed_cam, self.opt.ref_size, self.opt.ref_size, ssaa=ssaa)

	# rgb loss
	image = out["image"] # [H, W, 3] in [0, 1]
	valid_mask = ((out["alpha"] > 0) & (out["viewcos"] > 0.5)).detach()
	loss = loss + F.mse_loss(image * valid_mask, self.input_img_torch_channel_last * valid_mask)

	### novel view (manual batch)
	render_resolution = 512
	images = []
	vers, hors, radii = [], [], []
	# avoid too large elevation (> 80 or < -80), and make sure it always cover [-30, 30]
	min_ver = max(min(-30, -30 - self.opt.elevation), -80 - self.opt.elevation)
	max_ver = min(max(30, 30 - self.opt.elevation), 80 - self.opt.elevation)
	for _ in range(self.opt.batch_size):

	# render random view
	ver = np.random.randint(min_ver, max_ver)
	hor = np.random.randint(-180, 180)
	radius = 0

	vers.append(ver)
	hors.append(hor)
	radii.append(radius)

	pose = orbit_camera(self.opt.elevation + ver, hor, self.opt.radius + radius)

	# random render resolution
	ssaa = min(2.0, max(0.125, 2 * np.random.random()))
	out = self.renderer.render(pose, self.cam.perspective, render_resolution, render_resolution, ssaa=ssaa)

	image = out["image"] # [H, W, 3] in [0, 1]
	image = image.permute(2,0,1).contiguous().unsqueeze(0) # [1, 3, H, W] in [0, 1]

	images.append(image)

	images = torch.cat(images, dim=0)

	# import kiui
	# kiui.lo(hor, ver)
	# kiui.vis.plot_image(image)

	# guidance loss
	if self.enable_sd:

	# loss = loss + self.opt.lambda_sd * self.guidance_sd.train_step(images, step_ratio)
	refined_images = self.guidance_sd.refine(images, strength=0.6).float()
	refined_images = F.interpolate(refined_images, (render_resolution, render_resolution), mode="bilinear", align_corners=False)
	loss = loss + self.opt.lambda_sd * F.mse_loss(images, refined_images)

	if self.enable_zero123:
	# loss = loss + self.opt.lambda_zero123 * self.guidance_zero123.train_step(images, vers, hors, radii, step_ratio)
	refined_images = self.guidance_zero123.refine(images, vers, hors, radii, strength=0.6).float()
	refined_images = F.interpolate(refined_images, (render_resolution, render_resolution), mode="bilinear", align_corners=False)
	loss = loss + self.opt.lambda_zero123 * F.mse_loss(images, refined_images)
	# loss = loss + self.opt.lambda_zero123 * self.lpips_loss(images, refined_images)

	# optimize step
	loss.backward()
	self.optimizer.step()
	self.optimizer.zero_grad()

	ender.record()
	torch.cuda.synchronize()
	t = starter.elapsed_time(ender)

	self.need_update = True

	if self.gui:
	dpg.set_value("_log_train_time", f"{t:.4f}ms")
	dpg.set_value(
	"_log_train_log",
	f"step = {self.step: 5d} (+{self.train_steps: 2d}) loss = {loss.item():.4f}",
	)

	# dynamic train steps (no need for now)
	# max allowed train time per-frame is 500 ms
	# full_t = t / self.train_steps * 16
	# train_steps = min(16, max(4, int(16 * 500 / full_t)))
	# if train_steps > self.train_steps * 1.2 or train_steps < self.train_steps * 0.8:
	# self.train_steps = train_steps

	@torch.no_grad()
	def test_step(self):
	# ignore if no need to update
	if not self.need_update:
	return

	starter = torch.cuda.Event(enable_timing=True)
	ender = torch.cuda.Event(enable_timing=True)
	starter.record()

	# should update image
	if self.need_update:
	# render image

	out = self.renderer.render(self.cam.pose, self.cam.perspective, self.H, self.W)

	buffer_image = out[self.mode] # [H, W, 3]

	if self.mode in ['depth', 'alpha']:
	buffer_image = buffer_image.repeat(1, 1, 3)
	if self.mode == 'depth':
	buffer_image = (buffer_image - buffer_image.min()) / (buffer_image.max() - buffer_image.min() + 1e-20)

	self.buffer_image = buffer_image.contiguous().clamp(0, 1).detach().cpu().numpy()

	# display input_image
	if self.overlay_input_img and self.input_img is not None:
	self.buffer_image = (
	self.buffer_image * (1 - self.overlay_input_img_ratio)
	+ self.input_img * self.overlay_input_img_ratio
	)

	self.need_update = False

	ender.record()
	torch.cuda.synchronize()
	t = starter.elapsed_time(ender)

	if self.gui:
	dpg.set_value("_log_infer_time", f"{t:.4f}ms ({int(1000/t)} FPS)")
	dpg.set_value(
	"_texture", self.buffer_image
	) # buffer must be contiguous, else seg fault!


	def load_input(self, file):
	# load image
	print(f'[INFO] load image from {file}...')
	img = cv2.imread(file, cv2.IMREAD_UNCHANGED)
	if img.shape[-1] == 3:
	if self.bg_remover is None:
	self.bg_remover = rembg.new_session()
	img = rembg.remove(img, session=self.bg_remover)

	img = cv2.resize(
	img, (self.W, self.H), interpolation=cv2.INTER_AREA
	)
	img = img.astype(np.float32) / 255.0

	self.input_mask = img[..., 3:]
	# white bg
	self.input_img = img[..., :3] * self.input_mask + (
	1 - self.input_mask
	)
	# bgr to rgb
	self.input_img = self.input_img[..., ::-1].copy()

	# load prompt
	file_prompt = file.replace("_rgba.png", "_caption.txt")
	if os.path.exists(file_prompt):
	print(f'[INFO] load prompt from {file_prompt}...')
	with open(file_prompt, "r") as f:
	self.prompt = f.read().strip()

	def save_model(self):
	os.makedirs(self.opt.outdir, exist_ok=True)

	path = os.path.join(self.opt.outdir, self.opt.save_path + '.' + self.opt.mesh_format)
	self.renderer.export_mesh(path)

	print(f"[INFO] save model to {path}.")

	def register_dpg(self):
	### register texture

	with dpg.texture_registry(show=False):
	dpg.add_raw_texture(
	self.W,
	self.H,
	self.buffer_image,
	format=dpg.mvFormat_Float_rgb,
	tag="_texture",
	)

	### register window

	# the rendered image, as the primary window
	with dpg.window(
	tag="_primary_window",
	width=self.W,
	height=self.H,
	pos=[0, 0],
	no_move=True,
	no_title_bar=True,
	no_scrollbar=True,
	):
	# add the texture
	dpg.add_image("_texture")

	# dpg.set_primary_window("_primary_window", True)

	# control window
	with dpg.window(
	label="Control",
	tag="_control_window",
	width=600,
	height=self.H,
	pos=[self.W, 0],
	no_move=True,
	no_title_bar=True,
	):
	# button theme
	with dpg.theme() as theme_button:
	with dpg.theme_component(dpg.mvButton):
	dpg.add_theme_color(dpg.mvThemeCol_Button, (23, 3, 18))
	dpg.add_theme_color(dpg.mvThemeCol_ButtonHovered, (51, 3, 47))
	dpg.add_theme_color(dpg.mvThemeCol_ButtonActive, (83, 18, 83))
	dpg.add_theme_style(dpg.mvStyleVar_FrameRounding, 5)
	dpg.add_theme_style(dpg.mvStyleVar_FramePadding, 3, 3)

	# timer stuff
	with dpg.group(horizontal=True):
	dpg.add_text("Infer time: ")
	dpg.add_text("no data", tag="_log_infer_time")

	def callback_setattr(sender, app_data, user_data):
	setattr(self, user_data, app_data)

	# init stuff
	with dpg.collapsing_header(label="Initialize", default_open=True):

	# seed stuff
	def callback_set_seed(sender, app_data):
	self.seed = app_data
	self.seed_everything()

	dpg.add_input_text(
	label="seed",
	default_value=self.seed,
	on_enter=True,
	callback=callback_set_seed,
	)

	# input stuff
	def callback_select_input(sender, app_data):
	# only one item
	for k, v in app_data["selections"].items():
	dpg.set_value("_log_input", k)
	self.load_input(v)

	self.need_update = True

	with dpg.file_dialog(
	directory_selector=False,
	show=False,
	callback=callback_select_input,
	file_count=1,
	tag="file_dialog_tag",
	width=700,
	height=400,
	):
	dpg.add_file_extension("Images{.jpg,.jpeg,.png}")

	with dpg.group(horizontal=True):
	dpg.add_button(
	label="input",
	callback=lambda: dpg.show_item("file_dialog_tag"),
	)
	dpg.add_text("", tag="_log_input")

	# overlay stuff
	with dpg.group(horizontal=True):

	def callback_toggle_overlay_input_img(sender, app_data):
	self.overlay_input_img = not self.overlay_input_img
	self.need_update = True

	dpg.add_checkbox(
	label="overlay image",
	default_value=self.overlay_input_img,
	callback=callback_toggle_overlay_input_img,
	)

	def callback_set_overlay_input_img_ratio(sender, app_data):
	self.overlay_input_img_ratio = app_data
	self.need_update = True

	dpg.add_slider_float(
	label="ratio",
	min_value=0,
	max_value=1,
	format="%.1f",
	default_value=self.overlay_input_img_ratio,
	callback=callback_set_overlay_input_img_ratio,
	)

	# prompt stuff

	dpg.add_input_text(
	label="prompt",
	default_value=self.prompt,
	callback=callback_setattr,
	user_data="prompt",
	)

	dpg.add_input_text(
	label="negative",
	default_value=self.negative_prompt,
	callback=callback_setattr,
	user_data="negative_prompt",
	)

	# save current model
	with dpg.group(horizontal=True):
	dpg.add_text("Save: ")

	dpg.add_button(
	label="model",
	tag="_button_save_model",
	callback=self.save_model,
	)
	dpg.bind_item_theme("_button_save_model", theme_button)

	dpg.add_input_text(
	label="",
	default_value=self.opt.save_path,
	callback=callback_setattr,
	user_data="save_path",
	)

	# training stuff
	with dpg.collapsing_header(label="Train", default_open=True):
	# lr and train button
	with dpg.group(horizontal=True):
	dpg.add_text("Train: ")

	def callback_train(sender, app_data):
	if self.training:
	self.training = False
	dpg.configure_item("_button_train", label="start")
	else:
	self.prepare_train()
	self.training = True
	dpg.configure_item("_button_train", label="stop")

	# dpg.add_button(
	# label="init", tag="_button_init", callback=self.prepare_train
	# )
	# dpg.bind_item_theme("_button_init", theme_button)

	dpg.add_button(
	label="start", tag="_button_train", callback=callback_train
	)
	dpg.bind_item_theme("_button_train", theme_button)

	with dpg.group(horizontal=True):
	dpg.add_text("", tag="_log_train_time")
	dpg.add_text("", tag="_log_train_log")

	# rendering options
	with dpg.collapsing_header(label="Rendering", default_open=True):
	# mode combo
	def callback_change_mode(sender, app_data):
	self.mode = app_data
	self.need_update = True

	dpg.add_combo(
	("image", "depth", "alpha", "normal"),
	label="mode",
	default_value=self.mode,
	callback=callback_change_mode,
	)

	# fov slider
	def callback_set_fovy(sender, app_data):
	self.cam.fovy = np.deg2rad(app_data)
	self.need_update = True

	dpg.add_slider_int(
	label="FoV (vertical)",
	min_value=1,
	max_value=120,
	format="%d deg",
	default_value=np.rad2deg(self.cam.fovy),
	callback=callback_set_fovy,
	)

	### register camera handler

	def callback_camera_drag_rotate_or_draw_mask(sender, app_data):
	if not dpg.is_item_focused("_primary_window"):
	return

	dx = app_data[1]
	dy = app_data[2]

	self.cam.orbit(dx, dy)
	self.need_update = True

	def callback_camera_wheel_scale(sender, app_data):
	if not dpg.is_item_focused("_primary_window"):
	return

	delta = app_data

	self.cam.scale(delta)
	self.need_update = True

	def callback_camera_drag_pan(sender, app_data):
	if not dpg.is_item_focused("_primary_window"):
	return

	dx = app_data[1]
	dy = app_data[2]

	self.cam.pan(dx, dy)
	self.need_update = True

	def callback_set_mouse_loc(sender, app_data):
	if not dpg.is_item_focused("_primary_window"):
	return

	# just the pixel coordinate in image
	self.mouse_loc = np.array(app_data)

	with dpg.handler_registry():
	# for camera moving
	dpg.add_mouse_drag_handler(
	button=dpg.mvMouseButton_Left,
	callback=callback_camera_drag_rotate_or_draw_mask,
	)
	dpg.add_mouse_wheel_handler(callback=callback_camera_wheel_scale)
	dpg.add_mouse_drag_handler(
	button=dpg.mvMouseButton_Middle, callback=callback_camera_drag_pan
	)

	dpg.create_viewport(
	title="Gaussian3D",
	width=self.W + 600,
	height=self.H + (45 if os.name == "nt" else 0),
	resizable=False,
	)

	### global theme
	with dpg.theme() as theme_no_padding:
	with dpg.theme_component(dpg.mvAll):
	# set all padding to 0 to avoid scroll bar
	dpg.add_theme_style(
	dpg.mvStyleVar_WindowPadding, 0, 0, category=dpg.mvThemeCat_Core
	)
	dpg.add_theme_style(
	dpg.mvStyleVar_FramePadding, 0, 0, category=dpg.mvThemeCat_Core
	)
	dpg.add_theme_style(
	dpg.mvStyleVar_CellPadding, 0, 0, category=dpg.mvThemeCat_Core
	)

	dpg.bind_item_theme("_primary_window", theme_no_padding)

	dpg.setup_dearpygui()

	### register a larger font
	# get it from: https://github.com/lxgw/LxgwWenKai/releases/download/v1.300/LXGWWenKai-Regular.ttf
	if os.path.exists("LXGWWenKai-Regular.ttf"):
	with dpg.font_registry():
	with dpg.font("LXGWWenKai-Regular.ttf", 18) as default_font:
	dpg.bind_font(default_font)

	# dpg.show_metrics()

	dpg.show_viewport()

	def render(self):
	assert self.gui
	while dpg.is_dearpygui_running():
	# update texture every frame
	if self.training:
	self.train_step()
	self.test_step()
	dpg.render_dearpygui_frame()

	# no gui mode
	def train(self, iters=500):
	if iters > 0:
	self.prepare_train()
	for i in tqdm.trange(iters):
	self.train_step()
	# save
	self.save_model()


	if __name__ == "__main__":
	import argparse
	from omegaconf import OmegaConf

	parser = argparse.ArgumentParser()
	parser.add_argument("--config", required=True, help="path to the yaml config file")
	args, extras = parser.parse_known_args()

	# override default config from cli
	opt = OmegaConf.merge(OmegaConf.load(args.config), OmegaConf.from_cli(extras))

	# auto find mesh from stage 1
	if opt.mesh is None:
	default_path = os.path.join(opt.outdir, opt.save_path + '_mesh.' + opt.mesh_format)
	if os.path.exists(default_path):
	opt.mesh = default_path
	else:
	raise ValueError(f"Cannot find mesh from {default_path}, must specify --mesh explicitly!")

	gui = GUI(opt)

	if opt.gui:
	gui.render()
	else:
	gui.train(opt.iters_refine)