Spaces:

fffiloni
/

L4GM-demo

Sleeping

App Files Files Community

L4GM-demo / core /options.py

fffiloni

Migrated from GitHub

2cdb96e verified 22 days ago

raw

history blame

3.77 kB

	# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import tyro
	from dataclasses import dataclass
	from typing import Tuple, Literal, Dict, Optional


	@dataclass
	class Options:
	### model
	# Unet image input size
	input_size: int = 256
	# Unet definition
	down_channels: Tuple[int, ...] = (64, 128, 256, 512, 1024, 1024)
	down_attention: Tuple[bool, ...] = (False, False, False, True, True, True)
	mid_attention: bool = True
	up_channels: Tuple[int, ...] = (1024, 1024, 512, 256)
	up_attention: Tuple[bool, ...] = (True, True, True, False)
	# Unet output size, dependent on the input_size and U-Net structure!
	splat_size: int = 64
	# gaussian render size
	output_size: int = 256

	### dataset
	# data mode (only support s3 now)
	data_mode: str = '4d'
	# fovy of the dataset
	fovy: float = 49.1
	# camera near plane
	znear: float = 0.5
	# camera far plane
	zfar: float = 2.5
	# number of all views (input + output)
	num_views: int = 12
	# number of views
	num_input_views: int = 4
	# camera radius
	cam_radius: float = 1.5 # to better use [-1, 1]^3 space
	# num workers
	num_workers: int = 16
	datalist: str=''

	### training
	# workspace
	workspace: str = './workspace'
	# resume
	resume: Optional[str] = None
	# batch size (per-GPU)
	batch_size: int = 8
	# gradient accumulation
	gradient_accumulation_steps: int = 1
	# training epochs
	num_epochs: int = 30
	# lpips loss weight
	lambda_lpips: float = 1.0
	# gradient clip
	gradient_clip: float = 1.0
	# mixed precision
	mixed_precision: str = 'bf16'
	# learning rate
	lr: float = 4e-4
	# augmentation prob for grid distortion
	prob_grid_distortion: float = 0.5
	# augmentation prob for camera jitter
	prob_cam_jitter: float = 0.5
	# number of gaussians per pixel
	gaussian_perpixel: int = 1

	### testing
	# test image path
	test_path: Optional[str] = None

	### misc
	# nvdiffrast backend setting
	force_cuda_rast: bool = False
	# render fancy video with gaussian scaling effect
	fancy_video: bool = False

	# 4D
	num_frames: int = 8
	use_temp_attn: bool = True
	shuffle_input: bool = True

	# s3
	sample_by_anim: bool = True

	# interp
	interpresume: Optional[str] = None
	interpolate_rate: int = 3


	# all the default settings
	config_defaults: Dict[str, Options] = {}
	config_doc: Dict[str, str] = {}

	config_doc['lrm'] = 'the default settings for LGM'
	config_defaults['lrm'] = Options()


	config_doc['big'] = 'big model with higher resolution Gaussians'
	config_defaults['big'] = Options(
	input_size=256,
	up_channels=(1024, 1024, 512, 256, 128), # one more decoder
	up_attention=(True, True, True, False, False),
	splat_size=128,
	output_size=512, # render & supervise Gaussians at a higher resolution.
	batch_size=1,
	num_views=8,
	gradient_accumulation_steps=1,
	mixed_precision='bf16',
	resume='pretrained/model_fp16_fixrot.safetensors',
	)


	AllConfigs = tyro.extras.subcommand_type_from_defaults(config_defaults, config_doc)