# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import tyro from dataclasses import dataclass from typing import Tuple, Literal, Dict, Optional @dataclass class Options: ### model # Unet image input size input_size: int = 256 # Unet definition down_channels: Tuple[int, ...] = (64, 128, 256, 512, 1024, 1024) down_attention: Tuple[bool, ...] = (False, False, False, True, True, True) mid_attention: bool = True up_channels: Tuple[int, ...] = (1024, 1024, 512, 256) up_attention: Tuple[bool, ...] = (True, True, True, False) # Unet output size, dependent on the input_size and U-Net structure! splat_size: int = 64 # gaussian render size output_size: int = 256 ### dataset # data mode (only support s3 now) data_mode: str = '4d' # fovy of the dataset fovy: float = 49.1 # camera near plane znear: float = 0.5 # camera far plane zfar: float = 2.5 # number of all views (input + output) num_views: int = 12 # number of views num_input_views: int = 4 # camera radius cam_radius: float = 1.5 # to better use [-1, 1]^3 space # num workers num_workers: int = 16 datalist: str='' ### training # workspace workspace: str = './workspace' # resume resume: Optional[str] = None # batch size (per-GPU) batch_size: int = 8 # gradient accumulation gradient_accumulation_steps: int = 1 # training epochs num_epochs: int = 30 # lpips loss weight lambda_lpips: float = 1.0 # gradient clip gradient_clip: float = 1.0 # mixed precision mixed_precision: str = 'bf16' # learning rate lr: float = 4e-4 # augmentation prob for grid distortion prob_grid_distortion: float = 0.5 # augmentation prob for camera jitter prob_cam_jitter: float = 0.5 # number of gaussians per pixel gaussian_perpixel: int = 1 ### testing # test image path test_path: Optional[str] = None ### misc # nvdiffrast backend setting force_cuda_rast: bool = False # render fancy video with gaussian scaling effect fancy_video: bool = False # 4D num_frames: int = 8 use_temp_attn: bool = True shuffle_input: bool = True # s3 sample_by_anim: bool = True # interp interpresume: Optional[str] = None interpolate_rate: int = 3 # all the default settings config_defaults: Dict[str, Options] = {} config_doc: Dict[str, str] = {} config_doc['lrm'] = 'the default settings for LGM' config_defaults['lrm'] = Options() config_doc['big'] = 'big model with higher resolution Gaussians' config_defaults['big'] = Options( input_size=256, up_channels=(1024, 1024, 512, 256, 128), # one more decoder up_attention=(True, True, True, False, False), splat_size=128, output_size=512, # render & supervise Gaussians at a higher resolution. batch_size=1, num_views=8, gradient_accumulation_steps=1, mixed_precision='bf16', resume='pretrained/model_fp16_fixrot.safetensors', ) AllConfigs = tyro.extras.subcommand_type_from_defaults(config_defaults, config_doc)