define-hf-demo / configs /papers /define /scannet_temporal_test_context_1.yaml
Jiading Fang
add define
fc16538
wrapper:
seed: 42
min_epochs: 0
max_epochs: 0
find_unused_parameters: True
flip_lr_prob: 0.5
validate_first: True
validate_flipped: False
sync_batch_norm: True
evaluation:
rgb:
only_first: False
depth:
crop: ''
min_depth: 0.2
max_depth: 10.0
scale_output: resize
median_scaling: True
post_process: False
arch:
model:
checkpoint: /data/vidar/models/scannet_full.ckpt
file: perceiver/DefineGenericModel
use_pose_noise: []
use_virtual_cameras: []
virtual_cameras_eval: False
use_virtual_rgb: False
augment_canonical: False
encode_train: all
encode_eval: all
decode_train: all
decode_eval: all
decode_encodes: False
task_weights: [1.0,1.0]
scale_loss: True
sample_decoded_queries: 0.0
network:
tasks: [depth]
depth_range: [0.1,10.]
image_shape: [128,192]
num_bands_orig: 20
num_bands_dirs: 10
max_resolution_orig: 60
max_resolution_dirs: 60
to_world: True
d_latents: 512
num_latents: 1024
hidden_dropout_prob: 0.25
num_cross_attention_heads: 1
num_self_attends_per_block: 8
num_self_attention_heads: 8
decoder_num_heads: 1
rgb_feat_dim: 960
rgb_feat_type: resnet_all
downsample_encoder: 4
downsample_decoder: 4
upsample_convex: 'convex'
encoder_with_rgb: True
decoder_with_rgb: False
output_mode: inv_depth
sample_encoding_rays: 0
with_monodepth: False
datasets:
validation:
name: [ScanNetTemporal]
path: [/data/vidar/scannet_processed]
split: [test]
context: [1]
stride: [10]
labels: [depth,pose]
cameras: [[0]]
dataloader:
batch_size: 1
pin_memory: True
num_workers: 4
augmentation:
resize: [128,192]
resize_supervision: True
preserve_depth: True