File size: 2,405 Bytes
d1dc797 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
cond_image_size: 512
isosurface_resolution: 160
radius: 0.87
camera_embedder_cls: sf3d.models.camera.LinearCameraEmbedder
camera_embedder:
in_channels: 25
out_channels: 768
conditions:
- c2w_cond
- intrinsic_normed_cond
image_tokenizer_cls: sf3d.models.tokenizers.image.DINOV2SingleImageTokenizer
image_tokenizer:
pretrained_model_name_or_path: "facebook/dinov2-large"
width: 512
height: 512
modulation_cond_dim: 768
tokenizer_cls: sf3d.models.tokenizers.triplane.TriplaneLearnablePositionalEmbedding
tokenizer:
plane_size: 96
num_channels: 1024
backbone_cls: sf3d.models.transformers.backbone.TwoStreamInterleaveTransformer
backbone:
num_attention_heads: 16
attention_head_dim: 64
raw_triplane_channels: 1024
triplane_channels: 1024
raw_image_channels: 1024 # DINO features
num_latents: 1792
num_blocks: 4
num_basic_blocks: 3
post_processor_cls: sf3d.models.network.PixelShuffleUpsampleNetwork
post_processor:
in_channels: 1024
out_channels: 40
scale_factor: 4
conv_layers: 4
decoder_cls: sf3d.models.network.MaterialMLP
decoder:
in_channels: 120
n_neurons: 64
activation: silu
heads:
- name: density
out_channels: 1
out_bias: -1.0
n_hidden_layers: 2
output_activation: trunc_exp
- name: features
out_channels: 3
n_hidden_layers: 3
output_activation: sigmoid
- name: perturb_normal
out_channels: 3
n_hidden_layers: 3
output_activation: normalize_channel_last
- name: vertex_offset
out_channels: 3
n_hidden_layers: 2
image_estimator_cls: sf3d.models.image_estimator.clip_based_estimator.ClipBasedHeadEstimator
image_estimator:
distribution: beta
distribution_eval: mode
heads:
- name: roughness
out_channels: 1
n_hidden_layers: 3
output_activation: linear
add_to_decoder_features: true
output_bias: 1.0
shape: [-1, 1, 1]
- name: metallic
out_channels: 1
n_hidden_layers: 3
output_activation: linear
add_to_decoder_features: true
output_bias: 1.0
shape: [-1, 1, 1]
global_estimator_cls: sf3d.models.global_estimator.multi_head_estimator.MultiHeadEstimator
global_estimator:
triplane_features: 1024
heads:
- name: sg_amplitudes
out_channels: 24
n_hidden_layers: 3
output_activation: softplus
output_bias: 1.0
shape: [-1, 24, 1]
|