RAR / configs /infer /titok_s128.yaml
yucornetto's picture
upload
51ce47d verified
experiment:
tokenizer_checkpoint: "tokenizer_titok_s128.bin"
generator_checkpoint: "generator_titok_s128.bin"
output_dir: "titok_s_128"
model:
vq_model:
codebook_size: 4096
token_size: 12
use_l2_norm: True
commitment_cost: 0.25
# vit arch
vit_enc_model_size: "small"
vit_dec_model_size: "small"
vit_enc_patch_size: 16
vit_dec_patch_size: 16
num_latent_tokens: 128
finetune_decoder: True
generator:
model_type: "UViT"
hidden_size: 1024
num_hidden_layers: 20
num_attention_heads: 16
intermediate_size: 4096
dropout: 0.1
attn_drop: 0.1
num_steps: 64
class_label_dropout: 0.1
image_seq_len: ${model.vq_model.num_latent_tokens}
condition_num_classes: 1000
# sampling hyper-params
randomize_temperature: 2.8
guidance_scale: 6.9
guidance_decay: "power-cosine"
dataset:
preprocessing:
crop_size: 256