reza-alipour commited on
Commit
9936b82
1 Parent(s): e9f8b9c

Upload folder using huggingface_hub

Browse files
checkpoint-17000/ema_model/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "decay": 0.9999,
14
+ "encoder_hidden_size": 768,
15
+ "force_down_up_sample": true,
16
+ "hidden_dropout": 0.0,
17
+ "hidden_size": 1024,
18
+ "in_channels": 768,
19
+ "intermediate_size": 2816,
20
+ "inv_gamma": 1.0,
21
+ "layer_norm_eps": 1e-06,
22
+ "ln_elementwise_affine": true,
23
+ "mask_token_id": 8255,
24
+ "micro_cond_embed_dim": 1280,
25
+ "micro_cond_encode_dim": 256,
26
+ "min_decay": 0.0,
27
+ "norm_type": "rmsnorm",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 22,
30
+ "num_res_blocks": 3,
31
+ "optimization_step": 8000,
32
+ "power": 0.6666666666666666,
33
+ "update_after_step": 0,
34
+ "use_bias": false,
35
+ "use_ema_warmup": false,
36
+ "use_fused_mlp": false,
37
+ "use_fused_residual_norm": false,
38
+ "vocab_size": 8256
39
+ }
checkpoint-17000/ema_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec69ccd04fba0d4f3eb9e5821acaa0f435118c54918cc42c38a8807f31b0792
3
+ size 2433247453
checkpoint-17000/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 17000}
checkpoint-17000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bfe144b120084821b26c0b83f7f96d98d9f6331d06e578c9c8988f6d6fe9f08
3
+ size 4866486533
checkpoint-17000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b40d6d634d074eb6a20944a25d9a9b516349721258a39c97fa077d84d4abf74b
3
+ size 2433254429
checkpoint-17000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b347b906ddaec8c955e47f4a42fce503774fa719cbb7c0f1f50a8f69dc604e
3
+ size 14599
checkpoint-17000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927cb1e3c51f59387f003f7128d398b83c764d5386962b2ff194e10e35375b8a
3
+ size 627
checkpoint-17000/unwrapped_model/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "encoder_hidden_size": 768,
14
+ "force_down_up_sample": true,
15
+ "hidden_dropout": 0.0,
16
+ "hidden_size": 1024,
17
+ "in_channels": 768,
18
+ "intermediate_size": 2816,
19
+ "layer_norm_eps": 1e-06,
20
+ "ln_elementwise_affine": true,
21
+ "mask_token_id": 8255,
22
+ "micro_cond_embed_dim": 1280,
23
+ "micro_cond_encode_dim": 256,
24
+ "norm_type": "rmsnorm",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 22,
27
+ "num_res_blocks": 3,
28
+ "use_bias": false,
29
+ "use_fused_mlp": false,
30
+ "use_fused_residual_norm": false,
31
+ "vocab_size": 8256
32
+ }
checkpoint-17000/unwrapped_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b40d6d634d074eb6a20944a25d9a9b516349721258a39c97fa077d84d4abf74b
3
+ size 2433254429
config.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb:
2
+ entity: r-ap
3
+ run_id: tx78bpg1
4
+ experiment:
5
+ name: muse-multi
6
+ project: muse-prod
7
+ output_dir: output/
8
+ max_train_examples: 28500
9
+ max_eval_examples: 1000
10
+ save_every: 1000
11
+ eval_every: 1000
12
+ generate_every: 400
13
+ log_every: 50
14
+ log_grad_norm_every: 100000000
15
+ resume_from_checkpoint: latest
16
+ resume_lr_scheduler: true
17
+ checkpoints_total_limit: 4
18
+ logging_dir: output/logs
19
+ model:
20
+ vq_model:
21
+ type: vqgan
22
+ text_encoder:
23
+ type: clip
24
+ transformer:
25
+ vocab_size: 8256
26
+ hidden_size: 1024
27
+ intermediate_size: 2816
28
+ num_hidden_layers: 22
29
+ num_attention_heads: 16
30
+ in_channels: 768
31
+ block_out_channels:
32
+ - 768
33
+ block_has_attention:
34
+ - true
35
+ block_num_heads: 12
36
+ num_res_blocks: 3
37
+ res_ffn_factor: 4
38
+ patch_size: 1
39
+ encoder_hidden_size: 768
40
+ add_cross_attention: true
41
+ project_encoder_hidden_states: true
42
+ codebook_size: 8192
43
+ num_vq_tokens: 256
44
+ initializer_range: 0.02
45
+ norm_type: rmsnorm
46
+ layer_norm_eps: 1.0e-06
47
+ ln_elementwise_affine: true
48
+ use_encoder_layernorm: false
49
+ use_bias: false
50
+ hidden_dropout: 0.0
51
+ attention_dropout: 0.0
52
+ use_codebook_size_for_output: true
53
+ use_empty_embeds_for_uncond: true
54
+ add_cond_embeds: true
55
+ cond_embed_dim: 768
56
+ add_micro_cond_embeds: true
57
+ micro_cond_encode_dim: 256
58
+ micro_cond_embed_dim: 1280
59
+ force_down_up_sample: true
60
+ architecture: uvit
61
+ enable_xformers_memory_efficient_attention: true
62
+ dataset:
63
+ preprocessing:
64
+ max_seq_length: 77
65
+ resolution: 256
66
+ optimizer:
67
+ name: adamw
68
+ params:
69
+ learning_rate: 0.0001
70
+ scale_lr: false
71
+ beta1: 0.9
72
+ beta2: 0.999
73
+ weight_decay: 0.01
74
+ epsilon: 1.0e-08
75
+ lr_scheduler:
76
+ scheduler: constant_with_warmup
77
+ params:
78
+ learning_rate: ${optimizer.params.learning_rate}
79
+ warmup_steps: 100
80
+ training:
81
+ gradient_accumulation_steps: 1
82
+ batch_size: 20
83
+ mixed_precision: 'no'
84
+ enable_tf32: true
85
+ use_ema: true
86
+ ema_decay: 0.9999
87
+ ema_update_after_step: 0
88
+ ema_update_every: 1
89
+ seed: 13399
90
+ max_train_steps: 20000
91
+ overfit_one_batch: false
92
+ cond_dropout_prob: 0.1
93
+ min_masking_rate: 0.0
94
+ label_smoothing: 0.1
95
+ max_grad_norm: null
96
+ guidance_scale: 8
97
+ generation_timesteps: 16
98
+ use_soft_code_target: false
99
+ use_stochastic_code: false
100
+ soft_code_temp: 1.0
101
+ mask_schedule: cosine
102
+ mask_contiguous_region_prob: 0.15
103
+ config: configs/segmentation.yaml