reza-alipour commited on
Commit
48f31c9
1 Parent(s): 3c12aea

Upload folder using huggingface_hub

Browse files
Files changed (37) hide show
  1. checkpoint-16000/ema_model/config.json +39 -0
  2. checkpoint-16000/ema_model/pytorch_model.bin +3 -0
  3. checkpoint-16000/metadata.json +1 -0
  4. checkpoint-16000/optimizer.bin +3 -0
  5. checkpoint-16000/pytorch_model.bin +3 -0
  6. checkpoint-16000/random_states_0.pkl +3 -0
  7. checkpoint-16000/scheduler.bin +3 -0
  8. checkpoint-16000/unwrapped_model/config.json +32 -0
  9. checkpoint-16000/unwrapped_model/pytorch_model.bin +3 -0
  10. checkpoint-17000/ema_model/config.json +39 -0
  11. checkpoint-17000/ema_model/pytorch_model.bin +3 -0
  12. checkpoint-17000/metadata.json +1 -0
  13. checkpoint-17000/optimizer.bin +3 -0
  14. checkpoint-17000/pytorch_model.bin +3 -0
  15. checkpoint-17000/random_states_0.pkl +3 -0
  16. checkpoint-17000/scheduler.bin +3 -0
  17. checkpoint-17000/unwrapped_model/config.json +32 -0
  18. checkpoint-17000/unwrapped_model/pytorch_model.bin +3 -0
  19. checkpoint-18000/ema_model/config.json +39 -0
  20. checkpoint-18000/ema_model/pytorch_model.bin +3 -0
  21. checkpoint-18000/metadata.json +1 -0
  22. checkpoint-18000/optimizer.bin +3 -0
  23. checkpoint-18000/pytorch_model.bin +3 -0
  24. checkpoint-18000/random_states_0.pkl +3 -0
  25. checkpoint-18000/scheduler.bin +3 -0
  26. checkpoint-18000/unwrapped_model/config.json +32 -0
  27. checkpoint-18000/unwrapped_model/pytorch_model.bin +3 -0
  28. checkpoint-19000/ema_model/config.json +39 -0
  29. checkpoint-19000/ema_model/pytorch_model.bin +3 -0
  30. checkpoint-19000/metadata.json +1 -0
  31. checkpoint-19000/optimizer.bin +3 -0
  32. checkpoint-19000/pytorch_model.bin +3 -0
  33. checkpoint-19000/random_states_0.pkl +3 -0
  34. checkpoint-19000/scheduler.bin +3 -0
  35. checkpoint-19000/unwrapped_model/config.json +32 -0
  36. checkpoint-19000/unwrapped_model/pytorch_model.bin +3 -0
  37. config.yaml +104 -0
checkpoint-16000/ema_model/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "decay": 0.9999,
14
+ "encoder_hidden_size": 768,
15
+ "force_down_up_sample": true,
16
+ "hidden_dropout": 0.0,
17
+ "hidden_size": 1024,
18
+ "in_channels": 768,
19
+ "intermediate_size": 2816,
20
+ "inv_gamma": 1.0,
21
+ "layer_norm_eps": 1e-06,
22
+ "ln_elementwise_affine": true,
23
+ "mask_token_id": 8255,
24
+ "micro_cond_embed_dim": 1280,
25
+ "micro_cond_encode_dim": 256,
26
+ "min_decay": 0.0,
27
+ "norm_type": "rmsnorm",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 22,
30
+ "num_res_blocks": 3,
31
+ "optimization_step": 16000,
32
+ "power": 0.6666666666666666,
33
+ "update_after_step": 0,
34
+ "use_bias": false,
35
+ "use_ema_warmup": false,
36
+ "use_fused_mlp": false,
37
+ "use_fused_residual_norm": false,
38
+ "vocab_size": 8256
39
+ }
checkpoint-16000/ema_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9442c377fd7f403432cf274e442ebbf9cd5a9d77039ef1ad553b705fa7fb0d5
3
+ size 2433247453
checkpoint-16000/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 16000}
checkpoint-16000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac61908bf95a9fbb0fce9a67f1651fc15bb7d6a882a21671405345db656b0e7
3
+ size 4866486533
checkpoint-16000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824b778a4bfa117fcaeea2bbe95593d11853f871f7e0e398c8031c5483cc0715
3
+ size 2433254429
checkpoint-16000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccae0f99cb4b7afd849d0f2cf73f7f72e68a7b0e26a839bdfbdaaec351fb1a6d
3
+ size 14663
checkpoint-16000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61e7811b299e3751399e44fb63e5aa649eda39379a7c5656abcb05e077fa23de
3
+ size 627
checkpoint-16000/unwrapped_model/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "encoder_hidden_size": 768,
14
+ "force_down_up_sample": true,
15
+ "hidden_dropout": 0.0,
16
+ "hidden_size": 1024,
17
+ "in_channels": 768,
18
+ "intermediate_size": 2816,
19
+ "layer_norm_eps": 1e-06,
20
+ "ln_elementwise_affine": true,
21
+ "mask_token_id": 8255,
22
+ "micro_cond_embed_dim": 1280,
23
+ "micro_cond_encode_dim": 256,
24
+ "norm_type": "rmsnorm",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 22,
27
+ "num_res_blocks": 3,
28
+ "use_bias": false,
29
+ "use_fused_mlp": false,
30
+ "use_fused_residual_norm": false,
31
+ "vocab_size": 8256
32
+ }
checkpoint-16000/unwrapped_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824b778a4bfa117fcaeea2bbe95593d11853f871f7e0e398c8031c5483cc0715
3
+ size 2433254429
checkpoint-17000/ema_model/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "decay": 0.9999,
14
+ "encoder_hidden_size": 768,
15
+ "force_down_up_sample": true,
16
+ "hidden_dropout": 0.0,
17
+ "hidden_size": 1024,
18
+ "in_channels": 768,
19
+ "intermediate_size": 2816,
20
+ "inv_gamma": 1.0,
21
+ "layer_norm_eps": 1e-06,
22
+ "ln_elementwise_affine": true,
23
+ "mask_token_id": 8255,
24
+ "micro_cond_embed_dim": 1280,
25
+ "micro_cond_encode_dim": 256,
26
+ "min_decay": 0.0,
27
+ "norm_type": "rmsnorm",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 22,
30
+ "num_res_blocks": 3,
31
+ "optimization_step": 17000,
32
+ "power": 0.6666666666666666,
33
+ "update_after_step": 0,
34
+ "use_bias": false,
35
+ "use_ema_warmup": false,
36
+ "use_fused_mlp": false,
37
+ "use_fused_residual_norm": false,
38
+ "vocab_size": 8256
39
+ }
checkpoint-17000/ema_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c49970ad1419ceb322abcbf1a9c4a688f01fdf1c372b5dc2c45b83a1d7cd3907
3
+ size 2433247453
checkpoint-17000/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 17000}
checkpoint-17000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06214b9da04bb3402f04a05ff7bf28e679ea568efc8840137895b6ae948f63e
3
+ size 4866486533
checkpoint-17000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0711d3091631d614ecd49ec7267fa3aecbcd63fa173ba630069d5e2114791f21
3
+ size 2433254429
checkpoint-17000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8429c96e11c465ee4ac12c19a4d6c2d5df410c4f011c47c22476bc5ea55555eb
3
+ size 14663
checkpoint-17000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927cb1e3c51f59387f003f7128d398b83c764d5386962b2ff194e10e35375b8a
3
+ size 627
checkpoint-17000/unwrapped_model/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "encoder_hidden_size": 768,
14
+ "force_down_up_sample": true,
15
+ "hidden_dropout": 0.0,
16
+ "hidden_size": 1024,
17
+ "in_channels": 768,
18
+ "intermediate_size": 2816,
19
+ "layer_norm_eps": 1e-06,
20
+ "ln_elementwise_affine": true,
21
+ "mask_token_id": 8255,
22
+ "micro_cond_embed_dim": 1280,
23
+ "micro_cond_encode_dim": 256,
24
+ "norm_type": "rmsnorm",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 22,
27
+ "num_res_blocks": 3,
28
+ "use_bias": false,
29
+ "use_fused_mlp": false,
30
+ "use_fused_residual_norm": false,
31
+ "vocab_size": 8256
32
+ }
checkpoint-17000/unwrapped_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0711d3091631d614ecd49ec7267fa3aecbcd63fa173ba630069d5e2114791f21
3
+ size 2433254429
checkpoint-18000/ema_model/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "decay": 0.9999,
14
+ "encoder_hidden_size": 768,
15
+ "force_down_up_sample": true,
16
+ "hidden_dropout": 0.0,
17
+ "hidden_size": 1024,
18
+ "in_channels": 768,
19
+ "intermediate_size": 2816,
20
+ "inv_gamma": 1.0,
21
+ "layer_norm_eps": 1e-06,
22
+ "ln_elementwise_affine": true,
23
+ "mask_token_id": 8255,
24
+ "micro_cond_embed_dim": 1280,
25
+ "micro_cond_encode_dim": 256,
26
+ "min_decay": 0.0,
27
+ "norm_type": "rmsnorm",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 22,
30
+ "num_res_blocks": 3,
31
+ "optimization_step": 18000,
32
+ "power": 0.6666666666666666,
33
+ "update_after_step": 0,
34
+ "use_bias": false,
35
+ "use_ema_warmup": false,
36
+ "use_fused_mlp": false,
37
+ "use_fused_residual_norm": false,
38
+ "vocab_size": 8256
39
+ }
checkpoint-18000/ema_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc543cf2ac968d4002802669098abb4a1c4e97505e61b99088fbf81ab9b477c
3
+ size 2433247453
checkpoint-18000/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 18000}
checkpoint-18000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb5734aaeee42598b647908b68220386aa0c8a5ec66f343734919bc1ab127d13
3
+ size 4866486533
checkpoint-18000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f8794e5fb15d54d66180b19be7dd4c6c9e93f738f37c250a3f414283ac62ae2
3
+ size 2433254429
checkpoint-18000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da53af8ef43be89c1c05c1dbb3366f3db6e11d359c79936788812e16c618f801
3
+ size 14599
checkpoint-18000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:106c03c9203bf87eacbb68b21e440e6322338d101f1083b7c57a201947e76bcd
3
+ size 627
checkpoint-18000/unwrapped_model/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "encoder_hidden_size": 768,
14
+ "force_down_up_sample": true,
15
+ "hidden_dropout": 0.0,
16
+ "hidden_size": 1024,
17
+ "in_channels": 768,
18
+ "intermediate_size": 2816,
19
+ "layer_norm_eps": 1e-06,
20
+ "ln_elementwise_affine": true,
21
+ "mask_token_id": 8255,
22
+ "micro_cond_embed_dim": 1280,
23
+ "micro_cond_encode_dim": 256,
24
+ "norm_type": "rmsnorm",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 22,
27
+ "num_res_blocks": 3,
28
+ "use_bias": false,
29
+ "use_fused_mlp": false,
30
+ "use_fused_residual_norm": false,
31
+ "vocab_size": 8256
32
+ }
checkpoint-18000/unwrapped_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f8794e5fb15d54d66180b19be7dd4c6c9e93f738f37c250a3f414283ac62ae2
3
+ size 2433254429
checkpoint-19000/ema_model/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "decay": 0.9999,
14
+ "encoder_hidden_size": 768,
15
+ "force_down_up_sample": true,
16
+ "hidden_dropout": 0.0,
17
+ "hidden_size": 1024,
18
+ "in_channels": 768,
19
+ "intermediate_size": 2816,
20
+ "inv_gamma": 1.0,
21
+ "layer_norm_eps": 1e-06,
22
+ "ln_elementwise_affine": true,
23
+ "mask_token_id": 8255,
24
+ "micro_cond_embed_dim": 1280,
25
+ "micro_cond_encode_dim": 256,
26
+ "min_decay": 0.0,
27
+ "norm_type": "rmsnorm",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 22,
30
+ "num_res_blocks": 3,
31
+ "optimization_step": 19000,
32
+ "power": 0.6666666666666666,
33
+ "update_after_step": 0,
34
+ "use_bias": false,
35
+ "use_ema_warmup": false,
36
+ "use_fused_mlp": false,
37
+ "use_fused_residual_norm": false,
38
+ "vocab_size": 8256
39
+ }
checkpoint-19000/ema_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd1c2c51f3326fe29e5b1c6ff228b3953e7db3853b417ba72acc0ed21d0b70a4
3
+ size 2433247453
checkpoint-19000/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 19000}
checkpoint-19000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee1dd5b0fdb53b14c9e5fea404b8747d2b9be4834822e6fcf5c45de0837f3ae1
3
+ size 4866486533
checkpoint-19000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fff2ba7f92d6ebfd7f8e9321c3a9c884cf37e634fc46e90f8ab42636d211bb4
3
+ size 2433254429
checkpoint-19000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc79935ca199f1c08e936bdcece8113d5133a45e7416fed4eb90961d2bf14a92
3
+ size 14663
checkpoint-19000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01f80899eef09d4382fd001aa8e09cb281ec6d0a2f35143b7d598b51ceb1d5c5
3
+ size 627
checkpoint-19000/unwrapped_model/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MaskGiTUViT_v2",
3
+ "_version": "0.0.1",
4
+ "add_cond_embeds": true,
5
+ "add_micro_cond_embeds": true,
6
+ "attention_dropout": 0.0,
7
+ "block_num_heads": 12,
8
+ "block_out_channels": [
9
+ 768
10
+ ],
11
+ "codebook_size": 8192,
12
+ "cond_embed_dim": 768,
13
+ "encoder_hidden_size": 768,
14
+ "force_down_up_sample": true,
15
+ "hidden_dropout": 0.0,
16
+ "hidden_size": 1024,
17
+ "in_channels": 768,
18
+ "intermediate_size": 2816,
19
+ "layer_norm_eps": 1e-06,
20
+ "ln_elementwise_affine": true,
21
+ "mask_token_id": 8255,
22
+ "micro_cond_embed_dim": 1280,
23
+ "micro_cond_encode_dim": 256,
24
+ "norm_type": "rmsnorm",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 22,
27
+ "num_res_blocks": 3,
28
+ "use_bias": false,
29
+ "use_fused_mlp": false,
30
+ "use_fused_residual_norm": false,
31
+ "vocab_size": 8256
32
+ }
checkpoint-19000/unwrapped_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fff2ba7f92d6ebfd7f8e9321c3a9c884cf37e634fc46e90f8ab42636d211bb4
3
+ size 2433254429
config.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb:
2
+ entity: r-ap
3
+ run_id: xvom3pxa
4
+ experiment:
5
+ name: muse-multi
6
+ project: muse-prod
7
+ output_dir: output/
8
+ max_train_examples: 28500
9
+ max_eval_examples: 1000
10
+ save_every: 1000
11
+ eval_every: 700
12
+ generate_every: 200
13
+ log_every: 50
14
+ log_grad_norm_every: 100000000
15
+ resume_from_checkpoint: latest
16
+ resume_lr_scheduler: true
17
+ checkpoints_total_limit: 4
18
+ logging_dir: output/logs
19
+ model:
20
+ vq_model:
21
+ type: vqgan
22
+ text_encoder:
23
+ type: clip
24
+ pretrained: openMUSE/clip-vit-large-patch14-text-enc
25
+ transformer:
26
+ vocab_size: 8256
27
+ hidden_size: 1024
28
+ intermediate_size: 2816
29
+ num_hidden_layers: 22
30
+ num_attention_heads: 16
31
+ in_channels: 768
32
+ block_out_channels:
33
+ - 768
34
+ block_has_attention:
35
+ - true
36
+ block_num_heads: 12
37
+ num_res_blocks: 3
38
+ res_ffn_factor: 4
39
+ patch_size: 1
40
+ encoder_hidden_size: 768
41
+ add_cross_attention: true
42
+ project_encoder_hidden_states: true
43
+ codebook_size: 8192
44
+ num_vq_tokens: 256
45
+ initializer_range: 0.02
46
+ norm_type: rmsnorm
47
+ layer_norm_eps: 1.0e-06
48
+ ln_elementwise_affine: true
49
+ use_encoder_layernorm: false
50
+ use_bias: false
51
+ hidden_dropout: 0.0
52
+ attention_dropout: 0.0
53
+ use_codebook_size_for_output: true
54
+ use_empty_embeds_for_uncond: true
55
+ add_cond_embeds: true
56
+ cond_embed_dim: 768
57
+ add_micro_cond_embeds: true
58
+ micro_cond_encode_dim: 256
59
+ micro_cond_embed_dim: 1280
60
+ force_down_up_sample: true
61
+ architecture: uvit
62
+ enable_xformers_memory_efficient_attention: true
63
+ dataset:
64
+ preprocessing:
65
+ max_seq_length: 77
66
+ resolution: 256
67
+ optimizer:
68
+ name: adamw
69
+ params:
70
+ learning_rate: 0.0001
71
+ scale_lr: false
72
+ beta1: 0.9
73
+ beta2: 0.999
74
+ weight_decay: 0.01
75
+ epsilon: 1.0e-08
76
+ lr_scheduler:
77
+ scheduler: constant_with_warmup
78
+ params:
79
+ learning_rate: ${optimizer.params.learning_rate}
80
+ warmup_steps: 100
81
+ training:
82
+ gradient_accumulation_steps: 1
83
+ batch_size: 20
84
+ mixed_precision: 'no'
85
+ enable_tf32: true
86
+ use_ema: true
87
+ ema_decay: 0.9999
88
+ ema_update_after_step: 0
89
+ ema_update_every: 1
90
+ seed: 13399
91
+ max_train_steps: 20000
92
+ overfit_one_batch: false
93
+ cond_dropout_prob: 0.1
94
+ min_masking_rate: 0.0
95
+ label_smoothing: 0.1
96
+ max_grad_norm: null
97
+ guidance_scale: 8
98
+ generation_timesteps: 16
99
+ use_soft_code_target: false
100
+ use_stochastic_code: false
101
+ soft_code_temp: 1.0
102
+ mask_schedule: cosine
103
+ mask_contiguous_region_prob: 0.15
104
+ config: configs/segmentation.yaml