AbstractPhil commited on
Commit
fcc7d54
·
verified ·
1 Parent(s): 6c1eac8

Upload models/vit_zana_nano_thicc/20250916_221953/best_epoch040_acc52.21_training_config.json with huggingface_hub

Browse files
models/vit_zana_nano_thicc/20250916_221953/best_epoch040_acc52.21_training_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apply_margin_train_only": false,
3
+ "attn_dropout": 0.0,
4
+ "batch_size": 256,
5
+ "checkpoint_path": null,
6
+ "continue_last": false,
7
+ "continue_optimizer": true,
8
+ "continue_scheduler": true,
9
+ "cutmix_alpha": 1.0,
10
+ "depth": 4,
11
+ "dropout": 0.0,
12
+ "embed_dim": 128,
13
+ "epochs": 200,
14
+ "head_type": "roseface",
15
+ "hub_repo": "AbstractPhil/penta-vit-experiments",
16
+ "hub_token": null,
17
+ "img_size": 32,
18
+ "label_smoothing": 0.1,
19
+ "log_dir": "./runs",
20
+ "lr": 0.001,
21
+ "margin_m": 0.3,
22
+ "margin_type": "cosface",
23
+ "mix_final_alpha": 0.5,
24
+ "mix_final_prob": 0.25,
25
+ "mix_prob": 0.01,
26
+ "mix_schedule": "cosine",
27
+ "mix_schedule_end": null,
28
+ "mix_schedule_start": 0,
29
+ "mixup_alpha": 0.8,
30
+ "mlp_ratio": 4.0,
31
+ "model_variant": "vit_zana_nano_thicc",
32
+ "norm_type": "l1",
33
+ "num_heads": 8,
34
+ "patch_size": 2,
35
+ "prototype_mode": "centroid",
36
+ "push_every": 20,
37
+ "push_to_hub": true,
38
+ "save_dir": "./checkpoints/pentachora",
39
+ "save_every": 10,
40
+ "scale_s": 30.0,
41
+ "session_timestamp": "20250916_221953",
42
+ "similarity_mode": "rose",
43
+ "vocab_dim": 128,
44
+ "warmup_epochs": 10,
45
+ "weight_decay": 1e-05
46
+ }