KilianFt commited on
Commit
cb40fbe
·
verified ·
1 Parent(s): 94c2467

Upload FlashJudge artifacts from run_20260505_225612

Browse files
flash_judge_checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d022887bca2e346a8eb836c1484caf22a55b4853744ad12c61e6a06a817927f
3
+ size 1543637939
training_config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "google/siglip2-base-patch16-naflex",
3
+ "dataset": "apartments",
4
+ "apartments_jsonl": "data/apartments/question-image-dataset_labeled_expanded.jsonl",
5
+ "apartments_images_dir": "data/apartments/images",
6
+ "fmnist_root": "data",
7
+ "output_root": "models",
8
+ "run_name": null,
9
+ "batch_size": 20,
10
+ "gradient_accumulation_steps": 1,
11
+ "stack_size": 1,
12
+ "epochs": 2,
13
+ "max_steps": null,
14
+ "eval_interval": 1,
15
+ "eval_batches": 2,
16
+ "checkpoint_interval": 1000,
17
+ "checkpoint_mode": "max_val_acc",
18
+ "val_ratio": 0.1,
19
+ "seed": 42,
20
+ "apartments_train_group_limit": null,
21
+ "apartments_train_question_limit": null,
22
+ "apartments_question_split_mode": "disjoint",
23
+ "apartments_hard_negative_strategy": "within_group_overlap",
24
+ "apartments_hard_negative_topk": 0,
25
+ "apartments_hard_negative_oversample_factor": 0,
26
+ "apartments_augment_images": true,
27
+ "apartments_augment_add_prob": 0.35,
28
+ "apartments_augment_max_added_images": 3,
29
+ "apartments_augment_shuffle_prob": 0.5,
30
+ "apartments_augment_repeat_prob": 0.25,
31
+ "apartments_augment_repeat_max_images": 2,
32
+ "apartments_augment_rotate_prob": 0.5,
33
+ "apartments_augment_rotate_degrees": 5.0,
34
+ "apartments_augment_color_prob": 0.8,
35
+ "apartments_augment_brightness_delta": 0.1,
36
+ "apartments_augment_saturation_delta": 0.1,
37
+ "num_workers_train": 4,
38
+ "num_workers_val": 2,
39
+ "text_max_length": 64,
40
+ "image_max_num_patches": null,
41
+ "lr_backbone": 1e-6,
42
+ "lr_head": 0.0002,
43
+ "weight_decay": 0.01,
44
+ "freeze_backbone": false,
45
+ "freeze_backbone_epochs": 0,
46
+ "freeze_backbone_steps": null,
47
+ "backbone_trainable_scope": "full",
48
+ "attention_heads": 2,
49
+ "attention_dropout": 0.3,
50
+ "image_self_attention_layers": 0,
51
+ "image_dropout": 0.15,
52
+ "normalize_embeddings": true,
53
+ "set_context_mode": "mean",
54
+ "aggregation_mode": "attention",
55
+ "head_feature_mode": "matching",
56
+ "late_interaction_topk": 8,
57
+ "head_hidden_dim": 768,
58
+ "head_num_layers": 2,
59
+ "head_dropout": 0.2,
60
+ "head_activation": "swiglu",
61
+ "label_smoothing": 0.05,
62
+ "alignment_loss_weight": 0.05,
63
+ "alignment_loss_logit_scale": 18.0,
64
+ "alignment_loss_hard_negative_topk": 1,
65
+ "compile_model": false,
66
+ "compile_fallback_to_eager": true,
67
+ "backbone_torch_dtype": "auto",
68
+ "attn_implementation": null,
69
+ "resume_path": null,
70
+ "min_free_space_gb": 1.0,
71
+ "use_wandb": true,
72
+ "wandb_project": "flash-judge",
73
+ "wandb_entity": null,
74
+ "wandb_run_name": null,
75
+ "save_artifacts": true,
76
+ "push_to_hub": true,
77
+ "hub_org": "FuncAI",
78
+ "hub_repo_name": "FlashJudge7",
79
+ "hub_private": false,
80
+ "hub_token": null,
81
+ "device": null,
82
+ "use_amp": true
83
+ }
training_logs.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epoch,train_loss,train_acc,val_loss,val_acc,max_val_acc
2
+ 1,0.3626155336048879,0.8760223448383815,0.36394935452234245,0.8483606597530793,0.8483606597530793
3
+ 2,0.20038580257494265,0.969442161696799,0.39457498462947055,0.8397541029806264,0.8483606597530793