KilianFt commited on
Commit
e6982a0
·
verified ·
1 Parent(s): ec8ac94

Upload FlashJudge artifacts from run_20260409_104527

Browse files
flash_judge_checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dbb8025770dbee67e53c5c048787f36a1625c1f2962a8077cdd8bd16d6edd92
3
+ size 1534188467
training_config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "google/siglip2-base-patch16-naflex",
3
+ "dataset": "apartments",
4
+ "apartments_jsonl": "data/apartments/question-image-dataset_labeled.jsonl",
5
+ "apartments_images_dir": "data/apartments/images",
6
+ "fmnist_root": "data",
7
+ "output_root": "models",
8
+ "run_name": null,
9
+ "batch_size": 8,
10
+ "gradient_accumulation_steps": 1,
11
+ "stack_size": 1,
12
+ "epochs": 10,
13
+ "max_steps": null,
14
+ "eval_interval": 1,
15
+ "eval_batches": 2,
16
+ "checkpoint_interval": 1000,
17
+ "checkpoint_mode": "max_val_acc",
18
+ "val_ratio": 0.1,
19
+ "seed": 42,
20
+ "apartments_train_group_limit": null,
21
+ "apartments_train_question_limit": null,
22
+ "apartments_hard_negative_strategy": "within_group_overlap",
23
+ "apartments_hard_negative_topk": 0,
24
+ "apartments_hard_negative_oversample_factor": 0,
25
+ "num_workers_train": 4,
26
+ "num_workers_val": 2,
27
+ "text_max_length": 64,
28
+ "image_max_num_patches": null,
29
+ "lr_backbone": 1e-6,
30
+ "lr_head": 0.0002,
31
+ "weight_decay": 0.01,
32
+ "freeze_backbone": false,
33
+ "freeze_backbone_epochs": 0,
34
+ "freeze_backbone_steps": null,
35
+ "backbone_trainable_scope": "full",
36
+ "attention_heads": 2,
37
+ "attention_dropout": 0.3,
38
+ "image_self_attention_layers": 0,
39
+ "aggregation_mode": "attention",
40
+ "head_feature_mode": "matching",
41
+ "late_interaction_topk": 8,
42
+ "head_hidden_dim": 768,
43
+ "head_num_layers": 2,
44
+ "head_dropout": 0.2,
45
+ "head_activation": "swiglu",
46
+ "label_smoothing": 0.05,
47
+ "alignment_loss_weight": 0.05,
48
+ "alignment_loss_logit_scale": 18.0,
49
+ "alignment_loss_hard_negative_topk": 1,
50
+ "compile_model": false,
51
+ "compile_fallback_to_eager": true,
52
+ "backbone_torch_dtype": "auto",
53
+ "attn_implementation": null,
54
+ "resume_path": null,
55
+ "min_free_space_gb": 1.0,
56
+ "use_wandb": true,
57
+ "wandb_project": "flash-judge",
58
+ "wandb_entity": null,
59
+ "wandb_run_name": null,
60
+ "save_artifacts": true,
61
+ "push_to_hub": true,
62
+ "hub_org": "FuncAI",
63
+ "hub_repo_name": "FlashJudge2",
64
+ "hub_private": false,
65
+ "hub_token": null,
66
+ "device": null,
67
+ "use_amp": true
68
+ }
training_logs.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,train_loss,train_acc,val_loss,val_acc,max_val_acc
2
+ 1,0.5727274777736026,0.7228306655433867,0.41435598860226985,0.8224248927038627,0.8224248927038627
3
+ 2,0.42275050729028957,0.8384883860873751,0.356161927940252,0.8406652360515021,0.8406652360515021
4
+ 3,0.3362218457988015,0.8893368636418342,0.3536079549610359,0.8406652360515021,0.8406652360515021
5
+ 4,0.2799612180874485,0.9225538572632086,0.35294658706679366,0.8492489270386266,0.8492489270386266
6
+ 5,0.23606232431143226,0.9468046696353352,0.3613514193921258,0.8476394849785408,0.8492489270386266
7
+ 6,0.19861953134807328,0.9676856420748586,0.3673862542621312,0.8492489270386266,0.8492489270386266
8
+ 7,0.17411077068104241,0.9820676375015044,0.41156055256244706,0.8395922746781116,0.8492489270386266
9
+ 8,0.15878004032411447,0.9883860873751354,0.39233391980196913,0.8524678111587983,0.8524678111587983
10
+ 9,0.1485047506396916,0.9938620772656156,0.40588728695820075,0.8497854077253219,0.8524678111587983
11
+ 10,0.14366990868896623,0.995787700084246,0.394929265251679,0.8540772532188842,0.8540772532188842