Upload FlashJudge artifacts from run_20260413_171923

Browse files

Files changed (3) hide show

flash_judge_checkpoint.pth +3 -0
training_config.json +68 -0
training_logs.csv +11 -0

flash_judge_checkpoint.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f3a6d84a547d8a2bec5590f77db687a6d3153d3693b850e4d65ecfc67ef2b4a
+size 1534188467

training_config.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "model_id": "google/siglip2-base-patch16-naflex",
+  "dataset": "apartments",
+  "apartments_jsonl": "data/apartments/question-image-dataset_labeled.jsonl",
+  "apartments_images_dir": "data/apartments/images",
+  "fmnist_root": "data",
+  "output_root": "models",
+  "run_name": null,
+  "batch_size": 12,
+  "gradient_accumulation_steps": 1,
+  "stack_size": 1,
+  "epochs": 10,
+  "max_steps": null,
+  "eval_interval": 1,
+  "eval_batches": 2,
+  "checkpoint_interval": 1000,
+  "checkpoint_mode": "max_val_acc",
+  "val_ratio": 0.1,
+  "seed": 42,
+  "apartments_train_group_limit": null,
+  "apartments_train_question_limit": null,
+  "apartments_hard_negative_strategy": "within_group_overlap",
+  "apartments_hard_negative_topk": 0,
+  "apartments_hard_negative_oversample_factor": 0,
+  "num_workers_train": 4,
+  "num_workers_val": 2,
+  "text_max_length": 64,
+  "image_max_num_patches": null,
+  "lr_backbone": 1e-6,
+  "lr_head": 0.0002,
+  "weight_decay": 0.01,
+  "freeze_backbone": false,
+  "freeze_backbone_epochs": 0,
+  "freeze_backbone_steps": null,
+  "backbone_trainable_scope": "full",
+  "attention_heads": 2,
+  "attention_dropout": 0.3,
+  "image_self_attention_layers": 0,
+  "aggregation_mode": "attention",
+  "head_feature_mode": "matching",
+  "late_interaction_topk": 8,
+  "head_hidden_dim": 768,
+  "head_num_layers": 2,
+  "head_dropout": 0.2,
+  "head_activation": "swiglu",
+  "label_smoothing": 0.05,
+  "alignment_loss_weight": 0.05,
+  "alignment_loss_logit_scale": 18.0,
+  "alignment_loss_hard_negative_topk": 1,
+  "compile_model": false,
+  "compile_fallback_to_eager": true,
+  "backbone_torch_dtype": "auto",
+  "attn_implementation": null,
+  "resume_path": null,
+  "min_free_space_gb": 1.0,
+  "use_wandb": true,
+  "wandb_project": "flash-judge",
+  "wandb_entity": null,
+  "wandb_run_name": null,
+  "save_artifacts": true,
+  "push_to_hub": true,
+  "hub_org": "FuncAI",
+  "hub_repo_name": "FlashJudge3",
+  "hub_private": false,
+  "hub_token": null,
+  "device": null,
+  "use_amp": true
+}

training_logs.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+epoch,train_loss,train_acc,val_loss,val_acc,max_val_acc
+1,0.5494948535324894,0.745762051361989,0.482269128884068,0.7690839911917694,0.7690839911917694
+2,0.4104818215007268,0.8483301508125171,0.3898940207507774,0.8222010383624156,0.8222010383624156
+3,0.3310342863324621,0.8946650559214372,0.36742849753216933,0.8365140163716469,0.8365140163716469
+4,0.27506209598548376,0.9268539807489965,0.3869491647928953,0.8377862802443613,0.8377862802443613
+5,0.22913343823026697,0.9516776431160127,0.4359181334925744,0.8260178318005482,0.8377862802443613
+6,0.19417412566400494,0.9703441019445852,0.43097968814709714,0.829834624328686,0.8377862802443613
+7,0.16807112935970414,0.984490082225946,0.45283080663048586,0.8298346256936779,0.8377862802443613
+8,0.15529710228082838,0.9917384357585441,0.46923461431310376,0.8342875517506636,0.8377862802443613
+9,0.14645108295381964,0.9958302482366237,0.4757098777731525,0.832697220658528,0.8377862802443613
+10,0.14203419984618063,0.997311094657262,0.4732470480255953,0.8298346254661793,0.8377862802443613