Upload folder using huggingface_hub
Browse files- checkpoint-104/config.json +48 -0
- checkpoint-104/model.safetensors +3 -0
- checkpoint-104/optimizer.pt +3 -0
- checkpoint-104/preprocessor_config.json +24 -0
- checkpoint-104/rng_state.pth +3 -0
- checkpoint-104/scheduler.pt +3 -0
- checkpoint-104/trainer_state.json +54 -0
- checkpoint-104/training_args.bin +3 -0
- checkpoint-156/config.json +48 -0
- checkpoint-156/model.safetensors +3 -0
- checkpoint-156/optimizer.pt +3 -0
- checkpoint-156/preprocessor_config.json +24 -0
- checkpoint-156/rng_state.pth +3 -0
- checkpoint-156/scheduler.pt +3 -0
- checkpoint-156/trainer_state.json +64 -0
- checkpoint-156/training_args.bin +3 -0
- checkpoint-52/config.json +48 -0
- checkpoint-52/model.safetensors +3 -0
- checkpoint-52/optimizer.pt +3 -0
- checkpoint-52/preprocessor_config.json +24 -0
- checkpoint-52/rng_state.pth +3 -0
- checkpoint-52/scheduler.pt +3 -0
- checkpoint-52/trainer_state.json +44 -0
- checkpoint-52/training_args.bin +3 -0
- config.json +48 -0
- model.safetensors +3 -0
- preprocessor_config.json +24 -0
- training_args.bin +3 -0
checkpoint-104/config.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"SiglipForImageClassification"
|
| 4 |
+
],
|
| 5 |
+
"id2label": {
|
| 6 |
+
"0": "agentbrowse",
|
| 7 |
+
"1": "calendars",
|
| 8 |
+
"2": "humanbrowse"
|
| 9 |
+
},
|
| 10 |
+
"initializer_factor": 1.0,
|
| 11 |
+
"label2id": {
|
| 12 |
+
"agentbrowse": 0,
|
| 13 |
+
"calendars": 1,
|
| 14 |
+
"humanbrowse": 2
|
| 15 |
+
},
|
| 16 |
+
"model_type": "siglip",
|
| 17 |
+
"problem_type": "single_label_classification",
|
| 18 |
+
"text_config": {
|
| 19 |
+
"attention_dropout": 0.0,
|
| 20 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 21 |
+
"hidden_size": 768,
|
| 22 |
+
"intermediate_size": 3072,
|
| 23 |
+
"layer_norm_eps": 1e-06,
|
| 24 |
+
"max_position_embeddings": 64,
|
| 25 |
+
"model_type": "siglip_text_model",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"projection_size": 768,
|
| 29 |
+
"torch_dtype": "float32",
|
| 30 |
+
"vocab_size": 256000
|
| 31 |
+
},
|
| 32 |
+
"torch_dtype": "float32",
|
| 33 |
+
"transformers_version": "4.50.0",
|
| 34 |
+
"vision_config": {
|
| 35 |
+
"attention_dropout": 0.0,
|
| 36 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 37 |
+
"hidden_size": 768,
|
| 38 |
+
"image_size": 224,
|
| 39 |
+
"intermediate_size": 3072,
|
| 40 |
+
"layer_norm_eps": 1e-06,
|
| 41 |
+
"model_type": "siglip_vision_model",
|
| 42 |
+
"num_attention_heads": 12,
|
| 43 |
+
"num_channels": 3,
|
| 44 |
+
"num_hidden_layers": 12,
|
| 45 |
+
"patch_size": 16,
|
| 46 |
+
"torch_dtype": "float32"
|
| 47 |
+
}
|
| 48 |
+
}
|
checkpoint-104/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55aaa265df5026b3b75250328940bf31121c04189547507c61cfd85d2da990e7
|
| 3 |
+
size 371571068
|
checkpoint-104/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e97918f18a9f4e76d6399120631a309d58977bacdbace2a33d9d05f4e9d43735
|
| 3 |
+
size 686561914
|
checkpoint-104/preprocessor_config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": null,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.5,
|
| 8 |
+
0.5,
|
| 9 |
+
0.5
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "SiglipImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.5,
|
| 14 |
+
0.5,
|
| 15 |
+
0.5
|
| 16 |
+
],
|
| 17 |
+
"processor_class": "SiglipProcessor",
|
| 18 |
+
"resample": 2,
|
| 19 |
+
"rescale_factor": 0.00392156862745098,
|
| 20 |
+
"size": {
|
| 21 |
+
"height": 224,
|
| 22 |
+
"width": 224
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-104/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2442234b6cef5822ea66560689d66cc0d70d7f4cadf176cba9d8bb3573042003
|
| 3 |
+
size 14244
|
checkpoint-104/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d2fffc59e047d2588299685ccf9bfa75c9f6c1287706f94a4a69b2295228b87
|
| 3 |
+
size 1064
|
checkpoint-104/trainer_state.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 104,
|
| 3 |
+
"best_metric": 0.3692067861557007,
|
| 4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-104",
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 104,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.7046979865771812,
|
| 15 |
+
"eval_loss": 0.7991482615470886,
|
| 16 |
+
"eval_model_preparation_time": 0.0022,
|
| 17 |
+
"eval_runtime": 59.2892,
|
| 18 |
+
"eval_samples_per_second": 27.644,
|
| 19 |
+
"eval_steps_per_second": 3.458,
|
| 20 |
+
"step": 52
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"epoch": 2.0,
|
| 24 |
+
"eval_accuracy": 0.9115314215985357,
|
| 25 |
+
"eval_loss": 0.3692067861557007,
|
| 26 |
+
"eval_model_preparation_time": 0.0022,
|
| 27 |
+
"eval_runtime": 59.2955,
|
| 28 |
+
"eval_samples_per_second": 27.641,
|
| 29 |
+
"eval_steps_per_second": 3.457,
|
| 30 |
+
"step": 104
|
| 31 |
+
}
|
| 32 |
+
],
|
| 33 |
+
"logging_steps": 500,
|
| 34 |
+
"max_steps": 156,
|
| 35 |
+
"num_input_tokens_seen": 0,
|
| 36 |
+
"num_train_epochs": 3,
|
| 37 |
+
"save_steps": 500,
|
| 38 |
+
"stateful_callbacks": {
|
| 39 |
+
"TrainerControl": {
|
| 40 |
+
"args": {
|
| 41 |
+
"should_epoch_stop": false,
|
| 42 |
+
"should_evaluate": false,
|
| 43 |
+
"should_log": false,
|
| 44 |
+
"should_save": true,
|
| 45 |
+
"should_training_stop": false
|
| 46 |
+
},
|
| 47 |
+
"attributes": {}
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
"total_flos": 2.745527919918981e+17,
|
| 51 |
+
"train_batch_size": 32,
|
| 52 |
+
"trial_name": null,
|
| 53 |
+
"trial_params": null
|
| 54 |
+
}
|
checkpoint-104/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4a2f3a6230c65213829acec640610e4b338ff8aece404c702ffad97d6f293f5
|
| 3 |
+
size 5304
|
checkpoint-156/config.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"SiglipForImageClassification"
|
| 4 |
+
],
|
| 5 |
+
"id2label": {
|
| 6 |
+
"0": "agentbrowse",
|
| 7 |
+
"1": "calendars",
|
| 8 |
+
"2": "humanbrowse"
|
| 9 |
+
},
|
| 10 |
+
"initializer_factor": 1.0,
|
| 11 |
+
"label2id": {
|
| 12 |
+
"agentbrowse": 0,
|
| 13 |
+
"calendars": 1,
|
| 14 |
+
"humanbrowse": 2
|
| 15 |
+
},
|
| 16 |
+
"model_type": "siglip",
|
| 17 |
+
"problem_type": "single_label_classification",
|
| 18 |
+
"text_config": {
|
| 19 |
+
"attention_dropout": 0.0,
|
| 20 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 21 |
+
"hidden_size": 768,
|
| 22 |
+
"intermediate_size": 3072,
|
| 23 |
+
"layer_norm_eps": 1e-06,
|
| 24 |
+
"max_position_embeddings": 64,
|
| 25 |
+
"model_type": "siglip_text_model",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"projection_size": 768,
|
| 29 |
+
"torch_dtype": "float32",
|
| 30 |
+
"vocab_size": 256000
|
| 31 |
+
},
|
| 32 |
+
"torch_dtype": "float32",
|
| 33 |
+
"transformers_version": "4.50.0",
|
| 34 |
+
"vision_config": {
|
| 35 |
+
"attention_dropout": 0.0,
|
| 36 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 37 |
+
"hidden_size": 768,
|
| 38 |
+
"image_size": 224,
|
| 39 |
+
"intermediate_size": 3072,
|
| 40 |
+
"layer_norm_eps": 1e-06,
|
| 41 |
+
"model_type": "siglip_vision_model",
|
| 42 |
+
"num_attention_heads": 12,
|
| 43 |
+
"num_channels": 3,
|
| 44 |
+
"num_hidden_layers": 12,
|
| 45 |
+
"patch_size": 16,
|
| 46 |
+
"torch_dtype": "float32"
|
| 47 |
+
}
|
| 48 |
+
}
|
checkpoint-156/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:045e8e60277801c108b4a4624c22973b325c0fee913d8abf4e24f15986bd0c5d
|
| 3 |
+
size 371571068
|
checkpoint-156/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:031bf7a41ebb9c14fed5d8be4c4a44b4ebc9d3dbf8137d9ae7327d1f5ed9d068
|
| 3 |
+
size 686561914
|
checkpoint-156/preprocessor_config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": null,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.5,
|
| 8 |
+
0.5,
|
| 9 |
+
0.5
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "SiglipImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.5,
|
| 14 |
+
0.5,
|
| 15 |
+
0.5
|
| 16 |
+
],
|
| 17 |
+
"processor_class": "SiglipProcessor",
|
| 18 |
+
"resample": 2,
|
| 19 |
+
"rescale_factor": 0.00392156862745098,
|
| 20 |
+
"size": {
|
| 21 |
+
"height": 224,
|
| 22 |
+
"width": 224
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-156/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1c3cbb4bb20d96ee20e79c1b1972589e8f42d8a7c09d10fcf8acc31933ccf1c
|
| 3 |
+
size 14244
|
checkpoint-156/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f152cf6fb763356526487f8bdbb4b53fb4e3d563f3781d29a5d447baf32fa596
|
| 3 |
+
size 1064
|
checkpoint-156/trainer_state.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 156,
|
| 3 |
+
"best_metric": 0.28030630946159363,
|
| 4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-156",
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 156,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.7046979865771812,
|
| 15 |
+
"eval_loss": 0.7991482615470886,
|
| 16 |
+
"eval_model_preparation_time": 0.0022,
|
| 17 |
+
"eval_runtime": 59.2892,
|
| 18 |
+
"eval_samples_per_second": 27.644,
|
| 19 |
+
"eval_steps_per_second": 3.458,
|
| 20 |
+
"step": 52
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"epoch": 2.0,
|
| 24 |
+
"eval_accuracy": 0.9115314215985357,
|
| 25 |
+
"eval_loss": 0.3692067861557007,
|
| 26 |
+
"eval_model_preparation_time": 0.0022,
|
| 27 |
+
"eval_runtime": 59.2955,
|
| 28 |
+
"eval_samples_per_second": 27.641,
|
| 29 |
+
"eval_steps_per_second": 3.457,
|
| 30 |
+
"step": 104
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 3.0,
|
| 34 |
+
"eval_accuracy": 0.9219035997559487,
|
| 35 |
+
"eval_loss": 0.28030630946159363,
|
| 36 |
+
"eval_model_preparation_time": 0.0022,
|
| 37 |
+
"eval_runtime": 59.3053,
|
| 38 |
+
"eval_samples_per_second": 27.637,
|
| 39 |
+
"eval_steps_per_second": 3.457,
|
| 40 |
+
"step": 156
|
| 41 |
+
}
|
| 42 |
+
],
|
| 43 |
+
"logging_steps": 500,
|
| 44 |
+
"max_steps": 156,
|
| 45 |
+
"num_input_tokens_seen": 0,
|
| 46 |
+
"num_train_epochs": 3,
|
| 47 |
+
"save_steps": 500,
|
| 48 |
+
"stateful_callbacks": {
|
| 49 |
+
"TrainerControl": {
|
| 50 |
+
"args": {
|
| 51 |
+
"should_epoch_stop": false,
|
| 52 |
+
"should_evaluate": false,
|
| 53 |
+
"should_log": false,
|
| 54 |
+
"should_save": true,
|
| 55 |
+
"should_training_stop": true
|
| 56 |
+
},
|
| 57 |
+
"attributes": {}
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
"total_flos": 4.118291879878472e+17,
|
| 61 |
+
"train_batch_size": 32,
|
| 62 |
+
"trial_name": null,
|
| 63 |
+
"trial_params": null
|
| 64 |
+
}
|
checkpoint-156/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4a2f3a6230c65213829acec640610e4b338ff8aece404c702ffad97d6f293f5
|
| 3 |
+
size 5304
|
checkpoint-52/config.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"SiglipForImageClassification"
|
| 4 |
+
],
|
| 5 |
+
"id2label": {
|
| 6 |
+
"0": "agentbrowse",
|
| 7 |
+
"1": "calendars",
|
| 8 |
+
"2": "humanbrowse"
|
| 9 |
+
},
|
| 10 |
+
"initializer_factor": 1.0,
|
| 11 |
+
"label2id": {
|
| 12 |
+
"agentbrowse": 0,
|
| 13 |
+
"calendars": 1,
|
| 14 |
+
"humanbrowse": 2
|
| 15 |
+
},
|
| 16 |
+
"model_type": "siglip",
|
| 17 |
+
"problem_type": "single_label_classification",
|
| 18 |
+
"text_config": {
|
| 19 |
+
"attention_dropout": 0.0,
|
| 20 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 21 |
+
"hidden_size": 768,
|
| 22 |
+
"intermediate_size": 3072,
|
| 23 |
+
"layer_norm_eps": 1e-06,
|
| 24 |
+
"max_position_embeddings": 64,
|
| 25 |
+
"model_type": "siglip_text_model",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"projection_size": 768,
|
| 29 |
+
"torch_dtype": "float32",
|
| 30 |
+
"vocab_size": 256000
|
| 31 |
+
},
|
| 32 |
+
"torch_dtype": "float32",
|
| 33 |
+
"transformers_version": "4.50.0",
|
| 34 |
+
"vision_config": {
|
| 35 |
+
"attention_dropout": 0.0,
|
| 36 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 37 |
+
"hidden_size": 768,
|
| 38 |
+
"image_size": 224,
|
| 39 |
+
"intermediate_size": 3072,
|
| 40 |
+
"layer_norm_eps": 1e-06,
|
| 41 |
+
"model_type": "siglip_vision_model",
|
| 42 |
+
"num_attention_heads": 12,
|
| 43 |
+
"num_channels": 3,
|
| 44 |
+
"num_hidden_layers": 12,
|
| 45 |
+
"patch_size": 16,
|
| 46 |
+
"torch_dtype": "float32"
|
| 47 |
+
}
|
| 48 |
+
}
|
checkpoint-52/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed4b4fabb254577b59a78ba117d3e52858d7b4f204472a105c5e249e597af418
|
| 3 |
+
size 371571068
|
checkpoint-52/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a74893215cd2da2d37828f778e7f666194cb4865be2b5178ea9adf608f0ebdfc
|
| 3 |
+
size 686561914
|
checkpoint-52/preprocessor_config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": null,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.5,
|
| 8 |
+
0.5,
|
| 9 |
+
0.5
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "SiglipImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.5,
|
| 14 |
+
0.5,
|
| 15 |
+
0.5
|
| 16 |
+
],
|
| 17 |
+
"processor_class": "SiglipProcessor",
|
| 18 |
+
"resample": 2,
|
| 19 |
+
"rescale_factor": 0.00392156862745098,
|
| 20 |
+
"size": {
|
| 21 |
+
"height": 224,
|
| 22 |
+
"width": 224
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-52/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c97012cdd1978ec7e0344b6300bf910a42d56a4d245aef0ef930a04619a5374f
|
| 3 |
+
size 14244
|
checkpoint-52/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3d9cf8f10aba8a3302952a360abdcfb1363cfc0f35d165f9ec2a8f5c11921d3
|
| 3 |
+
size 1064
|
checkpoint-52/trainer_state.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 52,
|
| 3 |
+
"best_metric": 0.7991482615470886,
|
| 4 |
+
"best_model_checkpoint": "siglip2-finetune-full/checkpoint-52",
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 52,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.7046979865771812,
|
| 15 |
+
"eval_loss": 0.7991482615470886,
|
| 16 |
+
"eval_model_preparation_time": 0.0022,
|
| 17 |
+
"eval_runtime": 59.2892,
|
| 18 |
+
"eval_samples_per_second": 27.644,
|
| 19 |
+
"eval_steps_per_second": 3.458,
|
| 20 |
+
"step": 52
|
| 21 |
+
}
|
| 22 |
+
],
|
| 23 |
+
"logging_steps": 500,
|
| 24 |
+
"max_steps": 156,
|
| 25 |
+
"num_input_tokens_seen": 0,
|
| 26 |
+
"num_train_epochs": 3,
|
| 27 |
+
"save_steps": 500,
|
| 28 |
+
"stateful_callbacks": {
|
| 29 |
+
"TrainerControl": {
|
| 30 |
+
"args": {
|
| 31 |
+
"should_epoch_stop": false,
|
| 32 |
+
"should_evaluate": false,
|
| 33 |
+
"should_log": false,
|
| 34 |
+
"should_save": true,
|
| 35 |
+
"should_training_stop": false
|
| 36 |
+
},
|
| 37 |
+
"attributes": {}
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
"total_flos": 1.3727639599594906e+17,
|
| 41 |
+
"train_batch_size": 32,
|
| 42 |
+
"trial_name": null,
|
| 43 |
+
"trial_params": null
|
| 44 |
+
}
|
checkpoint-52/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4a2f3a6230c65213829acec640610e4b338ff8aece404c702ffad97d6f293f5
|
| 3 |
+
size 5304
|
config.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"SiglipForImageClassification"
|
| 4 |
+
],
|
| 5 |
+
"id2label": {
|
| 6 |
+
"0": "agentbrowse",
|
| 7 |
+
"1": "calendars",
|
| 8 |
+
"2": "humanbrowse"
|
| 9 |
+
},
|
| 10 |
+
"initializer_factor": 1.0,
|
| 11 |
+
"label2id": {
|
| 12 |
+
"agentbrowse": 0,
|
| 13 |
+
"calendars": 1,
|
| 14 |
+
"humanbrowse": 2
|
| 15 |
+
},
|
| 16 |
+
"model_type": "siglip",
|
| 17 |
+
"problem_type": "single_label_classification",
|
| 18 |
+
"text_config": {
|
| 19 |
+
"attention_dropout": 0.0,
|
| 20 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 21 |
+
"hidden_size": 768,
|
| 22 |
+
"intermediate_size": 3072,
|
| 23 |
+
"layer_norm_eps": 1e-06,
|
| 24 |
+
"max_position_embeddings": 64,
|
| 25 |
+
"model_type": "siglip_text_model",
|
| 26 |
+
"num_attention_heads": 12,
|
| 27 |
+
"num_hidden_layers": 12,
|
| 28 |
+
"projection_size": 768,
|
| 29 |
+
"torch_dtype": "float32",
|
| 30 |
+
"vocab_size": 256000
|
| 31 |
+
},
|
| 32 |
+
"torch_dtype": "float32",
|
| 33 |
+
"transformers_version": "4.50.0",
|
| 34 |
+
"vision_config": {
|
| 35 |
+
"attention_dropout": 0.0,
|
| 36 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 37 |
+
"hidden_size": 768,
|
| 38 |
+
"image_size": 224,
|
| 39 |
+
"intermediate_size": 3072,
|
| 40 |
+
"layer_norm_eps": 1e-06,
|
| 41 |
+
"model_type": "siglip_vision_model",
|
| 42 |
+
"num_attention_heads": 12,
|
| 43 |
+
"num_channels": 3,
|
| 44 |
+
"num_hidden_layers": 12,
|
| 45 |
+
"patch_size": 16,
|
| 46 |
+
"torch_dtype": "float32"
|
| 47 |
+
}
|
| 48 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:045e8e60277801c108b4a4624c22973b325c0fee913d8abf4e24f15986bd0c5d
|
| 3 |
+
size 371571068
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": null,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.5,
|
| 8 |
+
0.5,
|
| 9 |
+
0.5
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "SiglipImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.5,
|
| 14 |
+
0.5,
|
| 15 |
+
0.5
|
| 16 |
+
],
|
| 17 |
+
"processor_class": "SiglipProcessor",
|
| 18 |
+
"resample": 2,
|
| 19 |
+
"rescale_factor": 0.00392156862745098,
|
| 20 |
+
"size": {
|
| 21 |
+
"height": 224,
|
| 22 |
+
"width": 224
|
| 23 |
+
}
|
| 24 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4a2f3a6230c65213829acec640610e4b338ff8aece404c702ffad97d6f293f5
|
| 3 |
+
size 5304
|