Training in progress, step 10, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/config.json +10 -12
last-checkpoint/model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +9 -44
last-checkpoint/training_args.bin +1 -1

last-checkpoint/config.json CHANGED Viewed

@@ -8,11 +8,9 @@
   "model_type": "vision-text-dual-encoder",
   "projection_dim": 512,
   "text_config": {
-    "_name_or_path": "roberta-base",
     "add_cross_attention": false,
-    "architectures": [
-      "RobertaForMaskedLM"
-    ],
     "attention_probs_dropout_prob": 0.1,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
@@ -90,7 +88,7 @@
   "torch_dtype": "float32",
   "transformers_version": "4.41.2",
   "vision_config": {
-    "_name_or_path": "openai/clip-vit-base-patch32",
     "add_cross_attention": false,
     "architectures": null,
     "attention_dropout": 0.0,
@@ -111,15 +109,15 @@
     "forced_bos_token_id": null,
     "forced_eos_token_id": null,
     "hidden_act": "quick_gelu",
-    "hidden_size": 768,
     "id2label": {
       "0": "LABEL_0",
       "1": "LABEL_1"
     },
-    "image_size": 224,
     "initializer_factor": 1.0,
     "initializer_range": 0.02,
-    "intermediate_size": 3072,
     "is_decoder": false,
     "is_encoder_decoder": false,
     "label2id": {
@@ -132,20 +130,20 @@
     "min_length": 0,
     "model_type": "clip_vision_model",
     "no_repeat_ngram_size": 0,
-    "num_attention_heads": 12,
     "num_beam_groups": 1,
     "num_beams": 1,
     "num_channels": 3,
-    "num_hidden_layers": 12,
     "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
-    "patch_size": 32,
     "prefix": null,
     "problem_type": null,
-    "projection_dim": 512,
     "pruned_heads": {},
     "remove_invalid_values": false,
     "repetition_penalty": 1.0,

   "model_type": "vision-text-dual-encoder",
   "projection_dim": 512,
   "text_config": {
+    "_name_or_path": "allenai/biomed_roberta_base",
     "add_cross_attention": false,
+    "architectures": null,
     "attention_probs_dropout_prob": 0.1,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
   "torch_dtype": "float32",
   "transformers_version": "4.41.2",
   "vision_config": {
+    "_name_or_path": "openai/clip-vit-large-patch14-336",
     "add_cross_attention": false,
     "architectures": null,
     "attention_dropout": 0.0,
     "forced_bos_token_id": null,
     "forced_eos_token_id": null,
     "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
     "id2label": {
       "0": "LABEL_0",
       "1": "LABEL_1"
     },
+    "image_size": 336,
     "initializer_factor": 1.0,
     "initializer_range": 0.02,
+    "intermediate_size": 4096,
     "is_decoder": false,
     "is_encoder_decoder": false,
     "label2id": {
     "min_length": 0,
     "model_type": "clip_vision_model",
     "no_repeat_ngram_size": 0,
+    "num_attention_heads": 16,
     "num_beam_groups": 1,
     "num_beams": 1,
     "num_channels": 3,
+    "num_hidden_layers": 24,
     "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
+    "patch_size": 14,
     "prefix": null,
     "problem_type": null,
+    "projection_dim": 768,
     "pruned_heads": {},
     "remove_invalid_values": false,
     "repetition_penalty": 1.0,

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa1cfee1a3f3a86f86c4ccb5f265543a870a49cead43e0806e7e141372f3f4ed
-size 851603588

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d93e1447df338cb82425c676ee327e29e6ccdf6b0fcb1e6b1e8bcc1b9790a47
+size 1716360444

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acf262e3379c8e37d1c1a0ad4d14462dcd2a7d95af8c23d0d4b516558414521e
-size 1703440133

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9a2844302fb6bb266b84b60df234bff01243f6e34895e273c410eeacbbc97e6
+size 3433062021

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d5ea662e81c1b00c09c9c41f57f4e4cf6842421258bbe30ba99a30f79b98eaf
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7775cfb857ea8d5dad4e55d621fe6b664e7f7e01b9a13925f47757ea28a71912
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8bd0c58b23312c237ad42ee3260ade34ab0ee49f394461ab2981236675264909
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b32c228988998aa7f8c86d4c26da16835ba6d8d022fa594dfdc2c5ed5b7a666
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,58 +1,23 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.4036697247706424,
   "eval_steps": 100,
-  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.7339449541284404,
-      "grad_norm": 0.2602804899215698,
-      "learning_rate": 4.914814565722671e-05,
-      "loss": 2.7791,
       "step": 10
-    },
-    {
-      "epoch": 1.4678899082568808,
-      "grad_norm": 0.1586851328611374,
-      "learning_rate": 4.267766952966369e-05,
-      "loss": 2.763,
-      "step": 20
-    },
-    {
-      "epoch": 2.2018348623853212,
-      "grad_norm": 0.08846019953489304,
-      "learning_rate": 3.147047612756302e-05,
-      "loss": 2.7628,
-      "step": 30
-    },
-    {
-      "epoch": 2.9357798165137616,
-      "grad_norm": 0.03923465684056282,
-      "learning_rate": 1.852952387243698e-05,
-      "loss": 2.773,
-      "step": 40
-    },
-    {
-      "epoch": 3.669724770642202,
-      "grad_norm": 0.045462485402822495,
-      "learning_rate": 7.3223304703363135e-06,
-      "loss": 2.7624,
-      "step": 50
-    },
-    {
-      "epoch": 4.4036697247706424,
-      "grad_norm": 0.0461534820497036,
-      "learning_rate": 8.51854342773295e-07,
-      "loss": 2.7624,
-      "step": 60
     }
   ],
   "logging_steps": 10,
-  "max_steps": 65,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 10,
@@ -68,8 +33,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 1020606088320000.0,
-  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.04608294930875576,
   "eval_steps": 100,
+  "global_step": 10,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.04608294930875576,
+      "grad_norm": 3.232759714126587,
+      "learning_rate": 4.999735579817769e-05,
+      "loss": 1.4008,
       "step": 10
     }
   ],
   "logging_steps": 10,
+  "max_steps": 1085,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 23929694515200.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea249cd364526eab937ad6487a7f81bc8fe5346a1416e3119d05e564ae6f62c3
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:43d91b1496178ac406352964cea26801514f7d8531618943d63c80852c83fa37
 size 5112