End of training

Browse files

Files changed (7) hide show

README.md +7 -2
all_results.json +14 -0
test_results.json +10 -0
tokenizer.json +2 -16
train_results.json +10 -0
trainer_state.json +148 -0
validation_results.json +10 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ base_model: BridgeTower/bridgetower-large-itm-mlm-itc
 tags:
 - generated_from_trainer
 datasets:
-- newyorker_caption_contest
 model-index:
 - name: test-bridgetower
   results: []
@@ -15,7 +15,12 @@ should probably proofread and complete it, then remove this comment. -->
 # test-bridgetower
-This model is a fine-tuned version of [BridgeTower/bridgetower-large-itm-mlm-itc](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc) on the newyorker_caption_contest dataset.
 ## Model description

 tags:
 - generated_from_trainer
 datasets:
+- jmhessel/newyorker_caption_contest
 model-index:
 - name: test-bridgetower
   results: []
 # test-bridgetower
+This model is a fine-tuned version of [BridgeTower/bridgetower-large-itm-mlm-itc](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc) on the jmhessel/newyorker_caption_contest matching dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.1163
+- Memory Allocated (gb): 17.72
+- Max Memory Allocated (gb): 94.43
+- Total Memory Available (gb): 94.61
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "epoch": 5.0,
+    "eval_loss": 0.11634299904108047,
+    "eval_runtime": 0.4543,
+    "eval_samples_per_second": 1162.27,
+    "eval_steps_per_second": 11.006,
+    "max_memory_allocated (GB)": 94.43,
+    "memory_allocated (GB)": 17.72,
+    "total_memory_available (GB)": 94.61,
+    "train_loss": 0.08891021746855515,
+    "train_runtime": 341.352,
+    "train_samples_per_second": 429.544,
+    "train_steps_per_second": 1.141
+}

test_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 5.0,
+    "eval_loss": 0.11634299904108047,
+    "eval_runtime": 0.4543,
+    "eval_samples_per_second": 1162.27,
+    "eval_steps_per_second": 11.006,
+    "max_memory_allocated (GB)": 94.43,
+    "memory_allocated (GB)": 17.72,
+    "total_memory_available (GB)": 94.61
+}

tokenizer.json CHANGED Viewed

@@ -1,21 +1,7 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 128,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
-  "padding": {
-    "strategy": {
-      "Fixed": 128
-    },
-    "direction": "Right",
-    "pad_to_multiple_of": null,
-    "pad_id": 1,
-    "pad_type_id": 0,
-    "pad_token": "<pad>"
-  },
   "added_tokens": [
     {
       "id": 0,

 {
   "version": "1.0",
+  "truncation": null,
+  "padding": null,
   "added_tokens": [
     {
       "id": 0,

train_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 5.0,
+    "max_memory_allocated (GB)": 94.43,
+    "memory_allocated (GB)": 17.74,
+    "total_memory_available (GB)": 94.61,
+    "train_loss": 0.08891021746855515,
+    "train_runtime": 341.352,
+    "train_samples_per_second": 429.544,
+    "train_steps_per_second": 1.141
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,148 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 130,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.38,
+      "learning_rate": 9.230769230769232e-06,
+      "loss": 0.2471,
+      "max_memory_allocated (GB)": 94.4,
+      "memory_allocated (GB)": 17.88,
+      "step": 10,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 8.461538461538462e-06,
+      "loss": 0.1034,
+      "max_memory_allocated (GB)": 94.42,
+      "memory_allocated (GB)": 17.88,
+      "step": 20,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 1.15,
+      "learning_rate": 7.692307692307694e-06,
+      "loss": 0.0834,
+      "max_memory_allocated (GB)": 94.42,
+      "memory_allocated (GB)": 17.88,
+      "step": 30,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 1.54,
+      "learning_rate": 6.923076923076923e-06,
+      "loss": 0.0813,
+      "max_memory_allocated (GB)": 94.42,
+      "memory_allocated (GB)": 17.88,
+      "step": 40,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 1.92,
+      "learning_rate": 6.153846153846155e-06,
+      "loss": 0.0771,
+      "max_memory_allocated (GB)": 94.42,
+      "memory_allocated (GB)": 17.83,
+      "step": 50,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 5.384615384615385e-06,
+      "loss": 0.0732,
+      "max_memory_allocated (GB)": 94.42,
+      "memory_allocated (GB)": 17.88,
+      "step": 60,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 4.615384615384616e-06,
+      "loss": 0.0714,
+      "max_memory_allocated (GB)": 94.43,
+      "memory_allocated (GB)": 17.89,
+      "step": 70,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 3.08,
+      "learning_rate": 3.846153846153847e-06,
+      "loss": 0.0715,
+      "max_memory_allocated (GB)": 94.43,
+      "memory_allocated (GB)": 17.89,
+      "step": 80,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 3.46,
+      "learning_rate": 3.0769230769230774e-06,
+      "loss": 0.0693,
+      "max_memory_allocated (GB)": 94.43,
+      "memory_allocated (GB)": 17.88,
+      "step": 90,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 3.85,
+      "learning_rate": 2.307692307692308e-06,
+      "loss": 0.0691,
+      "max_memory_allocated (GB)": 94.43,
+      "memory_allocated (GB)": 17.88,
+      "step": 100,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 4.23,
+      "learning_rate": 1.5384615384615387e-06,
+      "loss": 0.0691,
+      "max_memory_allocated (GB)": 94.43,
+      "memory_allocated (GB)": 17.88,
+      "step": 110,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 4.62,
+      "learning_rate": 7.692307692307694e-07,
+      "loss": 0.0702,
+      "max_memory_allocated (GB)": 94.43,
+      "memory_allocated (GB)": 17.88,
+      "step": 120,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 0.0,
+      "loss": 0.0698,
+      "max_memory_allocated (GB)": 94.43,
+      "memory_allocated (GB)": 17.74,
+      "step": 130,
+      "total_memory_available (GB)": 94.61
+    },
+    {
+      "epoch": 5.0,
+      "max_memory_allocated (GB)": 94.43,
+      "memory_allocated (GB)": 17.74,
+      "step": 130,
+      "total_flos": 3.117345829342413e+16,
+      "total_memory_available (GB)": 94.61,
+      "train_loss": 0.08891021746855515,
+      "train_runtime": 341.352,
+      "train_samples_per_second": 429.544,
+      "train_steps_per_second": 1.141
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 130,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 3.117345829342413e+16,
+  "trial_name": null,
+  "trial_params": null
+}

validation_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 5.0,
+    "eval_loss": 0.11108700931072235,
+    "eval_runtime": 7.8437,
+    "eval_samples_per_second": 67.697,
+    "eval_steps_per_second": 0.637,
+    "max_memory_allocated (GB)": 94.43,
+    "memory_allocated (GB)": 17.71,
+    "total_memory_available (GB)": 94.61
+}