Model save

Files changed (10) hide show

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.8959
 ## Model description
@@ -43,14 +43,14 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 40
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
-- num_epochs: 2
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
 | 0.9354        | 0.22  | 92   | 0.9211          |
-| 0.8646        | 1.22  | 185  | 0.8959          |
 ### Framework versions

 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.9013
 ## Model description
 - total_train_batch_size: 40
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
+- num_epochs: 4
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
 | 0.9354        | 0.22  | 92   | 0.9211          |
+| 0.8752        | 1.16  | 160  | 0.9013          |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 1.22,
-    "eval_loss": 0.8958835601806641,
-    "eval_runtime": 48.8992,
     "eval_samples": 956,
-    "eval_samples_per_second": 19.55,
-    "eval_steps_per_second": 9.775,
-    "train_loss": 0.9214719901213775,
-    "train_runtime": 5680.6096,
     "train_samples": 16676,
-    "train_samples_per_second": 5.871,
-    "train_steps_per_second": 0.146
 }

 {
+    "epoch": 1.16,
+    "eval_loss": 0.9012895226478577,
+    "eval_runtime": 48.5375,
     "eval_samples": 956,
+    "eval_samples_per_second": 19.696,
+    "eval_steps_per_second": 9.848,
+    "train_loss": 0.0,
+    "train_runtime": 80.9781,
     "train_samples": 16676,
+    "train_samples_per_second": 823.729,
+    "train_steps_per_second": 20.549
 }

config.json CHANGED Viewed

@@ -22,6 +22,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.35.0",
-  "use_cache": true,
   "vocab_size": 32000
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.35.0",
+  "use_cache": false,
   "vocab_size": 32000
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.22,
-    "eval_loss": 0.8958835601806641,
-    "eval_runtime": 48.8992,
     "eval_samples": 956,
-    "eval_samples_per_second": 19.55,
-    "eval_steps_per_second": 9.775
 }

 {
+    "epoch": 1.16,
+    "eval_loss": 0.9012895226478577,
+    "eval_runtime": 48.5375,
     "eval_samples": 956,
+    "eval_samples_per_second": 19.696,
+    "eval_steps_per_second": 9.848
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:edfe5952a27c911cfcc36d4ba07c5433974e54140263c4f1aaba3f9835aeaf0b
 size 2200119864

 version https://git-lfs.github.com/spec/v1
+oid sha256:82a6abe0a7f5694698206db580f116073d1e3e745697ffc511a101ea321c73bd
 size 2200119864

runs/Jan03_16-36-16_ml-xtx-machine/events.out.tfevents.1704296223.ml-xtx-machine.50946.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:23b4820243a873e84a6fcb08bf2f029fc5f3920ba8f182f67d6b2b4607044f5e
+size 5049

runs/Jan03_16-36-16_ml-xtx-machine/events.out.tfevents.1704296353.ml-xtx-machine.50946.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:45842fb8aaada45bd3b0f272146efaf3c2e2aaa2b371825a5b16dcdac514384b
+size 359

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.22,
-    "train_loss": 0.9214719901213775,
-    "train_runtime": 5680.6096,
     "train_samples": 16676,
-    "train_samples_per_second": 5.871,
-    "train_steps_per_second": 0.146
 }

 {
+    "epoch": 1.16,
+    "train_loss": 0.0,
+    "train_runtime": 80.9781,
     "train_samples": 16676,
+    "train_samples_per_second": 823.729,
+    "train_steps_per_second": 20.549
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.2213960182297914,
   "eval_steps": 500,
-  "global_step": 185,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -71,34 +71,28 @@
       "step": 160
     },
     {
-      "epoch": 1.21,
-      "learning_rate": 1.7777792431226384e-05,
-      "loss": 0.8646,
-      "step": 180
-    },
-    {
-      "epoch": 1.22,
-      "eval_loss": 0.8958887457847595,
-      "eval_runtime": 48.9138,
-      "eval_samples_per_second": 19.545,
-      "eval_steps_per_second": 9.772,
-      "step": 185
     },
     {
-      "epoch": 1.22,
-      "step": 185,
-      "total_flos": 9.424742439400243e+16,
-      "train_loss": 0.9214719901213775,
-      "train_runtime": 5680.6096,
-      "train_samples_per_second": 5.871,
-      "train_steps_per_second": 0.146
     }
   ],
   "logging_steps": 20,
-  "max_steps": 832,
-  "num_train_epochs": 2,
   "save_steps": 40,
-  "total_flos": 9.424742439400243e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.1614295994243224,
   "eval_steps": 500,
+  "global_step": 160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 160
     },
     {
+      "epoch": 1.16,
+      "eval_loss": 0.9012895226478577,
+      "eval_runtime": 48.6193,
+      "eval_samples_per_second": 19.663,
+      "eval_steps_per_second": 9.831,
+      "step": 160
     },
     {
+      "epoch": 1.16,
+      "step": 160,
+      "total_flos": 8.13446546367447e+16,
+      "train_loss": 0.0,
+      "train_runtime": 80.9781,
+      "train_samples_per_second": 823.729,
+      "train_steps_per_second": 20.549
     }
   ],
   "logging_steps": 20,
+  "max_steps": 1664,
+  "num_train_epochs": 4,
   "save_steps": 40,
+  "total_flos": 8.13446546367447e+16,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1aa50007fd840cca58fdd03dcd8502b2c5a82b34eb9b5546d8b2fa653a4498b5
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:62daaa1ca184febf366ffa9c4dc9832e7b1feee79b4945c2ec3fbdbb5baf799d
 size 4728