Model save

Files changed (7) hide show

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 11.5514
 ## Model description
@@ -41,7 +41,7 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.2
-- num_epochs: 3
 ### Training results

 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 11.5637
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.2
+- num_epochs: 10
 ### Training results

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 3.0,
-    "eval_loss": 11.551432609558105,
-    "eval_runtime": 50.9651,
     "eval_samples": 615,
-    "eval_samples_per_second": 12.067,
-    "eval_steps_per_second": 6.043,
     "total_flos": 8536543131303936.0,
-    "train_loss": 12.036643933186408,
-    "train_runtime": 6501.8144,
-    "train_samples_per_second": 2.552,
-    "train_steps_per_second": 1.276
 }

 {
+    "epoch": 10.0,
+    "eval_loss": 11.56369686126709,
+    "eval_runtime": 44.4292,
     "eval_samples": 615,
+    "eval_samples_per_second": 13.842,
+    "eval_steps_per_second": 6.932,
     "total_flos": 8536543131303936.0,
+    "train_loss": 11.932003958565849,
+    "train_runtime": 24504.6855,
+    "train_samples_per_second": 2.257,
+    "train_steps_per_second": 1.128
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 3.0,
-    "eval_loss": 11.551432609558105,
-    "eval_runtime": 50.9651,
     "eval_samples": 615,
-    "eval_samples_per_second": 12.067,
-    "eval_steps_per_second": 6.043
 }

 {
+    "epoch": 10.0,
+    "eval_loss": 11.56369686126709,
+    "eval_runtime": 44.4292,
     "eval_samples": 615,
+    "eval_samples_per_second": 13.842,
+    "eval_steps_per_second": 6.932
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f0580adf7aa0c65ed7cfdd4857ad1dca5fcda8a74ab2db93ac6c2aa34bf13fd
 size 4018095352

 version https://git-lfs.github.com/spec/v1
+oid sha256:f212a975eba9ef3984476f2bac53838949bdf491cdd81ecf0ecd6f51b7b93543
 size 4018095352

tokenizer_config.json CHANGED Viewed

@@ -122,7 +122,7 @@
   "legacy": false,
   "model_max_length": 4096,
   "pad_token": "<unk>",
-  "padding_side": "right",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",

   "legacy": false,
   "model_max_length": 4096,
   "pad_token": "<unk>",
+  "padding_side": "left",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",

train_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "epoch": 3.0,
-    "train_loss": 12.036643933186408,
-    "train_runtime": 6501.8144,
-    "train_samples_per_second": 2.552,
-    "train_steps_per_second": 1.276
 }

 {
+    "epoch": 10.0,
+    "train_loss": 11.932003958565849,
+    "train_runtime": 24504.6855,
+    "train_samples_per_second": 2.257,
+    "train_steps_per_second": 1.128
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff