update model

Browse files

Files changed (6) hide show

all_results.json +4 -4
model.safetensors +1 -1
tokenizer.json +6 -1
train_results.json +4 -4
trainer_state.json +18 -18
training_args.bin +1 -1

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 3.0,
-    "train_loss": 0.21816863666881214,
-    "train_runtime": 391.9097,
     "train_samples": 17598,
-    "train_samples_per_second": 134.71,
-    "train_steps_per_second": 8.42
 }

 {
     "epoch": 3.0,
+    "train_loss": 0.27864328904585406,
+    "train_runtime": 413.6435,
     "train_samples": 17598,
+    "train_samples_per_second": 127.632,
+    "train_steps_per_second": 7.978
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66f4a81fd111458f0c5c0299dab1abfe4ce42e61648d5d3fbaa00ae3cce0de7c
 size 709090132

 version https://git-lfs.github.com/spec/v1
+oid sha256:b94a2b7dc1b945a12c97f126d710acab219fcdb45dd98e053089eeb5cb23ba9f
 size 709090132

tokenizer.json CHANGED Viewed

@@ -1,6 +1,11 @@
 {
   "version": "1.0",
-  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 256,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
   "padding": null,
   "added_tokens": [
     {

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 3.0,
-    "train_loss": 0.21816863666881214,
-    "train_runtime": 391.9097,
     "train_samples": 17598,
-    "train_samples_per_second": 134.71,
-    "train_steps_per_second": 8.42
 }

 {
     "epoch": 3.0,
+    "train_loss": 0.27864328904585406,
+    "train_runtime": 413.6435,
     "train_samples": 17598,
+    "train_samples_per_second": 127.632,
+    "train_steps_per_second": 7.978
 }

trainer_state.json CHANGED Viewed

@@ -10,54 +10,54 @@
   "log_history": [
     {
       "epoch": 0.45,
-      "grad_norm": 4.369685173034668,
       "learning_rate": 4.242424242424243e-05,
-      "loss": 0.4661,
       "step": 500
     },
     {
       "epoch": 0.91,
-      "grad_norm": 3.9891207218170166,
       "learning_rate": 3.484848484848485e-05,
-      "loss": 0.3327,
       "step": 1000
     },
     {
       "epoch": 1.36,
-      "grad_norm": 9.819967269897461,
       "learning_rate": 2.7272727272727273e-05,
-      "loss": 0.2258,
       "step": 1500
     },
     {
       "epoch": 1.82,
-      "grad_norm": 1.7621924877166748,
       "learning_rate": 1.9696969696969697e-05,
-      "loss": 0.1772,
       "step": 2000
     },
     {
       "epoch": 2.27,
-      "grad_norm": 21.205034255981445,
       "learning_rate": 1.2121212121212122e-05,
-      "loss": 0.1106,
       "step": 2500
     },
     {
       "epoch": 2.73,
-      "grad_norm": 8.332528114318848,
       "learning_rate": 4.5454545454545455e-06,
-      "loss": 0.0867,
       "step": 3000
     },
     {
       "epoch": 3.0,
       "step": 3300,
-      "total_flos": 2669084030031120.0,
-      "train_loss": 0.21816863666881214,
-      "train_runtime": 391.9097,
-      "train_samples_per_second": 134.71,
-      "train_steps_per_second": 8.42
     }
   ],
   "logging_steps": 500,
@@ -65,7 +65,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
-  "total_flos": 2669084030031120.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "log_history": [
     {
       "epoch": 0.45,
+      "grad_norm": 3.782728910446167,
       "learning_rate": 4.242424242424243e-05,
+      "loss": 0.532,
       "step": 500
     },
     {
       "epoch": 0.91,
+      "grad_norm": 3.925285816192627,
       "learning_rate": 3.484848484848485e-05,
+      "loss": 0.4013,
       "step": 1000
     },
     {
       "epoch": 1.36,
+      "grad_norm": 7.782624244689941,
       "learning_rate": 2.7272727272727273e-05,
+      "loss": 0.281,
       "step": 1500
     },
     {
       "epoch": 1.82,
+      "grad_norm": 3.6547534465789795,
       "learning_rate": 1.9696969696969697e-05,
+      "loss": 0.2454,
       "step": 2000
     },
     {
       "epoch": 2.27,
+      "grad_norm": 0.9583206176757812,
       "learning_rate": 1.2121212121212122e-05,
+      "loss": 0.1696,
       "step": 2500
     },
     {
       "epoch": 2.73,
+      "grad_norm": 11.225361824035645,
       "learning_rate": 4.5454545454545455e-06,
+      "loss": 0.1393,
       "step": 3000
     },
     {
       "epoch": 3.0,
       "step": 3300,
+      "total_flos": 2871828253461180.0,
+      "train_loss": 0.27864328904585406,
+      "train_runtime": 413.6435,
+      "train_samples_per_second": 127.632,
+      "train_steps_per_second": 7.978
     }
   ],
   "logging_steps": 500,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
+  "total_flos": 2871828253461180.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ce0ec1d3c14f032ca50e426364a62c0adbd65c3b9fdc8fd4787ebbcb999f0a3
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:33f26567c9db33a95d0e6f1ccfebacde8b5cba77cf3ea0f0c2923835aa7ee837
 size 4984