mikhail-panzo commited on
Commit
3745690
1 Parent(s): 2dbbdeb

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d76fb25ac74cbe823a725be4343d2b07424a8edf844909a7000abe7d69111912
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63511d44cdc914119f5c885f1c739e0023041a9ad236fc9a57f6ff9c0333eb16
3
  size 577789320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f7c629776fb459ffe00693d1167430337e7d74e730089d7e33ee908588b005f
3
  size 1155772233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd67f665ee333d5cacfa0b6159c217e72831fae2f2d447f9381dc2659128ce4
3
  size 1155772233
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0b694d3ca006a86a8090922c7030bb02dd33e4641d640c040f8086a25b51174
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3395cc0c92099e3a41f5095fd540270dd8b496efba101003c928717ffc8eedfe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a56375003d29c70f770a5d9b0b3090653def3595de353abeabbdb559dcca8724
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5cdfe5ed4f14bdadfee62402701e9c3c91a7e1b8246c7c7f0be536b67574fb3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4066200256347656,
3
  "best_model_checkpoint": "mikhail_panzo/fil_b32_le4_s8000/checkpoint-5500",
4
- "epoch": 153.84615384615384,
5
  "eval_steps": 500,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1099,6 +1099,84 @@
1099
  "eval_samples_per_second": 12.259,
1100
  "eval_steps_per_second": 1.599,
1101
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1102
  }
1103
  ],
1104
  "logging_steps": 50,
@@ -1118,7 +1196,7 @@
1118
  "attributes": {}
1119
  }
1120
  },
1121
- "total_flos": 4.150908851832547e+16,
1122
  "train_batch_size": 16,
1123
  "trial_name": null,
1124
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4066200256347656,
3
  "best_model_checkpoint": "mikhail_panzo/fil_b32_le4_s8000/checkpoint-5500",
4
+ "epoch": 164.83516483516485,
5
  "eval_steps": 500,
6
+ "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1099
  "eval_samples_per_second": 12.259,
1100
  "eval_steps_per_second": 1.599,
1101
  "step": 7000
1102
+ },
1103
+ {
1104
+ "epoch": 154.94505494505495,
1105
+ "grad_norm": 0.9426752328872681,
1106
+ "learning_rate": 1.5883333333333334e-05,
1107
+ "loss": 0.3567,
1108
+ "step": 7050
1109
+ },
1110
+ {
1111
+ "epoch": 156.04395604395606,
1112
+ "grad_norm": 1.0722122192382812,
1113
+ "learning_rate": 1.505e-05,
1114
+ "loss": 0.3559,
1115
+ "step": 7100
1116
+ },
1117
+ {
1118
+ "epoch": 157.14285714285714,
1119
+ "grad_norm": 1.1290276050567627,
1120
+ "learning_rate": 1.4216666666666667e-05,
1121
+ "loss": 0.3654,
1122
+ "step": 7150
1123
+ },
1124
+ {
1125
+ "epoch": 158.24175824175825,
1126
+ "grad_norm": 1.0128904581069946,
1127
+ "learning_rate": 1.3383333333333335e-05,
1128
+ "loss": 0.357,
1129
+ "step": 7200
1130
+ },
1131
+ {
1132
+ "epoch": 159.34065934065933,
1133
+ "grad_norm": 1.2913401126861572,
1134
+ "learning_rate": 1.255e-05,
1135
+ "loss": 0.3556,
1136
+ "step": 7250
1137
+ },
1138
+ {
1139
+ "epoch": 160.43956043956044,
1140
+ "grad_norm": 1.0247690677642822,
1141
+ "learning_rate": 1.1716666666666667e-05,
1142
+ "loss": 0.3512,
1143
+ "step": 7300
1144
+ },
1145
+ {
1146
+ "epoch": 161.53846153846155,
1147
+ "grad_norm": 1.1389926671981812,
1148
+ "learning_rate": 1.0883333333333335e-05,
1149
+ "loss": 0.3553,
1150
+ "step": 7350
1151
+ },
1152
+ {
1153
+ "epoch": 162.63736263736263,
1154
+ "grad_norm": 0.853523850440979,
1155
+ "learning_rate": 1.005e-05,
1156
+ "loss": 0.357,
1157
+ "step": 7400
1158
+ },
1159
+ {
1160
+ "epoch": 163.73626373626374,
1161
+ "grad_norm": 0.8105210065841675,
1162
+ "learning_rate": 9.216666666666666e-06,
1163
+ "loss": 0.3512,
1164
+ "step": 7450
1165
+ },
1166
+ {
1167
+ "epoch": 164.83516483516485,
1168
+ "grad_norm": 0.891890287399292,
1169
+ "learning_rate": 8.383333333333333e-06,
1170
+ "loss": 0.3581,
1171
+ "step": 7500
1172
+ },
1173
+ {
1174
+ "epoch": 164.83516483516485,
1175
+ "eval_loss": 0.4096794128417969,
1176
+ "eval_runtime": 12.3085,
1177
+ "eval_samples_per_second": 13.08,
1178
+ "eval_steps_per_second": 1.706,
1179
+ "step": 7500
1180
  }
1181
  ],
1182
  "logging_steps": 50,
 
1196
  "attributes": {}
1197
  }
1198
  },
1199
+ "total_flos": 4.446529244284723e+16,
1200
  "train_batch_size": 16,
1201
  "trial_name": null,
1202
  "trial_params": null