Shresthadev403 commited on
Commit
0b6365d
1 Parent(s): 2932d93

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 0.8792
17
- - eval_runtime: 1985.6931
18
- - eval_samples_per_second: 112.361
19
- - eval_steps_per_second: 1.756
20
- - epoch: 22.31
21
- - step: 1400000
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 0.8800
17
+ - eval_runtime: 2021.1167
18
+ - eval_samples_per_second: 110.392
19
+ - eval_steps_per_second: 1.725
20
+ - epoch: 23.11
21
+ - step: 1450000
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1705980498.4c96a83a2518.26.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd5a93fd1005b8776d729e04eacaecc520b2121f9fac4ca951575131c0090a2
3
+ size 5099
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a51da105fccd05ac3ab0eab61c0e0cc154cfc86fdfafd085fc53bab26b3187b
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbddb86f969282280a65ed4dd2012219eb863ac99138ee0331a62ace913f664
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 22.310401427865692,
5
  "eval_steps": 50000,
6
- "global_step": 1400000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -399,6 +399,20 @@
399
  "eval_samples_per_second": 112.361,
400
  "eval_steps_per_second": 1.756,
401
  "step": 1400000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  }
403
  ],
404
  "logging_steps": 50000,
@@ -406,7 +420,7 @@
406
  "num_input_tokens_seen": 0,
407
  "num_train_epochs": 500,
408
  "save_steps": 50000,
409
- "total_flos": 2.92646357286912e+18,
410
  "train_batch_size": 32,
411
  "trial_name": null,
412
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.107201478860894,
5
  "eval_steps": 50000,
6
+ "global_step": 1450000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
399
  "eval_samples_per_second": 112.361,
400
  "eval_steps_per_second": 1.756,
401
  "step": 1400000
402
+ },
403
+ {
404
+ "epoch": 23.11,
405
+ "learning_rate": 4.9920348679702315e-05,
406
+ "loss": 0.8294,
407
+ "step": 1450000
408
+ },
409
+ {
410
+ "epoch": 23.11,
411
+ "eval_loss": 0.8799900412559509,
412
+ "eval_runtime": 2021.1167,
413
+ "eval_samples_per_second": 110.392,
414
+ "eval_steps_per_second": 1.725,
415
+ "step": 1450000
416
  }
417
  ],
418
  "logging_steps": 50000,
 
420
  "num_input_tokens_seen": 0,
421
  "num_train_epochs": 500,
422
  "save_steps": 50000,
423
+ "total_flos": 3.03098005905408e+18,
424
  "train_batch_size": 32,
425
  "trial_name": null,
426
  "trial_params": null