Shresthadev403 commited on
Commit
b8b248d
1 Parent(s): 33d1ab1

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 0.8800
17
- - eval_runtime: 2021.1167
18
- - eval_samples_per_second: 110.392
19
- - eval_steps_per_second: 1.725
20
- - epoch: 23.11
21
- - step: 1450000
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 0.8771
17
+ - eval_runtime: 1995.2079
18
+ - eval_samples_per_second: 111.825
19
+ - eval_steps_per_second: 1.748
20
+ - epoch: 23.9
21
+ - step: 1500000
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1706094708.9715be663057.26.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde1941b10e53f17b7eb8c8bf5fba2d595a6f7a7d12df24f282efe7dacfb185b
3
+ size 5099
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dbddb86f969282280a65ed4dd2012219eb863ac99138ee0331a62ace913f664
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f0541783e22d8ff3ac5569ac1df2fbf083c4b29926687a32b871ac59ce20dc7
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.107201478860894,
5
  "eval_steps": 50000,
6
- "global_step": 1450000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -413,6 +413,20 @@
413
  "eval_samples_per_second": 110.392,
414
  "eval_steps_per_second": 1.725,
415
  "step": 1450000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  }
417
  ],
418
  "logging_steps": 50000,
@@ -420,7 +434,7 @@
420
  "num_input_tokens_seen": 0,
421
  "num_train_epochs": 500,
422
  "save_steps": 50000,
423
- "total_flos": 3.03098005905408e+18,
424
  "train_batch_size": 32,
425
  "trial_name": null,
426
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.9040015298561,
5
  "eval_steps": 50000,
6
+ "global_step": 1500000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
413
  "eval_samples_per_second": 110.392,
414
  "eval_steps_per_second": 1.725,
415
  "step": 1450000
416
+ },
417
+ {
418
+ "epoch": 23.9,
419
+ "learning_rate": 4.9920348679702315e-05,
420
+ "loss": 0.8277,
421
+ "step": 1500000
422
+ },
423
+ {
424
+ "epoch": 23.9,
425
+ "eval_loss": 0.8771235346794128,
426
+ "eval_runtime": 1995.2079,
427
+ "eval_samples_per_second": 111.825,
428
+ "eval_steps_per_second": 1.748,
429
+ "step": 1500000
430
  }
431
  ],
432
  "logging_steps": 50000,
 
434
  "num_input_tokens_seen": 0,
435
  "num_train_epochs": 500,
436
  "save_steps": 50000,
437
+ "total_flos": 3.13549687185408e+18,
438
  "train_batch_size": 32,
439
  "trial_name": null,
440
  "trial_params": null