Shresthadev403 commited on
Commit
4e0326f
1 Parent(s): 56731cd

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 0.8771
17
- - eval_runtime: 1995.2079
18
- - eval_samples_per_second: 111.825
19
  - eval_steps_per_second: 1.748
20
- - epoch: 23.9
21
- - step: 1500000
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 0.8785
17
+ - eval_runtime: 1994.3392
18
+ - eval_samples_per_second: 111.874
19
  - eval_steps_per_second: 1.748
20
+ - epoch: 24.7
21
+ - step: 1550000
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1706414275.bae337cb6ff3.26.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbbe8e4efb24b0fe86bc3844d93e9007d685c16aba6b387189e3812a7bb4bd5
3
+ size 5099
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f0541783e22d8ff3ac5569ac1df2fbf083c4b29926687a32b871ac59ce20dc7
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7c2e9b269aec002f777f3596e10788d6443bfda502ad3980d9dd7d4f4cc9d0c
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.9040015298561,
5
  "eval_steps": 50000,
6
- "global_step": 1500000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,6 +427,20 @@
427
  "eval_samples_per_second": 111.825,
428
  "eval_steps_per_second": 1.748,
429
  "step": 1500000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 50000,
@@ -434,7 +448,7 @@
434
  "num_input_tokens_seen": 0,
435
  "num_train_epochs": 500,
436
  "save_steps": 50000,
437
- "total_flos": 3.13549687185408e+18,
438
  "train_batch_size": 32,
439
  "trial_name": null,
440
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 24.7008015808513,
5
  "eval_steps": 50000,
6
+ "global_step": 1550000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "eval_samples_per_second": 111.825,
428
  "eval_steps_per_second": 1.748,
429
  "step": 1500000
430
+ },
431
+ {
432
+ "epoch": 24.7,
433
+ "learning_rate": 4.9920348679702315e-05,
434
+ "loss": 0.8254,
435
+ "step": 1550000
436
+ },
437
+ {
438
+ "epoch": 24.7,
439
+ "eval_loss": 0.8784825205802917,
440
+ "eval_runtime": 1994.3392,
441
+ "eval_samples_per_second": 111.874,
442
+ "eval_steps_per_second": 1.748,
443
+ "step": 1550000
444
  }
445
  ],
446
  "logging_steps": 50000,
 
448
  "num_input_tokens_seen": 0,
449
  "num_train_epochs": 500,
450
  "save_steps": 50000,
451
+ "total_flos": 3.24001335803904e+18,
452
  "train_batch_size": 32,
453
  "trial_name": null,
454
  "trial_params": null