Shresthadev403 commited on
Commit
ecc733f
1 Parent(s): ed1c350

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 0.8804
17
- - eval_runtime: 2001.1642
18
- - eval_samples_per_second: 111.493
19
- - eval_steps_per_second: 1.742
20
- - epoch: 27.09
21
- - step: 1700000
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 0.8755
17
+ - eval_runtime: 2011.2204
18
+ - eval_samples_per_second: 110.935
19
+ - eval_steps_per_second: 1.734
20
+ - epoch: 27.89
21
+ - step: 1750000
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1706959201.da32439aada9.27.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78170dcdced1e0eb4f4cc7998c6687cb55b9d9832697924e5109cdff08996864
3
+ size 5099
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da84264f61b81ecbeaa737abc94c8f7f2c8eaa12e6750a0475674fbc27a33a05
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d01d405f1fd949e31599c2d9dcedc77bbf8a8dd66e8e7243f43e7b70e2c97dd7
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 27.09120173383691,
5
  "eval_steps": 50000,
6
- "global_step": 1700000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -483,6 +483,20 @@
483
  "eval_samples_per_second": 111.493,
484
  "eval_steps_per_second": 1.742,
485
  "step": 1700000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  }
487
  ],
488
  "logging_steps": 50000,
@@ -490,7 +504,7 @@
490
  "num_input_tokens_seen": 0,
491
  "num_train_epochs": 500,
492
  "save_steps": 50000,
493
- "total_flos": 3.55356281659392e+18,
494
  "train_batch_size": 32,
495
  "trial_name": null,
496
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 27.888001784832113,
5
  "eval_steps": 50000,
6
+ "global_step": 1750000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
483
  "eval_samples_per_second": 111.493,
484
  "eval_steps_per_second": 1.742,
485
  "step": 1700000
486
+ },
487
+ {
488
+ "epoch": 27.89,
489
+ "learning_rate": 4.992035186690252e-05,
490
+ "loss": 0.8188,
491
+ "step": 1750000
492
+ },
493
+ {
494
+ "epoch": 27.89,
495
+ "eval_loss": 0.875482976436615,
496
+ "eval_runtime": 2011.2204,
497
+ "eval_samples_per_second": 110.935,
498
+ "eval_steps_per_second": 1.734,
499
+ "step": 1750000
500
  }
501
  ],
502
  "logging_steps": 50000,
 
504
  "num_input_tokens_seen": 0,
505
  "num_train_epochs": 500,
506
  "save_steps": 50000,
507
+ "total_flos": 3.65807962939392e+18,
508
  "train_batch_size": 32,
509
  "trial_name": null,
510
  "trial_params": null