Shresthadev403 commited on
Commit
e273453
1 Parent(s): d0ed784

End of training

Browse files
README.md CHANGED
@@ -13,13 +13,13 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 1.6239
17
- - eval_accuracy: 0.889
18
- - eval_runtime: 15.4204
19
- - eval_samples_per_second: 64.849
20
- - eval_steps_per_second: 4.085
21
- - epoch: 0.16
22
- - step: 10
23
 
24
  ## Model description
25
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 1.5642
17
+ - eval_accuracy: 0.896
18
+ - eval_runtime: 15.4357
19
+ - eval_samples_per_second: 64.785
20
+ - eval_steps_per_second: 4.081
21
+ - epoch: 0.32
22
+ - step: 20
23
 
24
  ## Model description
25
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61232a5c671227f482ab55cfa03487da624786f4fb0cabe828f1ae2287d73e38
3
  size 343528508
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dd65022906a0ab5f82a88756f55cc79380928935ba4c8d864da8c5b57a3acff
3
  size 343528508
runs/Feb04_07-22-55_0bc3247a45bd/events.out.tfevents.1707031376.0bc3247a45bd.819.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8068d2aa1650743e6fb7f978142028a3034f52513763f6780cbe989e40d12c89
3
- size 9699
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87fcdb11e956e547ff64bf76406cffc727915dbe9e408587b61aa557e13e4440
3
+ size 10170
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.16,
5
  "eval_steps": 10,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -22,13 +22,28 @@
22
  "eval_samples_per_second": 64.849,
23
  "eval_steps_per_second": 4.085,
24
  "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 10,
28
  "max_steps": 310,
29
  "num_train_epochs": 5,
30
  "save_steps": 500,
31
- "total_flos": 4.9638879166464e+16,
32
  "trial_name": null,
33
  "trial_params": null
34
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.32,
5
  "eval_steps": 10,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
22
  "eval_samples_per_second": 64.849,
23
  "eval_steps_per_second": 4.085,
24
  "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.32,
28
+ "learning_rate": 3.2258064516129034e-05,
29
+ "loss": 1.5408,
30
+ "step": 20
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "eval_accuracy": 0.896,
35
+ "eval_loss": 1.56419038772583,
36
+ "eval_runtime": 15.4357,
37
+ "eval_samples_per_second": 64.785,
38
+ "eval_steps_per_second": 4.081,
39
+ "step": 20
40
  }
41
  ],
42
  "logging_steps": 10,
43
  "max_steps": 310,
44
  "num_train_epochs": 5,
45
  "save_steps": 500,
46
+ "total_flos": 9.9277758332928e+16,
47
  "trial_name": null,
48
  "trial_params": null
49
  }