AlekseyKorshuk commited on
Commit
2f6f192
1 Parent(s): fcd73b6

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +15 -0
  2. eval_results.json +10 -0
  3. train_results.json +8 -0
  4. trainer_state.json +61 -0
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.23,
3
+ "eval_accuracy": 0.986989308918276,
4
+ "eval_loss": 0.0699462890625,
5
+ "eval_runtime": 192.7346,
6
+ "eval_samples": 11583,
7
+ "eval_samples_per_second": 60.098,
8
+ "eval_steps_per_second": 1.878,
9
+ "perplexity": 1.0724505773813178,
10
+ "train_loss": 0.12210027694702148,
11
+ "train_runtime": 1976.8574,
12
+ "train_samples": 220074,
13
+ "train_samples_per_second": 25.9,
14
+ "train_steps_per_second": 0.202
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.23,
3
+ "eval_accuracy": 0.986989308918276,
4
+ "eval_loss": 0.0699462890625,
5
+ "eval_runtime": 192.7346,
6
+ "eval_samples": 11583,
7
+ "eval_samples_per_second": 60.098,
8
+ "eval_steps_per_second": 1.878,
9
+ "perplexity": 1.0724505773813178
10
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.23,
3
+ "train_loss": 0.12210027694702148,
4
+ "train_runtime": 1976.8574,
5
+ "train_samples": 220074,
6
+ "train_samples_per_second": 25.9,
7
+ "train_steps_per_second": 0.202
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.23262576330328583,
5
+ "global_step": 400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "eval_accuracy": 0.9866142691167129,
13
+ "eval_loss": 0.07293701171875,
14
+ "eval_runtime": 192.7185,
15
+ "eval_samples_per_second": 60.103,
16
+ "eval_steps_per_second": 1.878,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.12,
21
+ "eval_accuracy": 0.9868109877885048,
22
+ "eval_loss": 0.07159423828125,
23
+ "eval_runtime": 192.6928,
24
+ "eval_samples_per_second": 60.111,
25
+ "eval_steps_per_second": 1.879,
26
+ "step": 200
27
+ },
28
+ {
29
+ "epoch": 0.17,
30
+ "eval_accuracy": 0.9869265209767002,
31
+ "eval_loss": 0.07049560546875,
32
+ "eval_runtime": 193.5043,
33
+ "eval_samples_per_second": 59.859,
34
+ "eval_steps_per_second": 1.871,
35
+ "step": 300
36
+ },
37
+ {
38
+ "epoch": 0.23,
39
+ "eval_accuracy": 0.986989308918276,
40
+ "eval_loss": 0.0699462890625,
41
+ "eval_runtime": 192.6707,
42
+ "eval_samples_per_second": 60.118,
43
+ "eval_steps_per_second": 1.879,
44
+ "step": 400
45
+ },
46
+ {
47
+ "epoch": 0.23,
48
+ "step": 400,
49
+ "total_flos": 4.12601633927594e+17,
50
+ "train_loss": 0.12210027694702148,
51
+ "train_runtime": 1976.8574,
52
+ "train_samples_per_second": 25.9,
53
+ "train_steps_per_second": 0.202
54
+ }
55
+ ],
56
+ "max_steps": 400,
57
+ "num_train_epochs": 1,
58
+ "total_flos": 4.12601633927594e+17,
59
+ "trial_name": null,
60
+ "trial_params": null
61
+ }