farleyknight commited on
Commit
18b68f8
1 Parent(s): 81f1609

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +12 -0
  2. eval_results.json +8 -0
  3. train_results.json +7 -0
  4. trainer_state.json +154 -0
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.8333333333333334,
4
+ "eval_loss": 0.701846718788147,
5
+ "eval_runtime": 5.0813,
6
+ "eval_samples_per_second": 80.295,
7
+ "eval_steps_per_second": 10.037,
8
+ "train_loss": 1.276922034557303,
9
+ "train_runtime": 342.3245,
10
+ "train_samples_per_second": 33.725,
11
+ "train_steps_per_second": 4.221
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.8333333333333334,
4
+ "eval_loss": 0.701846718788147,
5
+ "eval_runtime": 5.0813,
6
+ "eval_samples_per_second": 80.295,
7
+ "eval_steps_per_second": 10.037
8
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 1.276922034557303,
4
+ "train_runtime": 342.3245,
5
+ "train_samples_per_second": 33.725,
6
+ "train_steps_per_second": 4.221
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 1445,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.35,
12
+ "learning_rate": 1.8615916955017305e-05,
13
+ "loss": 2.2312,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.69,
18
+ "learning_rate": 1.7231833910034604e-05,
19
+ "loss": 1.9053,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 1.0,
24
+ "eval_accuracy": 0.7132352941176471,
25
+ "eval_loss": 1.368030309677124,
26
+ "eval_runtime": 5.1707,
27
+ "eval_samples_per_second": 78.907,
28
+ "eval_steps_per_second": 9.863,
29
+ "step": 289
30
+ },
31
+ {
32
+ "epoch": 1.04,
33
+ "learning_rate": 1.5847750865051904e-05,
34
+ "loss": 1.6154,
35
+ "step": 300
36
+ },
37
+ {
38
+ "epoch": 1.38,
39
+ "learning_rate": 1.4463667820069205e-05,
40
+ "loss": 1.4013,
41
+ "step": 400
42
+ },
43
+ {
44
+ "epoch": 1.73,
45
+ "learning_rate": 1.3079584775086506e-05,
46
+ "loss": 1.2788,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.7965686274509803,
52
+ "eval_loss": 0.949878990650177,
53
+ "eval_runtime": 5.1726,
54
+ "eval_samples_per_second": 78.878,
55
+ "eval_steps_per_second": 9.86,
56
+ "step": 578
57
+ },
58
+ {
59
+ "epoch": 2.08,
60
+ "learning_rate": 1.1695501730103806e-05,
61
+ "loss": 1.2022,
62
+ "step": 600
63
+ },
64
+ {
65
+ "epoch": 2.42,
66
+ "learning_rate": 1.0311418685121109e-05,
67
+ "loss": 1.1687,
68
+ "step": 700
69
+ },
70
+ {
71
+ "epoch": 2.77,
72
+ "learning_rate": 8.92733564013841e-06,
73
+ "loss": 1.1232,
74
+ "step": 800
75
+ },
76
+ {
77
+ "epoch": 3.0,
78
+ "eval_accuracy": 0.7279411764705882,
79
+ "eval_loss": 0.8679393529891968,
80
+ "eval_runtime": 5.1694,
81
+ "eval_samples_per_second": 78.925,
82
+ "eval_steps_per_second": 9.866,
83
+ "step": 867
84
+ },
85
+ {
86
+ "epoch": 3.11,
87
+ "learning_rate": 7.5432525951557104e-06,
88
+ "loss": 1.0408,
89
+ "step": 900
90
+ },
91
+ {
92
+ "epoch": 3.46,
93
+ "learning_rate": 6.159169550173011e-06,
94
+ "loss": 1.0277,
95
+ "step": 1000
96
+ },
97
+ {
98
+ "epoch": 3.81,
99
+ "learning_rate": 4.775086505190312e-06,
100
+ "loss": 1.0373,
101
+ "step": 1100
102
+ },
103
+ {
104
+ "epoch": 4.0,
105
+ "eval_accuracy": 0.8088235294117647,
106
+ "eval_loss": 0.732408344745636,
107
+ "eval_runtime": 5.1695,
108
+ "eval_samples_per_second": 78.924,
109
+ "eval_steps_per_second": 9.865,
110
+ "step": 1156
111
+ },
112
+ {
113
+ "epoch": 4.15,
114
+ "learning_rate": 3.3910034602076125e-06,
115
+ "loss": 1.0172,
116
+ "step": 1200
117
+ },
118
+ {
119
+ "epoch": 4.5,
120
+ "learning_rate": 2.0069204152249138e-06,
121
+ "loss": 0.9715,
122
+ "step": 1300
123
+ },
124
+ {
125
+ "epoch": 4.84,
126
+ "learning_rate": 6.228373702422146e-07,
127
+ "loss": 0.9658,
128
+ "step": 1400
129
+ },
130
+ {
131
+ "epoch": 5.0,
132
+ "eval_accuracy": 0.8333333333333334,
133
+ "eval_loss": 0.701846718788147,
134
+ "eval_runtime": 5.1658,
135
+ "eval_samples_per_second": 78.982,
136
+ "eval_steps_per_second": 9.873,
137
+ "step": 1445
138
+ },
139
+ {
140
+ "epoch": 5.0,
141
+ "step": 1445,
142
+ "total_flos": 8.947091674631578e+17,
143
+ "train_loss": 1.276922034557303,
144
+ "train_runtime": 342.3245,
145
+ "train_samples_per_second": 33.725,
146
+ "train_steps_per_second": 4.221
147
+ }
148
+ ],
149
+ "max_steps": 1445,
150
+ "num_train_epochs": 5,
151
+ "total_flos": 8.947091674631578e+17,
152
+ "trial_name": null,
153
+ "trial_params": null
154
+ }