AlekseyKorshuk commited on
Commit
7b40374
1 Parent(s): c7086bd

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +175 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 2.1025146484375,
4
+ "train_runtime": 54.0138,
5
+ "train_samples": 303,
6
+ "train_samples_per_second": 5.61,
7
+ "train_steps_per_second": 0.185
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 2.1025146484375,
4
+ "train_runtime": 54.0138,
5
+ "train_samples": 303,
6
+ "train_samples_per_second": 5.61,
7
+ "train_steps_per_second": 0.185
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 10,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "learning_rate": 3e-05,
13
+ "loss": 2.0841,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "eval_accuracy": 0.07586472641552201,
19
+ "eval_loss": 2.025390625,
20
+ "eval_runtime": 1.7457,
21
+ "eval_samples_per_second": 32.651,
22
+ "eval_steps_per_second": 1.146,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.2,
27
+ "learning_rate": 3e-05,
28
+ "loss": 2.062,
29
+ "step": 2
30
+ },
31
+ {
32
+ "epoch": 0.2,
33
+ "eval_accuracy": 0.07586472641552201,
34
+ "eval_loss": 2.025390625,
35
+ "eval_runtime": 2.5631,
36
+ "eval_samples_per_second": 22.239,
37
+ "eval_steps_per_second": 0.78,
38
+ "step": 2
39
+ },
40
+ {
41
+ "epoch": 0.3,
42
+ "learning_rate": 2.9265847744427305e-05,
43
+ "loss": 2.1509,
44
+ "step": 3
45
+ },
46
+ {
47
+ "epoch": 0.3,
48
+ "eval_accuracy": 0.07613672423953942,
49
+ "eval_loss": 1.994140625,
50
+ "eval_runtime": 1.9455,
51
+ "eval_samples_per_second": 29.298,
52
+ "eval_steps_per_second": 1.028,
53
+ "step": 3
54
+ },
55
+ {
56
+ "epoch": 0.4,
57
+ "learning_rate": 2.7135254915624213e-05,
58
+ "loss": 2.1206,
59
+ "step": 4
60
+ },
61
+ {
62
+ "epoch": 0.4,
63
+ "eval_accuracy": 0.07563806156217417,
64
+ "eval_loss": 1.994140625,
65
+ "eval_runtime": 2.5486,
66
+ "eval_samples_per_second": 22.365,
67
+ "eval_steps_per_second": 0.785,
68
+ "step": 4
69
+ },
70
+ {
71
+ "epoch": 0.5,
72
+ "learning_rate": 2.3816778784387097e-05,
73
+ "loss": 2.2087,
74
+ "step": 5
75
+ },
76
+ {
77
+ "epoch": 0.5,
78
+ "eval_accuracy": 0.07568339453284374,
79
+ "eval_loss": 1.994140625,
80
+ "eval_runtime": 2.1509,
81
+ "eval_samples_per_second": 26.501,
82
+ "eval_steps_per_second": 0.93,
83
+ "step": 5
84
+ },
85
+ {
86
+ "epoch": 0.6,
87
+ "learning_rate": 1.963525491562421e-05,
88
+ "loss": 2.0337,
89
+ "step": 6
90
+ },
91
+ {
92
+ "epoch": 0.6,
93
+ "eval_accuracy": 0.07550206265016547,
94
+ "eval_loss": 1.990234375,
95
+ "eval_runtime": 1.7684,
96
+ "eval_samples_per_second": 32.232,
97
+ "eval_steps_per_second": 1.131,
98
+ "step": 6
99
+ },
100
+ {
101
+ "epoch": 0.7,
102
+ "learning_rate": 1.5e-05,
103
+ "loss": 2.026,
104
+ "step": 7
105
+ },
106
+ {
107
+ "epoch": 0.7,
108
+ "eval_accuracy": 0.07552472913550025,
109
+ "eval_loss": 1.9853515625,
110
+ "eval_runtime": 1.9602,
111
+ "eval_samples_per_second": 29.079,
112
+ "eval_steps_per_second": 1.02,
113
+ "step": 7
114
+ },
115
+ {
116
+ "epoch": 0.8,
117
+ "learning_rate": 1.036474508437579e-05,
118
+ "loss": 2.1879,
119
+ "step": 8
120
+ },
121
+ {
122
+ "epoch": 0.8,
123
+ "eval_accuracy": 0.07557006210616982,
124
+ "eval_loss": 1.9833984375,
125
+ "eval_runtime": 1.7437,
126
+ "eval_samples_per_second": 32.69,
127
+ "eval_steps_per_second": 1.147,
128
+ "step": 8
129
+ },
130
+ {
131
+ "epoch": 0.9,
132
+ "learning_rate": 6.1832212156129045e-06,
133
+ "loss": 2.1052,
134
+ "step": 9
135
+ },
136
+ {
137
+ "epoch": 0.9,
138
+ "eval_accuracy": 0.07538873022349155,
139
+ "eval_loss": 1.982421875,
140
+ "eval_runtime": 2.3577,
141
+ "eval_samples_per_second": 24.177,
142
+ "eval_steps_per_second": 0.848,
143
+ "step": 9
144
+ },
145
+ {
146
+ "epoch": 1.0,
147
+ "learning_rate": 2.86474508437579e-06,
148
+ "loss": 2.046,
149
+ "step": 10
150
+ },
151
+ {
152
+ "epoch": 1.0,
153
+ "eval_accuracy": 0.07541139670882632,
154
+ "eval_loss": 1.98046875,
155
+ "eval_runtime": 2.5554,
156
+ "eval_samples_per_second": 22.306,
157
+ "eval_steps_per_second": 0.783,
158
+ "step": 10
159
+ },
160
+ {
161
+ "epoch": 1.0,
162
+ "step": 10,
163
+ "total_flos": 945593647104.0,
164
+ "train_loss": 2.1025146484375,
165
+ "train_runtime": 54.0138,
166
+ "train_samples_per_second": 5.61,
167
+ "train_steps_per_second": 0.185
168
+ }
169
+ ],
170
+ "max_steps": 10,
171
+ "num_train_epochs": 1,
172
+ "total_flos": 945593647104.0,
173
+ "trial_name": null,
174
+ "trial_params": null
175
+ }