stray-light commited on
Commit
29a6b65
1 Parent(s): dec1c8d

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9949109414758269,
4
+ "eval_loss": 0.05667030066251755,
5
+ "eval_runtime": 109.2692,
6
+ "eval_samples_per_second": 21.58,
7
+ "eval_steps_per_second": 0.677,
8
+ "total_flos": 2.349294757953454e+18,
9
+ "train_loss": 0.41549516755317334,
10
+ "train_runtime": 2328.1847,
11
+ "train_samples_per_second": 13.02,
12
+ "train_steps_per_second": 0.102
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9949109414758269,
4
+ "eval_loss": 0.05667030066251755,
5
+ "eval_runtime": 109.2692,
6
+ "eval_samples_per_second": 21.58,
7
+ "eval_steps_per_second": 0.677
8
+ }
runs/Feb08_08-44-49_fea166a63734/events.out.tfevents.1707384389.fea166a63734.313.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff07aee4f71a48fd4a1b1af3331928fcdf729b48527553cc8b15775aae688240
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 2.349294757953454e+18,
4
+ "train_loss": 0.41549516755317334,
5
+ "train_runtime": 2328.1847,
6
+ "train_samples_per_second": 13.02,
7
+ "train_steps_per_second": 0.102
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9949109414758269,
3
+ "best_model_checkpoint": "dit-base-finetuned-rvlcdip-finetuned-custom-first/checkpoint-237",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 237,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.13,
13
+ "learning_rate": 2.0833333333333336e-05,
14
+ "loss": 1.9058,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.25,
19
+ "learning_rate": 4.166666666666667e-05,
20
+ "loss": 1.6887,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.38,
25
+ "learning_rate": 4.8591549295774653e-05,
26
+ "loss": 1.4773,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.51,
31
+ "learning_rate": 4.624413145539906e-05,
32
+ "loss": 1.1597,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.63,
37
+ "learning_rate": 4.389671361502348e-05,
38
+ "loss": 0.792,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.76,
43
+ "learning_rate": 4.154929577464789e-05,
44
+ "loss": 0.5423,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.89,
49
+ "learning_rate": 3.9201877934272305e-05,
50
+ "loss": 0.3686,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 1.0,
55
+ "eval_accuracy": 0.9745547073791349,
56
+ "eval_loss": 0.2355559766292572,
57
+ "eval_runtime": 120.4299,
58
+ "eval_samples_per_second": 19.58,
59
+ "eval_steps_per_second": 0.614,
60
+ "step": 79
61
+ },
62
+ {
63
+ "epoch": 1.01,
64
+ "learning_rate": 3.6854460093896714e-05,
65
+ "loss": 0.2791,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 1.14,
70
+ "learning_rate": 3.450704225352113e-05,
71
+ "loss": 0.2155,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 1.27,
76
+ "learning_rate": 3.215962441314554e-05,
77
+ "loss": 0.1837,
78
+ "step": 100
79
+ },
80
+ {
81
+ "epoch": 1.39,
82
+ "learning_rate": 2.9812206572769952e-05,
83
+ "loss": 0.1594,
84
+ "step": 110
85
+ },
86
+ {
87
+ "epoch": 1.52,
88
+ "learning_rate": 2.746478873239437e-05,
89
+ "loss": 0.1428,
90
+ "step": 120
91
+ },
92
+ {
93
+ "epoch": 1.65,
94
+ "learning_rate": 2.511737089201878e-05,
95
+ "loss": 0.1049,
96
+ "step": 130
97
+ },
98
+ {
99
+ "epoch": 1.77,
100
+ "learning_rate": 2.2769953051643194e-05,
101
+ "loss": 0.0977,
102
+ "step": 140
103
+ },
104
+ {
105
+ "epoch": 1.9,
106
+ "learning_rate": 2.0422535211267607e-05,
107
+ "loss": 0.0891,
108
+ "step": 150
109
+ },
110
+ {
111
+ "epoch": 2.0,
112
+ "eval_accuracy": 0.9936386768447837,
113
+ "eval_loss": 0.07920637726783752,
114
+ "eval_runtime": 116.9591,
115
+ "eval_samples_per_second": 20.161,
116
+ "eval_steps_per_second": 0.633,
117
+ "step": 158
118
+ },
119
+ {
120
+ "epoch": 2.03,
121
+ "learning_rate": 1.807511737089202e-05,
122
+ "loss": 0.0908,
123
+ "step": 160
124
+ },
125
+ {
126
+ "epoch": 2.15,
127
+ "learning_rate": 1.5727699530516433e-05,
128
+ "loss": 0.0788,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 2.28,
133
+ "learning_rate": 1.3380281690140845e-05,
134
+ "loss": 0.0866,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 2.41,
139
+ "learning_rate": 1.1032863849765258e-05,
140
+ "loss": 0.0754,
141
+ "step": 190
142
+ },
143
+ {
144
+ "epoch": 2.53,
145
+ "learning_rate": 8.685446009389673e-06,
146
+ "loss": 0.0684,
147
+ "step": 200
148
+ },
149
+ {
150
+ "epoch": 2.66,
151
+ "learning_rate": 6.338028169014085e-06,
152
+ "loss": 0.0585,
153
+ "step": 210
154
+ },
155
+ {
156
+ "epoch": 2.78,
157
+ "learning_rate": 3.990610328638498e-06,
158
+ "loss": 0.0715,
159
+ "step": 220
160
+ },
161
+ {
162
+ "epoch": 2.91,
163
+ "learning_rate": 1.643192488262911e-06,
164
+ "loss": 0.0652,
165
+ "step": 230
166
+ },
167
+ {
168
+ "epoch": 3.0,
169
+ "eval_accuracy": 0.9949109414758269,
170
+ "eval_loss": 0.05667030066251755,
171
+ "eval_runtime": 116.045,
172
+ "eval_samples_per_second": 20.32,
173
+ "eval_steps_per_second": 0.638,
174
+ "step": 237
175
+ },
176
+ {
177
+ "epoch": 3.0,
178
+ "step": 237,
179
+ "total_flos": 2.349294757953454e+18,
180
+ "train_loss": 0.41549516755317334,
181
+ "train_runtime": 2328.1847,
182
+ "train_samples_per_second": 13.02,
183
+ "train_steps_per_second": 0.102
184
+ }
185
+ ],
186
+ "logging_steps": 10,
187
+ "max_steps": 237,
188
+ "num_input_tokens_seen": 0,
189
+ "num_train_epochs": 3,
190
+ "save_steps": 500,
191
+ "total_flos": 2.349294757953454e+18,
192
+ "train_batch_size": 32,
193
+ "trial_name": null,
194
+ "trial_params": null
195
+ }