w11wo commited on
Commit
c36424d
·
1 Parent(s): e4fecd6

added logs

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.19193305201784394,
4
+ "train_runtime": 3656.7872,
5
+ "train_samples": 108132,
6
+ "train_samples_per_second": 295.702,
7
+ "train_steps_per_second": 2.311
8
+ }
logs/byt5-small-cmudict/log.json ADDED
The diff for this file is too large to render. See raw diff
 
logs/byt5-small-cmudict/metrics.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ PER: 0.0665
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.19193305201784394,
4
+ "train_runtime": 3656.7872,
5
+ "train_samples": 108132,
6
+ "train_samples_per_second": 295.702,
7
+ "train_steps_per_second": 2.311
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 8450,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 0.0002,
13
+ "loss": 1.1569,
14
+ "step": 845
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_gen_len": 14.5711,
19
+ "eval_loss": 0.14331915974617004,
20
+ "eval_per": 0.1751,
21
+ "eval_runtime": 93.1351,
22
+ "eval_samples_per_second": 58.485,
23
+ "eval_steps_per_second": 1.836,
24
+ "step": 845
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "learning_rate": 0.00017777777777777779,
29
+ "loss": 0.1409,
30
+ "step": 1690
31
+ },
32
+ {
33
+ "epoch": 2.0,
34
+ "eval_gen_len": 14.5844,
35
+ "eval_loss": 0.10491351783275604,
36
+ "eval_per": 0.1383,
37
+ "eval_runtime": 92.5515,
38
+ "eval_samples_per_second": 58.854,
39
+ "eval_steps_per_second": 1.848,
40
+ "step": 1690
41
+ },
42
+ {
43
+ "epoch": 3.0,
44
+ "learning_rate": 0.00015555555555555556,
45
+ "loss": 0.1073,
46
+ "step": 2535
47
+ },
48
+ {
49
+ "epoch": 3.0,
50
+ "eval_gen_len": 14.6058,
51
+ "eval_loss": 0.09005340933799744,
52
+ "eval_per": 0.1281,
53
+ "eval_runtime": 91.8497,
54
+ "eval_samples_per_second": 59.303,
55
+ "eval_steps_per_second": 1.862,
56
+ "step": 2535
57
+ },
58
+ {
59
+ "epoch": 4.0,
60
+ "learning_rate": 0.00013333333333333334,
61
+ "loss": 0.0927,
62
+ "step": 3380
63
+ },
64
+ {
65
+ "epoch": 4.0,
66
+ "eval_gen_len": 14.5798,
67
+ "eval_loss": 0.08551742881536484,
68
+ "eval_per": 0.1237,
69
+ "eval_runtime": 92.071,
70
+ "eval_samples_per_second": 59.161,
71
+ "eval_steps_per_second": 1.857,
72
+ "step": 3380
73
+ },
74
+ {
75
+ "epoch": 5.0,
76
+ "learning_rate": 0.00011111111111111112,
77
+ "loss": 0.0833,
78
+ "step": 4225
79
+ },
80
+ {
81
+ "epoch": 5.0,
82
+ "eval_gen_len": 14.6139,
83
+ "eval_loss": 0.08394311368465424,
84
+ "eval_per": 0.1207,
85
+ "eval_runtime": 92.0384,
86
+ "eval_samples_per_second": 59.182,
87
+ "eval_steps_per_second": 1.858,
88
+ "step": 4225
89
+ },
90
+ {
91
+ "epoch": 6.0,
92
+ "learning_rate": 8.888888888888889e-05,
93
+ "loss": 0.0762,
94
+ "step": 5070
95
+ },
96
+ {
97
+ "epoch": 6.0,
98
+ "eval_gen_len": 14.5913,
99
+ "eval_loss": 0.08085508644580841,
100
+ "eval_per": 0.118,
101
+ "eval_runtime": 91.0725,
102
+ "eval_samples_per_second": 59.809,
103
+ "eval_steps_per_second": 1.878,
104
+ "step": 5070
105
+ },
106
+ {
107
+ "epoch": 7.0,
108
+ "learning_rate": 6.666666666666667e-05,
109
+ "loss": 0.0707,
110
+ "step": 5915
111
+ },
112
+ {
113
+ "epoch": 7.0,
114
+ "eval_gen_len": 14.6055,
115
+ "eval_loss": 0.08100058883428574,
116
+ "eval_per": 0.1171,
117
+ "eval_runtime": 90.5081,
118
+ "eval_samples_per_second": 60.182,
119
+ "eval_steps_per_second": 1.889,
120
+ "step": 5915
121
+ },
122
+ {
123
+ "epoch": 8.0,
124
+ "learning_rate": 4.4444444444444447e-05,
125
+ "loss": 0.0667,
126
+ "step": 6760
127
+ },
128
+ {
129
+ "epoch": 8.0,
130
+ "eval_gen_len": 14.6025,
131
+ "eval_loss": 0.07957806438207626,
132
+ "eval_per": 0.1166,
133
+ "eval_runtime": 91.0929,
134
+ "eval_samples_per_second": 59.796,
135
+ "eval_steps_per_second": 1.877,
136
+ "step": 6760
137
+ },
138
+ {
139
+ "epoch": 9.0,
140
+ "learning_rate": 2.2222222222222223e-05,
141
+ "loss": 0.0638,
142
+ "step": 7605
143
+ },
144
+ {
145
+ "epoch": 9.0,
146
+ "eval_gen_len": 14.6115,
147
+ "eval_loss": 0.07928925007581711,
148
+ "eval_per": 0.115,
149
+ "eval_runtime": 90.6785,
150
+ "eval_samples_per_second": 60.069,
151
+ "eval_steps_per_second": 1.886,
152
+ "step": 7605
153
+ },
154
+ {
155
+ "epoch": 10.0,
156
+ "learning_rate": 0.0,
157
+ "loss": 0.0609,
158
+ "step": 8450
159
+ },
160
+ {
161
+ "epoch": 10.0,
162
+ "eval_gen_len": 14.6046,
163
+ "eval_loss": 0.0802011638879776,
164
+ "eval_per": 0.1151,
165
+ "eval_runtime": 90.409,
166
+ "eval_samples_per_second": 60.248,
167
+ "eval_steps_per_second": 1.891,
168
+ "step": 8450
169
+ },
170
+ {
171
+ "epoch": 10.0,
172
+ "step": 8450,
173
+ "total_flos": 1.2418278660440064e+17,
174
+ "train_loss": 0.19193305201784394,
175
+ "train_runtime": 3656.7872,
176
+ "train_samples_per_second": 295.702,
177
+ "train_steps_per_second": 2.311
178
+ }
179
+ ],
180
+ "max_steps": 8450,
181
+ "num_train_epochs": 10,
182
+ "total_flos": 1.2418278660440064e+17,
183
+ "trial_name": null,
184
+ "trial_params": null
185
+ }