kaisugi commited on
Commit
59bfbd2
1 Parent(s): 00a8e14

delete unnecessary files

Browse files
all_results.json DELETED
@@ -1,34 +0,0 @@
1
- {
2
- "epoch": 10.0,
3
- "eval_bleu": 20.563,
4
- "eval_gen_len": 28.4174,
5
- "eval_loss": 1.523200511932373,
6
- "eval_mem_cpu_alloc_delta": 11807807,
7
- "eval_mem_cpu_peaked_delta": 8618611,
8
- "eval_mem_gpu_alloc_delta": 0,
9
- "eval_mem_gpu_peaked_delta": 2386463232,
10
- "eval_runtime": 222.6162,
11
- "eval_samples": 2753,
12
- "eval_samples_per_second": 12.367,
13
- "init_mem_cpu_alloc_delta": 2595413,
14
- "init_mem_cpu_peaked_delta": 18258,
15
- "init_mem_gpu_alloc_delta": 242026496,
16
- "init_mem_gpu_peaked_delta": 0,
17
- "test_bleu": 37.3827,
18
- "test_gen_len": 30.7292,
19
- "test_loss": 1.712681531906128,
20
- "test_mem_cpu_alloc_delta": 9661242,
21
- "test_mem_cpu_peaked_delta": 7664493,
22
- "test_mem_gpu_alloc_delta": 0,
23
- "test_mem_gpu_peaked_delta": 2097349120,
24
- "test_runtime": 191.4724,
25
- "test_samples": 2345,
26
- "test_samples_per_second": 12.247,
27
- "train_mem_cpu_alloc_delta": 1526492,
28
- "train_mem_cpu_peaked_delta": 67831945,
29
- "train_mem_gpu_alloc_delta": 968303104,
30
- "train_mem_gpu_peaked_delta": 2622328832,
31
- "train_runtime": 352.0036,
32
- "train_samples": 28883,
33
- "train_samples_per_second": 51.306
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "epoch": 10.0,
3
- "eval_bleu": 20.563,
4
- "eval_gen_len": 28.4174,
5
- "eval_loss": 1.523200511932373,
6
- "eval_mem_cpu_alloc_delta": 11807807,
7
- "eval_mem_cpu_peaked_delta": 8618611,
8
- "eval_mem_gpu_alloc_delta": 0,
9
- "eval_mem_gpu_peaked_delta": 2386463232,
10
- "eval_runtime": 222.6162,
11
- "eval_samples": 2753,
12
- "eval_samples_per_second": 12.367
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_generations.txt DELETED
The diff for this file is too large to render. See raw diff
 
test_results.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "test_bleu": 37.3827,
3
- "test_gen_len": 30.7292,
4
- "test_loss": 1.712681531906128,
5
- "test_mem_cpu_alloc_delta": 9661242,
6
- "test_mem_cpu_peaked_delta": 7664493,
7
- "test_mem_gpu_alloc_delta": 0,
8
- "test_mem_gpu_peaked_delta": 2097349120,
9
- "test_runtime": 191.4724,
10
- "test_samples": 2345,
11
- "test_samples_per_second": 12.247
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
train_results.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "epoch": 10.0,
3
- "init_mem_cpu_alloc_delta": 2595413,
4
- "init_mem_cpu_peaked_delta": 18258,
5
- "init_mem_gpu_alloc_delta": 242026496,
6
- "init_mem_gpu_peaked_delta": 0,
7
- "train_mem_cpu_alloc_delta": 1526492,
8
- "train_mem_cpu_peaked_delta": 67831945,
9
- "train_mem_gpu_alloc_delta": 968303104,
10
- "train_mem_gpu_peaked_delta": 2622328832,
11
- "train_runtime": 352.0036,
12
- "train_samples": 28883,
13
- "train_samples_per_second": 51.306
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
trainer_state.json DELETED
@@ -1,239 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
- "global_step": 18060,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.28,
12
- "learning_rate": 4.8615725359911405e-05,
13
- "loss": 2.4674,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.55,
18
- "learning_rate": 4.7231450719822815e-05,
19
- "loss": 2.2643,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.83,
24
- "learning_rate": 4.5847176079734225e-05,
25
- "loss": 2.1852,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 1.11,
30
- "learning_rate": 4.446290143964563e-05,
31
- "loss": 2.1008,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 1.38,
36
- "learning_rate": 4.307862679955704e-05,
37
- "loss": 2.0497,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 1.66,
42
- "learning_rate": 4.169435215946844e-05,
43
- "loss": 2.0143,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 1.94,
48
- "learning_rate": 4.0310077519379843e-05,
49
- "loss": 1.9799,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 2.21,
54
- "learning_rate": 3.892580287929125e-05,
55
- "loss": 1.9401,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 2.49,
60
- "learning_rate": 3.754152823920266e-05,
61
- "loss": 1.9205,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 2.77,
66
- "learning_rate": 3.6157253599114066e-05,
67
- "loss": 1.9005,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 3.05,
72
- "learning_rate": 3.477297895902547e-05,
73
- "loss": 1.8781,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 3.32,
78
- "learning_rate": 3.338870431893688e-05,
79
- "loss": 1.8538,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 3.6,
84
- "learning_rate": 3.200442967884829e-05,
85
- "loss": 1.8398,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 3.88,
90
- "learning_rate": 3.062015503875969e-05,
91
- "loss": 1.822,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 4.15,
96
- "learning_rate": 2.92358803986711e-05,
97
- "loss": 1.8072,
98
- "step": 7500
99
- },
100
- {
101
- "epoch": 4.43,
102
- "learning_rate": 2.7851605758582504e-05,
103
- "loss": 1.8134,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 4.71,
108
- "learning_rate": 2.646733111849391e-05,
109
- "loss": 1.7785,
110
- "step": 8500
111
- },
112
- {
113
- "epoch": 4.98,
114
- "learning_rate": 2.5083056478405313e-05,
115
- "loss": 1.7787,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 5.26,
120
- "learning_rate": 2.3698781838316723e-05,
121
- "loss": 1.7587,
122
- "step": 9500
123
- },
124
- {
125
- "epoch": 5.54,
126
- "learning_rate": 2.231450719822813e-05,
127
- "loss": 1.7583,
128
- "step": 10000
129
- },
130
- {
131
- "epoch": 5.81,
132
- "learning_rate": 2.0930232558139536e-05,
133
- "loss": 1.7558,
134
- "step": 10500
135
- },
136
- {
137
- "epoch": 6.09,
138
- "learning_rate": 1.9545957918050942e-05,
139
- "loss": 1.7438,
140
- "step": 11000
141
- },
142
- {
143
- "epoch": 6.37,
144
- "learning_rate": 1.816168327796235e-05,
145
- "loss": 1.7299,
146
- "step": 11500
147
- },
148
- {
149
- "epoch": 6.64,
150
- "learning_rate": 1.6777408637873755e-05,
151
- "loss": 1.753,
152
- "step": 12000
153
- },
154
- {
155
- "epoch": 6.92,
156
- "learning_rate": 1.539313399778516e-05,
157
- "loss": 1.6899,
158
- "step": 12500
159
- },
160
- {
161
- "epoch": 7.2,
162
- "learning_rate": 1.4008859357696569e-05,
163
- "loss": 1.7248,
164
- "step": 13000
165
- },
166
- {
167
- "epoch": 7.48,
168
- "learning_rate": 1.2624584717607974e-05,
169
- "loss": 1.6908,
170
- "step": 13500
171
- },
172
- {
173
- "epoch": 7.75,
174
- "learning_rate": 1.1240310077519382e-05,
175
- "loss": 1.6977,
176
- "step": 14000
177
- },
178
- {
179
- "epoch": 8.03,
180
- "learning_rate": 9.856035437430786e-06,
181
- "loss": 1.6908,
182
- "step": 14500
183
- },
184
- {
185
- "epoch": 8.31,
186
- "learning_rate": 8.471760797342193e-06,
187
- "loss": 1.7079,
188
- "step": 15000
189
- },
190
- {
191
- "epoch": 8.58,
192
- "learning_rate": 7.087486157253599e-06,
193
- "loss": 1.6921,
194
- "step": 15500
195
- },
196
- {
197
- "epoch": 8.86,
198
- "learning_rate": 5.703211517165006e-06,
199
- "loss": 1.6902,
200
- "step": 16000
201
- },
202
- {
203
- "epoch": 9.14,
204
- "learning_rate": 4.318936877076412e-06,
205
- "loss": 1.6771,
206
- "step": 16500
207
- },
208
- {
209
- "epoch": 9.41,
210
- "learning_rate": 2.9346622369878186e-06,
211
- "loss": 1.6853,
212
- "step": 17000
213
- },
214
- {
215
- "epoch": 9.69,
216
- "learning_rate": 1.550387596899225e-06,
217
- "loss": 1.6893,
218
- "step": 17500
219
- },
220
- {
221
- "epoch": 9.97,
222
- "learning_rate": 1.6611295681063123e-07,
223
- "loss": 1.6818,
224
- "step": 18000
225
- },
226
- {
227
- "epoch": 10.0,
228
- "step": 18060,
229
- "total_flos": 1.1559596046910464e+16,
230
- "train_runtime": 352.0036,
231
- "train_samples_per_second": 51.306
232
- }
233
- ],
234
- "max_steps": 18060,
235
- "num_train_epochs": 10,
236
- "total_flos": 1.1559596046910464e+16,
237
- "trial_name": null,
238
- "trial_params": null
239
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1890a229aea27e81568a5b8407044e323a2e668222918f7d20c8b288bbc676e
3
- size 2351