winddude commited on
Commit
f3a74a7
1 Parent(s): a4f9e40

Upload 3 files

Browse files

add trained lora

Files changed (3) hide show
  1. adapter_config.json +17 -0
  2. adapter_model.bin +3 -0
  3. trainer_state.json +341 -0
adapter_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "/home/llmadmin/models/my_llama_hf/llama_hf_7B",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "lora_alpha": 64,
8
+ "lora_dropout": 0.05,
9
+ "modules_to_save": null,
10
+ "peft_type": "LORA",
11
+ "r": 32,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "task_type": "CAUSAL_LM"
17
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a74aee00df877f98d1fa126b04fd13f89db5f962a1c4b80d95a9f7ffea87bf9
3
+ size 67154893
trainer_state.json ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.8454276323318481,
3
+ "best_model_checkpoint": "/home/llmadmin/lawrence/autoWSB/data/tunned_v4/checkpoint-1000",
4
+ "epoch": 2.9994666666666667,
5
+ "global_step": 4218,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 8.333333333333333e-05,
13
+ "loss": 1.9887,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.14,
18
+ "learning_rate": 0.00016666666666666666,
19
+ "loss": 1.7819,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.21,
24
+ "learning_rate": 0.00025,
25
+ "loss": 1.7572,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.28,
30
+ "learning_rate": 0.00024400444104134763,
31
+ "loss": 3.1654,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.36,
36
+ "learning_rate": 0.00023762618683001533,
37
+ "loss": 2.402,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.36,
42
+ "eval_loss": 2.0042166709899902,
43
+ "eval_runtime": 1078.6167,
44
+ "eval_samples_per_second": 23.178,
45
+ "eval_steps_per_second": 0.579,
46
+ "step": 500
47
+ },
48
+ {
49
+ "epoch": 0.43,
50
+ "learning_rate": 0.000231247932618683,
51
+ "loss": 1.9543,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.5,
56
+ "learning_rate": 0.0002248696784073507,
57
+ "loss": 1.905,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.57,
62
+ "learning_rate": 0.00021849142419601836,
63
+ "loss": 1.8835,
64
+ "step": 800
65
+ },
66
+ {
67
+ "epoch": 0.64,
68
+ "learning_rate": 0.00021211316998468607,
69
+ "loss": 1.8628,
70
+ "step": 900
71
+ },
72
+ {
73
+ "epoch": 0.71,
74
+ "learning_rate": 0.00020573491577335375,
75
+ "loss": 1.8463,
76
+ "step": 1000
77
+ },
78
+ {
79
+ "epoch": 0.71,
80
+ "eval_loss": 1.8454276323318481,
81
+ "eval_runtime": 1080.8608,
82
+ "eval_samples_per_second": 23.13,
83
+ "eval_steps_per_second": 0.578,
84
+ "step": 1000
85
+ },
86
+ {
87
+ "epoch": 0.78,
88
+ "learning_rate": 0.00019935666156202145,
89
+ "loss": 1.8654,
90
+ "step": 1100
91
+ },
92
+ {
93
+ "epoch": 0.85,
94
+ "learning_rate": 0.00019297840735068913,
95
+ "loss": 1.8511,
96
+ "step": 1200
97
+ },
98
+ {
99
+ "epoch": 0.92,
100
+ "learning_rate": 0.00018660015313935684,
101
+ "loss": 1.8424,
102
+ "step": 1300
103
+ },
104
+ {
105
+ "epoch": 1.0,
106
+ "learning_rate": 0.0001802218989280245,
107
+ "loss": 1.8579,
108
+ "step": 1400
109
+ },
110
+ {
111
+ "epoch": 1.07,
112
+ "learning_rate": 0.00017843598774885147,
113
+ "loss": 23.4765,
114
+ "step": 1500
115
+ },
116
+ {
117
+ "epoch": 1.07,
118
+ "eval_loss": 10.320993423461914,
119
+ "eval_runtime": 1165.3618,
120
+ "eval_samples_per_second": 21.453,
121
+ "eval_steps_per_second": 0.536,
122
+ "step": 1500
123
+ },
124
+ {
125
+ "epoch": 1.14,
126
+ "learning_rate": 0.00017767059724349159,
127
+ "loss": 26.8324,
128
+ "step": 1600
129
+ },
130
+ {
131
+ "epoch": 1.21,
132
+ "learning_rate": 0.00017129234303215926,
133
+ "loss": 33.5803,
134
+ "step": 1700
135
+ },
136
+ {
137
+ "epoch": 1.28,
138
+ "learning_rate": 0.00016491408882082697,
139
+ "loss": 75831.04,
140
+ "step": 1800
141
+ },
142
+ {
143
+ "epoch": 1.35,
144
+ "learning_rate": 0.00015853583460949465,
145
+ "loss": 112434.02,
146
+ "step": 1900
147
+ },
148
+ {
149
+ "epoch": 1.42,
150
+ "learning_rate": 0.00015215758039816232,
151
+ "loss": 9892.82,
152
+ "step": 2000
153
+ },
154
+ {
155
+ "epoch": 1.42,
156
+ "eval_loss": 10.320993423461914,
157
+ "eval_runtime": 1166.8445,
158
+ "eval_samples_per_second": 21.425,
159
+ "eval_steps_per_second": 0.536,
160
+ "step": 2000
161
+ },
162
+ {
163
+ "epoch": 1.49,
164
+ "learning_rate": 0.00014577932618683,
165
+ "loss": 9040.6219,
166
+ "step": 2100
167
+ },
168
+ {
169
+ "epoch": 1.56,
170
+ "learning_rate": 0.0001394010719754977,
171
+ "loss": 2015.4547,
172
+ "step": 2200
173
+ },
174
+ {
175
+ "epoch": 1.64,
176
+ "learning_rate": 0.0001330228177641654,
177
+ "loss": 16854.6437,
178
+ "step": 2300
179
+ },
180
+ {
181
+ "epoch": 1.71,
182
+ "learning_rate": 0.0001266445635528331,
183
+ "loss": 795.047,
184
+ "step": 2400
185
+ },
186
+ {
187
+ "epoch": 1.78,
188
+ "learning_rate": 0.00012026630934150074,
189
+ "loss": 38857.9575,
190
+ "step": 2500
191
+ },
192
+ {
193
+ "epoch": 1.78,
194
+ "eval_loss": 10.320993423461914,
195
+ "eval_runtime": 1167.9826,
196
+ "eval_samples_per_second": 21.404,
197
+ "eval_steps_per_second": 0.535,
198
+ "step": 2500
199
+ },
200
+ {
201
+ "epoch": 1.85,
202
+ "learning_rate": 0.00011388805513016846,
203
+ "loss": 185.0979,
204
+ "step": 2600
205
+ },
206
+ {
207
+ "epoch": 1.92,
208
+ "learning_rate": 0.00010750980091883614,
209
+ "loss": 12796.2138,
210
+ "step": 2700
211
+ },
212
+ {
213
+ "epoch": 1.99,
214
+ "learning_rate": 0.00010113154670750384,
215
+ "loss": 14625.28,
216
+ "step": 2800
217
+ },
218
+ {
219
+ "epoch": 2.06,
220
+ "learning_rate": 9.475329249617151e-05,
221
+ "loss": 271.5516,
222
+ "step": 2900
223
+ },
224
+ {
225
+ "epoch": 2.13,
226
+ "learning_rate": 8.837503828483921e-05,
227
+ "loss": 5358.3219,
228
+ "step": 3000
229
+ },
230
+ {
231
+ "epoch": 2.13,
232
+ "eval_loss": 10.320993423461914,
233
+ "eval_runtime": 1167.2296,
234
+ "eval_samples_per_second": 21.418,
235
+ "eval_steps_per_second": 0.535,
236
+ "step": 3000
237
+ },
238
+ {
239
+ "epoch": 2.2,
240
+ "learning_rate": 8.199678407350689e-05,
241
+ "loss": 106711.13,
242
+ "step": 3100
243
+ },
244
+ {
245
+ "epoch": 2.28,
246
+ "learning_rate": 7.561852986217458e-05,
247
+ "loss": 141536.53,
248
+ "step": 3200
249
+ },
250
+ {
251
+ "epoch": 2.35,
252
+ "learning_rate": 6.924027565084226e-05,
253
+ "loss": 784.2875,
254
+ "step": 3300
255
+ },
256
+ {
257
+ "epoch": 2.42,
258
+ "learning_rate": 6.286202143950997e-05,
259
+ "loss": 95102.5,
260
+ "step": 3400
261
+ },
262
+ {
263
+ "epoch": 2.49,
264
+ "learning_rate": 5.648376722817764e-05,
265
+ "loss": 2327.5258,
266
+ "step": 3500
267
+ },
268
+ {
269
+ "epoch": 2.49,
270
+ "eval_loss": 10.320993423461914,
271
+ "eval_runtime": 1166.8332,
272
+ "eval_samples_per_second": 21.426,
273
+ "eval_steps_per_second": 0.536,
274
+ "step": 3500
275
+ },
276
+ {
277
+ "epoch": 2.56,
278
+ "learning_rate": 5.010551301684534e-05,
279
+ "loss": 59017.795,
280
+ "step": 3600
281
+ },
282
+ {
283
+ "epoch": 2.63,
284
+ "learning_rate": 4.3727258805513014e-05,
285
+ "loss": 680382.64,
286
+ "step": 3700
287
+ },
288
+ {
289
+ "epoch": 2.7,
290
+ "learning_rate": 3.734900459418072e-05,
291
+ "loss": 1483.1705,
292
+ "step": 3800
293
+ },
294
+ {
295
+ "epoch": 2.77,
296
+ "learning_rate": 3.097075038284839e-05,
297
+ "loss": 30906.0775,
298
+ "step": 3900
299
+ },
300
+ {
301
+ "epoch": 2.84,
302
+ "learning_rate": 2.459249617151609e-05,
303
+ "loss": 46132.635,
304
+ "step": 4000
305
+ },
306
+ {
307
+ "epoch": 2.84,
308
+ "eval_loss": 10.320993423461914,
309
+ "eval_runtime": 1166.5367,
310
+ "eval_samples_per_second": 21.431,
311
+ "eval_steps_per_second": 0.536,
312
+ "step": 4000
313
+ },
314
+ {
315
+ "epoch": 2.92,
316
+ "learning_rate": 1.8214241960183767e-05,
317
+ "loss": 31202.95,
318
+ "step": 4100
319
+ },
320
+ {
321
+ "epoch": 2.99,
322
+ "learning_rate": 1.1835987748851469e-05,
323
+ "loss": 2477.5823,
324
+ "step": 4200
325
+ },
326
+ {
327
+ "epoch": 3.0,
328
+ "step": 4218,
329
+ "total_flos": 2.7466670484779696e+19,
330
+ "train_loss": 35493.948473290504,
331
+ "train_runtime": 121039.6951,
332
+ "train_samples_per_second": 5.577,
333
+ "train_steps_per_second": 0.035
334
+ }
335
+ ],
336
+ "max_steps": 4218,
337
+ "num_train_epochs": 3,
338
+ "total_flos": 2.7466670484779696e+19,
339
+ "trial_name": null,
340
+ "trial_params": null
341
+ }