Leptok commited on
Commit
ba4dec7
1 Parent(s): 46b11a3

Upload 6 files

Browse files
Files changed (4) hide show
  1. README.md +1 -0
  2. adapter_config.json +4 -4
  3. adapter_model.bin +1 -1
  4. trainer_state.json +188 -26
README.md CHANGED
@@ -5,5 +5,6 @@ library_name: peft
5
 
6
  ### Framework versions
7
 
 
8
 
9
  - PEFT 0.4.0
 
5
 
6
  ### Framework versions
7
 
8
+ - PEFT 0.4.0
9
 
10
  - PEFT 0.4.0
adapter_config.json CHANGED
@@ -14,14 +14,14 @@
14
  "r": 64,
15
  "revision": null,
16
  "target_modules": [
17
- "up_proj",
18
  "v_proj",
 
19
  "k_proj",
20
- "mm_projector",
21
- "gate_proj",
22
  "q_proj",
 
23
  "down_proj",
24
- "o_proj"
 
25
  ],
26
  "task_type": "CAUSAL_LM"
27
  }
 
14
  "r": 64,
15
  "revision": null,
16
  "target_modules": [
 
17
  "v_proj",
18
+ "o_proj",
19
  "k_proj",
 
 
20
  "q_proj",
21
+ "gate_proj",
22
  "down_proj",
23
+ "mm_projector",
24
+ "up_proj"
25
  ],
26
  "task_type": "CAUSAL_LM"
27
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0f528aa4754c5d1a84d815ff787bda6f2bb285401e1c368c20e1707eb5e7de7
3
  size 501676373
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32806b169c3c4e18fdb96cf2afc8b4379a06750c87c2b6043dc199eeeaf4ccf2
3
  size 501676373
trainer_state.json CHANGED
@@ -1,79 +1,241 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 9,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.33,
12
- "learning_rate": 2e-05,
13
  "loss": 2.0078,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.67,
18
- "learning_rate": 1.9238795325112867e-05,
19
  "loss": 1.8555,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 1.0,
24
- "learning_rate": 1.7071067811865477e-05,
25
- "loss": 1.9688,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 1.33,
30
- "learning_rate": 1.3826834323650899e-05,
31
  "loss": 1.9648,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 1.67,
36
- "learning_rate": 1e-05,
37
  "loss": 1.9531,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 2.0,
42
- "learning_rate": 6.173165676349103e-06,
43
- "loss": 1.8594,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 2.33,
48
- "learning_rate": 2.9289321881345257e-06,
49
- "loss": 1.8164,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 2.67,
54
- "learning_rate": 7.612046748871327e-07,
55
- "loss": 2.0547,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 3.0,
60
- "learning_rate": 0.0,
61
- "loss": 1.8125,
62
  "step": 9
63
  },
64
  {
65
- "epoch": 3.0,
66
- "step": 9,
67
- "total_flos": 3615157490024448.0,
68
- "train_loss": 1.9214409722222223,
69
- "train_runtime": 539.9369,
70
- "train_samples_per_second": 0.533,
71
- "train_steps_per_second": 0.017
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  }
73
  ],
74
- "max_steps": 9,
75
- "num_train_epochs": 3,
76
- "total_flos": 3615157490024448.0,
77
  "trial_name": null,
78
  "trial_params": null
79
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
+ "global_step": 36,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.33,
12
+ "learning_rate": 1e-05,
13
  "loss": 2.0078,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.67,
18
+ "learning_rate": 2e-05,
19
  "loss": 1.8555,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 1.0,
24
+ "learning_rate": 1.9957341762950346e-05,
25
+ "loss": 1.9727,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 1.33,
30
+ "learning_rate": 1.982973099683902e-05,
31
  "loss": 1.9648,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 1.67,
36
+ "learning_rate": 1.961825643172819e-05,
37
  "loss": 1.9531,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 2.0,
42
+ "learning_rate": 1.932472229404356e-05,
43
+ "loss": 1.8516,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 2.33,
48
+ "learning_rate": 1.8951632913550625e-05,
49
+ "loss": 1.8047,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 2.67,
54
+ "learning_rate": 1.8502171357296144e-05,
55
+ "loss": 2.0195,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 3.0,
60
+ "learning_rate": 1.7980172272802398e-05,
61
+ "loss": 1.7578,
62
  "step": 9
63
  },
64
  {
65
+ "epoch": 3.33,
66
+ "learning_rate": 1.7390089172206594e-05,
67
+ "loss": 1.7539,
68
+ "step": 10
69
+ },
70
+ {
71
+ "epoch": 3.67,
72
+ "learning_rate": 1.6736956436465573e-05,
73
+ "loss": 1.7461,
74
+ "step": 11
75
+ },
76
+ {
77
+ "epoch": 4.0,
78
+ "learning_rate": 1.6026346363792565e-05,
79
+ "loss": 1.7109,
80
+ "step": 12
81
+ },
82
+ {
83
+ "epoch": 4.33,
84
+ "learning_rate": 1.526432162877356e-05,
85
+ "loss": 1.7227,
86
+ "step": 13
87
+ },
88
+ {
89
+ "epoch": 4.67,
90
+ "learning_rate": 1.4457383557765385e-05,
91
+ "loss": 1.4961,
92
+ "step": 14
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "learning_rate": 1.3612416661871532e-05,
97
+ "loss": 1.4961,
98
+ "step": 15
99
+ },
100
+ {
101
+ "epoch": 5.33,
102
+ "learning_rate": 1.2736629900720832e-05,
103
+ "loss": 1.5039,
104
+ "step": 16
105
+ },
106
+ {
107
+ "epoch": 5.67,
108
+ "learning_rate": 1.1837495178165706e-05,
109
+ "loss": 1.2852,
110
+ "step": 17
111
+ },
112
+ {
113
+ "epoch": 6.0,
114
+ "learning_rate": 1.092268359463302e-05,
115
+ "loss": 1.4375,
116
+ "step": 18
117
+ },
118
+ {
119
+ "epoch": 6.33,
120
+ "learning_rate": 1e-05,
121
+ "loss": 1.3555,
122
+ "step": 19
123
+ },
124
+ {
125
+ "epoch": 6.67,
126
+ "learning_rate": 9.07731640536698e-06,
127
+ "loss": 1.2539,
128
+ "step": 20
129
+ },
130
+ {
131
+ "epoch": 7.0,
132
+ "learning_rate": 8.162504821834296e-06,
133
+ "loss": 1.2578,
134
+ "step": 21
135
+ },
136
+ {
137
+ "epoch": 7.33,
138
+ "learning_rate": 7.263370099279173e-06,
139
+ "loss": 1.1641,
140
+ "step": 22
141
+ },
142
+ {
143
+ "epoch": 7.67,
144
+ "learning_rate": 6.387583338128471e-06,
145
+ "loss": 1.2461,
146
+ "step": 23
147
+ },
148
+ {
149
+ "epoch": 8.0,
150
+ "learning_rate": 5.542616442234618e-06,
151
+ "loss": 1.1445,
152
+ "step": 24
153
+ },
154
+ {
155
+ "epoch": 8.33,
156
+ "learning_rate": 4.7356783712264405e-06,
157
+ "loss": 1.2344,
158
+ "step": 25
159
+ },
160
+ {
161
+ "epoch": 8.67,
162
+ "learning_rate": 3.973653636207437e-06,
163
+ "loss": 1.1367,
164
+ "step": 26
165
+ },
166
+ {
167
+ "epoch": 9.0,
168
+ "learning_rate": 3.2630435635344283e-06,
169
+ "loss": 1.0488,
170
+ "step": 27
171
+ },
172
+ {
173
+ "epoch": 9.33,
174
+ "learning_rate": 2.6099108277934105e-06,
175
+ "loss": 1.043,
176
+ "step": 28
177
+ },
178
+ {
179
+ "epoch": 9.67,
180
+ "learning_rate": 2.019827727197605e-06,
181
+ "loss": 1.1328,
182
+ "step": 29
183
+ },
184
+ {
185
+ "epoch": 10.0,
186
+ "learning_rate": 1.4978286427038602e-06,
187
+ "loss": 1.1758,
188
+ "step": 30
189
+ },
190
+ {
191
+ "epoch": 10.33,
192
+ "learning_rate": 1.0483670864493777e-06,
193
+ "loss": 1.0723,
194
+ "step": 31
195
+ },
196
+ {
197
+ "epoch": 10.67,
198
+ "learning_rate": 6.752777059564431e-07,
199
+ "loss": 1.1367,
200
+ "step": 32
201
+ },
202
+ {
203
+ "epoch": 11.0,
204
+ "learning_rate": 3.817435682718096e-07,
205
+ "loss": 1.082,
206
+ "step": 33
207
+ },
208
+ {
209
+ "epoch": 11.33,
210
+ "learning_rate": 1.7026900316098217e-07,
211
+ "loss": 1.0742,
212
+ "step": 34
213
+ },
214
+ {
215
+ "epoch": 11.67,
216
+ "learning_rate": 4.2658237049655325e-08,
217
+ "loss": 1.1172,
218
+ "step": 35
219
+ },
220
+ {
221
+ "epoch": 12.0,
222
+ "learning_rate": 0.0,
223
+ "loss": 1.0938,
224
+ "step": 36
225
+ },
226
+ {
227
+ "epoch": 12.0,
228
+ "step": 36,
229
+ "total_flos": 1.4460629960097792e+16,
230
+ "train_loss": 1.4474826388888888,
231
+ "train_runtime": 1003.5962,
232
+ "train_samples_per_second": 1.148,
233
+ "train_steps_per_second": 0.036
234
  }
235
  ],
236
+ "max_steps": 36,
237
+ "num_train_epochs": 12,
238
+ "total_flos": 1.4460629960097792e+16,
239
  "trial_name": null,
240
  "trial_params": null
241
  }