Krish356 commited on
Commit
0a4b9d7
·
verified ·
1 Parent(s): 7161c4b

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -28,18 +28,18 @@
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
31
- "up_proj",
32
  "gate_proj",
33
- "k_proj",
34
- "o_proj",
35
  "q_proj",
36
  "v_proj",
37
- "down_proj"
 
 
38
  ],
39
  "target_parameters": null,
40
  "task_type": null,
41
  "trainable_token_indices": null,
42
  "use_dora": false,
43
  "use_qalora": false,
44
- "use_rslora": true
45
  }
 
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
31
+ "down_proj",
32
  "gate_proj",
 
 
33
  "q_proj",
34
  "v_proj",
35
+ "up_proj",
36
+ "o_proj",
37
+ "k_proj"
38
  ],
39
  "target_parameters": null,
40
  "task_type": null,
41
  "trainable_token_indices": null,
42
  "use_dora": false,
43
  "use_qalora": false,
44
+ "use_rslora": false
45
  }
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42ef19ac7448b71ef2c7f0ad912c2ee3e37fb7f8f2595fa93636a91b62132912
3
  size 3380768360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517e89777cd0acc23adc7844c9140c49862b632bbe55e0d37e4bbc779c613a97
3
  size 3380768360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ceba1a0a25a3089f0fbe4ad7b2ef2a6b51ebe56c5f49c6415186e4df7e28e94b
3
- size 1757899449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf20b7786740e08a40ec488c2846b5a881d156067618ab1e19e46158812a7a62
3
+ size 1855337587
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c800b778fa7e115e4c34de8529902de8b61c9a1b4bab3eb8295d06dafff030e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:117f5f7ecb8bf188570856b720dd33d2d7c01cd9b95297de35cf83f3e9fc739e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb481d43b196067ad8292b807ce1f5b4d4770f3d66564b6ee5dd045f67b57cbc
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.2328767123287672,
6
  "eval_steps": 500,
7
- "global_step": 90,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -141,10 +141,52 @@
141
  "learning_rate": 2.0223024531515985e-05,
142
  "loss": 0.1956,
143
  "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  }
145
  ],
146
  "logging_steps": 5,
147
- "max_steps": 219,
148
  "num_input_tokens_seen": 0,
149
  "num_train_epochs": 3,
150
  "save_steps": 30,
@@ -160,7 +202,7 @@
160
  "attributes": {}
161
  }
162
  },
163
- "total_flos": 1.6231690613367767e+18,
164
  "train_batch_size": 16,
165
  "trial_name": null,
166
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.28753993610223644,
6
  "eval_steps": 500,
7
+ "global_step": 120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
141
  "learning_rate": 2.0223024531515985e-05,
142
  "loss": 0.1956,
143
  "step": 90
144
+ },
145
+ {
146
+ "epoch": 0.04792332268370607,
147
+ "grad_norm": 0.2704693078994751,
148
+ "learning_rate": 2.4727026880991426e-05,
149
+ "loss": 0.3087,
150
+ "step": 95
151
+ },
152
+ {
153
+ "epoch": 0.09584664536741214,
154
+ "grad_norm": 0.15272189676761627,
155
+ "learning_rate": 2.4126313231549726e-05,
156
+ "loss": 0.2705,
157
+ "step": 100
158
+ },
159
+ {
160
+ "epoch": 0.14376996805111822,
161
+ "grad_norm": 0.08888361603021622,
162
+ "learning_rate": 2.3501398240052193e-05,
163
+ "loss": 0.266,
164
+ "step": 105
165
+ },
166
+ {
167
+ "epoch": 0.19169329073482427,
168
+ "grad_norm": 0.06617572158575058,
169
+ "learning_rate": 2.2853939068715516e-05,
170
+ "loss": 0.2504,
171
+ "step": 110
172
+ },
173
+ {
174
+ "epoch": 0.23961661341853036,
175
+ "grad_norm": 0.07254050672054291,
176
+ "learning_rate": 2.2185652662868573e-05,
177
+ "loss": 0.2381,
178
+ "step": 115
179
+ },
180
+ {
181
+ "epoch": 0.28753993610223644,
182
+ "grad_norm": 0.06296419352293015,
183
+ "learning_rate": 2.1498311197922046e-05,
184
+ "loss": 0.252,
185
+ "step": 120
186
  }
187
  ],
188
  "logging_steps": 5,
189
+ "max_steps": 315,
190
  "num_input_tokens_seen": 0,
191
  "num_train_epochs": 3,
192
  "save_steps": 30,
 
202
  "attributes": {}
203
  }
204
  },
205
+ "total_flos": 1.8551401685858673e+18,
206
  "train_batch_size": 16,
207
  "trial_name": null,
208
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8855ef24f42f0e1ba1145f6cd4d570d92cdd85350340e9302a34e4eb24aa4b7e
3
  size 6353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c47959f4c81dcb5cc476e8d02fd692e443b4d45c58fa790a42d9629281c83403
3
  size 6353