ZeroUniqueness
commited on
Commit
Β·
896b733
1
Parent(s):
5a016fe
Training in progress, step 5300
Browse files- adapter_config.json +5 -5
- adapter_model.bin +1 -1
- {checkpoint-4900 β checkpoint-5200/adapter_model}/README.md +0 -0
- {checkpoint-4900 β checkpoint-5200}/adapter_model/adapter_config.json +4 -4
- {checkpoint-4900 β checkpoint-5200/adapter_model}/adapter_model.bin +1 -1
- {checkpoint-4900/adapter_model β checkpoint-5300}/README.md +0 -0
- {checkpoint-4900 β checkpoint-5300}/adapter_config.json +3 -3
- {checkpoint-4900/adapter_model β checkpoint-5300}/adapter_model.bin +1 -1
- {checkpoint-4900 β checkpoint-5300}/optimizer.pt +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_0.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_1.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_10.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_11.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_12.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_13.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_2.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_3.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_4.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_5.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_6.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_7.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_8.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/rng_state_9.pth +1 -1
- {checkpoint-4900 β checkpoint-5300}/scheduler.pt +1 -1
- {checkpoint-4900 β checkpoint-5300}/trainer_state.json +107 -3
- {checkpoint-4900 β checkpoint-5300}/training_args.bin +1 -1
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"v_proj",
|
18 |
-
"up_proj",
|
19 |
-
"k_proj",
|
20 |
"q_proj",
|
21 |
-
"down_proj",
|
22 |
"gate_proj",
|
23 |
-
"o_proj"
|
|
|
|
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"q_proj",
|
|
|
18 |
"gate_proj",
|
19 |
+
"o_proj",
|
20 |
+
"down_proj",
|
21 |
+
"k_proj",
|
22 |
+
"v_proj",
|
23 |
+
"up_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14ca48bd88a31002dc05f9274bc978bc8b5b8ae3e51470e74ee7bc0bb12284e9
|
3 |
size 500897101
|
{checkpoint-4900 β checkpoint-5200/adapter_model}/README.md
RENAMED
File without changes
|
{checkpoint-4900 β checkpoint-5200}/adapter_model/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
17 |
"k_proj",
|
18 |
-
"
|
19 |
"down_proj",
|
20 |
"gate_proj",
|
21 |
-
"
|
22 |
-
"q_proj",
|
23 |
-
"up_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"v_proj",
|
18 |
+
"up_proj",
|
19 |
"k_proj",
|
20 |
+
"q_proj",
|
21 |
"down_proj",
|
22 |
"gate_proj",
|
23 |
+
"o_proj"
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-4900 β checkpoint-5200/adapter_model}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28c2d0c51dd5ec8f66ff0bbdb8ae6185bd1eca75e6b3b3409367c09446351f5a
|
3 |
size 500897101
|
{checkpoint-4900/adapter_model β checkpoint-5300}/README.md
RENAMED
File without changes
|
{checkpoint-4900 β checkpoint-5300}/adapter_config.json
RENAMED
@@ -14,12 +14,12 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"
|
|
|
18 |
"o_proj",
|
19 |
"down_proj",
|
20 |
-
"
|
21 |
"v_proj",
|
22 |
-
"q_proj",
|
23 |
"up_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"q_proj",
|
18 |
+
"gate_proj",
|
19 |
"o_proj",
|
20 |
"down_proj",
|
21 |
+
"k_proj",
|
22 |
"v_proj",
|
|
|
23 |
"up_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
{checkpoint-4900/adapter_model β checkpoint-5300}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14ca48bd88a31002dc05f9274bc978bc8b5b8ae3e51470e74ee7bc0bb12284e9
|
3 |
size 500897101
|
{checkpoint-4900 β checkpoint-5300}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001752701
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:869a51ca719bee8b6aab465be97fa4ac5e228c769321c2712f644067ceeca076
|
3 |
size 1001752701
|
{checkpoint-4900 β checkpoint-5300}/rng_state_0.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9267467db4bfc0b62f4b2992b98c478568d6b740025cdb5016f4102da1504163
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_1.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea0db0e586ec1c1e547243b2e02d8b0d760f3d113e51420b567322fb80b86283
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_10.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcfb12512c0be896f0842434c14afaa23ede7d6aadbd83bfd18570859fcdecdd
|
3 |
size 27789
|
{checkpoint-4900 β checkpoint-5300}/rng_state_11.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcd57397386863534e25c2052e0eb5a3986965528e8332bead4e6da6c8a52a60
|
3 |
size 27789
|
{checkpoint-4900 β checkpoint-5300}/rng_state_12.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b494220b086ab33c457cd9cb3298d5707ae27861e1a1ea15cf7b2846c34edb3
|
3 |
size 27789
|
{checkpoint-4900 β checkpoint-5300}/rng_state_13.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66ddbbc44b73707a46dbe70a821775b9215bfb3188b97867dfd5d788f2c4bd1b
|
3 |
size 27789
|
{checkpoint-4900 β checkpoint-5300}/rng_state_2.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e09271dc29724c758580e8e6864e76555b7a1dd95ca5d0036f08cdd9f67fbaec
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_3.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65b0c519b8283902066db24d39bebbf0ec0f57d632779c56be9c9d58f7cdb78e
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_4.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:868a6a26d82f017652ee4b9e21c96ec36ab5f96be4eeedf65e27eda71ff93f25
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_5.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d82c47d40cdc9cbed8bc1b1f8fdcb354fb8b6ac525cfce07fab2d1af5791c195
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_6.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64d8ec328ebab19d4fa69241bbb5de66af7b09aa5f0d71d9e37be38a5170d24e
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_7.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf9d89caef032992b8f5efcdaf7ab987e37e4fb52ec8d47520f814092e1e7ab3
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_8.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8913ebfea32419c486eaa9895c49a7962a44933cd37f4965710a3560fe737c25
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/rng_state_9.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:685f43d3417ce77333bd61cb9e79c3d6e0421bb659bae8689caafc6c6b0d8752
|
3 |
size 27772
|
{checkpoint-4900 β checkpoint-5300}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d904fb502ced275ef5e7925781f477f4c89d48daa60e9f45b9339adf1aaa77d
|
3 |
size 627
|
{checkpoint-4900 β checkpoint-5300}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1190,11 +1190,115 @@
|
|
1190 |
"learning_rate": 5.946628610250484e-05,
|
1191 |
"loss": 0.7918,
|
1192 |
"step": 4900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1193 |
}
|
1194 |
],
|
1195 |
"max_steps": 7737,
|
1196 |
"num_train_epochs": 3,
|
1197 |
-
"total_flos": 2.
|
1198 |
"trial_name": null,
|
1199 |
"trial_params": null
|
1200 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.055060100814269,
|
5 |
+
"global_step": 5300,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1190 |
"learning_rate": 5.946628610250484e-05,
|
1191 |
"loss": 0.7918,
|
1192 |
"step": 4900
|
1193 |
+
},
|
1194 |
+
{
|
1195 |
+
"epoch": 1.91,
|
1196 |
+
"learning_rate": 5.853920568929996e-05,
|
1197 |
+
"loss": 0.7921,
|
1198 |
+
"step": 4925
|
1199 |
+
},
|
1200 |
+
{
|
1201 |
+
"epoch": 1.92,
|
1202 |
+
"learning_rate": 5.761640870750799e-05,
|
1203 |
+
"loss": 0.7878,
|
1204 |
+
"step": 4950
|
1205 |
+
},
|
1206 |
+
{
|
1207 |
+
"epoch": 1.93,
|
1208 |
+
"learning_rate": 5.669799049388375e-05,
|
1209 |
+
"loss": 0.7901,
|
1210 |
+
"step": 4975
|
1211 |
+
},
|
1212 |
+
{
|
1213 |
+
"epoch": 1.94,
|
1214 |
+
"learning_rate": 5.578404593279911e-05,
|
1215 |
+
"loss": 0.7858,
|
1216 |
+
"step": 5000
|
1217 |
+
},
|
1218 |
+
{
|
1219 |
+
"epoch": 1.94,
|
1220 |
+
"eval_loss": 0.807844877243042,
|
1221 |
+
"eval_runtime": 59.586,
|
1222 |
+
"eval_samples_per_second": 12.251,
|
1223 |
+
"eval_steps_per_second": 0.889,
|
1224 |
+
"step": 5000
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"epoch": 1.95,
|
1228 |
+
"learning_rate": 5.487466944644033e-05,
|
1229 |
+
"loss": 0.7902,
|
1230 |
+
"step": 5025
|
1231 |
+
},
|
1232 |
+
{
|
1233 |
+
"epoch": 1.96,
|
1234 |
+
"learning_rate": 5.3969954985052996e-05,
|
1235 |
+
"loss": 0.7979,
|
1236 |
+
"step": 5050
|
1237 |
+
},
|
1238 |
+
{
|
1239 |
+
"epoch": 1.97,
|
1240 |
+
"learning_rate": 5.306999601723579e-05,
|
1241 |
+
"loss": 0.7931,
|
1242 |
+
"step": 5075
|
1243 |
+
},
|
1244 |
+
{
|
1245 |
+
"epoch": 1.98,
|
1246 |
+
"learning_rate": 5.21748855202839e-05,
|
1247 |
+
"loss": 0.7868,
|
1248 |
+
"step": 5100
|
1249 |
+
},
|
1250 |
+
{
|
1251 |
+
"epoch": 1.99,
|
1252 |
+
"learning_rate": 5.128471597058342e-05,
|
1253 |
+
"loss": 0.7993,
|
1254 |
+
"step": 5125
|
1255 |
+
},
|
1256 |
+
{
|
1257 |
+
"epoch": 2.0,
|
1258 |
+
"learning_rate": 5.03995793340572e-05,
|
1259 |
+
"loss": 0.7892,
|
1260 |
+
"step": 5150
|
1261 |
+
},
|
1262 |
+
{
|
1263 |
+
"epoch": 2.01,
|
1264 |
+
"learning_rate": 4.9519567056663694e-05,
|
1265 |
+
"loss": 0.7788,
|
1266 |
+
"step": 5175
|
1267 |
+
},
|
1268 |
+
{
|
1269 |
+
"epoch": 2.02,
|
1270 |
+
"learning_rate": 4.864477005494938e-05,
|
1271 |
+
"loss": 0.7654,
|
1272 |
+
"step": 5200
|
1273 |
+
},
|
1274 |
+
{
|
1275 |
+
"epoch": 2.03,
|
1276 |
+
"learning_rate": 4.777527870665592e-05,
|
1277 |
+
"loss": 0.7468,
|
1278 |
+
"step": 5225
|
1279 |
+
},
|
1280 |
+
{
|
1281 |
+
"epoch": 2.04,
|
1282 |
+
"learning_rate": 4.691118284138296e-05,
|
1283 |
+
"loss": 0.7359,
|
1284 |
+
"step": 5250
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"epoch": 2.05,
|
1288 |
+
"learning_rate": 4.605257173130763e-05,
|
1289 |
+
"loss": 0.7422,
|
1290 |
+
"step": 5275
|
1291 |
+
},
|
1292 |
+
{
|
1293 |
+
"epoch": 2.06,
|
1294 |
+
"learning_rate": 4.519953408196152e-05,
|
1295 |
+
"loss": 0.7424,
|
1296 |
+
"step": 5300
|
1297 |
}
|
1298 |
],
|
1299 |
"max_steps": 7737,
|
1300 |
"num_train_epochs": 3,
|
1301 |
+
"total_flos": 2.282482987033428e+19,
|
1302 |
"trial_name": null,
|
1303 |
"trial_params": null
|
1304 |
}
|
{checkpoint-4900 β checkpoint-5300}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:689d52379bcc7c50e04c40b22a97b473b8de3f17b4096bebf81eb9f37e1dafa6
|
3 |
size 4027
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:689d52379bcc7c50e04c40b22a97b473b8de3f17b4096bebf81eb9f37e1dafa6
|
3 |
size 4027
|