bhuvanmdev
commited on
Commit
•
1b41654
1
Parent(s):
7238d0f
Training in progress, step 1680, checkpoint
Browse files
last-checkpoint/adapter_config.json
CHANGED
@@ -20,10 +20,10 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"o_proj",
|
24 |
"qkv_proj",
|
25 |
-
"
|
26 |
-
"gate_up_proj"
|
|
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
23 |
"qkv_proj",
|
24 |
+
"o_proj",
|
25 |
+
"gate_up_proj",
|
26 |
+
"down_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100697728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd9525a4c9388eba206599ad1083c9f04830f7c0993ba71f7249c50d3c13d194
|
3 |
size 100697728
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201541754
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc517b5ef5e5fb65e842afb1ff46711a33890ca508092071a4522eec551c4860
|
3 |
size 201541754
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2111ebc063da3a4cacd27c844d6c3c4ea5d6c8b4612b9ccfbb113e1022b25f1b
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6f33bde628cfaa59f6d1470858dd0b46ed4d4b40c45bae4e5eb4771b5d21f15
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1335,14 +1335,30 @@
|
|
1335 |
"loss": 0.3903,
|
1336 |
"num_input_tokens_seen": 1115112,
|
1337 |
"step": 1660
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1338 |
}
|
1339 |
],
|
1340 |
"logging_steps": 10,
|
1341 |
"max_steps": 2795,
|
1342 |
-
"num_input_tokens_seen":
|
1343 |
"num_train_epochs": 1,
|
1344 |
"save_steps": 20,
|
1345 |
-
"total_flos": 2.
|
1346 |
"train_batch_size": 1,
|
1347 |
"trial_name": null,
|
1348 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6010733452593918,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1680,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1335 |
"loss": 0.3903,
|
1336 |
"num_input_tokens_seen": 1115112,
|
1337 |
"step": 1660
|
1338 |
+
},
|
1339 |
+
{
|
1340 |
+
"epoch": 0.5974955277280859,
|
1341 |
+
"grad_norm": 0.39852896332740784,
|
1342 |
+
"learning_rate": 8.050089445438284e-05,
|
1343 |
+
"loss": 0.4128,
|
1344 |
+
"num_input_tokens_seen": 1121451,
|
1345 |
+
"step": 1670
|
1346 |
+
},
|
1347 |
+
{
|
1348 |
+
"epoch": 0.6010733452593918,
|
1349 |
+
"grad_norm": 0.37065914273262024,
|
1350 |
+
"learning_rate": 7.978533094812165e-05,
|
1351 |
+
"loss": 0.4015,
|
1352 |
+
"num_input_tokens_seen": 1129374,
|
1353 |
+
"step": 1680
|
1354 |
}
|
1355 |
],
|
1356 |
"logging_steps": 10,
|
1357 |
"max_steps": 2795,
|
1358 |
+
"num_input_tokens_seen": 1129374,
|
1359 |
"num_train_epochs": 1,
|
1360 |
"save_steps": 20,
|
1361 |
+
"total_flos": 2.539563299769139e+16,
|
1362 |
"train_batch_size": 1,
|
1363 |
"trial_name": null,
|
1364 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6627f5a0391650843e3b66a870c0689d2522bb2ec40028b633e06c7aaaaff6d
|
3 |
size 5048
|