bhuvanmdev commited on
Commit
1b41654
1 Parent(s): 7238d0f

Training in progress, step 1680, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "qkv_proj",
25
- "down_proj",
26
- "gate_up_proj"
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "qkv_proj",
24
+ "o_proj",
25
+ "gate_up_proj",
26
+ "down_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee715368403bed92f0597eabc44753fd00d49356f11cef1b3c8fb8c0804ae158
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd9525a4c9388eba206599ad1083c9f04830f7c0993ba71f7249c50d3c13d194
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:208b124f35b0f1bb89c24aa5e442cf97e514521b89c0892c0bc2dd2ca24dcc7c
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc517b5ef5e5fb65e842afb1ff46711a33890ca508092071a4522eec551c4860
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba8cf0f8aa521d1f1829b6d195f2417168669d9a3f3f5d0131830f8caf9a53ed
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2111ebc063da3a4cacd27c844d6c3c4ea5d6c8b4612b9ccfbb113e1022b25f1b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f76a6b8811ac3433e0a8dcf8c698d49602a1ab3ba3e9478d0943f2dd2665d0e5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f33bde628cfaa59f6d1470858dd0b46ed4d4b40c45bae4e5eb4771b5d21f15
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.59391771019678,
5
  "eval_steps": 500,
6
- "global_step": 1660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1335,14 +1335,30 @@
1335
  "loss": 0.3903,
1336
  "num_input_tokens_seen": 1115112,
1337
  "step": 1660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1338
  }
1339
  ],
1340
  "logging_steps": 10,
1341
  "max_steps": 2795,
1342
- "num_input_tokens_seen": 1115112,
1343
  "num_train_epochs": 1,
1344
  "save_steps": 20,
1345
- "total_flos": 2.5074930982404096e+16,
1346
  "train_batch_size": 1,
1347
  "trial_name": null,
1348
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6010733452593918,
5
  "eval_steps": 500,
6
+ "global_step": 1680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1335
  "loss": 0.3903,
1336
  "num_input_tokens_seen": 1115112,
1337
  "step": 1660
1338
+ },
1339
+ {
1340
+ "epoch": 0.5974955277280859,
1341
+ "grad_norm": 0.39852896332740784,
1342
+ "learning_rate": 8.050089445438284e-05,
1343
+ "loss": 0.4128,
1344
+ "num_input_tokens_seen": 1121451,
1345
+ "step": 1670
1346
+ },
1347
+ {
1348
+ "epoch": 0.6010733452593918,
1349
+ "grad_norm": 0.37065914273262024,
1350
+ "learning_rate": 7.978533094812165e-05,
1351
+ "loss": 0.4015,
1352
+ "num_input_tokens_seen": 1129374,
1353
+ "step": 1680
1354
  }
1355
  ],
1356
  "logging_steps": 10,
1357
  "max_steps": 2795,
1358
+ "num_input_tokens_seen": 1129374,
1359
  "num_train_epochs": 1,
1360
  "save_steps": 20,
1361
+ "total_flos": 2.539563299769139e+16,
1362
  "train_batch_size": 1,
1363
  "trial_name": null,
1364
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77d0a7fc1510ab3970debab54d5361ccc4e46d17c58f6086837651cc376b2329
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6627f5a0391650843e3b66a870c0689d2522bb2ec40028b633e06c7aaaaff6d
3
  size 5048