MohamedAhmedAE commited on
Commit
92ddb7d
·
verified ·
1 Parent(s): 21ed5d7

Training in progress, step 247200, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "down_proj",
27
- "q_proj",
28
- "k_proj",
29
  "up_proj",
30
- "gate_proj",
31
  "v_proj",
32
- "o_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "gate_proj",
27
  "down_proj",
 
 
28
  "up_proj",
29
+ "q_proj",
30
  "v_proj",
31
+ "o_proj",
32
+ "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad2c04e4c9d9778549e502f8f4d5e5c7678fc1dcb6dbaa7898e81a74d789ffe6
3
  size 1342238560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26cdd869c2b4ef51426ac8e4543dd00178c3d7969b70ed9815e4cdd56536de38
3
  size 1342238560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7abfc3edc0f50712b10a6abd65117145ee7a61c6581f5ef5f334f78cac8b278
3
  size 683268498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fed46b67619b196bd11398b24691e1dc344bc786624293d833d81496772323ba
3
  size 683268498
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdf2e2cd9b38a414a8e4dfabaffbbc3519d4a60f21cea7d14c955d0395100be6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b46cf230fb1dcfff937d34e6fba4e48cdf99fb6180c75deb7bd7cfc11bd65f9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5c2f6216cdb6e05487f89478e270dd1586ad361fb78787891ff688d5d1f5b80
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6758d545d7403ec126f36e0141cd2cb64797d86a13dce36859bd0cdff2824ef9
3
  size 1064
last-checkpoint/tokenizer_config.json CHANGED
@@ -2064,7 +2064,7 @@
2064
  "pad_token": "<|eot_id|>",
2065
  "padding_side": "left",
2066
  "stride": 0,
2067
- "tokenizer_class": "PreTrainedTokenizerFast",
2068
  "truncation_side": "right",
2069
  "truncation_strategy": "longest_first"
2070
  }
 
2064
  "pad_token": "<|eot_id|>",
2065
  "padding_side": "left",
2066
  "stride": 0,
2067
+ "tokenizer_class": "PreTrainedTokenizer",
2068
  "truncation_side": "right",
2069
  "truncation_strategy": "longest_first"
2070
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.16733305467673631,
5
  "eval_steps": 500,
6
- "global_step": 240600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8428,6 +8428,237 @@
8428
  "learning_rate": 1.9759580630399218e-05,
8429
  "loss": 1.6874,
8430
  "step": 240600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8431
  }
8432
  ],
8433
  "logging_steps": 200,
@@ -8447,7 +8678,7 @@
8447
  "attributes": {}
8448
  }
8449
  },
8450
- "total_flos": 3.2024253058097725e+18,
8451
  "train_batch_size": 1,
8452
  "trial_name": null,
8453
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1719232382214847,
5
  "eval_steps": 500,
6
+ "global_step": 247200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8428
  "learning_rate": 1.9759580630399218e-05,
8429
  "loss": 1.6874,
8430
  "step": 240600
8431
+ },
8432
+ {
8433
+ "epoch": 0.1674721511477893,
8434
+ "grad_norm": 5.881978511810303,
8435
+ "learning_rate": 1.9759183073216768e-05,
8436
+ "loss": 1.6754,
8437
+ "step": 240800
8438
+ },
8439
+ {
8440
+ "epoch": 0.16761124761884227,
8441
+ "grad_norm": 4.917115688323975,
8442
+ "learning_rate": 1.9758785193358672e-05,
8443
+ "loss": 1.6594,
8444
+ "step": 241000
8445
+ },
8446
+ {
8447
+ "epoch": 0.16775034408989525,
8448
+ "grad_norm": 7.00810432434082,
8449
+ "learning_rate": 1.9758386990843928e-05,
8450
+ "loss": 1.6253,
8451
+ "step": 241200
8452
+ },
8453
+ {
8454
+ "epoch": 0.16788944056094826,
8455
+ "grad_norm": 3.8624985218048096,
8456
+ "learning_rate": 1.9757988465691542e-05,
8457
+ "loss": 1.6543,
8458
+ "step": 241400
8459
+ },
8460
+ {
8461
+ "epoch": 0.16802853703200124,
8462
+ "grad_norm": 6.821996212005615,
8463
+ "learning_rate": 1.9757589617920542e-05,
8464
+ "loss": 1.674,
8465
+ "step": 241600
8466
+ },
8467
+ {
8468
+ "epoch": 0.16816763350305422,
8469
+ "grad_norm": 7.101013660430908,
8470
+ "learning_rate": 1.9757190447549967e-05,
8471
+ "loss": 1.6478,
8472
+ "step": 241800
8473
+ },
8474
+ {
8475
+ "epoch": 0.1683067299741072,
8476
+ "grad_norm": 3.3569910526275635,
8477
+ "learning_rate": 1.9756790954598874e-05,
8478
+ "loss": 1.6584,
8479
+ "step": 242000
8480
+ },
8481
+ {
8482
+ "epoch": 0.16844582644516018,
8483
+ "grad_norm": 3.9595654010772705,
8484
+ "learning_rate": 1.9756391139086332e-05,
8485
+ "loss": 1.6638,
8486
+ "step": 242200
8487
+ },
8488
+ {
8489
+ "epoch": 0.16858492291621316,
8490
+ "grad_norm": 7.490050315856934,
8491
+ "learning_rate": 1.9755991001031433e-05,
8492
+ "loss": 1.6073,
8493
+ "step": 242400
8494
+ },
8495
+ {
8496
+ "epoch": 0.16872401938726614,
8497
+ "grad_norm": 6.08933162689209,
8498
+ "learning_rate": 1.9755590540453275e-05,
8499
+ "loss": 1.6431,
8500
+ "step": 242600
8501
+ },
8502
+ {
8503
+ "epoch": 0.16886311585831912,
8504
+ "grad_norm": 5.067729473114014,
8505
+ "learning_rate": 1.9755189757370973e-05,
8506
+ "loss": 1.6102,
8507
+ "step": 242800
8508
+ },
8509
+ {
8510
+ "epoch": 0.1690022123293721,
8511
+ "grad_norm": 5.07871150970459,
8512
+ "learning_rate": 1.9754788651803664e-05,
8513
+ "loss": 1.6425,
8514
+ "step": 243000
8515
+ },
8516
+ {
8517
+ "epoch": 0.16914130880042508,
8518
+ "grad_norm": 4.212064743041992,
8519
+ "learning_rate": 1.97543872237705e-05,
8520
+ "loss": 1.6382,
8521
+ "step": 243200
8522
+ },
8523
+ {
8524
+ "epoch": 0.16928040527147806,
8525
+ "grad_norm": 5.310381889343262,
8526
+ "learning_rate": 1.9753985473290637e-05,
8527
+ "loss": 1.649,
8528
+ "step": 243400
8529
+ },
8530
+ {
8531
+ "epoch": 0.16941950174253104,
8532
+ "grad_norm": 5.563880443572998,
8533
+ "learning_rate": 1.9753583400383262e-05,
8534
+ "loss": 1.68,
8535
+ "step": 243600
8536
+ },
8537
+ {
8538
+ "epoch": 0.16955859821358402,
8539
+ "grad_norm": 3.346017837524414,
8540
+ "learning_rate": 1.975318100506756e-05,
8541
+ "loss": 1.6314,
8542
+ "step": 243800
8543
+ },
8544
+ {
8545
+ "epoch": 0.169697694684637,
8546
+ "grad_norm": 3.5837533473968506,
8547
+ "learning_rate": 1.9752778287362746e-05,
8548
+ "loss": 1.6668,
8549
+ "step": 244000
8550
+ },
8551
+ {
8552
+ "epoch": 0.16983679115568998,
8553
+ "grad_norm": 8.021364212036133,
8554
+ "learning_rate": 1.9752375247288046e-05,
8555
+ "loss": 1.6824,
8556
+ "step": 244200
8557
+ },
8558
+ {
8559
+ "epoch": 0.16997588762674296,
8560
+ "grad_norm": 6.544102191925049,
8561
+ "learning_rate": 1.97519718848627e-05,
8562
+ "loss": 1.6961,
8563
+ "step": 244400
8564
+ },
8565
+ {
8566
+ "epoch": 0.17011498409779593,
8567
+ "grad_norm": 6.292764663696289,
8568
+ "learning_rate": 1.9751568200105962e-05,
8569
+ "loss": 1.6397,
8570
+ "step": 244600
8571
+ },
8572
+ {
8573
+ "epoch": 0.17025408056884891,
8574
+ "grad_norm": 7.250925064086914,
8575
+ "learning_rate": 1.9751164193037104e-05,
8576
+ "loss": 1.7036,
8577
+ "step": 244800
8578
+ },
8579
+ {
8580
+ "epoch": 0.17039317703990192,
8581
+ "grad_norm": 4.996527194976807,
8582
+ "learning_rate": 1.975075986367542e-05,
8583
+ "loss": 1.609,
8584
+ "step": 245000
8585
+ },
8586
+ {
8587
+ "epoch": 0.1705322735109549,
8588
+ "grad_norm": 5.131301403045654,
8589
+ "learning_rate": 1.97503552120402e-05,
8590
+ "loss": 1.7069,
8591
+ "step": 245200
8592
+ },
8593
+ {
8594
+ "epoch": 0.17067136998200788,
8595
+ "grad_norm": 8.829029083251953,
8596
+ "learning_rate": 1.9749950238150776e-05,
8597
+ "loss": 1.6709,
8598
+ "step": 245400
8599
+ },
8600
+ {
8601
+ "epoch": 0.17081046645306086,
8602
+ "grad_norm": 5.728978633880615,
8603
+ "learning_rate": 1.9749544942026467e-05,
8604
+ "loss": 1.672,
8605
+ "step": 245600
8606
+ },
8607
+ {
8608
+ "epoch": 0.17094956292411384,
8609
+ "grad_norm": 5.395960330963135,
8610
+ "learning_rate": 1.9749139323686628e-05,
8611
+ "loss": 1.6404,
8612
+ "step": 245800
8613
+ },
8614
+ {
8615
+ "epoch": 0.17108865939516682,
8616
+ "grad_norm": 3.34220027923584,
8617
+ "learning_rate": 1.9748733383150624e-05,
8618
+ "loss": 1.6915,
8619
+ "step": 246000
8620
+ },
8621
+ {
8622
+ "epoch": 0.1712277558662198,
8623
+ "grad_norm": 2.9125590324401855,
8624
+ "learning_rate": 1.974832712043783e-05,
8625
+ "loss": 1.6057,
8626
+ "step": 246200
8627
+ },
8628
+ {
8629
+ "epoch": 0.17136685233727278,
8630
+ "grad_norm": 5.961441516876221,
8631
+ "learning_rate": 1.974792053556764e-05,
8632
+ "loss": 1.6639,
8633
+ "step": 246400
8634
+ },
8635
+ {
8636
+ "epoch": 0.17150594880832576,
8637
+ "grad_norm": 3.4587485790252686,
8638
+ "learning_rate": 1.9747513628559473e-05,
8639
+ "loss": 1.667,
8640
+ "step": 246600
8641
+ },
8642
+ {
8643
+ "epoch": 0.17164504527937874,
8644
+ "grad_norm": 3.578892946243286,
8645
+ "learning_rate": 1.974710639943274e-05,
8646
+ "loss": 1.6889,
8647
+ "step": 246800
8648
+ },
8649
+ {
8650
+ "epoch": 0.17178414175043172,
8651
+ "grad_norm": 4.567336082458496,
8652
+ "learning_rate": 1.9746698848206897e-05,
8653
+ "loss": 1.6884,
8654
+ "step": 247000
8655
+ },
8656
+ {
8657
+ "epoch": 0.1719232382214847,
8658
+ "grad_norm": 4.0480523109436035,
8659
+ "learning_rate": 1.974629097490139e-05,
8660
+ "loss": 1.6463,
8661
+ "step": 247200
8662
  }
8663
  ],
8664
  "logging_steps": 200,
 
8678
  "attributes": {}
8679
  }
8680
  },
8681
+ "total_flos": 3.2903444675753165e+18,
8682
  "train_batch_size": 1,
8683
  "trial_name": null,
8684
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31159c9e3ece420d10b679508751f56bfb33866580a857e3a293714f0a805ecb
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d05fe2fc1f3ea3381f9d45772e347485be6cf70ca63fca95b95759ca5f4677ea
3
  size 6840