MohamedAhmedAE commited on
Commit
db5d5c7
1 Parent(s): 25fa7d7

Training in progress, step 22400, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
  "q_proj",
25
- "gate_proj",
26
- "down_proj",
27
  "up_proj",
28
- "k_proj",
29
- "o_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "q_proj",
 
 
24
  "up_proj",
25
+ "v_proj",
26
+ "down_proj",
27
+ "o_proj",
28
+ "gate_proj",
29
+ "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb5d4a8e80692092cbb6ea69a8cd6902d7306436bd2ef4db3d268beb6d254345
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e56293ea86924617db1d72bddea10585e4be98a582ddca170c081dd3d642c9
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ed3c0ef76d1963f5e466fac078e7bdfa634f48c1b91aaad0c42e6ae39ac1315
3
  size 85736914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7c5ec127b3617a18b3d72d14dceb0ba197088d23e289ef92a6837ce8b5e15f
3
  size 85736914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adc80d060b3614ce93cf171cfa62b9a17755ba93a67b6a802cd87b68b0a907bb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4762419da8cab351088827e4869a7225442c8f1f7484efb21afc6c2799818eb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55ba296f12eae4fc0511f08bee2c4fc9bf546a7ddf44e3e343ade2d2cae2ed71
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47e872382e84dc07d00209e557644b4bf1503898fe7aa404af9c426170e40ad4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.02974652244561034,
5
  "eval_steps": 2000,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -707,6 +707,90 @@
707
  "learning_rate": 1.99982565504712e-05,
708
  "loss": 1.5135,
709
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
  }
711
  ],
712
  "logging_steps": 200,
@@ -714,7 +798,7 @@
714
  "num_input_tokens_seen": 0,
715
  "num_train_epochs": 5,
716
  "save_steps": 200,
717
- "total_flos": 3.6983529369430426e+17,
718
  "train_batch_size": 1,
719
  "trial_name": null,
720
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.033316105139083584,
5
  "eval_steps": 2000,
6
+ "global_step": 22400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
707
  "learning_rate": 1.99982565504712e-05,
708
  "loss": 1.5135,
709
  "step": 20000
710
+ },
711
+ {
712
+ "epoch": 0.03,
713
+ "grad_norm": 2.2242326736450195,
714
+ "learning_rate": 1.9998221652689703e-05,
715
+ "loss": 1.5325,
716
+ "step": 20200
717
+ },
718
+ {
719
+ "epoch": 0.03,
720
+ "grad_norm": 1.6685123443603516,
721
+ "learning_rate": 1.9998186409125715e-05,
722
+ "loss": 1.5765,
723
+ "step": 20400
724
+ },
725
+ {
726
+ "epoch": 0.03,
727
+ "grad_norm": 1.8478649854660034,
728
+ "learning_rate": 1.999815064006636e-05,
729
+ "loss": 1.5597,
730
+ "step": 20600
731
+ },
732
+ {
733
+ "epoch": 0.03,
734
+ "grad_norm": 2.6228203773498535,
735
+ "learning_rate": 1.999811452174307e-05,
736
+ "loss": 1.5312,
737
+ "step": 20800
738
+ },
739
+ {
740
+ "epoch": 0.03,
741
+ "grad_norm": 1.2979694604873657,
742
+ "learning_rate": 1.9998078054157092e-05,
743
+ "loss": 1.5863,
744
+ "step": 21000
745
+ },
746
+ {
747
+ "epoch": 0.03,
748
+ "grad_norm": 1.4286555051803589,
749
+ "learning_rate": 1.999804123730971e-05,
750
+ "loss": 1.5265,
751
+ "step": 21200
752
+ },
753
+ {
754
+ "epoch": 0.03,
755
+ "grad_norm": 2.2393202781677246,
756
+ "learning_rate": 1.999800407120221e-05,
757
+ "loss": 1.5599,
758
+ "step": 21400
759
+ },
760
+ {
761
+ "epoch": 0.03,
762
+ "grad_norm": 8.066116333007812,
763
+ "learning_rate": 1.9997966555835886e-05,
764
+ "loss": 1.5345,
765
+ "step": 21600
766
+ },
767
+ {
768
+ "epoch": 0.03,
769
+ "grad_norm": 2.866185426712036,
770
+ "learning_rate": 1.9997928691212052e-05,
771
+ "loss": 1.5141,
772
+ "step": 21800
773
+ },
774
+ {
775
+ "epoch": 0.03,
776
+ "grad_norm": 2.5764899253845215,
777
+ "learning_rate": 1.9997890477332027e-05,
778
+ "loss": 1.5189,
779
+ "step": 22000
780
+ },
781
+ {
782
+ "epoch": 0.03,
783
+ "grad_norm": 3.597501039505005,
784
+ "learning_rate": 1.9997851914197147e-05,
785
+ "loss": 1.5368,
786
+ "step": 22200
787
+ },
788
+ {
789
+ "epoch": 0.03,
790
+ "grad_norm": 2.5888760089874268,
791
+ "learning_rate": 1.9997813001808763e-05,
792
+ "loss": 1.5603,
793
+ "step": 22400
794
  }
795
  ],
796
  "logging_steps": 200,
 
798
  "num_input_tokens_seen": 0,
799
  "num_train_epochs": 5,
800
  "save_steps": 200,
801
+ "total_flos": 4.327171237194056e+17,
802
  "train_batch_size": 1,
803
  "trial_name": null,
804
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7da6c64555cac2ee9fc43605b134636b4610ed6cd244e07ab7cb4ce3c058548c
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7800ffb29813f89bbd542587a923598ba056a5186a554421f6b09a6d22b374
3
  size 4920