MohamedAhmedAE commited on
Commit
25fa7d7
1 Parent(s): e9a8d7c

Training in progress, step 22400

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ad5df223fcbfdd3ad46ed054bfdef283483890238b3a6ac51915a4695682e23
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e56293ea86924617db1d72bddea10585e4be98a582ddca170c081dd3d642c9
3
  size 167832240
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "up_proj",
25
  "v_proj",
26
- "down_proj",
27
- "o_proj",
28
  "gate_proj",
29
- "k_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "v_proj",
24
+ "q_proj",
 
25
  "gate_proj",
26
+ "down_proj",
27
+ "up_proj",
28
+ "k_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ad5df223fcbfdd3ad46ed054bfdef283483890238b3a6ac51915a4695682e23
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb5d4a8e80692092cbb6ea69a8cd6902d7306436bd2ef4db3d268beb6d254345
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6982f8b3fe44594c163fc5ab0e300f13004e4862a415b5b03ac8da4472123835
3
  size 85736914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed3c0ef76d1963f5e466fac078e7bdfa634f48c1b91aaad0c42e6ae39ac1315
3
  size 85736914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd8f2420f5ce27e32ef99ce3a23532d1502953314af1258b35a9d3e59a9189b5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adc80d060b3614ce93cf171cfa62b9a17755ba93a67b6a802cd87b68b0a907bb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3636d6407e959c81f7c73cc51cbd4d3f955a426c0e115359ea4e27b55c546296
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ba296f12eae4fc0511f08bee2c4fc9bf546a7ddf44e3e343ade2d2cae2ed71
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03301863991462748,
5
  "eval_steps": 2000,
6
- "global_step": 22200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -707,83 +707,6 @@
707
  "learning_rate": 1.99982565504712e-05,
708
  "loss": 1.5135,
709
  "step": 20000
710
- },
711
- {
712
- "epoch": 0.03,
713
- "grad_norm": 2.2242326736450195,
714
- "learning_rate": 1.9998221652689703e-05,
715
- "loss": 1.5325,
716
- "step": 20200
717
- },
718
- {
719
- "epoch": 0.03,
720
- "grad_norm": 1.6685123443603516,
721
- "learning_rate": 1.9998186409125715e-05,
722
- "loss": 1.5765,
723
- "step": 20400
724
- },
725
- {
726
- "epoch": 0.03,
727
- "grad_norm": 1.8478649854660034,
728
- "learning_rate": 1.999815064006636e-05,
729
- "loss": 1.5597,
730
- "step": 20600
731
- },
732
- {
733
- "epoch": 0.03,
734
- "grad_norm": 2.6228203773498535,
735
- "learning_rate": 1.999811452174307e-05,
736
- "loss": 1.5312,
737
- "step": 20800
738
- },
739
- {
740
- "epoch": 0.03,
741
- "grad_norm": 1.2979694604873657,
742
- "learning_rate": 1.9998078054157092e-05,
743
- "loss": 1.5863,
744
- "step": 21000
745
- },
746
- {
747
- "epoch": 0.03,
748
- "grad_norm": 1.4286555051803589,
749
- "learning_rate": 1.999804123730971e-05,
750
- "loss": 1.5265,
751
- "step": 21200
752
- },
753
- {
754
- "epoch": 0.03,
755
- "grad_norm": 2.2393202781677246,
756
- "learning_rate": 1.999800407120221e-05,
757
- "loss": 1.5599,
758
- "step": 21400
759
- },
760
- {
761
- "epoch": 0.03,
762
- "grad_norm": 8.066116333007812,
763
- "learning_rate": 1.9997966555835886e-05,
764
- "loss": 1.5345,
765
- "step": 21600
766
- },
767
- {
768
- "epoch": 0.03,
769
- "grad_norm": 2.866185426712036,
770
- "learning_rate": 1.9997928691212052e-05,
771
- "loss": 1.5141,
772
- "step": 21800
773
- },
774
- {
775
- "epoch": 0.03,
776
- "grad_norm": 2.5764899253845215,
777
- "learning_rate": 1.9997890477332027e-05,
778
- "loss": 1.5189,
779
- "step": 22000
780
- },
781
- {
782
- "epoch": 0.03,
783
- "grad_norm": 3.597501039505005,
784
- "learning_rate": 1.9997851914197147e-05,
785
- "loss": 1.5368,
786
- "step": 22200
787
  }
788
  ],
789
  "logging_steps": 200,
@@ -791,7 +714,7 @@
791
  "num_input_tokens_seen": 0,
792
  "num_train_epochs": 5,
793
  "save_steps": 200,
794
- "total_flos": 4.275111434343137e+17,
795
  "train_batch_size": 1,
796
  "trial_name": null,
797
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.02974652244561034,
5
  "eval_steps": 2000,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
707
  "learning_rate": 1.99982565504712e-05,
708
  "loss": 1.5135,
709
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
  }
711
  ],
712
  "logging_steps": 200,
 
714
  "num_input_tokens_seen": 0,
715
  "num_train_epochs": 5,
716
  "save_steps": 200,
717
+ "total_flos": 3.6983529369430426e+17,
718
  "train_batch_size": 1,
719
  "trial_name": null,
720
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a7800ffb29813f89bbd542587a923598ba056a5186a554421f6b09a6d22b374
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da6c64555cac2ee9fc43605b134636b4610ed6cd244e07ab7cb4ce3c058548c
3
  size 4920