MohamedAhmedAE commited on
Commit
6cd612b
1 Parent(s): 3f107a6

Training in progress, step 25800

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0d94edbec8149dbe77b5714eec5b67014b9ae262c0869f1aa19043d8ecf1190
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e7569231f9d48775234e444dc3002609a2aaf23f9fbb03afc31dd08b174acd
3
  size 167832240
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
- "down_proj",
25
  "up_proj",
 
26
  "q_proj",
27
  "gate_proj",
28
- "o_proj",
29
- "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "o_proj",
24
  "k_proj",
 
25
  "up_proj",
26
+ "v_proj",
27
  "q_proj",
28
  "gate_proj",
29
+ "down_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0d94edbec8149dbe77b5714eec5b67014b9ae262c0869f1aa19043d8ecf1190
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c7f93eabe6ac3c54d66eaa201fd02227487a26f7778737a9f254ff462e973e4
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a14c1f59df5a9c6088d17c4ae86f9dd5081681dd4f24842cc845731cd5c6bd83
3
  size 85736914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73914a91d2c85155b01b949bf4bca1b47684ea4ff1764db4a597aed9516fd5ab
3
  size 85736914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78d8aea77898e9b91f9eb081b5eb89090ec0b6f85c7c2b88fe32a844a809dee9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f8c4f9076fe893e7a53debd026211a9b9066658d86f31864434230c495759f3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e12782ff1de906d4f9075c6686b1b4fec69203509c9a4e2f4c19898a0d5f4d8e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8114cada636c19d5007acc44352f3b0449c4b5ebb7cb27bcb702507137d58166
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.07615109746076248,
5
  "eval_steps": 2000,
6
- "global_step": 25600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -826,83 +826,6 @@
826
  "learning_rate": 1.999045551990737e-05,
827
  "loss": 1.536,
828
  "step": 23400
829
- },
830
- {
831
- "epoch": 0.07,
832
- "grad_norm": 1.7328152656555176,
833
- "learning_rate": 1.9990292364225084e-05,
834
- "loss": 1.5166,
835
- "step": 23600
836
- },
837
- {
838
- "epoch": 0.07,
839
- "grad_norm": 1.3295519351959229,
840
- "learning_rate": 1.9990126996188935e-05,
841
- "loss": 1.5488,
842
- "step": 23800
843
- },
844
- {
845
- "epoch": 0.07,
846
- "grad_norm": 2.1913652420043945,
847
- "learning_rate": 1.998996023220988e-05,
848
- "loss": 1.5219,
849
- "step": 24000
850
- },
851
- {
852
- "epoch": 0.07,
853
- "grad_norm": 1.8065701723098755,
854
- "learning_rate": 1.998979207231122e-05,
855
- "loss": 1.5181,
856
- "step": 24200
857
- },
858
- {
859
- "epoch": 0.07,
860
- "grad_norm": 1.8158023357391357,
861
- "learning_rate": 1.998962336776768e-05,
862
- "loss": 1.5176,
863
- "step": 24400
864
- },
865
- {
866
- "epoch": 0.07,
867
- "grad_norm": 1.4990816116333008,
868
- "learning_rate": 1.9989452423079802e-05,
869
- "loss": 1.4998,
870
- "step": 24600
871
- },
872
- {
873
- "epoch": 0.07,
874
- "grad_norm": 1.655572533607483,
875
- "learning_rate": 1.9989280082543273e-05,
876
- "loss": 1.5426,
877
- "step": 24800
878
- },
879
- {
880
- "epoch": 0.07,
881
- "grad_norm": 1.9679639339447021,
882
- "learning_rate": 1.9989106346182187e-05,
883
- "loss": 1.5603,
884
- "step": 25000
885
- },
886
- {
887
- "epoch": 0.07,
888
- "grad_norm": 1.2155619859695435,
889
- "learning_rate": 1.9988931214020803e-05,
890
- "loss": 1.5368,
891
- "step": 25200
892
- },
893
- {
894
- "epoch": 0.08,
895
- "grad_norm": 1.3557419776916504,
896
- "learning_rate": 1.9988754686083607e-05,
897
- "loss": 1.531,
898
- "step": 25400
899
- },
900
- {
901
- "epoch": 0.08,
902
- "grad_norm": 1.433875322341919,
903
- "learning_rate": 1.998857676239526e-05,
904
- "loss": 1.5502,
905
- "step": 25600
906
  }
907
  ],
908
  "logging_steps": 200,
@@ -910,7 +833,7 @@
910
  "num_input_tokens_seen": 0,
911
  "num_train_epochs": 5,
912
  "save_steps": 200,
913
- "total_flos": 5.89264804322771e+17,
914
  "train_batch_size": 1,
915
  "trial_name": null,
916
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0696068625227282,
5
  "eval_steps": 2000,
6
+ "global_step": 23400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
826
  "learning_rate": 1.999045551990737e-05,
827
  "loss": 1.536,
828
  "step": 23400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829
  }
830
  ],
831
  "logging_steps": 200,
 
833
  "num_input_tokens_seen": 0,
834
  "num_train_epochs": 5,
835
  "save_steps": 200,
836
+ "total_flos": 4.745472228553851e+17,
837
  "train_batch_size": 1,
838
  "trial_name": null,
839
  "trial_params": null