MohamedAhmedAE commited on
Commit
3f107a6
1 Parent(s): 3a45916

Training in progress, step 25600, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "k_proj",
 
25
  "up_proj",
26
- "v_proj",
27
  "q_proj",
28
  "gate_proj",
29
- "down_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
+ "down_proj",
25
  "up_proj",
 
26
  "q_proj",
27
  "gate_proj",
28
+ "o_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c7f93eabe6ac3c54d66eaa201fd02227487a26f7778737a9f254ff462e973e4
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d94edbec8149dbe77b5714eec5b67014b9ae262c0869f1aa19043d8ecf1190
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73914a91d2c85155b01b949bf4bca1b47684ea4ff1764db4a597aed9516fd5ab
3
  size 85736914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a14c1f59df5a9c6088d17c4ae86f9dd5081681dd4f24842cc845731cd5c6bd83
3
  size 85736914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f8c4f9076fe893e7a53debd026211a9b9066658d86f31864434230c495759f3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78d8aea77898e9b91f9eb081b5eb89090ec0b6f85c7c2b88fe32a844a809dee9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8114cada636c19d5007acc44352f3b0449c4b5ebb7cb27bcb702507137d58166
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e12782ff1de906d4f9075c6686b1b4fec69203509c9a4e2f4c19898a0d5f4d8e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0696068625227282,
5
  "eval_steps": 2000,
6
- "global_step": 23400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -826,6 +826,83 @@
826
  "learning_rate": 1.999045551990737e-05,
827
  "loss": 1.536,
828
  "step": 23400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829
  }
830
  ],
831
  "logging_steps": 200,
@@ -833,7 +910,7 @@
833
  "num_input_tokens_seen": 0,
834
  "num_train_epochs": 5,
835
  "save_steps": 200,
836
- "total_flos": 4.745472228553851e+17,
837
  "train_batch_size": 1,
838
  "trial_name": null,
839
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07615109746076248,
5
  "eval_steps": 2000,
6
+ "global_step": 25600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
826
  "learning_rate": 1.999045551990737e-05,
827
  "loss": 1.536,
828
  "step": 23400
829
+ },
830
+ {
831
+ "epoch": 0.07,
832
+ "grad_norm": 1.7328152656555176,
833
+ "learning_rate": 1.9990292364225084e-05,
834
+ "loss": 1.5166,
835
+ "step": 23600
836
+ },
837
+ {
838
+ "epoch": 0.07,
839
+ "grad_norm": 1.3295519351959229,
840
+ "learning_rate": 1.9990126996188935e-05,
841
+ "loss": 1.5488,
842
+ "step": 23800
843
+ },
844
+ {
845
+ "epoch": 0.07,
846
+ "grad_norm": 2.1913652420043945,
847
+ "learning_rate": 1.998996023220988e-05,
848
+ "loss": 1.5219,
849
+ "step": 24000
850
+ },
851
+ {
852
+ "epoch": 0.07,
853
+ "grad_norm": 1.8065701723098755,
854
+ "learning_rate": 1.998979207231122e-05,
855
+ "loss": 1.5181,
856
+ "step": 24200
857
+ },
858
+ {
859
+ "epoch": 0.07,
860
+ "grad_norm": 1.8158023357391357,
861
+ "learning_rate": 1.998962336776768e-05,
862
+ "loss": 1.5176,
863
+ "step": 24400
864
+ },
865
+ {
866
+ "epoch": 0.07,
867
+ "grad_norm": 1.4990816116333008,
868
+ "learning_rate": 1.9989452423079802e-05,
869
+ "loss": 1.4998,
870
+ "step": 24600
871
+ },
872
+ {
873
+ "epoch": 0.07,
874
+ "grad_norm": 1.655572533607483,
875
+ "learning_rate": 1.9989280082543273e-05,
876
+ "loss": 1.5426,
877
+ "step": 24800
878
+ },
879
+ {
880
+ "epoch": 0.07,
881
+ "grad_norm": 1.9679639339447021,
882
+ "learning_rate": 1.9989106346182187e-05,
883
+ "loss": 1.5603,
884
+ "step": 25000
885
+ },
886
+ {
887
+ "epoch": 0.07,
888
+ "grad_norm": 1.2155619859695435,
889
+ "learning_rate": 1.9988931214020803e-05,
890
+ "loss": 1.5368,
891
+ "step": 25200
892
+ },
893
+ {
894
+ "epoch": 0.08,
895
+ "grad_norm": 1.3557419776916504,
896
+ "learning_rate": 1.9988754686083607e-05,
897
+ "loss": 1.531,
898
+ "step": 25400
899
+ },
900
+ {
901
+ "epoch": 0.08,
902
+ "grad_norm": 1.433875322341919,
903
+ "learning_rate": 1.998857676239526e-05,
904
+ "loss": 1.5502,
905
+ "step": 25600
906
  }
907
  ],
908
  "logging_steps": 200,
 
910
  "num_input_tokens_seen": 0,
911
  "num_train_epochs": 5,
912
  "save_steps": 200,
913
+ "total_flos": 5.89264804322771e+17,
914
  "train_batch_size": 1,
915
  "trial_name": null,
916
  "trial_params": null