MohamedAhmedAE commited on
Commit
faf38e1
1 Parent(s): 6cd612b

Training in progress, step 25800, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "k_proj",
 
25
  "up_proj",
26
- "v_proj",
27
  "q_proj",
28
  "gate_proj",
29
- "down_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
+ "down_proj",
25
  "up_proj",
 
26
  "q_proj",
27
  "gate_proj",
28
+ "o_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c7f93eabe6ac3c54d66eaa201fd02227487a26f7778737a9f254ff462e973e4
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e7569231f9d48775234e444dc3002609a2aaf23f9fbb03afc31dd08b174acd
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73914a91d2c85155b01b949bf4bca1b47684ea4ff1764db4a597aed9516fd5ab
3
  size 85736914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1795d0f5907365f50ee8bf592e588c51cf0c1336607a8958053b6df1866c85ef
3
  size 85736914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f8c4f9076fe893e7a53debd026211a9b9066658d86f31864434230c495759f3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e0be0fbdc67baa64bde42b3f62fb0cbfa6b616dea815616465446e745a61cd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8114cada636c19d5007acc44352f3b0449c4b5ebb7cb27bcb702507137d58166
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd4eb6473181bbf6da6b1613c063360450d1f8ec75384d51547bd12664e8b3a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0696068625227282,
5
  "eval_steps": 2000,
6
- "global_step": 23400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -826,6 +826,90 @@
826
  "learning_rate": 1.999045551990737e-05,
827
  "loss": 1.536,
828
  "step": 23400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829
  }
830
  ],
831
  "logging_steps": 200,
@@ -833,7 +917,7 @@
833
  "num_input_tokens_seen": 0,
834
  "num_train_epochs": 5,
835
  "save_steps": 200,
836
- "total_flos": 4.745472228553851e+17,
837
  "train_batch_size": 1,
838
  "trial_name": null,
839
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07674602790967469,
5
  "eval_steps": 2000,
6
+ "global_step": 25800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
826
  "learning_rate": 1.999045551990737e-05,
827
  "loss": 1.536,
828
  "step": 23400
829
+ },
830
+ {
831
+ "epoch": 0.07,
832
+ "grad_norm": 1.7328152656555176,
833
+ "learning_rate": 1.9990292364225084e-05,
834
+ "loss": 1.5166,
835
+ "step": 23600
836
+ },
837
+ {
838
+ "epoch": 0.07,
839
+ "grad_norm": 1.3295519351959229,
840
+ "learning_rate": 1.9990126996188935e-05,
841
+ "loss": 1.5488,
842
+ "step": 23800
843
+ },
844
+ {
845
+ "epoch": 0.07,
846
+ "grad_norm": 2.1913652420043945,
847
+ "learning_rate": 1.998996023220988e-05,
848
+ "loss": 1.5219,
849
+ "step": 24000
850
+ },
851
+ {
852
+ "epoch": 0.07,
853
+ "grad_norm": 1.8065701723098755,
854
+ "learning_rate": 1.998979207231122e-05,
855
+ "loss": 1.5181,
856
+ "step": 24200
857
+ },
858
+ {
859
+ "epoch": 0.07,
860
+ "grad_norm": 1.8158023357391357,
861
+ "learning_rate": 1.998962336776768e-05,
862
+ "loss": 1.5176,
863
+ "step": 24400
864
+ },
865
+ {
866
+ "epoch": 0.07,
867
+ "grad_norm": 1.4990816116333008,
868
+ "learning_rate": 1.9989452423079802e-05,
869
+ "loss": 1.4998,
870
+ "step": 24600
871
+ },
872
+ {
873
+ "epoch": 0.07,
874
+ "grad_norm": 1.655572533607483,
875
+ "learning_rate": 1.9989280082543273e-05,
876
+ "loss": 1.5426,
877
+ "step": 24800
878
+ },
879
+ {
880
+ "epoch": 0.07,
881
+ "grad_norm": 1.9679639339447021,
882
+ "learning_rate": 1.9989106346182187e-05,
883
+ "loss": 1.5603,
884
+ "step": 25000
885
+ },
886
+ {
887
+ "epoch": 0.07,
888
+ "grad_norm": 1.2155619859695435,
889
+ "learning_rate": 1.9988931214020803e-05,
890
+ "loss": 1.5368,
891
+ "step": 25200
892
+ },
893
+ {
894
+ "epoch": 0.08,
895
+ "grad_norm": 1.3557419776916504,
896
+ "learning_rate": 1.9988754686083607e-05,
897
+ "loss": 1.531,
898
+ "step": 25400
899
+ },
900
+ {
901
+ "epoch": 0.08,
902
+ "grad_norm": 1.433875322341919,
903
+ "learning_rate": 1.998857676239526e-05,
904
+ "loss": 1.5502,
905
+ "step": 25600
906
+ },
907
+ {
908
+ "epoch": 0.08,
909
+ "grad_norm": 1.2107449769973755,
910
+ "learning_rate": 1.998839744298062e-05,
911
+ "loss": 1.5509,
912
+ "step": 25800
913
  }
914
  ],
915
  "logging_steps": 200,
 
917
  "num_input_tokens_seen": 0,
918
  "num_train_epochs": 5,
919
  "save_steps": 200,
920
+ "total_flos": 5.996940170325443e+17,
921
  "train_batch_size": 1,
922
  "trial_name": null,
923
  "trial_params": null