MohamedAhmedAE commited on
Commit
9fb38bb
1 Parent(s): 879581f

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "q_proj",
24
- "o_proj",
25
- "k_proj",
26
  "gate_proj",
27
  "down_proj",
28
  "up_proj",
29
- "v_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
  "q_proj",
 
 
25
  "gate_proj",
26
  "down_proj",
27
  "up_proj",
28
+ "k_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30377e3553033d932e69daa7064c259e8a7335e5cc0b78f6a1f4f648bae4b97e
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb5d4a8e80692092cbb6ea69a8cd6902d7306436bd2ef4db3d268beb6d254345
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727033a6eb5dc35337334fe86877b4a8995db5404205b07aeda22261d1e1f288
3
  size 85736914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed3c0ef76d1963f5e466fac078e7bdfa634f48c1b91aaad0c42e6ae39ac1315
3
  size 85736914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b7de94d4e3bf0d0cab47bcde24f41cd7eb97ed0885fd609023fab02c6a16228
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adc80d060b3614ce93cf171cfa62b9a17755ba93a67b6a802cd87b68b0a907bb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf318569a9b4769e81e4790eccd85435a6e6935dc6ee58b1ecc55958c1b71bd0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ba296f12eae4fc0511f08bee2c4fc9bf546a7ddf44e3e343ade2d2cae2ed71
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.028259196323329825,
5
  "eval_steps": 2000,
6
- "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -672,6 +672,41 @@
672
  "learning_rate": 1.9998426349986698e-05,
673
  "loss": 1.5367,
674
  "step": 19000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
  }
676
  ],
677
  "logging_steps": 200,
@@ -679,7 +714,7 @@
679
  "num_input_tokens_seen": 0,
680
  "num_train_epochs": 5,
681
  "save_steps": 200,
682
- "total_flos": 3.4393054952298086e+17,
683
  "train_batch_size": 1,
684
  "trial_name": null,
685
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.02974652244561034,
5
  "eval_steps": 2000,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
672
  "learning_rate": 1.9998426349986698e-05,
673
  "loss": 1.5367,
674
  "step": 19000
675
+ },
676
+ {
677
+ "epoch": 0.03,
678
+ "grad_norm": 3.7834692001342773,
679
+ "learning_rate": 1.9998393186348416e-05,
680
+ "loss": 1.5619,
681
+ "step": 19200
682
+ },
683
+ {
684
+ "epoch": 0.03,
685
+ "grad_norm": 2.51110577583313,
686
+ "learning_rate": 1.9998359507659452e-05,
687
+ "loss": 1.5688,
688
+ "step": 19400
689
+ },
690
+ {
691
+ "epoch": 0.03,
692
+ "grad_norm": 2.6746368408203125,
693
+ "learning_rate": 1.999832547969925e-05,
694
+ "loss": 1.5216,
695
+ "step": 19600
696
+ },
697
+ {
698
+ "epoch": 0.03,
699
+ "grad_norm": 2.6558778285980225,
700
+ "learning_rate": 1.9998291102469e-05,
701
+ "loss": 1.5335,
702
+ "step": 19800
703
+ },
704
+ {
705
+ "epoch": 0.03,
706
+ "grad_norm": 2.4056808948516846,
707
+ "learning_rate": 1.99982565504712e-05,
708
+ "loss": 1.5135,
709
+ "step": 20000
710
  }
711
  ],
712
  "logging_steps": 200,
 
714
  "num_input_tokens_seen": 0,
715
  "num_train_epochs": 5,
716
  "save_steps": 200,
717
+ "total_flos": 3.6983529369430426e+17,
718
  "train_batch_size": 1,
719
  "trial_name": null,
720
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc9e75826f834526adf57daa4ee7a58f88bf2ec9679f7599af2037d01589eb4f
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da6c64555cac2ee9fc43605b134636b4610ed6cd244e07ab7cb4ce3c058548c
3
  size 4920