LoMicha commited on
Commit
e5836e4
1 Parent(s): 147e5f9

Training in progress, step 1716, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0930d6e64875820c22c7cffca09ca1acaf6cdcc1e7968d0c5a856968a87824e8
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12fb4cb11627ad284c1d37770a6aacfa220bc7356007fd91be56ae9b13cfa7e1
3
  size 159967880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b9d593632762fa95cd51142f1adb4c560e59f1f9d92ecb7de05d6485c887205
3
  size 81735892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d7084c316ee634e4748f581b2bd6c1841fa446bb7314925c6856b0c688d9d59
3
  size 81735892
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b283d316b0c499174401fc8457651f1fb183c6003c46a4d25e29dfecd151147
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfdfe6864f5def6fb115e8ca14ab15e350070dafd726a524cb7ea2d2792031bc
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a6797f0f81e1d80bc4d2d6295ad3c421b4b433370ca9e0c209b11267f3ef64f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddc0a7b6e0502913c27e2140efcb7b2ae87f20e67eabbf1a4c9eb14a1147911a
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bd17fa23f67ef7fbf6e377f7e0c23474bf385755bb96f63949a2752039f1de4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3972fa5d27ce600e42d2f7fa59c3c380c3f284988e6b7b9d744cfc80bdadb18e
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:212929e3bfad92319ef54b8b509922f96991c6c7d7791e9983b6f58b96c35aff
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31938e2b87878021814bafc01027edb09402ab03f9b4018907cc3c40a6a45630
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:671918de7ffc87d6187292033f79bb1cacaa6a7d5996a986d5989df4cdad43d1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2aa591540982b0ed0561ae043be4daaa0300d3947c5a8b6265d20fe06871fc8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9906759906759907,
5
  "eval_steps": 500,
6
- "global_step": 1700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11907,6 +11907,118 @@
11907
  "learning_rate": 9.324009324009324e-06,
11908
  "loss": 0.003,
11909
  "step": 1700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11910
  }
11911
  ],
11912
  "logging_steps": 1,
@@ -11921,12 +12033,12 @@
11921
  "should_evaluate": false,
11922
  "should_log": false,
11923
  "should_save": true,
11924
- "should_training_stop": false
11925
  },
11926
  "attributes": {}
11927
  }
11928
  },
11929
- "total_flos": 7.109695330572042e+19,
11930
  "train_batch_size": 4,
11931
  "trial_name": null,
11932
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 1716,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11907
  "learning_rate": 9.324009324009324e-06,
11908
  "loss": 0.003,
11909
  "step": 1700
11910
+ },
11911
+ {
11912
+ "epoch": 0.9912587412587412,
11913
+ "grad_norm": 0.0011362357763573527,
11914
+ "learning_rate": 8.741258741258741e-06,
11915
+ "loss": 0.0032,
11916
+ "step": 1701
11917
+ },
11918
+ {
11919
+ "epoch": 0.9918414918414918,
11920
+ "grad_norm": 0.0010037495521828532,
11921
+ "learning_rate": 8.158508158508159e-06,
11922
+ "loss": 0.0033,
11923
+ "step": 1702
11924
+ },
11925
+ {
11926
+ "epoch": 0.9924242424242424,
11927
+ "grad_norm": 0.0010420752223581076,
11928
+ "learning_rate": 7.5757575757575764e-06,
11929
+ "loss": 0.004,
11930
+ "step": 1703
11931
+ },
11932
+ {
11933
+ "epoch": 0.993006993006993,
11934
+ "grad_norm": 0.0010835586581379175,
11935
+ "learning_rate": 6.993006993006993e-06,
11936
+ "loss": 0.0033,
11937
+ "step": 1704
11938
+ },
11939
+ {
11940
+ "epoch": 0.9935897435897436,
11941
+ "grad_norm": 0.0009612834546715021,
11942
+ "learning_rate": 6.41025641025641e-06,
11943
+ "loss": 0.0046,
11944
+ "step": 1705
11945
+ },
11946
+ {
11947
+ "epoch": 0.9941724941724942,
11948
+ "grad_norm": 0.0007791322423145175,
11949
+ "learning_rate": 5.827505827505828e-06,
11950
+ "loss": 0.003,
11951
+ "step": 1706
11952
+ },
11953
+ {
11954
+ "epoch": 0.9947552447552448,
11955
+ "grad_norm": 0.0017918848898261786,
11956
+ "learning_rate": 5.244755244755245e-06,
11957
+ "loss": 0.0038,
11958
+ "step": 1707
11959
+ },
11960
+ {
11961
+ "epoch": 0.9953379953379954,
11962
+ "grad_norm": 0.0009001931175589561,
11963
+ "learning_rate": 4.662004662004662e-06,
11964
+ "loss": 0.0037,
11965
+ "step": 1708
11966
+ },
11967
+ {
11968
+ "epoch": 0.995920745920746,
11969
+ "grad_norm": 0.0008824478718452156,
11970
+ "learning_rate": 4.079254079254079e-06,
11971
+ "loss": 0.004,
11972
+ "step": 1709
11973
+ },
11974
+ {
11975
+ "epoch": 0.9965034965034965,
11976
+ "grad_norm": 0.0010619634995236993,
11977
+ "learning_rate": 3.4965034965034966e-06,
11978
+ "loss": 0.003,
11979
+ "step": 1710
11980
+ },
11981
+ {
11982
+ "epoch": 0.997086247086247,
11983
+ "grad_norm": 0.0007909830892458558,
11984
+ "learning_rate": 2.913752913752914e-06,
11985
+ "loss": 0.0023,
11986
+ "step": 1711
11987
+ },
11988
+ {
11989
+ "epoch": 0.9976689976689976,
11990
+ "grad_norm": 0.0019757202826440334,
11991
+ "learning_rate": 2.331002331002331e-06,
11992
+ "loss": 0.0065,
11993
+ "step": 1712
11994
+ },
11995
+ {
11996
+ "epoch": 0.9982517482517482,
11997
+ "grad_norm": 0.0008470152388326824,
11998
+ "learning_rate": 1.7482517482517483e-06,
11999
+ "loss": 0.0027,
12000
+ "step": 1713
12001
+ },
12002
+ {
12003
+ "epoch": 0.9988344988344988,
12004
+ "grad_norm": 0.0009145813528448343,
12005
+ "learning_rate": 1.1655011655011655e-06,
12006
+ "loss": 0.0035,
12007
+ "step": 1714
12008
+ },
12009
+ {
12010
+ "epoch": 0.9994172494172494,
12011
+ "grad_norm": 0.0007738119456917048,
12012
+ "learning_rate": 5.827505827505827e-07,
12013
+ "loss": 0.0038,
12014
+ "step": 1715
12015
+ },
12016
+ {
12017
+ "epoch": 1.0,
12018
+ "grad_norm": 0.0009543611668050289,
12019
+ "learning_rate": 0.0,
12020
+ "loss": 0.0042,
12021
+ "step": 1716
12022
  }
12023
  ],
12024
  "logging_steps": 1,
 
12033
  "should_evaluate": false,
12034
  "should_log": false,
12035
  "should_save": true,
12036
+ "should_training_stop": true
12037
  },
12038
  "attributes": {}
12039
  }
12040
  },
12041
+ "total_flos": 7.176610110153897e+19,
12042
  "train_batch_size": 4,
12043
  "trial_name": null,
12044
  "trial_params": null