Federic commited on
Commit
967f59d
1 Parent(s): 80e028a

Training in progress, step 175, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3d1b036ff3cb5ac02c5acf6005e92e48b05c61587c49579854c2529afea41f
3
  size 838904832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd8ec9656022c898911f6dde9c5bdf31b5301c7a36712dae2f6856e081f4e0e8
3
  size 838904832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59b42bcd4eadba12555b9589f81e0afdd6e7d134c2b0fbdedc77c7bced004ef9
3
  size 420633876
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f267c5c997b91bae465e5945e55f2e9cbce1c7604e3b32a103e233640ddbdd3
3
  size 420633876
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:145f1a4da4a46bf2b53df9260772ce418443341bad1fbda0361575d00cb23c5d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91dd0dd4a17e2da0a18b127f9e2d433fb98198a1880740f0dd81deeb9cdafd3e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d31dc31a119769737d72f3df4c8cdf99522596cafc12bf2eea05a4ff374f599c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17c000dd3ad474c8365253b4464489310fed0d13c745556138280174b380deb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6,
5
  "eval_steps": 500,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -907,13 +907,163 @@
907
  "learning_rate": 0.0002,
908
  "loss": 0.3719,
909
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
  }
911
  ],
912
  "logging_steps": 1,
913
  "max_steps": 250,
914
  "num_train_epochs": 1,
915
  "save_steps": 25,
916
- "total_flos": 1.78966730207232e+16,
917
  "trial_name": null,
918
  "trial_params": null
919
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7,
5
  "eval_steps": 500,
6
+ "global_step": 175,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
907
  "learning_rate": 0.0002,
908
  "loss": 0.3719,
909
  "step": 150
910
+ },
911
+ {
912
+ "epoch": 0.6,
913
+ "learning_rate": 0.0002,
914
+ "loss": 0.7191,
915
+ "step": 151
916
+ },
917
+ {
918
+ "epoch": 0.61,
919
+ "learning_rate": 0.0002,
920
+ "loss": 0.6457,
921
+ "step": 152
922
+ },
923
+ {
924
+ "epoch": 0.61,
925
+ "learning_rate": 0.0002,
926
+ "loss": 0.7385,
927
+ "step": 153
928
+ },
929
+ {
930
+ "epoch": 0.62,
931
+ "learning_rate": 0.0002,
932
+ "loss": 0.5479,
933
+ "step": 154
934
+ },
935
+ {
936
+ "epoch": 0.62,
937
+ "learning_rate": 0.0002,
938
+ "loss": 0.6008,
939
+ "step": 155
940
+ },
941
+ {
942
+ "epoch": 0.62,
943
+ "learning_rate": 0.0002,
944
+ "loss": 0.669,
945
+ "step": 156
946
+ },
947
+ {
948
+ "epoch": 0.63,
949
+ "learning_rate": 0.0002,
950
+ "loss": 0.5624,
951
+ "step": 157
952
+ },
953
+ {
954
+ "epoch": 0.63,
955
+ "learning_rate": 0.0002,
956
+ "loss": 0.669,
957
+ "step": 158
958
+ },
959
+ {
960
+ "epoch": 0.64,
961
+ "learning_rate": 0.0002,
962
+ "loss": 0.6167,
963
+ "step": 159
964
+ },
965
+ {
966
+ "epoch": 0.64,
967
+ "learning_rate": 0.0002,
968
+ "loss": 0.5228,
969
+ "step": 160
970
+ },
971
+ {
972
+ "epoch": 0.64,
973
+ "learning_rate": 0.0002,
974
+ "loss": 0.5331,
975
+ "step": 161
976
+ },
977
+ {
978
+ "epoch": 0.65,
979
+ "learning_rate": 0.0002,
980
+ "loss": 0.5176,
981
+ "step": 162
982
+ },
983
+ {
984
+ "epoch": 0.65,
985
+ "learning_rate": 0.0002,
986
+ "loss": 0.5237,
987
+ "step": 163
988
+ },
989
+ {
990
+ "epoch": 0.66,
991
+ "learning_rate": 0.0002,
992
+ "loss": 0.5622,
993
+ "step": 164
994
+ },
995
+ {
996
+ "epoch": 0.66,
997
+ "learning_rate": 0.0002,
998
+ "loss": 0.5641,
999
+ "step": 165
1000
+ },
1001
+ {
1002
+ "epoch": 0.66,
1003
+ "learning_rate": 0.0002,
1004
+ "loss": 0.5955,
1005
+ "step": 166
1006
+ },
1007
+ {
1008
+ "epoch": 0.67,
1009
+ "learning_rate": 0.0002,
1010
+ "loss": 0.5265,
1011
+ "step": 167
1012
+ },
1013
+ {
1014
+ "epoch": 0.67,
1015
+ "learning_rate": 0.0002,
1016
+ "loss": 0.5471,
1017
+ "step": 168
1018
+ },
1019
+ {
1020
+ "epoch": 0.68,
1021
+ "learning_rate": 0.0002,
1022
+ "loss": 0.5106,
1023
+ "step": 169
1024
+ },
1025
+ {
1026
+ "epoch": 0.68,
1027
+ "learning_rate": 0.0002,
1028
+ "loss": 0.54,
1029
+ "step": 170
1030
+ },
1031
+ {
1032
+ "epoch": 0.68,
1033
+ "learning_rate": 0.0002,
1034
+ "loss": 0.5257,
1035
+ "step": 171
1036
+ },
1037
+ {
1038
+ "epoch": 0.69,
1039
+ "learning_rate": 0.0002,
1040
+ "loss": 0.4932,
1041
+ "step": 172
1042
+ },
1043
+ {
1044
+ "epoch": 0.69,
1045
+ "learning_rate": 0.0002,
1046
+ "loss": 0.5014,
1047
+ "step": 173
1048
+ },
1049
+ {
1050
+ "epoch": 0.7,
1051
+ "learning_rate": 0.0002,
1052
+ "loss": 0.5609,
1053
+ "step": 174
1054
+ },
1055
+ {
1056
+ "epoch": 0.7,
1057
+ "learning_rate": 0.0002,
1058
+ "loss": 0.4734,
1059
+ "step": 175
1060
  }
1061
  ],
1062
  "logging_steps": 1,
1063
  "max_steps": 250,
1064
  "num_train_epochs": 1,
1065
  "save_steps": 25,
1066
+ "total_flos": 2.123744801427456e+16,
1067
  "trial_name": null,
1068
  "trial_params": null
1069
  }