azherali commited on
Commit
e2811e0
·
verified ·
1 Parent(s): f5b65df

Training in progress, step 16000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22bd38c8f37046bd62018eb7378b2b711b978047511760c0db69629ca9b301ad
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd57384594eb425dd0bbabba65317a1b5777b6c5b289479078bfbc0a2b10c7cd
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:118eb527e3f3d95f6bbf2dc9c4c0579763f722fb5753842653cc84555c706dc2
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a90fca018f105f0c4de5bd49a9f37f48eb4343bbd82fa5c86a766904ed07780
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ae035518883f7d805914f164f0380793c8a3934e907b1f7143d4918513187a8
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e040819bef00bdfb5631475cda110717053ec4d4c4c67d6781ad7edccde1fd
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a1d6e49e1244c2136fab7f3074345ca5593ed632d4d0d6d129c6fe8e480d66c
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9569b0daf1a454e36aca0fe6fc85ec984df4b90957450731328ceedec7505da8
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:645e796050c981f4589900a1facddd99613e38baf853710ef8a2a7fa1c39977d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8187ef3352672be0a48a06b17757282db7b3ef79dad63ef57d1187dc8f56fd82
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 12000,
3
- "best_metric": 0.97856556986665,
4
- "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-12000",
5
- "epoch": 0.384,
6
  "eval_steps": 4000,
7
- "global_step": 12000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -884,6 +884,298 @@
884
  "eval_samples_per_second": 121.628,
885
  "eval_steps_per_second": 7.602,
886
  "step": 12000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
  }
888
  ],
889
  "logging_steps": 100,
@@ -912,7 +1204,7 @@
912
  "attributes": {}
913
  }
914
  },
915
- "total_flos": 5.096626165108723e+16,
916
  "train_batch_size": 16,
917
  "trial_name": null,
918
  "trial_params": null
 
1
  {
2
+ "best_global_step": 16000,
3
+ "best_metric": 0.9820134202589396,
4
+ "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-16000",
5
+ "epoch": 0.512,
6
  "eval_steps": 4000,
7
+ "global_step": 16000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
884
  "eval_samples_per_second": 121.628,
885
  "eval_steps_per_second": 7.602,
886
  "step": 12000
887
+ },
888
+ {
889
+ "epoch": 0.3872,
890
+ "grad_norm": 11.900330543518066,
891
+ "learning_rate": 1.851056179775281e-05,
892
+ "loss": 0.0958,
893
+ "step": 12100
894
+ },
895
+ {
896
+ "epoch": 0.3904,
897
+ "grad_norm": 18.242124557495117,
898
+ "learning_rate": 1.8497720706260034e-05,
899
+ "loss": 0.075,
900
+ "step": 12200
901
+ },
902
+ {
903
+ "epoch": 0.3936,
904
+ "grad_norm": 0.0790882408618927,
905
+ "learning_rate": 1.8484879614767256e-05,
906
+ "loss": 0.071,
907
+ "step": 12300
908
+ },
909
+ {
910
+ "epoch": 0.3968,
911
+ "grad_norm": 16.233280181884766,
912
+ "learning_rate": 1.847203852327448e-05,
913
+ "loss": 0.1109,
914
+ "step": 12400
915
+ },
916
+ {
917
+ "epoch": 0.4,
918
+ "grad_norm": 12.048758506774902,
919
+ "learning_rate": 1.8459197431781702e-05,
920
+ "loss": 0.0703,
921
+ "step": 12500
922
+ },
923
+ {
924
+ "epoch": 0.4032,
925
+ "grad_norm": 9.086562156677246,
926
+ "learning_rate": 1.8446356340288927e-05,
927
+ "loss": 0.0706,
928
+ "step": 12600
929
+ },
930
+ {
931
+ "epoch": 0.4064,
932
+ "grad_norm": 9.871477127075195,
933
+ "learning_rate": 1.843351524879615e-05,
934
+ "loss": 0.0745,
935
+ "step": 12700
936
+ },
937
+ {
938
+ "epoch": 0.4096,
939
+ "grad_norm": 0.7136130928993225,
940
+ "learning_rate": 1.8420674157303373e-05,
941
+ "loss": 0.0544,
942
+ "step": 12800
943
+ },
944
+ {
945
+ "epoch": 0.4128,
946
+ "grad_norm": 10.892882347106934,
947
+ "learning_rate": 1.8407833065810598e-05,
948
+ "loss": 0.1109,
949
+ "step": 12900
950
+ },
951
+ {
952
+ "epoch": 0.416,
953
+ "grad_norm": 1.3350844383239746,
954
+ "learning_rate": 1.839499197431782e-05,
955
+ "loss": 0.0898,
956
+ "step": 13000
957
+ },
958
+ {
959
+ "epoch": 0.4192,
960
+ "grad_norm": 11.215353012084961,
961
+ "learning_rate": 1.838215088282504e-05,
962
+ "loss": 0.0963,
963
+ "step": 13100
964
+ },
965
+ {
966
+ "epoch": 0.4224,
967
+ "grad_norm": 0.2309681475162506,
968
+ "learning_rate": 1.8369309791332262e-05,
969
+ "loss": 0.0785,
970
+ "step": 13200
971
+ },
972
+ {
973
+ "epoch": 0.4256,
974
+ "grad_norm": 0.0822586640715599,
975
+ "learning_rate": 1.8356468699839487e-05,
976
+ "loss": 0.0736,
977
+ "step": 13300
978
+ },
979
+ {
980
+ "epoch": 0.4288,
981
+ "grad_norm": 0.6296452283859253,
982
+ "learning_rate": 1.834362760834671e-05,
983
+ "loss": 0.1064,
984
+ "step": 13400
985
+ },
986
+ {
987
+ "epoch": 0.432,
988
+ "grad_norm": 7.989764213562012,
989
+ "learning_rate": 1.8330786516853933e-05,
990
+ "loss": 0.0885,
991
+ "step": 13500
992
+ },
993
+ {
994
+ "epoch": 0.4352,
995
+ "grad_norm": 0.04016461223363876,
996
+ "learning_rate": 1.8317945425361158e-05,
997
+ "loss": 0.0574,
998
+ "step": 13600
999
+ },
1000
+ {
1001
+ "epoch": 0.4384,
1002
+ "grad_norm": 0.03219222649931908,
1003
+ "learning_rate": 1.830510433386838e-05,
1004
+ "loss": 0.0742,
1005
+ "step": 13700
1006
+ },
1007
+ {
1008
+ "epoch": 0.4416,
1009
+ "grad_norm": 11.071674346923828,
1010
+ "learning_rate": 1.8292263242375605e-05,
1011
+ "loss": 0.0968,
1012
+ "step": 13800
1013
+ },
1014
+ {
1015
+ "epoch": 0.4448,
1016
+ "grad_norm": 22.920804977416992,
1017
+ "learning_rate": 1.8279422150882826e-05,
1018
+ "loss": 0.0782,
1019
+ "step": 13900
1020
+ },
1021
+ {
1022
+ "epoch": 0.448,
1023
+ "grad_norm": 0.05693759024143219,
1024
+ "learning_rate": 1.826658105939005e-05,
1025
+ "loss": 0.0538,
1026
+ "step": 14000
1027
+ },
1028
+ {
1029
+ "epoch": 0.4512,
1030
+ "grad_norm": 0.08659256994724274,
1031
+ "learning_rate": 1.8253739967897272e-05,
1032
+ "loss": 0.0699,
1033
+ "step": 14100
1034
+ },
1035
+ {
1036
+ "epoch": 0.4544,
1037
+ "grad_norm": 3.1322948932647705,
1038
+ "learning_rate": 1.8240898876404497e-05,
1039
+ "loss": 0.0841,
1040
+ "step": 14200
1041
+ },
1042
+ {
1043
+ "epoch": 0.4576,
1044
+ "grad_norm": 0.025594089180231094,
1045
+ "learning_rate": 1.822805778491172e-05,
1046
+ "loss": 0.1108,
1047
+ "step": 14300
1048
+ },
1049
+ {
1050
+ "epoch": 0.4608,
1051
+ "grad_norm": 0.5694848299026489,
1052
+ "learning_rate": 1.8215216693418943e-05,
1053
+ "loss": 0.0705,
1054
+ "step": 14400
1055
+ },
1056
+ {
1057
+ "epoch": 0.464,
1058
+ "grad_norm": 5.643801212310791,
1059
+ "learning_rate": 1.8202375601926165e-05,
1060
+ "loss": 0.0773,
1061
+ "step": 14500
1062
+ },
1063
+ {
1064
+ "epoch": 0.4672,
1065
+ "grad_norm": 1.6325165033340454,
1066
+ "learning_rate": 1.818953451043339e-05,
1067
+ "loss": 0.1244,
1068
+ "step": 14600
1069
+ },
1070
+ {
1071
+ "epoch": 0.4704,
1072
+ "grad_norm": 0.04731294512748718,
1073
+ "learning_rate": 1.817669341894061e-05,
1074
+ "loss": 0.0523,
1075
+ "step": 14700
1076
+ },
1077
+ {
1078
+ "epoch": 0.4736,
1079
+ "grad_norm": 9.385772705078125,
1080
+ "learning_rate": 1.8163852327447836e-05,
1081
+ "loss": 0.0739,
1082
+ "step": 14800
1083
+ },
1084
+ {
1085
+ "epoch": 0.4768,
1086
+ "grad_norm": 0.04752274602651596,
1087
+ "learning_rate": 1.8151011235955057e-05,
1088
+ "loss": 0.0636,
1089
+ "step": 14900
1090
+ },
1091
+ {
1092
+ "epoch": 0.48,
1093
+ "grad_norm": 0.4523526430130005,
1094
+ "learning_rate": 1.8138170144462282e-05,
1095
+ "loss": 0.0907,
1096
+ "step": 15000
1097
+ },
1098
+ {
1099
+ "epoch": 0.4832,
1100
+ "grad_norm": 0.10660147666931152,
1101
+ "learning_rate": 1.8125329052969504e-05,
1102
+ "loss": 0.1093,
1103
+ "step": 15100
1104
+ },
1105
+ {
1106
+ "epoch": 0.4864,
1107
+ "grad_norm": 0.024508710950613022,
1108
+ "learning_rate": 1.811248796147673e-05,
1109
+ "loss": 0.0562,
1110
+ "step": 15200
1111
+ },
1112
+ {
1113
+ "epoch": 0.4896,
1114
+ "grad_norm": 4.832937240600586,
1115
+ "learning_rate": 1.809964686998395e-05,
1116
+ "loss": 0.0694,
1117
+ "step": 15300
1118
+ },
1119
+ {
1120
+ "epoch": 0.4928,
1121
+ "grad_norm": 12.76455020904541,
1122
+ "learning_rate": 1.808680577849117e-05,
1123
+ "loss": 0.0525,
1124
+ "step": 15400
1125
+ },
1126
+ {
1127
+ "epoch": 0.496,
1128
+ "grad_norm": 0.244754359126091,
1129
+ "learning_rate": 1.8073964686998396e-05,
1130
+ "loss": 0.0632,
1131
+ "step": 15500
1132
+ },
1133
+ {
1134
+ "epoch": 0.4992,
1135
+ "grad_norm": 19.315397262573242,
1136
+ "learning_rate": 1.8061123595505618e-05,
1137
+ "loss": 0.0794,
1138
+ "step": 15600
1139
+ },
1140
+ {
1141
+ "epoch": 0.5024,
1142
+ "grad_norm": 0.05077612027525902,
1143
+ "learning_rate": 1.8048282504012842e-05,
1144
+ "loss": 0.0848,
1145
+ "step": 15700
1146
+ },
1147
+ {
1148
+ "epoch": 0.5056,
1149
+ "grad_norm": 0.33186858892440796,
1150
+ "learning_rate": 1.8035441412520064e-05,
1151
+ "loss": 0.0894,
1152
+ "step": 15800
1153
+ },
1154
+ {
1155
+ "epoch": 0.5088,
1156
+ "grad_norm": 0.26919984817504883,
1157
+ "learning_rate": 1.802260032102729e-05,
1158
+ "loss": 0.0801,
1159
+ "step": 15900
1160
+ },
1161
+ {
1162
+ "epoch": 0.512,
1163
+ "grad_norm": 3.566136121749878,
1164
+ "learning_rate": 1.800975922953451e-05,
1165
+ "loss": 0.0754,
1166
+ "step": 16000
1167
+ },
1168
+ {
1169
+ "epoch": 0.512,
1170
+ "eval_accuracy": 0.98201,
1171
+ "eval_f1": 0.9820134202589396,
1172
+ "eval_loss": 0.07791993767023087,
1173
+ "eval_precision": 0.9820579125315673,
1174
+ "eval_recall": 0.98201,
1175
+ "eval_runtime": 823.0886,
1176
+ "eval_samples_per_second": 121.494,
1177
+ "eval_steps_per_second": 7.593,
1178
+ "step": 16000
1179
  }
1180
  ],
1181
  "logging_steps": 100,
 
1204
  "attributes": {}
1205
  }
1206
  },
1207
+ "total_flos": 6.79459660326359e+16,
1208
  "train_batch_size": 16,
1209
  "trial_name": null,
1210
  "trial_params": null