Plim commited on
Commit
495992b
β€’
1 Parent(s): 39a2094

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 4.0,
3
  "eval_loss": Infinity,
4
- "eval_runtime": 982.1257,
5
  "eval_samples": 16021,
6
- "eval_samples_per_second": 16.313,
7
- "eval_steps_per_second": 1.02,
8
  "eval_wer": 0.2936861897030796,
9
- "train_loss": 0.9901066277552089,
10
- "train_runtime": 182144.7146,
11
  "train_samples": 446485,
12
- "train_samples_per_second": 9.805,
13
- "train_steps_per_second": 0.077
14
  }
1
  {
2
+ "epoch": 6.0,
3
  "eval_loss": Infinity,
4
+ "eval_runtime": 970.1291,
5
  "eval_samples": 16021,
6
+ "eval_samples_per_second": 16.514,
7
+ "eval_steps_per_second": 1.033,
8
  "eval_wer": 0.2936861897030796,
9
+ "train_loss": 0.30566064197718185,
10
+ "train_runtime": 111829.698,
11
  "train_samples": 446485,
12
+ "train_samples_per_second": 23.955,
13
+ "train_steps_per_second": 0.187
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 4.0,
3
  "eval_loss": Infinity,
4
- "eval_runtime": 982.1257,
5
  "eval_samples": 16021,
6
- "eval_samples_per_second": 16.313,
7
- "eval_steps_per_second": 1.02,
8
  "eval_wer": 0.2936861897030796
9
  }
1
  {
2
+ "epoch": 6.0,
3
  "eval_loss": Infinity,
4
+ "eval_runtime": 970.1291,
5
  "eval_samples": 16021,
6
+ "eval_samples_per_second": 16.514,
7
+ "eval_steps_per_second": 1.033,
8
  "eval_wer": 0.2936861897030796
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
- "train_loss": 0.9901066277552089,
4
- "train_runtime": 182144.7146,
5
  "train_samples": 446485,
6
- "train_samples_per_second": 9.805,
7
- "train_steps_per_second": 0.077
8
  }
1
  {
2
+ "epoch": 6.0,
3
+ "train_loss": 0.30566064197718185,
4
+ "train_runtime": 111829.698,
5
  "train_samples": 446485,
6
+ "train_samples_per_second": 23.955,
7
+ "train_steps_per_second": 0.187
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": Infinity,
3
  "best_model_checkpoint": "./checkpoint-1000",
4
- "epoch": 3.999928330824912,
5
- "global_step": 13952,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -906,71 +906,554 @@
906
  },
907
  {
908
  "epoch": 3.76,
909
- "learning_rate": 5.390311244979919e-06,
910
- "loss": 0.7832,
911
  "step": 13100
912
  },
913
  {
914
  "epoch": 3.78,
915
- "learning_rate": 4.762801204819277e-06,
916
- "loss": 0.7755,
917
  "step": 13200
918
  },
919
  {
920
  "epoch": 3.81,
921
- "learning_rate": 4.135291164658634e-06,
922
- "loss": 0.7608,
923
  "step": 13300
924
  },
925
  {
926
  "epoch": 3.84,
927
- "learning_rate": 3.514056224899598e-06,
928
- "loss": 0.7738,
929
  "step": 13400
930
  },
931
  {
932
  "epoch": 3.87,
933
- "learning_rate": 2.8865461847389554e-06,
934
- "loss": 0.7732,
935
  "step": 13500
936
  },
937
  {
938
  "epoch": 3.9,
939
- "learning_rate": 2.259036144578313e-06,
940
- "loss": 0.7756,
941
  "step": 13600
942
  },
943
  {
944
  "epoch": 3.93,
945
- "learning_rate": 1.637801204819277e-06,
946
- "loss": 0.7675,
947
  "step": 13700
948
  },
949
  {
950
  "epoch": 3.96,
951
- "learning_rate": 1.0102911646586344e-06,
952
- "loss": 0.7653,
953
  "step": 13800
954
  },
955
  {
956
  "epoch": 3.99,
957
- "learning_rate": 3.8278112449799194e-07,
958
- "loss": 0.7627,
959
  "step": 13900
960
  },
961
  {
962
- "epoch": 4.0,
963
- "step": 13952,
964
- "total_flos": 8.511794559686228e+20,
965
- "train_loss": 0.9901066277552089,
966
- "train_runtime": 182144.7146,
967
- "train_samples_per_second": 9.805,
968
- "train_steps_per_second": 0.077
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
969
  }
970
  ],
971
- "max_steps": 13952,
972
- "num_train_epochs": 4,
973
- "total_flos": 8.511794559686228e+20,
974
  "trial_name": null,
975
  "trial_params": null
976
  }
1
  {
2
  "best_metric": Infinity,
3
  "best_model_checkpoint": "./checkpoint-1000",
4
+ "epoch": 5.999928330824912,
5
+ "global_step": 20928,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
906
  },
907
  {
908
  "epoch": 3.76,
909
+ "learning_rate": 3.1045276838546063e-05,
910
+ "loss": 0.805,
911
  "step": 13100
912
  },
913
  {
914
  "epoch": 3.78,
915
+ "learning_rate": 3.0649038461538456e-05,
916
+ "loss": 0.816,
917
  "step": 13200
918
  },
919
  {
920
  "epoch": 3.81,
921
+ "learning_rate": 3.025280008453085e-05,
922
+ "loss": 0.8112,
923
  "step": 13300
924
  },
925
  {
926
  "epoch": 3.84,
927
+ "learning_rate": 2.9860524091293318e-05,
928
+ "loss": 0.8274,
929
  "step": 13400
930
  },
931
  {
932
  "epoch": 3.87,
933
+ "learning_rate": 2.946428571428571e-05,
934
+ "loss": 0.8249,
935
  "step": 13500
936
  },
937
  {
938
  "epoch": 3.9,
939
+ "learning_rate": 2.9068047337278103e-05,
940
+ "loss": 0.8276,
941
  "step": 13600
942
  },
943
  {
944
  "epoch": 3.93,
945
+ "learning_rate": 2.8675771344040573e-05,
946
+ "loss": 0.8227,
947
  "step": 13700
948
  },
949
  {
950
  "epoch": 3.96,
951
+ "learning_rate": 2.8279532967032966e-05,
952
+ "loss": 0.8246,
953
  "step": 13800
954
  },
955
  {
956
  "epoch": 3.99,
957
+ "learning_rate": 2.7883294590025355e-05,
958
+ "loss": 0.8198,
959
  "step": 13900
960
  },
961
  {
962
+ "epoch": 4.01,
963
+ "learning_rate": 2.748705621301775e-05,
964
+ "loss": 0.8372,
965
+ "step": 14000
966
+ },
967
+ {
968
+ "epoch": 4.01,
969
+ "eval_loss": Infinity,
970
+ "eval_runtime": 973.1623,
971
+ "eval_samples_per_second": 16.463,
972
+ "eval_steps_per_second": 1.03,
973
+ "eval_wer": 0.19336725853959047,
974
+ "step": 14000
975
+ },
976
+ {
977
+ "epoch": 4.04,
978
+ "learning_rate": 2.7090817836010143e-05,
979
+ "loss": 0.8228,
980
+ "step": 14100
981
+ },
982
+ {
983
+ "epoch": 4.07,
984
+ "learning_rate": 2.6694579459002535e-05,
985
+ "loss": 0.8197,
986
+ "step": 14200
987
+ },
988
+ {
989
+ "epoch": 4.1,
990
+ "learning_rate": 2.6302303465765005e-05,
991
+ "loss": 0.8295,
992
+ "step": 14300
993
+ },
994
+ {
995
+ "epoch": 4.13,
996
+ "learning_rate": 2.5906065088757394e-05,
997
+ "loss": 0.8147,
998
+ "step": 14400
999
+ },
1000
+ {
1001
+ "epoch": 4.16,
1002
+ "learning_rate": 2.5509826711749787e-05,
1003
+ "loss": 0.8153,
1004
+ "step": 14500
1005
+ },
1006
+ {
1007
+ "epoch": 4.19,
1008
+ "learning_rate": 2.511358833474218e-05,
1009
+ "loss": 0.8155,
1010
+ "step": 14600
1011
+ },
1012
+ {
1013
+ "epoch": 4.21,
1014
+ "learning_rate": 2.471734995773457e-05,
1015
+ "loss": 0.8215,
1016
+ "step": 14700
1017
+ },
1018
+ {
1019
+ "epoch": 4.24,
1020
+ "learning_rate": 2.4321111580726964e-05,
1021
+ "loss": 0.8214,
1022
+ "step": 14800
1023
+ },
1024
+ {
1025
+ "epoch": 4.27,
1026
+ "learning_rate": 2.3924873203719357e-05,
1027
+ "loss": 0.811,
1028
+ "step": 14900
1029
+ },
1030
+ {
1031
+ "epoch": 4.3,
1032
+ "learning_rate": 2.352863482671175e-05,
1033
+ "loss": 0.8075,
1034
+ "step": 15000
1035
+ },
1036
+ {
1037
+ "epoch": 4.3,
1038
+ "eval_loss": Infinity,
1039
+ "eval_runtime": 973.424,
1040
+ "eval_samples_per_second": 16.458,
1041
+ "eval_steps_per_second": 1.029,
1042
+ "eval_wer": 0.1923276841233885,
1043
+ "step": 15000
1044
+ },
1045
+ {
1046
+ "epoch": 4.33,
1047
+ "learning_rate": 2.313239644970414e-05,
1048
+ "loss": 0.8085,
1049
+ "step": 15100
1050
+ },
1051
+ {
1052
+ "epoch": 4.36,
1053
+ "learning_rate": 2.273615807269653e-05,
1054
+ "loss": 0.8149,
1055
+ "step": 15200
1056
+ },
1057
+ {
1058
+ "epoch": 4.39,
1059
+ "learning_rate": 2.2339919695688927e-05,
1060
+ "loss": 0.8103,
1061
+ "step": 15300
1062
+ },
1063
+ {
1064
+ "epoch": 4.42,
1065
+ "learning_rate": 2.194368131868132e-05,
1066
+ "loss": 0.8053,
1067
+ "step": 15400
1068
+ },
1069
+ {
1070
+ "epoch": 4.44,
1071
+ "learning_rate": 2.154744294167371e-05,
1072
+ "loss": 0.8038,
1073
+ "step": 15500
1074
+ },
1075
+ {
1076
+ "epoch": 4.47,
1077
+ "learning_rate": 2.11512045646661e-05,
1078
+ "loss": 0.8062,
1079
+ "step": 15600
1080
+ },
1081
+ {
1082
+ "epoch": 4.5,
1083
+ "learning_rate": 2.075892857142857e-05,
1084
+ "loss": 0.8074,
1085
+ "step": 15700
1086
+ },
1087
+ {
1088
+ "epoch": 4.53,
1089
+ "learning_rate": 2.0362690194420963e-05,
1090
+ "loss": 0.8082,
1091
+ "step": 15800
1092
+ },
1093
+ {
1094
+ "epoch": 4.56,
1095
+ "learning_rate": 1.9966451817413352e-05,
1096
+ "loss": 0.8093,
1097
+ "step": 15900
1098
+ },
1099
+ {
1100
+ "epoch": 4.59,
1101
+ "learning_rate": 1.9570213440405745e-05,
1102
+ "loss": 0.8069,
1103
+ "step": 16000
1104
+ },
1105
+ {
1106
+ "epoch": 4.59,
1107
+ "eval_loss": Infinity,
1108
+ "eval_runtime": 966.1996,
1109
+ "eval_samples_per_second": 16.581,
1110
+ "eval_steps_per_second": 1.037,
1111
+ "eval_wer": 0.18768168488060938,
1112
+ "step": 16000
1113
+ },
1114
+ {
1115
+ "epoch": 4.62,
1116
+ "learning_rate": 1.917397506339814e-05,
1117
+ "loss": 0.8039,
1118
+ "step": 16100
1119
+ },
1120
+ {
1121
+ "epoch": 4.64,
1122
+ "learning_rate": 1.8777736686390533e-05,
1123
+ "loss": 0.8069,
1124
+ "step": 16200
1125
+ },
1126
+ {
1127
+ "epoch": 4.67,
1128
+ "learning_rate": 1.8381498309382922e-05,
1129
+ "loss": 0.8108,
1130
+ "step": 16300
1131
+ },
1132
+ {
1133
+ "epoch": 4.7,
1134
+ "learning_rate": 1.7985259932375315e-05,
1135
+ "loss": 0.7968,
1136
+ "step": 16400
1137
+ },
1138
+ {
1139
+ "epoch": 4.73,
1140
+ "learning_rate": 1.7589021555367707e-05,
1141
+ "loss": 0.8013,
1142
+ "step": 16500
1143
+ },
1144
+ {
1145
+ "epoch": 4.76,
1146
+ "learning_rate": 1.71927831783601e-05,
1147
+ "loss": 0.8059,
1148
+ "step": 16600
1149
+ },
1150
+ {
1151
+ "epoch": 4.79,
1152
+ "learning_rate": 1.6796544801352492e-05,
1153
+ "loss": 0.8076,
1154
+ "step": 16700
1155
+ },
1156
+ {
1157
+ "epoch": 4.82,
1158
+ "learning_rate": 1.6400306424344885e-05,
1159
+ "loss": 0.808,
1160
+ "step": 16800
1161
+ },
1162
+ {
1163
+ "epoch": 4.85,
1164
+ "learning_rate": 1.6004068047337277e-05,
1165
+ "loss": 0.8053,
1166
+ "step": 16900
1167
+ },
1168
+ {
1169
+ "epoch": 4.87,
1170
+ "learning_rate": 1.560782967032967e-05,
1171
+ "loss": 0.8064,
1172
+ "step": 17000
1173
+ },
1174
+ {
1175
+ "epoch": 4.87,
1176
+ "eval_loss": Infinity,
1177
+ "eval_runtime": 964.7985,
1178
+ "eval_samples_per_second": 16.606,
1179
+ "eval_steps_per_second": 1.039,
1180
+ "eval_wer": 0.19554266426238345,
1181
+ "step": 17000
1182
+ },
1183
+ {
1184
+ "epoch": 4.9,
1185
+ "learning_rate": 1.521159129332206e-05,
1186
+ "loss": 0.8192,
1187
+ "step": 17100
1188
+ },
1189
+ {
1190
+ "epoch": 4.93,
1191
+ "learning_rate": 1.4815352916314453e-05,
1192
+ "loss": 0.8165,
1193
+ "step": 17200
1194
+ },
1195
+ {
1196
+ "epoch": 4.96,
1197
+ "learning_rate": 1.4419114539306845e-05,
1198
+ "loss": 0.8164,
1199
+ "step": 17300
1200
+ },
1201
+ {
1202
+ "epoch": 4.99,
1203
+ "learning_rate": 1.4022876162299238e-05,
1204
+ "loss": 0.8161,
1205
+ "step": 17400
1206
+ },
1207
+ {
1208
+ "epoch": 5.02,
1209
+ "learning_rate": 1.362663778529163e-05,
1210
+ "loss": 0.8215,
1211
+ "step": 17500
1212
+ },
1213
+ {
1214
+ "epoch": 5.05,
1215
+ "learning_rate": 1.3230399408284023e-05,
1216
+ "loss": 0.8304,
1217
+ "step": 17600
1218
+ },
1219
+ {
1220
+ "epoch": 5.07,
1221
+ "learning_rate": 1.2834161031276414e-05,
1222
+ "loss": 0.8087,
1223
+ "step": 17700
1224
+ },
1225
+ {
1226
+ "epoch": 5.1,
1227
+ "learning_rate": 1.2437922654268808e-05,
1228
+ "loss": 0.8133,
1229
+ "step": 17800
1230
+ },
1231
+ {
1232
+ "epoch": 5.13,
1233
+ "learning_rate": 1.2041684277261199e-05,
1234
+ "loss": 0.7987,
1235
+ "step": 17900
1236
+ },
1237
+ {
1238
+ "epoch": 5.16,
1239
+ "learning_rate": 1.1645445900253593e-05,
1240
+ "loss": 0.801,
1241
+ "step": 18000
1242
+ },
1243
+ {
1244
+ "epoch": 5.16,
1245
+ "eval_loss": Infinity,
1246
+ "eval_runtime": 968.3005,
1247
+ "eval_samples_per_second": 16.545,
1248
+ "eval_steps_per_second": 1.035,
1249
+ "eval_wer": 0.18908061835426385,
1250
+ "step": 18000
1251
+ },
1252
+ {
1253
+ "epoch": 5.19,
1254
+ "learning_rate": 1.1249207523245984e-05,
1255
+ "loss": 0.8075,
1256
+ "step": 18100
1257
+ },
1258
+ {
1259
+ "epoch": 5.22,
1260
+ "learning_rate": 1.0852969146238374e-05,
1261
+ "loss": 0.8069,
1262
+ "step": 18200
1263
+ },
1264
+ {
1265
+ "epoch": 5.25,
1266
+ "learning_rate": 1.0456730769230769e-05,
1267
+ "loss": 0.8095,
1268
+ "step": 18300
1269
+ },
1270
+ {
1271
+ "epoch": 5.28,
1272
+ "learning_rate": 1.006049239222316e-05,
1273
+ "loss": 0.8069,
1274
+ "step": 18400
1275
+ },
1276
+ {
1277
+ "epoch": 5.3,
1278
+ "learning_rate": 9.664254015215553e-06,
1279
+ "loss": 0.8127,
1280
+ "step": 18500
1281
+ },
1282
+ {
1283
+ "epoch": 5.33,
1284
+ "learning_rate": 9.268015638207944e-06,
1285
+ "loss": 0.8132,
1286
+ "step": 18600
1287
+ },
1288
+ {
1289
+ "epoch": 5.36,
1290
+ "learning_rate": 8.871777261200337e-06,
1291
+ "loss": 0.8086,
1292
+ "step": 18700
1293
+ },
1294
+ {
1295
+ "epoch": 5.39,
1296
+ "learning_rate": 8.47553888419273e-06,
1297
+ "loss": 0.8018,
1298
+ "step": 18800
1299
+ },
1300
+ {
1301
+ "epoch": 5.42,
1302
+ "learning_rate": 8.079300507185122e-06,
1303
+ "loss": 0.8102,
1304
+ "step": 18900
1305
+ },
1306
+ {
1307
+ "epoch": 5.45,
1308
+ "learning_rate": 7.683062130177514e-06,
1309
+ "loss": 0.8022,
1310
+ "step": 19000
1311
+ },
1312
+ {
1313
+ "epoch": 5.45,
1314
+ "eval_loss": Infinity,
1315
+ "eval_runtime": 968.9288,
1316
+ "eval_samples_per_second": 16.535,
1317
+ "eval_steps_per_second": 1.034,
1318
+ "eval_wer": 0.18952981717607953,
1319
+ "step": 19000
1320
+ },
1321
+ {
1322
+ "epoch": 5.48,
1323
+ "learning_rate": 7.286823753169907e-06,
1324
+ "loss": 0.7979,
1325
+ "step": 19100
1326
+ },
1327
+ {
1328
+ "epoch": 5.5,
1329
+ "learning_rate": 6.890585376162298e-06,
1330
+ "loss": 0.797,
1331
+ "step": 19200
1332
+ },
1333
+ {
1334
+ "epoch": 5.53,
1335
+ "learning_rate": 6.494346999154691e-06,
1336
+ "loss": 0.7969,
1337
+ "step": 19300
1338
+ },
1339
+ {
1340
+ "epoch": 5.56,
1341
+ "learning_rate": 6.098108622147083e-06,
1342
+ "loss": 0.8034,
1343
+ "step": 19400
1344
+ },
1345
+ {
1346
+ "epoch": 5.59,
1347
+ "learning_rate": 5.701870245139475e-06,
1348
+ "loss": 0.8025,
1349
+ "step": 19500
1350
+ },
1351
+ {
1352
+ "epoch": 5.62,
1353
+ "learning_rate": 5.305631868131867e-06,
1354
+ "loss": 0.7938,
1355
+ "step": 19600
1356
+ },
1357
+ {
1358
+ "epoch": 5.65,
1359
+ "learning_rate": 4.90939349112426e-06,
1360
+ "loss": 0.7942,
1361
+ "step": 19700
1362
+ },
1363
+ {
1364
+ "epoch": 5.68,
1365
+ "learning_rate": 4.513155114116652e-06,
1366
+ "loss": 0.7855,
1367
+ "step": 19800
1368
+ },
1369
+ {
1370
+ "epoch": 5.71,
1371
+ "learning_rate": 4.116916737109044e-06,
1372
+ "loss": 0.7798,
1373
+ "step": 19900
1374
+ },
1375
+ {
1376
+ "epoch": 5.73,
1377
+ "learning_rate": 3.724640743871513e-06,
1378
+ "loss": 0.792,
1379
+ "step": 20000
1380
+ },
1381
+ {
1382
+ "epoch": 5.73,
1383
+ "eval_loss": Infinity,
1384
+ "eval_runtime": 967.729,
1385
+ "eval_samples_per_second": 16.555,
1386
+ "eval_steps_per_second": 1.035,
1387
+ "eval_wer": 0.1853586852592198,
1388
+ "step": 20000
1389
+ },
1390
+ {
1391
+ "epoch": 5.76,
1392
+ "learning_rate": 3.3284023668639047e-06,
1393
+ "loss": 0.786,
1394
+ "step": 20100
1395
+ },
1396
+ {
1397
+ "epoch": 5.79,
1398
+ "learning_rate": 2.932163989856297e-06,
1399
+ "loss": 0.7907,
1400
+ "step": 20200
1401
+ },
1402
+ {
1403
+ "epoch": 5.82,
1404
+ "learning_rate": 2.5359256128486896e-06,
1405
+ "loss": 0.7837,
1406
+ "step": 20300
1407
+ },
1408
+ {
1409
+ "epoch": 5.85,
1410
+ "learning_rate": 2.1396872358410817e-06,
1411
+ "loss": 0.7769,
1412
+ "step": 20400
1413
+ },
1414
+ {
1415
+ "epoch": 5.88,
1416
+ "learning_rate": 1.743448858833474e-06,
1417
+ "loss": 0.7821,
1418
+ "step": 20500
1419
+ },
1420
+ {
1421
+ "epoch": 5.91,
1422
+ "learning_rate": 1.3511728655959423e-06,
1423
+ "loss": 0.7837,
1424
+ "step": 20600
1425
+ },
1426
+ {
1427
+ "epoch": 5.93,
1428
+ "learning_rate": 9.549344885883346e-07,
1429
+ "loss": 0.7856,
1430
+ "step": 20700
1431
+ },
1432
+ {
1433
+ "epoch": 5.96,
1434
+ "learning_rate": 5.586961115807269e-07,
1435
+ "loss": 0.7895,
1436
+ "step": 20800
1437
+ },
1438
+ {
1439
+ "epoch": 5.99,
1440
+ "learning_rate": 1.6245773457311917e-07,
1441
+ "loss": 0.7739,
1442
+ "step": 20900
1443
+ },
1444
+ {
1445
+ "epoch": 6.0,
1446
+ "step": 20928,
1447
+ "total_flos": 1.2767684631907514e+21,
1448
+ "train_loss": 0.30566064197718185,
1449
+ "train_runtime": 111829.698,
1450
+ "train_samples_per_second": 23.955,
1451
+ "train_steps_per_second": 0.187
1452
  }
1453
  ],
1454
+ "max_steps": 20928,
1455
+ "num_train_epochs": 6,
1456
+ "total_flos": 1.2767684631907514e+21,
1457
  "trial_name": null,
1458
  "trial_params": null
1459
  }
wandb/run-20220206_201634-uhiy9e2t/files/output.log CHANGED
@@ -11369,3 +11369,514 @@ Upload file wandb/run-20220206_201634-uhiy9e2t/run-uhiy9e2t.wandb: 98%|β–ˆβ–ˆ
11369
  89ae304..5c2523c main -> main
11370
  Upload file wandb/run-20220206_201634-uhiy9e2t/run-uhiy9e2t.wandb: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 117M/117M [00:08<00:00, 15.1MB/s]
11371
  Dropping the following result as it does not have all the necessary fields:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11369
  89ae304..5c2523c main -> main
11370
  Upload file wandb/run-20220206_201634-uhiy9e2t/run-uhiy9e2t.wandb: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 117M/117M [00:08<00:00, 15.1MB/s]
11371
  Dropping the following result as it does not have all the necessary fields:
11372
+ {}
11373
+
11374
+
11375
+
11376
+
11377
+
11378
+
11379
+
11380
+
11381
+
11382
+
11383
+
11384
+
11385
+ Upload file wandb/run-20220206_201634-uhiy9e2t/run-uhiy9e2t.wandb: 63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 73.4M/117M [00:24<00:05, 7.97MB/s]
11386
+ 02/08/2022 03:25:43 - WARNING - huggingface_hub.repository - To https://huggingface.co/Plim/xls-r-1b-cv_8-fr
11387
+ Upload file wandb/run-20220206_201634-uhiy9e2t/run-uhiy9e2t.wandb: 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 104M/117M [00:26<00:01, 12.1MB/s]To https://huggingface.co/Plim/xls-r-1b-cv_8-fr
11388
+ 5c2523c..39a2094 main -> main
11389
+ Upload file wandb/run-20220206_201634-uhiy9e2t/run-uhiy9e2t.wandb: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 117M/117M [00:27<00:00, 4.51MB/s]
11390
+ The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.
11391
+ ***** Running Evaluation *****
11392
+ Num examples = 16021
11393
+ Batch size = 16
11394
+ ***** train metrics *****
11395
+ epoch = 6.0
11396
+ train_loss = 0.3057
11397
+ train_runtime = 1 day, 7:03:49.69
11398
+ train_samples = 446485
11399
+ train_samples_per_second = 23.955
11400
+ train_steps_per_second = 0.187
11401
+ 02/08/2022 03:25:47 - INFO - __main__ - *** Evaluate ***
11402
+
11403
+
11404
+
11405
+
11406
+
11407
+
11408
+
11409
+
11410
+
11411
+
11412
+
11413
+
11414
+
11415
+
11416
+
11417
+
11418
+
11419
+
11420
+
11421
+
11422
+
11423
+
11424
+
11425
+
11426
+
11427
+
11428
+
11429
+
11430
+
11431
+
11432
+
11433
+
11434
+
11435
+
11436
+
11437
+
11438
+
11439
+
11440
+
11441
+
11442
+
11443
+
11444
+
11445
+
11446
+
11447
+
11448
+
11449
+
11450
+
11451
+
11452
+
11453
+
11454
+
11455
+
11456
+
11457
+
11458
+
11459
+
11460
+
11461
+
11462
+
11463
+
11464
+
11465
+
11466
+
11467
+
11468
+
11469
+
11470
+
11471
+
11472
+
11473
+
11474
+
11475
+
11476
+
11477
+
11478
+
11479
+
11480
+
11481
+
11482
+
11483
+
11484
+
11485
+
11486
+
11487
+
11488
+
11489
+
11490
+
11491
+
11492
+
11493
+
11494
+
11495
+
11496
+
11497
+
11498
+
11499
+
11500
+
11501
+
11502
+
11503
+
11504
+
11505
+
11506
+
11507
+
11508
+
11509
+
11510
+
11511
+
11512
+
11513
+
11514
+
11515
+
11516
+
11517
+
11518
+
11519
+
11520
+
11521
+
11522
+
11523
+
11524
+
11525
+
11526
+
11527
+
11528
+
11529
+
11530
+
11531
+
11532
+
11533
+
11534
+
11535
+
11536
+
11537
+
11538
+
11539
+
11540
+
11541
+
11542
+
11543
+
11544
+
11545
+
11546
+
11547
+
11548
+
11549
+
11550
+
11551
+
11552
+
11553
+
11554
+
11555
+
11556
+
11557
+
11558
+
11559
+
11560
+
11561
+
11562
+
11563
+
11564
+
11565
+
11566
+
11567
+
11568
+
11569
+
11570
+
11571
+
11572
+
11573
+
11574
+
11575
+
11576
+
11577
+
11578
+
11579
+
11580
+
11581
+
11582
+
11583
+
11584
+
11585
+
11586
+
11587
+
11588
+
11589
+
11590
+
11591
+
11592
+
11593
+
11594
+
11595
+
11596
+
11597
+
11598
+
11599
+
11600
+
11601
+
11602
+
11603
+
11604
+
11605
+
11606
+
11607
+
11608
+
11609
+
11610
+
11611
+
11612
+
11613
+
11614
+
11615
+
11616
+
11617
+
11618
+
11619
+
11620
+
11621
+
11622
+
11623
+
11624
+
11625
+
11626
+
11627
+
11628
+
11629
+
11630
+
11631
+
11632
+
11633
+
11634
+
11635
+
11636
+
11637
+
11638
+
11639
+
11640
+
11641
+
11642
+
11643
+
11644
+
11645
+
11646
+
11647
+
11648
+
11649
+
11650
+
11651
+
11652
+
11653
+
11654
+
11655
+
11656
+
11657
+
11658
+
11659
+
11660
+
11661
+
11662
+
11663
+
11664
+
11665
+
11666
+
11667
+
11668
+
11669
+
11670
+
11671
+
11672
+
11673
+
11674
+
11675
+
11676
+
11677
+
11678
+
11679
+
11680
+
11681
+
11682
+
11683
+
11684
+
11685
+
11686
+
11687
+
11688
+
11689
+
11690
+
11691
+
11692
+
11693
+
11694
+
11695
+
11696
+
11697
+
11698
+
11699
+
11700
+
11701
+
11702
+
11703
+
11704
+
11705
+
11706
+
11707
+
11708
+
11709
+
11710
+
11711
+
11712
+
11713
+
11714
+
11715
+
11716
+
11717
+
11718
+
11719
+
11720
+
11721
+
11722
+
11723
+
11724
+
11725
+
11726
+
11727
+
11728
+
11729
+
11730
+
11731
+
11732
+
11733
+
11734
+
11735
+
11736
+
11737
+
11738
+
11739
+
11740
+
11741
+
11742
+
11743
+
11744
+
11745
+
11746
+
11747
+
11748
+
11749
+
11750
+
11751
+
11752
+
11753
+
11754
+
11755
+
11756
+
11757
+
11758
+
11759
+
11760
+
11761
+
11762
+
11763
+
11764
+
11765
+
11766
+
11767
+
11768
+
11769
+
11770
+
11771
+
11772
+
11773
+
11774
+
11775
+
11776
+
11777
+
11778
+
11779
+
11780
+
11781
+
11782
+
11783
+
11784
+
11785
+
11786
+
11787
+
11788
+
11789
+
11790
+
11791
+
11792
+
11793
+
11794
+
11795
+
11796
+
11797
+
11798
+
11799
+
11800
+
11801
+
11802
+
11803
+
11804
+
11805
+
11806
+
11807
+
11808
+
11809
+
11810
+
11811
+
11812
+
11813
+
11814
+
11815
+
11816
+
11817
+
11818
+
11819
+
11820
+
11821
+
11822
+
11823
+
11824
+
11825
+
11826
+
11827
+
11828
+
11829
+
11830
+
11831
+
11832
+
11833
+
11834
+
11835
+
11836
+
11837
+
11838
+
11839
+
11840
+
11841
+
11842
+
11843
+
11844
+
11845
+
11846
+
11847
+
11848
+
11849
+
11850
+
11851
+
11852
+
11853
+
11854
+
11855
+
11856
+
11857
+
11858
+
11859
+
11860
+
11861
+
11862
+
11863
+
11864
+
11865
+
11866
+
11867
+
11868
+
11869
+
11870
+
11871
+ 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1002/1002 [16:09<00:00, 1.03it/s]
11872
+ Saving model checkpoint to ./
11873
+ Configuration saved in ./config.json
11874
+ ***** eval metrics *****
11875
+ epoch = 6.0
11876
+ eval_loss = inf
11877
+ eval_runtime = 0:16:10.12
11878
+ eval_samples = 16021
11879
+ eval_samples_per_second = 16.514
11880
+ eval_steps_per_second = 1.033
11881
+ eval_wer = 0.2937
11882
+ Model weights saved in ./pytorch_model.bin
wandb/run-20220206_201634-uhiy9e2t/files/wandb-summary.json CHANGED
The diff for this file is too large to render. See raw diff
wandb/run-20220206_201634-uhiy9e2t/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
wandb/run-20220206_201634-uhiy9e2t/run-uhiy9e2t.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:160673feed9ea3ddc328247490fe7d2950cd358fb40b4b827edc8123c80ab0af
3
- size 123076744
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408ae6e3f28dda1ea915dfd924f64e8ed1220e8622555b73f8a99c8961884603
3
+ size 123251777