fsicoli commited on
Commit
cc173dc
1 Parent(s): 00ffbb3

33d4cdefdfe5a65f442f0e42bc45d33d6627d35ca6548d7254f191626170f210

Browse files
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: openai/whisper-large-v3
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - mozilla-foundation/common_voice_16_0
8
+ metrics:
9
+ - wer
10
+ model-index:
11
+ - name: whisper-large-v3-pt-cv16-cuda
12
+ results:
13
+ - task:
14
+ name: Automatic Speech Recognition
15
+ type: automatic-speech-recognition
16
+ dataset:
17
+ name: mozilla-foundation/common_voice_16_0 pt
18
+ type: mozilla-foundation/common_voice_16_0
19
+ split: None
20
+ args: pt
21
+ metrics:
22
+ - name: Wer
23
+ type: wer
24
+ value: 0.9998545572074984
25
+ ---
26
+
27
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
+ should probably proofread and complete it, then remove this comment. -->
29
+
30
+ # whisper-large-v3-pt-cv16-cuda
31
+
32
+ This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the mozilla-foundation/common_voice_16_0 pt dataset.
33
+ It achieves the following results on the evaluation set:
34
+ - Loss: 0.1325
35
+ - Wer: 0.9999
36
+
37
+ ## Model description
38
+
39
+ More information needed
40
+
41
+ ## Intended uses & limitations
42
+
43
+ More information needed
44
+
45
+ ## Training and evaluation data
46
+
47
+ More information needed
48
+
49
+ ## Training procedure
50
+
51
+ ### Training hyperparameters
52
+
53
+ The following hyperparameters were used during training:
54
+ - learning_rate: 1e-06
55
+ - train_batch_size: 8
56
+ - eval_batch_size: 8
57
+ - seed: 42
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: linear
60
+ - lr_scheduler_warmup_steps: 2000
61
+ - training_steps: 5000
62
+ - mixed_precision_training: Native AMP
63
+
64
+ ### Training results
65
+
66
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
67
+ |:-------------:|:-----:|:----:|:---------------:|:------:|
68
+ | 0.199 | 0.26 | 1000 | 0.1563 | 0.1124 |
69
+ | 0.1654 | 0.52 | 2000 | 0.1500 | 0.1052 |
70
+ | 0.1794 | 0.77 | 3000 | 0.1379 | 0.0997 |
71
+ | 0.0821 | 1.03 | 4000 | 0.1321 | 1.0007 |
72
+ | 0.1292 | 1.29 | 5000 | 0.1325 | 0.9999 |
73
+
74
+
75
+ ### Framework versions
76
+
77
+ - Transformers 4.37.0.dev0
78
+ - Pytorch 2.2.0.dev20231212
79
+ - Datasets 2.15.1.dev0
80
+ - Tokenizers 0.15.0
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c6b0edf383dba5a20a86c58366da587af2b11f57f3238809e22174428275ba2
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a3553cfea4d55a22ddc4554e797d5f41b8d4ec64eff37633af911cf87725f80
3
  size 1180663192
runs/Feb01_13-09-49_DITEC2014063010/events.out.tfevents.1706804084.DITEC2014063010.17912.0 CHANGED
Binary files a/runs/Feb01_13-09-49_DITEC2014063010/events.out.tfevents.1706804084.DITEC2014063010.17912.0 and b/runs/Feb01_13-09-49_DITEC2014063010/events.out.tfevents.1706804084.DITEC2014063010.17912.0 differ
 
runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1707147265.DITEC2014063010.20076.0 CHANGED
Binary files a/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1707147265.DITEC2014063010.20076.0 and b/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1707147265.DITEC2014063010.20076.0 differ
 
runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1708496511.DITEC2014063010.20076.1 CHANGED
Binary files a/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1708496511.DITEC2014063010.20076.1 and b/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1708496511.DITEC2014063010.20076.1 differ
 
runs/Jan18_11-02-40_DITEC2014063010/events.out.tfevents.1705588605.DITEC2014063010.4840.0 CHANGED
Binary files a/runs/Jan18_11-02-40_DITEC2014063010/events.out.tfevents.1705588605.DITEC2014063010.4840.0 and b/runs/Jan18_11-02-40_DITEC2014063010/events.out.tfevents.1705588605.DITEC2014063010.4840.0 differ
 
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.29,
3
+ "train_loss": 0.05041759390830994,
4
+ "train_runtime": 1177508.0135,
5
+ "train_samples": 30998,
6
+ "train_samples_per_second": 0.034,
7
+ "train_steps_per_second": 0.004
8
+ }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.032258064516129,
5
  "eval_steps": 1000,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1003,6 +1003,264 @@
1003
  "eval_steps_per_second": 0.006,
1004
  "eval_wer": 1.000743374272786,
1005
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006
  }
1007
  ],
1008
  "logging_steps": 25,
@@ -1010,7 +1268,7 @@
1010
  "num_input_tokens_seen": 0,
1011
  "num_train_epochs": 2,
1012
  "save_steps": 1000,
1013
- "total_flos": 1.0871315081330688e+20,
1014
  "train_batch_size": 8,
1015
  "trial_name": null,
1016
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2903225806451613,
5
  "eval_steps": 1000,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1003
  "eval_steps_per_second": 0.006,
1004
  "eval_wer": 1.000743374272786,
1005
  "step": 4000
1006
+ },
1007
+ {
1008
+ "epoch": 1.04,
1009
+ "learning_rate": 3.263333333333333e-07,
1010
+ "loss": 0.1092,
1011
+ "step": 4025
1012
+ },
1013
+ {
1014
+ "epoch": 1.05,
1015
+ "learning_rate": 3.18e-07,
1016
+ "loss": 0.0928,
1017
+ "step": 4050
1018
+ },
1019
+ {
1020
+ "epoch": 1.05,
1021
+ "learning_rate": 3.096666666666666e-07,
1022
+ "loss": 0.1481,
1023
+ "step": 4075
1024
+ },
1025
+ {
1026
+ "epoch": 1.06,
1027
+ "learning_rate": 3.0133333333333333e-07,
1028
+ "loss": 0.1043,
1029
+ "step": 4100
1030
+ },
1031
+ {
1032
+ "epoch": 1.06,
1033
+ "learning_rate": 2.93e-07,
1034
+ "loss": 0.1018,
1035
+ "step": 4125
1036
+ },
1037
+ {
1038
+ "epoch": 1.07,
1039
+ "learning_rate": 2.8466666666666665e-07,
1040
+ "loss": 0.104,
1041
+ "step": 4150
1042
+ },
1043
+ {
1044
+ "epoch": 1.08,
1045
+ "learning_rate": 2.763333333333333e-07,
1046
+ "loss": 0.1394,
1047
+ "step": 4175
1048
+ },
1049
+ {
1050
+ "epoch": 1.08,
1051
+ "learning_rate": 2.68e-07,
1052
+ "loss": 0.0962,
1053
+ "step": 4200
1054
+ },
1055
+ {
1056
+ "epoch": 1.09,
1057
+ "learning_rate": 2.596666666666666e-07,
1058
+ "loss": 0.1058,
1059
+ "step": 4225
1060
+ },
1061
+ {
1062
+ "epoch": 1.1,
1063
+ "learning_rate": 2.5133333333333333e-07,
1064
+ "loss": 0.0837,
1065
+ "step": 4250
1066
+ },
1067
+ {
1068
+ "epoch": 1.1,
1069
+ "learning_rate": 2.43e-07,
1070
+ "loss": 0.1396,
1071
+ "step": 4275
1072
+ },
1073
+ {
1074
+ "epoch": 1.11,
1075
+ "learning_rate": 2.3466666666666665e-07,
1076
+ "loss": 0.1005,
1077
+ "step": 4300
1078
+ },
1079
+ {
1080
+ "epoch": 1.12,
1081
+ "learning_rate": 2.263333333333333e-07,
1082
+ "loss": 0.131,
1083
+ "step": 4325
1084
+ },
1085
+ {
1086
+ "epoch": 1.12,
1087
+ "learning_rate": 2.18e-07,
1088
+ "loss": 0.1069,
1089
+ "step": 4350
1090
+ },
1091
+ {
1092
+ "epoch": 1.13,
1093
+ "learning_rate": 2.0966666666666665e-07,
1094
+ "loss": 0.136,
1095
+ "step": 4375
1096
+ },
1097
+ {
1098
+ "epoch": 1.14,
1099
+ "learning_rate": 2.0133333333333334e-07,
1100
+ "loss": 0.0954,
1101
+ "step": 4400
1102
+ },
1103
+ {
1104
+ "epoch": 1.14,
1105
+ "learning_rate": 1.93e-07,
1106
+ "loss": 0.1276,
1107
+ "step": 4425
1108
+ },
1109
+ {
1110
+ "epoch": 1.15,
1111
+ "learning_rate": 1.8466666666666665e-07,
1112
+ "loss": 0.1033,
1113
+ "step": 4450
1114
+ },
1115
+ {
1116
+ "epoch": 1.15,
1117
+ "learning_rate": 1.7633333333333334e-07,
1118
+ "loss": 0.1547,
1119
+ "step": 4475
1120
+ },
1121
+ {
1122
+ "epoch": 1.16,
1123
+ "learning_rate": 1.68e-07,
1124
+ "loss": 0.1093,
1125
+ "step": 4500
1126
+ },
1127
+ {
1128
+ "epoch": 1.17,
1129
+ "learning_rate": 1.5966666666666668e-07,
1130
+ "loss": 0.1435,
1131
+ "step": 4525
1132
+ },
1133
+ {
1134
+ "epoch": 1.17,
1135
+ "learning_rate": 1.513333333333333e-07,
1136
+ "loss": 0.1085,
1137
+ "step": 4550
1138
+ },
1139
+ {
1140
+ "epoch": 1.18,
1141
+ "learning_rate": 1.4299999999999997e-07,
1142
+ "loss": 0.1105,
1143
+ "step": 4575
1144
+ },
1145
+ {
1146
+ "epoch": 1.19,
1147
+ "learning_rate": 1.3466666666666665e-07,
1148
+ "loss": 0.0858,
1149
+ "step": 4600
1150
+ },
1151
+ {
1152
+ "epoch": 1.19,
1153
+ "learning_rate": 1.263333333333333e-07,
1154
+ "loss": 0.1336,
1155
+ "step": 4625
1156
+ },
1157
+ {
1158
+ "epoch": 1.2,
1159
+ "learning_rate": 1.1799999999999998e-07,
1160
+ "loss": 0.0857,
1161
+ "step": 4650
1162
+ },
1163
+ {
1164
+ "epoch": 1.21,
1165
+ "learning_rate": 1.0966666666666666e-07,
1166
+ "loss": 0.1146,
1167
+ "step": 4675
1168
+ },
1169
+ {
1170
+ "epoch": 1.21,
1171
+ "learning_rate": 1.0133333333333333e-07,
1172
+ "loss": 0.097,
1173
+ "step": 4700
1174
+ },
1175
+ {
1176
+ "epoch": 1.22,
1177
+ "learning_rate": 9.3e-08,
1178
+ "loss": 0.1146,
1179
+ "step": 4725
1180
+ },
1181
+ {
1182
+ "epoch": 1.23,
1183
+ "learning_rate": 8.466666666666667e-08,
1184
+ "loss": 0.0932,
1185
+ "step": 4750
1186
+ },
1187
+ {
1188
+ "epoch": 1.23,
1189
+ "learning_rate": 7.633333333333333e-08,
1190
+ "loss": 0.1245,
1191
+ "step": 4775
1192
+ },
1193
+ {
1194
+ "epoch": 1.24,
1195
+ "learning_rate": 6.8e-08,
1196
+ "loss": 0.0978,
1197
+ "step": 4800
1198
+ },
1199
+ {
1200
+ "epoch": 1.25,
1201
+ "learning_rate": 5.966666666666666e-08,
1202
+ "loss": 0.1283,
1203
+ "step": 4825
1204
+ },
1205
+ {
1206
+ "epoch": 1.25,
1207
+ "learning_rate": 5.133333333333333e-08,
1208
+ "loss": 0.1091,
1209
+ "step": 4850
1210
+ },
1211
+ {
1212
+ "epoch": 1.26,
1213
+ "learning_rate": 4.2999999999999995e-08,
1214
+ "loss": 0.1106,
1215
+ "step": 4875
1216
+ },
1217
+ {
1218
+ "epoch": 1.26,
1219
+ "learning_rate": 3.4666666666666666e-08,
1220
+ "loss": 0.1131,
1221
+ "step": 4900
1222
+ },
1223
+ {
1224
+ "epoch": 1.27,
1225
+ "learning_rate": 2.633333333333333e-08,
1226
+ "loss": 0.1219,
1227
+ "step": 4925
1228
+ },
1229
+ {
1230
+ "epoch": 1.28,
1231
+ "learning_rate": 1.8e-08,
1232
+ "loss": 0.1205,
1233
+ "step": 4950
1234
+ },
1235
+ {
1236
+ "epoch": 1.28,
1237
+ "learning_rate": 9.666666666666667e-09,
1238
+ "loss": 0.1408,
1239
+ "step": 4975
1240
+ },
1241
+ {
1242
+ "epoch": 1.29,
1243
+ "learning_rate": 1.3333333333333333e-09,
1244
+ "loss": 0.1292,
1245
+ "step": 5000
1246
+ },
1247
+ {
1248
+ "epoch": 1.29,
1249
+ "eval_loss": 0.132488414645195,
1250
+ "eval_runtime": 160662.2084,
1251
+ "eval_samples_per_second": 0.059,
1252
+ "eval_steps_per_second": 0.007,
1253
+ "eval_wer": 0.9998545572074984,
1254
+ "step": 5000
1255
+ },
1256
+ {
1257
+ "epoch": 1.29,
1258
+ "step": 5000,
1259
+ "total_flos": 1.3589313726578688e+20,
1260
+ "train_loss": 0.05041759390830994,
1261
+ "train_runtime": 1177508.0135,
1262
+ "train_samples_per_second": 0.034,
1263
+ "train_steps_per_second": 0.004
1264
  }
1265
  ],
1266
  "logging_steps": 25,
 
1268
  "num_input_tokens_seen": 0,
1269
  "num_train_epochs": 2,
1270
  "save_steps": 1000,
1271
+ "total_flos": 1.3589313726578688e+20,
1272
  "train_batch_size": 8,
1273
  "trial_name": null,
1274
  "trial_params": null
training_args.bin CHANGED
Binary files a/training_args.bin and b/training_args.bin differ