Tflatval commited on
Commit
1d6555c
1 Parent(s): fb853ec

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NbAiLab/nb-whisper-large-verbatim",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "alignment_heads": [
6
+ [
7
+ 7,
8
+ 0
9
+ ],
10
+ [
11
+ 10,
12
+ 17
13
+ ],
14
+ [
15
+ 12,
16
+ 18
17
+ ],
18
+ [
19
+ 13,
20
+ 12
21
+ ],
22
+ [
23
+ 16,
24
+ 1
25
+ ],
26
+ [
27
+ 17,
28
+ 14
29
+ ],
30
+ [
31
+ 19,
32
+ 11
33
+ ],
34
+ [
35
+ 21,
36
+ 4
37
+ ],
38
+ [
39
+ 24,
40
+ 1
41
+ ],
42
+ [
43
+ 25,
44
+ 6
45
+ ]
46
+ ],
47
+ "apply_spec_augment": false,
48
+ "architectures": [
49
+ "WhisperForConditionalGeneration"
50
+ ],
51
+ "attention_dropout": 0,
52
+ "begin_suppress_tokens": [
53
+ 220,
54
+ 50257
55
+ ],
56
+ "bos_token_id": 50257,
57
+ "classifier_proj_size": 256,
58
+ "d_model": 1280,
59
+ "decoder_attention_heads": 20,
60
+ "decoder_ffn_dim": 5120,
61
+ "decoder_layerdrop": 0,
62
+ "decoder_layers": 32,
63
+ "decoder_start_token_id": 50258,
64
+ "dropout": 0,
65
+ "encoder_attention_heads": 20,
66
+ "encoder_ffn_dim": 5120,
67
+ "encoder_layerdrop": 0,
68
+ "encoder_layers": 32,
69
+ "eos_token_id": 50257,
70
+ "forced_decoder_ids": null,
71
+ "init_std": 0.02,
72
+ "is_encoder_decoder": true,
73
+ "lang_ids": [
74
+ 50259,
75
+ 50260,
76
+ 50261,
77
+ 50262,
78
+ 50263,
79
+ 50264,
80
+ 50265,
81
+ 50266,
82
+ 50267,
83
+ 50268,
84
+ 50269,
85
+ 50270,
86
+ 50271,
87
+ 50272,
88
+ 50273,
89
+ 50274,
90
+ 50275,
91
+ 50276,
92
+ 50277,
93
+ 50278,
94
+ 50279,
95
+ 50280,
96
+ 50281,
97
+ 50282,
98
+ 50283,
99
+ 50284,
100
+ 50285,
101
+ 50286,
102
+ 50287,
103
+ 50288,
104
+ 50289,
105
+ 50290,
106
+ 50291,
107
+ 50292,
108
+ 50293,
109
+ 50294,
110
+ 50295,
111
+ 50296,
112
+ 50297,
113
+ 50298,
114
+ 50299,
115
+ 50300,
116
+ 50301,
117
+ 50302,
118
+ 50303,
119
+ 50304,
120
+ 50305,
121
+ 50306,
122
+ 50307,
123
+ 50308,
124
+ 50309,
125
+ 50310,
126
+ 50311,
127
+ 50312,
128
+ 50313,
129
+ 50314,
130
+ 50315,
131
+ 50316,
132
+ 50317,
133
+ 50318,
134
+ 50319,
135
+ 50320,
136
+ 50321,
137
+ 50322,
138
+ 50323,
139
+ 50324,
140
+ 50325,
141
+ 50326,
142
+ 50327,
143
+ 50328,
144
+ 50329,
145
+ 50330,
146
+ 50331,
147
+ 50332,
148
+ 50333,
149
+ 50334,
150
+ 50335,
151
+ 50336,
152
+ 50337,
153
+ 50338,
154
+ 50339,
155
+ 50340,
156
+ 50341,
157
+ 50342,
158
+ 50343,
159
+ 50344,
160
+ 50345,
161
+ 50346,
162
+ 50347,
163
+ 50348,
164
+ 50349,
165
+ 50350,
166
+ 50351,
167
+ 50352,
168
+ 50353,
169
+ 50354,
170
+ 50355,
171
+ 50356,
172
+ 50357,
173
+ 50358
174
+ ],
175
+ "mask_feature_length": 10,
176
+ "mask_feature_min_masks": 0,
177
+ "mask_feature_prob": 0,
178
+ "mask_time_length": 10,
179
+ "mask_time_min_masks": 2,
180
+ "mask_time_prob": 0.05,
181
+ "max_length": 448,
182
+ "max_source_positions": 1500,
183
+ "max_target_positions": 448,
184
+ "median_filter_width": 7,
185
+ "model_type": "whisper",
186
+ "num_hidden_layers": 32,
187
+ "num_mel_bins": 128,
188
+ "pad_token_id": 50256,
189
+ "scale_embedding": false,
190
+ "suppress_ids": [
191
+ 1,
192
+ 2,
193
+ 7,
194
+ 8,
195
+ 9,
196
+ 10,
197
+ 14,
198
+ 25,
199
+ 26,
200
+ 27,
201
+ 28,
202
+ 29,
203
+ 31,
204
+ 58,
205
+ 59,
206
+ 60,
207
+ 61,
208
+ 62,
209
+ 63,
210
+ 90,
211
+ 91,
212
+ 92,
213
+ 93,
214
+ 359,
215
+ 503,
216
+ 522,
217
+ 542,
218
+ 873,
219
+ 893,
220
+ 902,
221
+ 918,
222
+ 922,
223
+ 931,
224
+ 1350,
225
+ 1853,
226
+ 1982,
227
+ 2460,
228
+ 2627,
229
+ 3246,
230
+ 3253,
231
+ 3268,
232
+ 3536,
233
+ 3846,
234
+ 3961,
235
+ 4183,
236
+ 4667,
237
+ 6585,
238
+ 6647,
239
+ 7273,
240
+ 9061,
241
+ 9383,
242
+ 10428,
243
+ 10929,
244
+ 11938,
245
+ 12033,
246
+ 12331,
247
+ 12562,
248
+ 13793,
249
+ 14157,
250
+ 14635,
251
+ 15265,
252
+ 15618,
253
+ 16553,
254
+ 16604,
255
+ 18362,
256
+ 18956,
257
+ 20075,
258
+ 21675,
259
+ 22520,
260
+ 26130,
261
+ 26161,
262
+ 26435,
263
+ 28279,
264
+ 29464,
265
+ 31650,
266
+ 32302,
267
+ 32470,
268
+ 36865,
269
+ 42863,
270
+ 47425,
271
+ 49870,
272
+ 50254,
273
+ 50258,
274
+ 50359,
275
+ 50360,
276
+ 50361,
277
+ 50362,
278
+ 50363
279
+ ],
280
+ "suppress_ids_begin": [
281
+ 220,
282
+ 50257
283
+ ],
284
+ "suppress_tokens": [],
285
+ "torch_dtype": "float32",
286
+ "transformers_version": "4.38.1",
287
+ "use_cache": true,
288
+ "use_weighted_layer_sum": false,
289
+ "vocab_size": 51866
290
+ }
generation_config.json ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 7,
5
+ 0
6
+ ],
7
+ [
8
+ 10,
9
+ 17
10
+ ],
11
+ [
12
+ 12,
13
+ 18
14
+ ],
15
+ [
16
+ 13,
17
+ 12
18
+ ],
19
+ [
20
+ 16,
21
+ 1
22
+ ],
23
+ [
24
+ 17,
25
+ 14
26
+ ],
27
+ [
28
+ 19,
29
+ 11
30
+ ],
31
+ [
32
+ 21,
33
+ 4
34
+ ],
35
+ [
36
+ 24,
37
+ 1
38
+ ],
39
+ [
40
+ 25,
41
+ 6
42
+ ]
43
+ ],
44
+ "begin_suppress_tokens": [
45
+ 220,
46
+ 50257
47
+ ],
48
+ "bos_token_id": 50257,
49
+ "decoder_start_token_id": 50258,
50
+ "eos_token_id": 50257,
51
+ "forced_decoder_ids": [
52
+ [
53
+ 1,
54
+ 50288
55
+ ],
56
+ [
57
+ 2,
58
+ 50360
59
+ ],
60
+ [
61
+ 3,
62
+ 50364
63
+ ]
64
+ ],
65
+ "is_multilingual": true,
66
+ "lang_to_id": {
67
+ "<|af|>": 50327,
68
+ "<|am|>": 50334,
69
+ "<|ar|>": 50272,
70
+ "<|as|>": 50350,
71
+ "<|az|>": 50304,
72
+ "<|ba|>": 50355,
73
+ "<|be|>": 50330,
74
+ "<|bg|>": 50292,
75
+ "<|bn|>": 50302,
76
+ "<|bo|>": 50347,
77
+ "<|br|>": 50309,
78
+ "<|bs|>": 50315,
79
+ "<|ca|>": 50270,
80
+ "<|cs|>": 50283,
81
+ "<|cy|>": 50297,
82
+ "<|da|>": 50285,
83
+ "<|de|>": 50261,
84
+ "<|el|>": 50281,
85
+ "<|en|>": 50259,
86
+ "<|es|>": 50262,
87
+ "<|et|>": 50307,
88
+ "<|eu|>": 50310,
89
+ "<|fa|>": 50300,
90
+ "<|fi|>": 50277,
91
+ "<|fo|>": 50338,
92
+ "<|fr|>": 50265,
93
+ "<|gl|>": 50319,
94
+ "<|gu|>": 50333,
95
+ "<|haw|>": 50352,
96
+ "<|ha|>": 50354,
97
+ "<|he|>": 50279,
98
+ "<|hi|>": 50276,
99
+ "<|hr|>": 50291,
100
+ "<|ht|>": 50339,
101
+ "<|hu|>": 50286,
102
+ "<|hy|>": 50312,
103
+ "<|id|>": 50275,
104
+ "<|is|>": 50311,
105
+ "<|it|>": 50274,
106
+ "<|ja|>": 50266,
107
+ "<|jw|>": 50356,
108
+ "<|ka|>": 50329,
109
+ "<|kk|>": 50316,
110
+ "<|km|>": 50323,
111
+ "<|kn|>": 50306,
112
+ "<|ko|>": 50264,
113
+ "<|la|>": 50294,
114
+ "<|lb|>": 50345,
115
+ "<|ln|>": 50353,
116
+ "<|lo|>": 50336,
117
+ "<|lt|>": 50293,
118
+ "<|lv|>": 50301,
119
+ "<|mg|>": 50349,
120
+ "<|mi|>": 50295,
121
+ "<|mk|>": 50308,
122
+ "<|ml|>": 50296,
123
+ "<|mn|>": 50314,
124
+ "<|mr|>": 50320,
125
+ "<|ms|>": 50282,
126
+ "<|mt|>": 50343,
127
+ "<|my|>": 50346,
128
+ "<|ne|>": 50313,
129
+ "<|nl|>": 50271,
130
+ "<|nn|>": 50342,
131
+ "<|no|>": 50288,
132
+ "<|oc|>": 50328,
133
+ "<|pa|>": 50321,
134
+ "<|pl|>": 50269,
135
+ "<|ps|>": 50340,
136
+ "<|pt|>": 50267,
137
+ "<|ro|>": 50284,
138
+ "<|ru|>": 50263,
139
+ "<|sa|>": 50344,
140
+ "<|sd|>": 50332,
141
+ "<|si|>": 50322,
142
+ "<|sk|>": 50298,
143
+ "<|sl|>": 50305,
144
+ "<|sn|>": 50324,
145
+ "<|so|>": 50326,
146
+ "<|sq|>": 50317,
147
+ "<|sr|>": 50303,
148
+ "<|su|>": 50357,
149
+ "<|sv|>": 50273,
150
+ "<|sw|>": 50318,
151
+ "<|ta|>": 50287,
152
+ "<|te|>": 50299,
153
+ "<|tg|>": 50331,
154
+ "<|th|>": 50289,
155
+ "<|tk|>": 50341,
156
+ "<|tl|>": 50348,
157
+ "<|tr|>": 50268,
158
+ "<|tt|>": 50351,
159
+ "<|uk|>": 50280,
160
+ "<|ur|>": 50290,
161
+ "<|uz|>": 50337,
162
+ "<|vi|>": 50278,
163
+ "<|yi|>": 50335,
164
+ "<|yo|>": 50325,
165
+ "<|yue|>": 50358,
166
+ "<|zh|>": 50260
167
+ },
168
+ "language": "<|no|>",
169
+ "max_initial_timestamp_index": 1,
170
+ "max_length": 448,
171
+ "no_timestamps_token_id": 50364,
172
+ "pad_token_id": 50257,
173
+ "return_timestamps": false,
174
+ "suppress_tokens": [
175
+ 1,
176
+ 2,
177
+ 7,
178
+ 8,
179
+ 9,
180
+ 10,
181
+ 14,
182
+ 25,
183
+ 26,
184
+ 27,
185
+ 28,
186
+ 29,
187
+ 31,
188
+ 58,
189
+ 59,
190
+ 60,
191
+ 61,
192
+ 62,
193
+ 63,
194
+ 90,
195
+ 91,
196
+ 92,
197
+ 93,
198
+ 359,
199
+ 503,
200
+ 522,
201
+ 542,
202
+ 873,
203
+ 893,
204
+ 902,
205
+ 918,
206
+ 922,
207
+ 931,
208
+ 1350,
209
+ 1853,
210
+ 1982,
211
+ 2460,
212
+ 2627,
213
+ 3246,
214
+ 3253,
215
+ 3268,
216
+ 3536,
217
+ 3846,
218
+ 3961,
219
+ 4183,
220
+ 4667,
221
+ 6585,
222
+ 6647,
223
+ 7273,
224
+ 9061,
225
+ 9383,
226
+ 10428,
227
+ 10929,
228
+ 11938,
229
+ 12033,
230
+ 12331,
231
+ 12562,
232
+ 13793,
233
+ 14157,
234
+ 14635,
235
+ 15265,
236
+ 15618,
237
+ 16553,
238
+ 16604,
239
+ 18362,
240
+ 18956,
241
+ 20075,
242
+ 21675,
243
+ 22520,
244
+ 26130,
245
+ 26161,
246
+ 26435,
247
+ 28279,
248
+ 29464,
249
+ 31650,
250
+ 32302,
251
+ 32470,
252
+ 36865,
253
+ 42863,
254
+ 47425,
255
+ 49870,
256
+ 50254,
257
+ 50258,
258
+ 50359,
259
+ 50360,
260
+ 50361,
261
+ 50362,
262
+ 50363
263
+ ],
264
+ "task": "transcribe",
265
+ "task_to_id": {
266
+ "transcribe": 50360,
267
+ "translate": 50359
268
+ },
269
+ "transformers_version": "4.38.1"
270
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c838a74f852dcf1a5f5c4f06252143c9279d695963f6c6ef54a4c770d65b748
3
+ size 4993448880
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59f064da08c50ac29e5290657564024a4a59ffc39d6a0ae82ad33e9f3f958109
3
+ size 1180663192
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ccc7ab042538232bc6a996c863ec1ad1318d59f03b8c681586b24f26aef85a4
3
+ size 12333660476
preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 128,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd57c654092f134895f518f4c8a68e3b53b679bfa87212cac8eb67a6ac2794a
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da423a39ed9647de8be7df65b0cb7b2806b048deca302384c81aa13270200849
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,758 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 8.647140864714087,
3
+ "best_model_checkpoint": "/cluster/home/torstefl/Master/saved_model/Whisper/NB/checkpoint-546",
4
+ "epoch": 6.204545454545454,
5
+ "eval_steps": 21,
6
+ "global_step": 546,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.24,
13
+ "eval_rundkast_loss": 1.4755723476409912,
14
+ "eval_rundkast_runtime": 265.7554,
15
+ "eval_rundkast_samples_per_second": 0.963,
16
+ "eval_rundkast_steps_per_second": 0.015,
17
+ "eval_rundkast_wer": 17.823479005998287,
18
+ "step": 21
19
+ },
20
+ {
21
+ "epoch": 0.24,
22
+ "eval_nb_samtale_loss": 1.2217341661453247,
23
+ "eval_nb_samtale_runtime": 132.2755,
24
+ "eval_nb_samtale_samples_per_second": 0.968,
25
+ "eval_nb_samtale_steps_per_second": 0.015,
26
+ "eval_nb_samtale_wer": 14.504881450488144,
27
+ "step": 21
28
+ },
29
+ {
30
+ "epoch": 0.24,
31
+ "eval_bigbrother_loss": 1.8927885293960571,
32
+ "eval_bigbrother_runtime": 256.5203,
33
+ "eval_bigbrother_samples_per_second": 0.998,
34
+ "eval_bigbrother_steps_per_second": 0.016,
35
+ "eval_bigbrother_wer": 29.599999999999998,
36
+ "step": 21
37
+ },
38
+ {
39
+ "epoch": 0.48,
40
+ "eval_rundkast_loss": 1.2454240322113037,
41
+ "eval_rundkast_runtime": 255.1947,
42
+ "eval_rundkast_samples_per_second": 1.003,
43
+ "eval_rundkast_steps_per_second": 0.016,
44
+ "eval_rundkast_wer": 17.13796058269066,
45
+ "step": 42
46
+ },
47
+ {
48
+ "epoch": 0.48,
49
+ "eval_nb_samtale_loss": 0.9530782103538513,
50
+ "eval_nb_samtale_runtime": 132.01,
51
+ "eval_nb_samtale_samples_per_second": 0.97,
52
+ "eval_nb_samtale_steps_per_second": 0.015,
53
+ "eval_nb_samtale_wer": 14.714086471408647,
54
+ "step": 42
55
+ },
56
+ {
57
+ "epoch": 0.48,
58
+ "eval_bigbrother_loss": 1.7297923564910889,
59
+ "eval_bigbrother_runtime": 255.4086,
60
+ "eval_bigbrother_samples_per_second": 1.002,
61
+ "eval_bigbrother_steps_per_second": 0.016,
62
+ "eval_bigbrother_wer": 29.15555555555556,
63
+ "step": 42
64
+ },
65
+ {
66
+ "epoch": 0.72,
67
+ "eval_rundkast_loss": 0.9745193719863892,
68
+ "eval_rundkast_runtime": 255.3218,
69
+ "eval_rundkast_samples_per_second": 1.003,
70
+ "eval_rundkast_steps_per_second": 0.016,
71
+ "eval_rundkast_wer": 16.36675235646958,
72
+ "step": 63
73
+ },
74
+ {
75
+ "epoch": 0.72,
76
+ "eval_nb_samtale_loss": 0.7309895157814026,
77
+ "eval_nb_samtale_runtime": 131.5521,
78
+ "eval_nb_samtale_samples_per_second": 0.973,
79
+ "eval_nb_samtale_steps_per_second": 0.015,
80
+ "eval_nb_samtale_wer": 13.772663877266389,
81
+ "step": 63
82
+ },
83
+ {
84
+ "epoch": 0.72,
85
+ "eval_bigbrother_loss": 1.4885540008544922,
86
+ "eval_bigbrother_runtime": 255.8175,
87
+ "eval_bigbrother_samples_per_second": 1.001,
88
+ "eval_bigbrother_steps_per_second": 0.016,
89
+ "eval_bigbrother_wer": 29.15555555555556,
90
+ "step": 63
91
+ },
92
+ {
93
+ "epoch": 0.95,
94
+ "eval_rundkast_loss": 0.6675350666046143,
95
+ "eval_rundkast_runtime": 258.7982,
96
+ "eval_rundkast_samples_per_second": 0.989,
97
+ "eval_rundkast_steps_per_second": 0.015,
98
+ "eval_rundkast_wer": 15.295629820051415,
99
+ "step": 84
100
+ },
101
+ {
102
+ "epoch": 0.95,
103
+ "eval_nb_samtale_loss": 0.5071679353713989,
104
+ "eval_nb_samtale_runtime": 132.2238,
105
+ "eval_nb_samtale_samples_per_second": 0.968,
106
+ "eval_nb_samtale_steps_per_second": 0.015,
107
+ "eval_nb_samtale_wer": 13.319386331938631,
108
+ "step": 84
109
+ },
110
+ {
111
+ "epoch": 0.95,
112
+ "eval_bigbrother_loss": 1.1185076236724854,
113
+ "eval_bigbrother_runtime": 255.4352,
114
+ "eval_bigbrother_samples_per_second": 1.002,
115
+ "eval_bigbrother_steps_per_second": 0.016,
116
+ "eval_bigbrother_wer": 29.333333333333332,
117
+ "step": 84
118
+ },
119
+ {
120
+ "epoch": 1.14,
121
+ "grad_norm": 2.3595402240753174,
122
+ "learning_rate": 1.0000000000000002e-06,
123
+ "loss": 0.8426,
124
+ "step": 100
125
+ },
126
+ {
127
+ "epoch": 1.19,
128
+ "eval_rundkast_loss": 0.4894542694091797,
129
+ "eval_rundkast_runtime": 257.5365,
130
+ "eval_rundkast_samples_per_second": 0.994,
131
+ "eval_rundkast_steps_per_second": 0.016,
132
+ "eval_rundkast_wer": 18.466152527849186,
133
+ "step": 105
134
+ },
135
+ {
136
+ "epoch": 1.19,
137
+ "eval_nb_samtale_loss": 0.39404886960983276,
138
+ "eval_nb_samtale_runtime": 132.7392,
139
+ "eval_nb_samtale_samples_per_second": 0.964,
140
+ "eval_nb_samtale_steps_per_second": 0.015,
141
+ "eval_nb_samtale_wer": 12.377963737796374,
142
+ "step": 105
143
+ },
144
+ {
145
+ "epoch": 1.19,
146
+ "eval_bigbrother_loss": 0.8832881450653076,
147
+ "eval_bigbrother_runtime": 255.7717,
148
+ "eval_bigbrother_samples_per_second": 1.001,
149
+ "eval_bigbrother_steps_per_second": 0.016,
150
+ "eval_bigbrother_wer": 29.42222222222222,
151
+ "step": 105
152
+ },
153
+ {
154
+ "epoch": 1.43,
155
+ "eval_rundkast_loss": 0.41296815872192383,
156
+ "eval_rundkast_runtime": 266.8919,
157
+ "eval_rundkast_samples_per_second": 0.959,
158
+ "eval_rundkast_steps_per_second": 0.015,
159
+ "eval_rundkast_wer": 17.13796058269066,
160
+ "step": 126
161
+ },
162
+ {
163
+ "epoch": 1.43,
164
+ "eval_nb_samtale_loss": 0.3332406282424927,
165
+ "eval_nb_samtale_runtime": 131.6921,
166
+ "eval_nb_samtale_samples_per_second": 0.972,
167
+ "eval_nb_samtale_steps_per_second": 0.015,
168
+ "eval_nb_samtale_wer": 12.238493723849372,
169
+ "step": 126
170
+ },
171
+ {
172
+ "epoch": 1.43,
173
+ "eval_bigbrother_loss": 0.7359280586242676,
174
+ "eval_bigbrother_runtime": 252.6466,
175
+ "eval_bigbrother_samples_per_second": 1.013,
176
+ "eval_bigbrother_steps_per_second": 0.016,
177
+ "eval_bigbrother_wer": 28.355555555555554,
178
+ "step": 126
179
+ },
180
+ {
181
+ "epoch": 1.67,
182
+ "eval_rundkast_loss": 0.3938118815422058,
183
+ "eval_rundkast_runtime": 258.8717,
184
+ "eval_rundkast_samples_per_second": 0.989,
185
+ "eval_rundkast_steps_per_second": 0.015,
186
+ "eval_rundkast_wer": 13.967437874892887,
187
+ "step": 147
188
+ },
189
+ {
190
+ "epoch": 1.67,
191
+ "eval_nb_samtale_loss": 0.3065837025642395,
192
+ "eval_nb_samtale_runtime": 131.151,
193
+ "eval_nb_samtale_samples_per_second": 0.976,
194
+ "eval_nb_samtale_steps_per_second": 0.015,
195
+ "eval_nb_samtale_wer": 11.436541143654114,
196
+ "step": 147
197
+ },
198
+ {
199
+ "epoch": 1.67,
200
+ "eval_bigbrother_loss": 0.6836585998535156,
201
+ "eval_bigbrother_runtime": 253.257,
202
+ "eval_bigbrother_samples_per_second": 1.011,
203
+ "eval_bigbrother_steps_per_second": 0.016,
204
+ "eval_bigbrother_wer": 27.644444444444442,
205
+ "step": 147
206
+ },
207
+ {
208
+ "epoch": 1.91,
209
+ "eval_rundkast_loss": 0.3802303373813629,
210
+ "eval_rundkast_runtime": 253.7316,
211
+ "eval_rundkast_samples_per_second": 1.009,
212
+ "eval_rundkast_steps_per_second": 0.016,
213
+ "eval_rundkast_wer": 13.753213367609254,
214
+ "step": 168
215
+ },
216
+ {
217
+ "epoch": 1.91,
218
+ "eval_nb_samtale_loss": 0.2869262993335724,
219
+ "eval_nb_samtale_runtime": 130.461,
220
+ "eval_nb_samtale_samples_per_second": 0.981,
221
+ "eval_nb_samtale_steps_per_second": 0.015,
222
+ "eval_nb_samtale_wer": 10.460251046025103,
223
+ "step": 168
224
+ },
225
+ {
226
+ "epoch": 1.91,
227
+ "eval_bigbrother_loss": 0.65956711769104,
228
+ "eval_bigbrother_runtime": 251.8276,
229
+ "eval_bigbrother_samples_per_second": 1.017,
230
+ "eval_bigbrother_steps_per_second": 0.016,
231
+ "eval_bigbrother_wer": 27.68888888888889,
232
+ "step": 168
233
+ },
234
+ {
235
+ "epoch": 2.15,
236
+ "eval_rundkast_loss": 0.37286698818206787,
237
+ "eval_rundkast_runtime": 252.2645,
238
+ "eval_rundkast_samples_per_second": 1.015,
239
+ "eval_rundkast_steps_per_second": 0.016,
240
+ "eval_rundkast_wer": 13.581833761782347,
241
+ "step": 189
242
+ },
243
+ {
244
+ "epoch": 2.15,
245
+ "eval_nb_samtale_loss": 0.27960070967674255,
246
+ "eval_nb_samtale_runtime": 131.5439,
247
+ "eval_nb_samtale_samples_per_second": 0.973,
248
+ "eval_nb_samtale_steps_per_second": 0.015,
249
+ "eval_nb_samtale_wer": 10.355648535564853,
250
+ "step": 189
251
+ },
252
+ {
253
+ "epoch": 2.15,
254
+ "eval_bigbrother_loss": 0.650139331817627,
255
+ "eval_bigbrother_runtime": 252.878,
256
+ "eval_bigbrother_samples_per_second": 1.012,
257
+ "eval_bigbrother_steps_per_second": 0.016,
258
+ "eval_bigbrother_wer": 27.644444444444442,
259
+ "step": 189
260
+ },
261
+ {
262
+ "epoch": 2.27,
263
+ "grad_norm": 1.7029398679733276,
264
+ "learning_rate": 2.0000000000000003e-06,
265
+ "loss": 0.3125,
266
+ "step": 200
267
+ },
268
+ {
269
+ "epoch": 2.39,
270
+ "eval_rundkast_loss": 0.37109890580177307,
271
+ "eval_rundkast_runtime": 260.7651,
272
+ "eval_rundkast_samples_per_second": 0.982,
273
+ "eval_rundkast_steps_per_second": 0.015,
274
+ "eval_rundkast_wer": 13.79605826906598,
275
+ "step": 210
276
+ },
277
+ {
278
+ "epoch": 2.39,
279
+ "eval_nb_samtale_loss": 0.2866850793361664,
280
+ "eval_nb_samtale_runtime": 134.0747,
281
+ "eval_nb_samtale_samples_per_second": 0.955,
282
+ "eval_nb_samtale_steps_per_second": 0.015,
283
+ "eval_nb_samtale_wer": 10.564853556485355,
284
+ "step": 210
285
+ },
286
+ {
287
+ "epoch": 2.39,
288
+ "eval_bigbrother_loss": 0.72154700756073,
289
+ "eval_bigbrother_runtime": 259.4669,
290
+ "eval_bigbrother_samples_per_second": 0.987,
291
+ "eval_bigbrother_steps_per_second": 0.015,
292
+ "eval_bigbrother_wer": 26.93333333333333,
293
+ "step": 210
294
+ },
295
+ {
296
+ "epoch": 2.62,
297
+ "eval_rundkast_loss": 0.3417912721633911,
298
+ "eval_rundkast_runtime": 287.8177,
299
+ "eval_rundkast_samples_per_second": 0.889,
300
+ "eval_rundkast_steps_per_second": 0.014,
301
+ "eval_rundkast_wer": 15.424164524421593,
302
+ "step": 231
303
+ },
304
+ {
305
+ "epoch": 2.62,
306
+ "eval_nb_samtale_loss": 0.25085484981536865,
307
+ "eval_nb_samtale_runtime": 133.6313,
308
+ "eval_nb_samtale_samples_per_second": 0.958,
309
+ "eval_nb_samtale_steps_per_second": 0.015,
310
+ "eval_nb_samtale_wer": 9.867503486750348,
311
+ "step": 231
312
+ },
313
+ {
314
+ "epoch": 2.62,
315
+ "eval_bigbrother_loss": 0.5950784683227539,
316
+ "eval_bigbrother_runtime": 320.7203,
317
+ "eval_bigbrother_samples_per_second": 0.798,
318
+ "eval_bigbrother_steps_per_second": 0.012,
319
+ "eval_bigbrother_wer": 27.37777777777778,
320
+ "step": 231
321
+ },
322
+ {
323
+ "epoch": 2.86,
324
+ "eval_rundkast_loss": 0.3154206871986389,
325
+ "eval_rundkast_runtime": 258.2448,
326
+ "eval_rundkast_samples_per_second": 0.991,
327
+ "eval_rundkast_steps_per_second": 0.015,
328
+ "eval_rundkast_wer": 13.453299057412169,
329
+ "step": 252
330
+ },
331
+ {
332
+ "epoch": 2.86,
333
+ "eval_nb_samtale_loss": 0.23974575102329254,
334
+ "eval_nb_samtale_runtime": 135.359,
335
+ "eval_nb_samtale_samples_per_second": 0.946,
336
+ "eval_nb_samtale_steps_per_second": 0.015,
337
+ "eval_nb_samtale_wer": 9.518828451882845,
338
+ "step": 252
339
+ },
340
+ {
341
+ "epoch": 2.86,
342
+ "eval_bigbrother_loss": 0.5906115770339966,
343
+ "eval_bigbrother_runtime": 261.1086,
344
+ "eval_bigbrother_samples_per_second": 0.98,
345
+ "eval_bigbrother_steps_per_second": 0.015,
346
+ "eval_bigbrother_wer": 26.8,
347
+ "step": 252
348
+ },
349
+ {
350
+ "epoch": 3.1,
351
+ "eval_rundkast_loss": 0.30801013112068176,
352
+ "eval_rundkast_runtime": 254.8501,
353
+ "eval_rundkast_samples_per_second": 1.005,
354
+ "eval_rundkast_steps_per_second": 0.016,
355
+ "eval_rundkast_wer": 13.410454155955442,
356
+ "step": 273
357
+ },
358
+ {
359
+ "epoch": 3.1,
360
+ "eval_nb_samtale_loss": 0.23249460756778717,
361
+ "eval_nb_samtale_runtime": 130.939,
362
+ "eval_nb_samtale_samples_per_second": 0.978,
363
+ "eval_nb_samtale_steps_per_second": 0.015,
364
+ "eval_nb_samtale_wer": 9.797768479776849,
365
+ "step": 273
366
+ },
367
+ {
368
+ "epoch": 3.1,
369
+ "eval_bigbrother_loss": 0.5573908090591431,
370
+ "eval_bigbrother_runtime": 253.2503,
371
+ "eval_bigbrother_samples_per_second": 1.011,
372
+ "eval_bigbrother_steps_per_second": 0.016,
373
+ "eval_bigbrother_wer": 27.6,
374
+ "step": 273
375
+ },
376
+ {
377
+ "epoch": 3.34,
378
+ "eval_rundkast_loss": 0.3013758659362793,
379
+ "eval_rundkast_runtime": 252.5508,
380
+ "eval_rundkast_samples_per_second": 1.014,
381
+ "eval_rundkast_steps_per_second": 0.016,
382
+ "eval_rundkast_wer": 13.110539845758353,
383
+ "step": 294
384
+ },
385
+ {
386
+ "epoch": 3.34,
387
+ "eval_nb_samtale_loss": 0.22886353731155396,
388
+ "eval_nb_samtale_runtime": 130.5994,
389
+ "eval_nb_samtale_samples_per_second": 0.98,
390
+ "eval_nb_samtale_steps_per_second": 0.015,
391
+ "eval_nb_samtale_wer": 9.483960948396094,
392
+ "step": 294
393
+ },
394
+ {
395
+ "epoch": 3.34,
396
+ "eval_bigbrother_loss": 0.5597022771835327,
397
+ "eval_bigbrother_runtime": 252.6597,
398
+ "eval_bigbrother_samples_per_second": 1.013,
399
+ "eval_bigbrother_steps_per_second": 0.016,
400
+ "eval_bigbrother_wer": 28.044444444444444,
401
+ "step": 294
402
+ },
403
+ {
404
+ "epoch": 3.41,
405
+ "grad_norm": 2.1695916652679443,
406
+ "learning_rate": 3e-06,
407
+ "loss": 0.2225,
408
+ "step": 300
409
+ },
410
+ {
411
+ "epoch": 3.58,
412
+ "eval_rundkast_loss": 0.29593202471733093,
413
+ "eval_rundkast_runtime": 265.0209,
414
+ "eval_rundkast_samples_per_second": 0.966,
415
+ "eval_rundkast_steps_per_second": 0.015,
416
+ "eval_rundkast_wer": 13.53898886032562,
417
+ "step": 315
418
+ },
419
+ {
420
+ "epoch": 3.58,
421
+ "eval_nb_samtale_loss": 0.23120403289794922,
422
+ "eval_nb_samtale_runtime": 137.2483,
423
+ "eval_nb_samtale_samples_per_second": 0.933,
424
+ "eval_nb_samtale_steps_per_second": 0.015,
425
+ "eval_nb_samtale_wer": 9.9721059972106,
426
+ "step": 315
427
+ },
428
+ {
429
+ "epoch": 3.58,
430
+ "eval_bigbrother_loss": 0.5614113807678223,
431
+ "eval_bigbrother_runtime": 257.5911,
432
+ "eval_bigbrother_samples_per_second": 0.994,
433
+ "eval_bigbrother_steps_per_second": 0.016,
434
+ "eval_bigbrother_wer": 27.73333333333333,
435
+ "step": 315
436
+ },
437
+ {
438
+ "epoch": 3.82,
439
+ "eval_rundkast_loss": 0.2934091091156006,
440
+ "eval_rundkast_runtime": 253.624,
441
+ "eval_rundkast_samples_per_second": 1.009,
442
+ "eval_rundkast_steps_per_second": 0.016,
443
+ "eval_rundkast_wer": 13.453299057412169,
444
+ "step": 336
445
+ },
446
+ {
447
+ "epoch": 3.82,
448
+ "eval_nb_samtale_loss": 0.2249545007944107,
449
+ "eval_nb_samtale_runtime": 134.2615,
450
+ "eval_nb_samtale_samples_per_second": 0.953,
451
+ "eval_nb_samtale_steps_per_second": 0.015,
452
+ "eval_nb_samtale_wer": 9.693165969316597,
453
+ "step": 336
454
+ },
455
+ {
456
+ "epoch": 3.82,
457
+ "eval_bigbrother_loss": 0.5466703176498413,
458
+ "eval_bigbrother_runtime": 258.8204,
459
+ "eval_bigbrother_samples_per_second": 0.989,
460
+ "eval_bigbrother_steps_per_second": 0.015,
461
+ "eval_bigbrother_wer": 27.68888888888889,
462
+ "step": 336
463
+ },
464
+ {
465
+ "epoch": 4.06,
466
+ "eval_rundkast_loss": 0.2969070076942444,
467
+ "eval_rundkast_runtime": 253.4479,
468
+ "eval_rundkast_samples_per_second": 1.01,
469
+ "eval_rundkast_steps_per_second": 0.016,
470
+ "eval_rundkast_wer": 13.753213367609254,
471
+ "step": 357
472
+ },
473
+ {
474
+ "epoch": 4.06,
475
+ "eval_nb_samtale_loss": 0.2252650409936905,
476
+ "eval_nb_samtale_runtime": 133.6675,
477
+ "eval_nb_samtale_samples_per_second": 0.958,
478
+ "eval_nb_samtale_steps_per_second": 0.015,
479
+ "eval_nb_samtale_wer": 9.902370990237099,
480
+ "step": 357
481
+ },
482
+ {
483
+ "epoch": 4.06,
484
+ "eval_bigbrother_loss": 0.5490403771400452,
485
+ "eval_bigbrother_runtime": 252.1606,
486
+ "eval_bigbrother_samples_per_second": 1.015,
487
+ "eval_bigbrother_steps_per_second": 0.016,
488
+ "eval_bigbrother_wer": 27.51111111111111,
489
+ "step": 357
490
+ },
491
+ {
492
+ "epoch": 4.3,
493
+ "eval_rundkast_loss": 0.29715079069137573,
494
+ "eval_rundkast_runtime": 263.3468,
495
+ "eval_rundkast_samples_per_second": 0.972,
496
+ "eval_rundkast_steps_per_second": 0.015,
497
+ "eval_rundkast_wer": 13.367609254498714,
498
+ "step": 378
499
+ },
500
+ {
501
+ "epoch": 4.3,
502
+ "eval_nb_samtale_loss": 0.22843754291534424,
503
+ "eval_nb_samtale_runtime": 136.3747,
504
+ "eval_nb_samtale_samples_per_second": 0.939,
505
+ "eval_nb_samtale_steps_per_second": 0.015,
506
+ "eval_nb_samtale_wer": 9.797768479776849,
507
+ "step": 378
508
+ },
509
+ {
510
+ "epoch": 4.3,
511
+ "eval_bigbrother_loss": 0.5757928490638733,
512
+ "eval_bigbrother_runtime": 252.9905,
513
+ "eval_bigbrother_samples_per_second": 1.012,
514
+ "eval_bigbrother_steps_per_second": 0.016,
515
+ "eval_bigbrother_wer": 27.37777777777778,
516
+ "step": 378
517
+ },
518
+ {
519
+ "epoch": 4.53,
520
+ "eval_rundkast_loss": 0.2953430414199829,
521
+ "eval_rundkast_runtime": 258.4676,
522
+ "eval_rundkast_samples_per_second": 0.99,
523
+ "eval_rundkast_steps_per_second": 0.015,
524
+ "eval_rundkast_wer": 13.453299057412169,
525
+ "step": 399
526
+ },
527
+ {
528
+ "epoch": 4.53,
529
+ "eval_nb_samtale_loss": 0.22929418087005615,
530
+ "eval_nb_samtale_runtime": 135.2614,
531
+ "eval_nb_samtale_samples_per_second": 0.946,
532
+ "eval_nb_samtale_steps_per_second": 0.015,
533
+ "eval_nb_samtale_wer": 9.588563458856346,
534
+ "step": 399
535
+ },
536
+ {
537
+ "epoch": 4.53,
538
+ "eval_bigbrother_loss": 0.5682810544967651,
539
+ "eval_bigbrother_runtime": 261.904,
540
+ "eval_bigbrother_samples_per_second": 0.977,
541
+ "eval_bigbrother_steps_per_second": 0.015,
542
+ "eval_bigbrother_wer": 27.68888888888889,
543
+ "step": 399
544
+ },
545
+ {
546
+ "epoch": 4.55,
547
+ "grad_norm": 1.9239732027053833,
548
+ "learning_rate": 4.000000000000001e-06,
549
+ "loss": 0.1597,
550
+ "step": 400
551
+ },
552
+ {
553
+ "epoch": 4.77,
554
+ "eval_rundkast_loss": 0.2963680922985077,
555
+ "eval_rundkast_runtime": 257.7303,
556
+ "eval_rundkast_samples_per_second": 0.993,
557
+ "eval_rundkast_steps_per_second": 0.016,
558
+ "eval_rundkast_wer": 13.496143958868895,
559
+ "step": 420
560
+ },
561
+ {
562
+ "epoch": 4.77,
563
+ "eval_nb_samtale_loss": 0.22047586739063263,
564
+ "eval_nb_samtale_runtime": 134.462,
565
+ "eval_nb_samtale_samples_per_second": 0.952,
566
+ "eval_nb_samtale_steps_per_second": 0.015,
567
+ "eval_nb_samtale_wer": 9.309623430962342,
568
+ "step": 420
569
+ },
570
+ {
571
+ "epoch": 4.77,
572
+ "eval_bigbrother_loss": 0.5600541830062866,
573
+ "eval_bigbrother_runtime": 259.0585,
574
+ "eval_bigbrother_samples_per_second": 0.988,
575
+ "eval_bigbrother_steps_per_second": 0.015,
576
+ "eval_bigbrother_wer": 27.200000000000003,
577
+ "step": 420
578
+ },
579
+ {
580
+ "epoch": 5.01,
581
+ "eval_rundkast_loss": 0.2924000322818756,
582
+ "eval_rundkast_runtime": 253.711,
583
+ "eval_rundkast_samples_per_second": 1.009,
584
+ "eval_rundkast_steps_per_second": 0.016,
585
+ "eval_rundkast_wer": 13.281919451585262,
586
+ "step": 441
587
+ },
588
+ {
589
+ "epoch": 5.01,
590
+ "eval_nb_samtale_loss": 0.2149951457977295,
591
+ "eval_nb_samtale_runtime": 134.7407,
592
+ "eval_nb_samtale_samples_per_second": 0.95,
593
+ "eval_nb_samtale_steps_per_second": 0.015,
594
+ "eval_nb_samtale_wer": 9.483960948396094,
595
+ "step": 441
596
+ },
597
+ {
598
+ "epoch": 5.01,
599
+ "eval_bigbrother_loss": 0.5668565630912781,
600
+ "eval_bigbrother_runtime": 259.6443,
601
+ "eval_bigbrother_samples_per_second": 0.986,
602
+ "eval_bigbrother_steps_per_second": 0.015,
603
+ "eval_bigbrother_wer": 26.8,
604
+ "step": 441
605
+ },
606
+ {
607
+ "epoch": 5.25,
608
+ "eval_rundkast_loss": 0.31433504819869995,
609
+ "eval_rundkast_runtime": 258.8641,
610
+ "eval_rundkast_samples_per_second": 0.989,
611
+ "eval_rundkast_steps_per_second": 0.015,
612
+ "eval_rundkast_wer": 13.79605826906598,
613
+ "step": 462
614
+ },
615
+ {
616
+ "epoch": 5.25,
617
+ "eval_nb_samtale_loss": 0.2276706099510193,
618
+ "eval_nb_samtale_runtime": 135.9916,
619
+ "eval_nb_samtale_samples_per_second": 0.941,
620
+ "eval_nb_samtale_steps_per_second": 0.015,
621
+ "eval_nb_samtale_wer": 9.344490934449093,
622
+ "step": 462
623
+ },
624
+ {
625
+ "epoch": 5.25,
626
+ "eval_bigbrother_loss": 0.6108298301696777,
627
+ "eval_bigbrother_runtime": 258.4975,
628
+ "eval_bigbrother_samples_per_second": 0.99,
629
+ "eval_bigbrother_steps_per_second": 0.015,
630
+ "eval_bigbrother_wer": 28.48888888888889,
631
+ "step": 462
632
+ },
633
+ {
634
+ "epoch": 5.49,
635
+ "eval_rundkast_loss": 0.31115537881851196,
636
+ "eval_rundkast_runtime": 254.7658,
637
+ "eval_rundkast_samples_per_second": 1.005,
638
+ "eval_rundkast_steps_per_second": 0.016,
639
+ "eval_rundkast_wer": 13.710368466152529,
640
+ "step": 483
641
+ },
642
+ {
643
+ "epoch": 5.49,
644
+ "eval_nb_samtale_loss": 0.2282358556985855,
645
+ "eval_nb_samtale_runtime": 134.7289,
646
+ "eval_nb_samtale_samples_per_second": 0.95,
647
+ "eval_nb_samtale_steps_per_second": 0.015,
648
+ "eval_nb_samtale_wer": 9.06555090655509,
649
+ "step": 483
650
+ },
651
+ {
652
+ "epoch": 5.49,
653
+ "eval_bigbrother_loss": 0.6029994487762451,
654
+ "eval_bigbrother_runtime": 253.4743,
655
+ "eval_bigbrother_samples_per_second": 1.01,
656
+ "eval_bigbrother_steps_per_second": 0.016,
657
+ "eval_bigbrother_wer": 27.066666666666666,
658
+ "step": 483
659
+ },
660
+ {
661
+ "epoch": 5.68,
662
+ "grad_norm": 2.697171449661255,
663
+ "learning_rate": 5e-06,
664
+ "loss": 0.1207,
665
+ "step": 500
666
+ },
667
+ {
668
+ "epoch": 5.73,
669
+ "eval_rundkast_loss": 0.31634020805358887,
670
+ "eval_rundkast_runtime": 260.8574,
671
+ "eval_rundkast_samples_per_second": 0.981,
672
+ "eval_rundkast_steps_per_second": 0.015,
673
+ "eval_rundkast_wer": 13.838903170522707,
674
+ "step": 504
675
+ },
676
+ {
677
+ "epoch": 5.73,
678
+ "eval_nb_samtale_loss": 0.22956953942775726,
679
+ "eval_nb_samtale_runtime": 134.4632,
680
+ "eval_nb_samtale_samples_per_second": 0.952,
681
+ "eval_nb_samtale_steps_per_second": 0.015,
682
+ "eval_nb_samtale_wer": 9.379358437935844,
683
+ "step": 504
684
+ },
685
+ {
686
+ "epoch": 5.73,
687
+ "eval_bigbrother_loss": 0.6007498502731323,
688
+ "eval_bigbrother_runtime": 259.5455,
689
+ "eval_bigbrother_samples_per_second": 0.986,
690
+ "eval_bigbrother_steps_per_second": 0.015,
691
+ "eval_bigbrother_wer": 28.000000000000004,
692
+ "step": 504
693
+ },
694
+ {
695
+ "epoch": 5.97,
696
+ "eval_rundkast_loss": 0.3192276954650879,
697
+ "eval_rundkast_runtime": 253.8269,
698
+ "eval_rundkast_samples_per_second": 1.009,
699
+ "eval_rundkast_steps_per_second": 0.016,
700
+ "eval_rundkast_wer": 13.53898886032562,
701
+ "step": 525
702
+ },
703
+ {
704
+ "epoch": 5.97,
705
+ "eval_nb_samtale_loss": 0.22917582094669342,
706
+ "eval_nb_samtale_runtime": 134.9907,
707
+ "eval_nb_samtale_samples_per_second": 0.948,
708
+ "eval_nb_samtale_steps_per_second": 0.015,
709
+ "eval_nb_samtale_wer": 9.06555090655509,
710
+ "step": 525
711
+ },
712
+ {
713
+ "epoch": 5.97,
714
+ "eval_bigbrother_loss": 0.6029475331306458,
715
+ "eval_bigbrother_runtime": 253.265,
716
+ "eval_bigbrother_samples_per_second": 1.011,
717
+ "eval_bigbrother_steps_per_second": 0.016,
718
+ "eval_bigbrother_wer": 26.666666666666668,
719
+ "step": 525
720
+ },
721
+ {
722
+ "epoch": 6.2,
723
+ "eval_rundkast_loss": 0.3402000665664673,
724
+ "eval_rundkast_runtime": 255.0851,
725
+ "eval_rundkast_samples_per_second": 1.004,
726
+ "eval_rundkast_steps_per_second": 0.016,
727
+ "eval_rundkast_wer": 13.367609254498714,
728
+ "step": 546
729
+ },
730
+ {
731
+ "epoch": 6.2,
732
+ "eval_nb_samtale_loss": 0.23988915979862213,
733
+ "eval_nb_samtale_runtime": 133.3224,
734
+ "eval_nb_samtale_samples_per_second": 0.96,
735
+ "eval_nb_samtale_steps_per_second": 0.015,
736
+ "eval_nb_samtale_wer": 8.647140864714087,
737
+ "step": 546
738
+ },
739
+ {
740
+ "epoch": 6.2,
741
+ "eval_bigbrother_loss": 0.6717787384986877,
742
+ "eval_bigbrother_runtime": 257.5221,
743
+ "eval_bigbrother_samples_per_second": 0.994,
744
+ "eval_bigbrother_steps_per_second": 0.016,
745
+ "eval_bigbrother_wer": 26.711111111111112,
746
+ "step": 546
747
+ }
748
+ ],
749
+ "logging_steps": 100,
750
+ "max_steps": 1320,
751
+ "num_input_tokens_seen": 0,
752
+ "num_train_epochs": 15,
753
+ "save_steps": 21,
754
+ "total_flos": 8.900086563864576e+19,
755
+ "train_batch_size": 48,
756
+ "trial_name": null,
757
+ "trial_params": null
758
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0af81cda6f4e6e8ca7298eea8143a7256eb99e6fe1fe28d8f057758dad120caf
3
+ size 5112