Markus Serloth-Schwarzer commited on
Commit
76641a4
·
verified ·
1 Parent(s): 408c5e2

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-classification
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - v11/autotrain-data
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Classification
15
+
16
+ ## Validation Metrics
17
+ loss: 0.4699208438396454
18
+
19
+ f1_macro: 0.8648394526320947
20
+
21
+ f1_micro: 0.8277777777777777
22
+
23
+ f1_weighted: 0.827145991318232
24
+
25
+ precision_macro: 0.8595340501792115
26
+
27
+ precision_micro: 0.8277777777777777
28
+
29
+ precision_weighted: 0.8456027479091995
30
+
31
+ recall_macro: 0.8846808510638299
32
+
33
+ recall_micro: 0.8277777777777777
34
+
35
+ recall_weighted: 0.8277777777777777
36
+
37
+ accuracy: 0.8277777777777777
checkpoint-474/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "oliverguhr/german-sentiment-bert",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "finetuning_task": "germansentiment",
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "negative",
15
+ "1": "neutral",
16
+ "2": "positive"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "negative": 0,
22
+ "neutral": 1,
23
+ "positive": 2
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "bert",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.40.1",
35
+ "type_vocab_size": 2,
36
+ "use_cache": true,
37
+ "vocab_size": 30000
38
+ }
checkpoint-474/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea75d278f283fe62fb3b9ea3bbe2fd5a2e06e533169565aac67768a4b2901bf
3
+ size 436358132
checkpoint-474/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f7c4e5ab114d689a6fa039838bcf705e08c386357161acab048f49c953ff25b
3
+ size 872837370
checkpoint-474/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a453341100571124158224ae53e2fcc4d3ef29536b469799f547c8d45dfa6526
3
+ size 14244
checkpoint-474/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c97dc0e7fb4017aa9ecf672b477ad2cf43803c9f7fd521a5cc08afa46f48f7ea
3
+ size 1064
checkpoint-474/trainer_state.json ADDED
@@ -0,0 +1,901 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4699208438396454,
3
+ "best_model_checkpoint": "v11/checkpoint-474",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 474,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02531645569620253,
13
+ "grad_norm": 71.55428314208984,
14
+ "learning_rate": 2.1097046413502108e-07,
15
+ "loss": 3.1896,
16
+ "step": 4
17
+ },
18
+ {
19
+ "epoch": 0.05063291139240506,
20
+ "grad_norm": Infinity,
21
+ "learning_rate": 8.438818565400843e-07,
22
+ "loss": 2.6405,
23
+ "step": 8
24
+ },
25
+ {
26
+ "epoch": 0.0759493670886076,
27
+ "grad_norm": 37.44911575317383,
28
+ "learning_rate": 1.6877637130801686e-06,
29
+ "loss": 3.0762,
30
+ "step": 12
31
+ },
32
+ {
33
+ "epoch": 0.10126582278481013,
34
+ "grad_norm": 46.11091995239258,
35
+ "learning_rate": 2.531645569620253e-06,
36
+ "loss": 2.5418,
37
+ "step": 16
38
+ },
39
+ {
40
+ "epoch": 0.12658227848101267,
41
+ "grad_norm": 13.867141723632812,
42
+ "learning_rate": 3.3755274261603373e-06,
43
+ "loss": 2.7083,
44
+ "step": 20
45
+ },
46
+ {
47
+ "epoch": 0.1518987341772152,
48
+ "grad_norm": 44.78251647949219,
49
+ "learning_rate": 4.219409282700422e-06,
50
+ "loss": 1.6333,
51
+ "step": 24
52
+ },
53
+ {
54
+ "epoch": 0.17721518987341772,
55
+ "grad_norm": 63.29035949707031,
56
+ "learning_rate": 5.063291139240506e-06,
57
+ "loss": 2.092,
58
+ "step": 28
59
+ },
60
+ {
61
+ "epoch": 0.20253164556962025,
62
+ "grad_norm": 29.540374755859375,
63
+ "learning_rate": 5.907172995780591e-06,
64
+ "loss": 2.0659,
65
+ "step": 32
66
+ },
67
+ {
68
+ "epoch": 0.22784810126582278,
69
+ "grad_norm": 32.71324157714844,
70
+ "learning_rate": 6.7510548523206746e-06,
71
+ "loss": 2.0142,
72
+ "step": 36
73
+ },
74
+ {
75
+ "epoch": 0.25316455696202533,
76
+ "grad_norm": 45.9168701171875,
77
+ "learning_rate": 7.5949367088607605e-06,
78
+ "loss": 1.9482,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.27848101265822783,
83
+ "grad_norm": 29.264678955078125,
84
+ "learning_rate": 8.438818565400844e-06,
85
+ "loss": 1.7309,
86
+ "step": 44
87
+ },
88
+ {
89
+ "epoch": 0.3037974683544304,
90
+ "grad_norm": 35.710845947265625,
91
+ "learning_rate": 9.28270042194093e-06,
92
+ "loss": 1.0276,
93
+ "step": 48
94
+ },
95
+ {
96
+ "epoch": 0.3291139240506329,
97
+ "grad_norm": 43.064239501953125,
98
+ "learning_rate": 1.0126582278481012e-05,
99
+ "loss": 1.3045,
100
+ "step": 52
101
+ },
102
+ {
103
+ "epoch": 0.35443037974683544,
104
+ "grad_norm": 9.842476844787598,
105
+ "learning_rate": 1.0970464135021098e-05,
106
+ "loss": 1.3589,
107
+ "step": 56
108
+ },
109
+ {
110
+ "epoch": 0.379746835443038,
111
+ "grad_norm": 14.65708065032959,
112
+ "learning_rate": 1.1814345991561182e-05,
113
+ "loss": 0.9091,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.4050632911392405,
118
+ "grad_norm": 11.407841682434082,
119
+ "learning_rate": 1.2658227848101267e-05,
120
+ "loss": 0.8581,
121
+ "step": 64
122
+ },
123
+ {
124
+ "epoch": 0.43037974683544306,
125
+ "grad_norm": 35.808719635009766,
126
+ "learning_rate": 1.3502109704641349e-05,
127
+ "loss": 0.9538,
128
+ "step": 68
129
+ },
130
+ {
131
+ "epoch": 0.45569620253164556,
132
+ "grad_norm": 11.020061492919922,
133
+ "learning_rate": 1.4345991561181435e-05,
134
+ "loss": 0.9462,
135
+ "step": 72
136
+ },
137
+ {
138
+ "epoch": 0.4810126582278481,
139
+ "grad_norm": 6.515781402587891,
140
+ "learning_rate": 1.5189873417721521e-05,
141
+ "loss": 0.809,
142
+ "step": 76
143
+ },
144
+ {
145
+ "epoch": 0.5063291139240507,
146
+ "grad_norm": 9.359097480773926,
147
+ "learning_rate": 1.6033755274261607e-05,
148
+ "loss": 0.6867,
149
+ "step": 80
150
+ },
151
+ {
152
+ "epoch": 0.5316455696202531,
153
+ "grad_norm": 20.292892456054688,
154
+ "learning_rate": 1.6877637130801688e-05,
155
+ "loss": 0.6582,
156
+ "step": 84
157
+ },
158
+ {
159
+ "epoch": 0.5569620253164557,
160
+ "grad_norm": 17.644256591796875,
161
+ "learning_rate": 1.7721518987341772e-05,
162
+ "loss": 0.7018,
163
+ "step": 88
164
+ },
165
+ {
166
+ "epoch": 0.5822784810126582,
167
+ "grad_norm": 16.383758544921875,
168
+ "learning_rate": 1.856540084388186e-05,
169
+ "loss": 0.5546,
170
+ "step": 92
171
+ },
172
+ {
173
+ "epoch": 0.6075949367088608,
174
+ "grad_norm": 13.477869033813477,
175
+ "learning_rate": 1.940928270042194e-05,
176
+ "loss": 0.7188,
177
+ "step": 96
178
+ },
179
+ {
180
+ "epoch": 0.6329113924050633,
181
+ "grad_norm": 10.13427448272705,
182
+ "learning_rate": 2.0253164556962025e-05,
183
+ "loss": 0.7572,
184
+ "step": 100
185
+ },
186
+ {
187
+ "epoch": 0.6582278481012658,
188
+ "grad_norm": 14.624322891235352,
189
+ "learning_rate": 2.1097046413502112e-05,
190
+ "loss": 0.6465,
191
+ "step": 104
192
+ },
193
+ {
194
+ "epoch": 0.6835443037974683,
195
+ "grad_norm": 6.965182781219482,
196
+ "learning_rate": 2.1940928270042196e-05,
197
+ "loss": 0.5644,
198
+ "step": 108
199
+ },
200
+ {
201
+ "epoch": 0.7088607594936709,
202
+ "grad_norm": 14.612415313720703,
203
+ "learning_rate": 2.278481012658228e-05,
204
+ "loss": 0.6489,
205
+ "step": 112
206
+ },
207
+ {
208
+ "epoch": 0.7341772151898734,
209
+ "grad_norm": 10.756507873535156,
210
+ "learning_rate": 2.3628691983122365e-05,
211
+ "loss": 0.6872,
212
+ "step": 116
213
+ },
214
+ {
215
+ "epoch": 0.759493670886076,
216
+ "grad_norm": 10.897459030151367,
217
+ "learning_rate": 2.447257383966245e-05,
218
+ "loss": 0.5815,
219
+ "step": 120
220
+ },
221
+ {
222
+ "epoch": 0.7848101265822784,
223
+ "grad_norm": 5.7723188400268555,
224
+ "learning_rate": 2.5316455696202533e-05,
225
+ "loss": 0.6707,
226
+ "step": 124
227
+ },
228
+ {
229
+ "epoch": 0.810126582278481,
230
+ "grad_norm": 6.329953193664551,
231
+ "learning_rate": 2.616033755274262e-05,
232
+ "loss": 0.8948,
233
+ "step": 128
234
+ },
235
+ {
236
+ "epoch": 0.8354430379746836,
237
+ "grad_norm": 81.23648071289062,
238
+ "learning_rate": 2.7004219409282698e-05,
239
+ "loss": 0.7619,
240
+ "step": 132
241
+ },
242
+ {
243
+ "epoch": 0.8607594936708861,
244
+ "grad_norm": 27.8104248046875,
245
+ "learning_rate": 2.7848101265822786e-05,
246
+ "loss": 0.6075,
247
+ "step": 136
248
+ },
249
+ {
250
+ "epoch": 0.8860759493670886,
251
+ "grad_norm": 8.194925308227539,
252
+ "learning_rate": 2.869198312236287e-05,
253
+ "loss": 0.82,
254
+ "step": 140
255
+ },
256
+ {
257
+ "epoch": 0.9113924050632911,
258
+ "grad_norm": 9.09708309173584,
259
+ "learning_rate": 2.9535864978902954e-05,
260
+ "loss": 0.6168,
261
+ "step": 144
262
+ },
263
+ {
264
+ "epoch": 0.9367088607594937,
265
+ "grad_norm": 13.87863540649414,
266
+ "learning_rate": 3.0379746835443042e-05,
267
+ "loss": 0.5575,
268
+ "step": 148
269
+ },
270
+ {
271
+ "epoch": 0.9620253164556962,
272
+ "grad_norm": 20.190404891967773,
273
+ "learning_rate": 3.1223628691983126e-05,
274
+ "loss": 0.9551,
275
+ "step": 152
276
+ },
277
+ {
278
+ "epoch": 0.9873417721518988,
279
+ "grad_norm": 6.65585470199585,
280
+ "learning_rate": 3.2067510548523214e-05,
281
+ "loss": 0.7284,
282
+ "step": 156
283
+ },
284
+ {
285
+ "epoch": 1.0,
286
+ "eval_accuracy": 0.7611111111111111,
287
+ "eval_f1_macro": 0.8171827313856298,
288
+ "eval_f1_micro": 0.7611111111111111,
289
+ "eval_f1_weighted": 0.7610008434935971,
290
+ "eval_loss": 0.5506798028945923,
291
+ "eval_precision_macro": 0.8070455512984248,
292
+ "eval_precision_micro": 0.7611111111111111,
293
+ "eval_precision_weighted": 0.7688892436497802,
294
+ "eval_recall_macro": 0.8340425531914893,
295
+ "eval_recall_micro": 0.7611111111111111,
296
+ "eval_recall_weighted": 0.7611111111111111,
297
+ "eval_runtime": 0.4991,
298
+ "eval_samples_per_second": 360.643,
299
+ "eval_steps_per_second": 24.043,
300
+ "step": 158
301
+ },
302
+ {
303
+ "epoch": 1.0126582278481013,
304
+ "grad_norm": 6.7371063232421875,
305
+ "learning_rate": 3.291139240506329e-05,
306
+ "loss": 0.5544,
307
+ "step": 160
308
+ },
309
+ {
310
+ "epoch": 1.0379746835443038,
311
+ "grad_norm": 7.718777656555176,
312
+ "learning_rate": 3.3755274261603375e-05,
313
+ "loss": 0.5192,
314
+ "step": 164
315
+ },
316
+ {
317
+ "epoch": 1.0632911392405062,
318
+ "grad_norm": 18.91424560546875,
319
+ "learning_rate": 3.459915611814346e-05,
320
+ "loss": 0.6722,
321
+ "step": 168
322
+ },
323
+ {
324
+ "epoch": 1.0886075949367089,
325
+ "grad_norm": 14.480657577514648,
326
+ "learning_rate": 3.5443037974683544e-05,
327
+ "loss": 0.6675,
328
+ "step": 172
329
+ },
330
+ {
331
+ "epoch": 1.1139240506329113,
332
+ "grad_norm": 15.364533424377441,
333
+ "learning_rate": 3.628691983122363e-05,
334
+ "loss": 0.5339,
335
+ "step": 176
336
+ },
337
+ {
338
+ "epoch": 1.139240506329114,
339
+ "grad_norm": 11.355390548706055,
340
+ "learning_rate": 3.713080168776372e-05,
341
+ "loss": 0.4978,
342
+ "step": 180
343
+ },
344
+ {
345
+ "epoch": 1.1645569620253164,
346
+ "grad_norm": 11.57992935180664,
347
+ "learning_rate": 3.79746835443038e-05,
348
+ "loss": 0.5707,
349
+ "step": 184
350
+ },
351
+ {
352
+ "epoch": 1.189873417721519,
353
+ "grad_norm": 13.356510162353516,
354
+ "learning_rate": 3.881856540084388e-05,
355
+ "loss": 0.5443,
356
+ "step": 188
357
+ },
358
+ {
359
+ "epoch": 1.2151898734177216,
360
+ "grad_norm": 6.806561470031738,
361
+ "learning_rate": 3.966244725738397e-05,
362
+ "loss": 0.4811,
363
+ "step": 192
364
+ },
365
+ {
366
+ "epoch": 1.240506329113924,
367
+ "grad_norm": 49.87335205078125,
368
+ "learning_rate": 4.050632911392405e-05,
369
+ "loss": 0.5528,
370
+ "step": 196
371
+ },
372
+ {
373
+ "epoch": 1.2658227848101267,
374
+ "grad_norm": 29.326234817504883,
375
+ "learning_rate": 4.135021097046414e-05,
376
+ "loss": 0.6233,
377
+ "step": 200
378
+ },
379
+ {
380
+ "epoch": 1.2911392405063291,
381
+ "grad_norm": 23.68698501586914,
382
+ "learning_rate": 4.2194092827004224e-05,
383
+ "loss": 0.6821,
384
+ "step": 204
385
+ },
386
+ {
387
+ "epoch": 1.3164556962025316,
388
+ "grad_norm": 21.932170867919922,
389
+ "learning_rate": 4.3037974683544305e-05,
390
+ "loss": 0.8705,
391
+ "step": 208
392
+ },
393
+ {
394
+ "epoch": 1.3417721518987342,
395
+ "grad_norm": 16.624055862426758,
396
+ "learning_rate": 4.388185654008439e-05,
397
+ "loss": 0.4985,
398
+ "step": 212
399
+ },
400
+ {
401
+ "epoch": 1.3670886075949367,
402
+ "grad_norm": 18.823989868164062,
403
+ "learning_rate": 4.4725738396624474e-05,
404
+ "loss": 0.7091,
405
+ "step": 216
406
+ },
407
+ {
408
+ "epoch": 1.3924050632911391,
409
+ "grad_norm": 5.886724948883057,
410
+ "learning_rate": 4.556962025316456e-05,
411
+ "loss": 0.4788,
412
+ "step": 220
413
+ },
414
+ {
415
+ "epoch": 1.4177215189873418,
416
+ "grad_norm": 19.74236297607422,
417
+ "learning_rate": 4.641350210970464e-05,
418
+ "loss": 0.681,
419
+ "step": 224
420
+ },
421
+ {
422
+ "epoch": 1.4430379746835442,
423
+ "grad_norm": 5.201137542724609,
424
+ "learning_rate": 4.725738396624473e-05,
425
+ "loss": 0.6167,
426
+ "step": 228
427
+ },
428
+ {
429
+ "epoch": 1.4683544303797469,
430
+ "grad_norm": 13.234941482543945,
431
+ "learning_rate": 4.810126582278481e-05,
432
+ "loss": 0.5769,
433
+ "step": 232
434
+ },
435
+ {
436
+ "epoch": 1.4936708860759493,
437
+ "grad_norm": 17.14457893371582,
438
+ "learning_rate": 4.89451476793249e-05,
439
+ "loss": 0.5358,
440
+ "step": 236
441
+ },
442
+ {
443
+ "epoch": 1.518987341772152,
444
+ "grad_norm": 18.709863662719727,
445
+ "learning_rate": 4.9789029535864986e-05,
446
+ "loss": 0.7973,
447
+ "step": 240
448
+ },
449
+ {
450
+ "epoch": 1.5443037974683544,
451
+ "grad_norm": 12.674654006958008,
452
+ "learning_rate": 4.9929676511955e-05,
453
+ "loss": 0.6866,
454
+ "step": 244
455
+ },
456
+ {
457
+ "epoch": 1.5696202531645569,
458
+ "grad_norm": 28.57341194152832,
459
+ "learning_rate": 4.983591186122832e-05,
460
+ "loss": 0.6526,
461
+ "step": 248
462
+ },
463
+ {
464
+ "epoch": 1.5949367088607596,
465
+ "grad_norm": 10.77186393737793,
466
+ "learning_rate": 4.974214721050164e-05,
467
+ "loss": 0.5577,
468
+ "step": 252
469
+ },
470
+ {
471
+ "epoch": 1.620253164556962,
472
+ "grad_norm": 8.296313285827637,
473
+ "learning_rate": 4.964838255977497e-05,
474
+ "loss": 0.6331,
475
+ "step": 256
476
+ },
477
+ {
478
+ "epoch": 1.6455696202531644,
479
+ "grad_norm": 10.310996055603027,
480
+ "learning_rate": 4.955461790904829e-05,
481
+ "loss": 0.569,
482
+ "step": 260
483
+ },
484
+ {
485
+ "epoch": 1.6708860759493671,
486
+ "grad_norm": 13.412028312683105,
487
+ "learning_rate": 4.9460853258321614e-05,
488
+ "loss": 0.6135,
489
+ "step": 264
490
+ },
491
+ {
492
+ "epoch": 1.6962025316455698,
493
+ "grad_norm": 12.334749221801758,
494
+ "learning_rate": 4.936708860759494e-05,
495
+ "loss": 0.4672,
496
+ "step": 268
497
+ },
498
+ {
499
+ "epoch": 1.721518987341772,
500
+ "grad_norm": 10.7319974899292,
501
+ "learning_rate": 4.927332395686826e-05,
502
+ "loss": 0.5693,
503
+ "step": 272
504
+ },
505
+ {
506
+ "epoch": 1.7468354430379747,
507
+ "grad_norm": 8.58609676361084,
508
+ "learning_rate": 4.917955930614159e-05,
509
+ "loss": 0.4975,
510
+ "step": 276
511
+ },
512
+ {
513
+ "epoch": 1.7721518987341773,
514
+ "grad_norm": 10.97795581817627,
515
+ "learning_rate": 4.908579465541491e-05,
516
+ "loss": 0.6971,
517
+ "step": 280
518
+ },
519
+ {
520
+ "epoch": 1.7974683544303798,
521
+ "grad_norm": 4.033533573150635,
522
+ "learning_rate": 4.8992030004688236e-05,
523
+ "loss": 0.3827,
524
+ "step": 284
525
+ },
526
+ {
527
+ "epoch": 1.8227848101265822,
528
+ "grad_norm": 8.546496391296387,
529
+ "learning_rate": 4.889826535396156e-05,
530
+ "loss": 0.4725,
531
+ "step": 288
532
+ },
533
+ {
534
+ "epoch": 1.8481012658227849,
535
+ "grad_norm": 16.97895050048828,
536
+ "learning_rate": 4.8804500703234885e-05,
537
+ "loss": 0.6739,
538
+ "step": 292
539
+ },
540
+ {
541
+ "epoch": 1.8734177215189873,
542
+ "grad_norm": 50.98750305175781,
543
+ "learning_rate": 4.87107360525082e-05,
544
+ "loss": 0.6749,
545
+ "step": 296
546
+ },
547
+ {
548
+ "epoch": 1.8987341772151898,
549
+ "grad_norm": 6.033726215362549,
550
+ "learning_rate": 4.861697140178153e-05,
551
+ "loss": 0.6029,
552
+ "step": 300
553
+ },
554
+ {
555
+ "epoch": 1.9240506329113924,
556
+ "grad_norm": 70.4456558227539,
557
+ "learning_rate": 4.852320675105486e-05,
558
+ "loss": 0.6886,
559
+ "step": 304
560
+ },
561
+ {
562
+ "epoch": 1.9493670886075949,
563
+ "grad_norm": 15.961381912231445,
564
+ "learning_rate": 4.8429442100328175e-05,
565
+ "loss": 0.8721,
566
+ "step": 308
567
+ },
568
+ {
569
+ "epoch": 1.9746835443037973,
570
+ "grad_norm": 7.286694526672363,
571
+ "learning_rate": 4.83356774496015e-05,
572
+ "loss": 0.6044,
573
+ "step": 312
574
+ },
575
+ {
576
+ "epoch": 2.0,
577
+ "grad_norm": 17.835411071777344,
578
+ "learning_rate": 4.824191279887483e-05,
579
+ "loss": 0.6555,
580
+ "step": 316
581
+ },
582
+ {
583
+ "epoch": 2.0,
584
+ "eval_accuracy": 0.7833333333333333,
585
+ "eval_f1_macro": 0.7629566553128967,
586
+ "eval_f1_micro": 0.7833333333333333,
587
+ "eval_f1_weighted": 0.7813522674146798,
588
+ "eval_loss": 0.6177344918251038,
589
+ "eval_precision_macro": 0.7341277483549993,
590
+ "eval_precision_micro": 0.7833333333333333,
591
+ "eval_precision_weighted": 0.8182069491253411,
592
+ "eval_recall_macro": 0.8563120567375887,
593
+ "eval_recall_micro": 0.7833333333333333,
594
+ "eval_recall_weighted": 0.7833333333333333,
595
+ "eval_runtime": 0.4807,
596
+ "eval_samples_per_second": 374.457,
597
+ "eval_steps_per_second": 24.964,
598
+ "step": 316
599
+ },
600
+ {
601
+ "epoch": 2.0253164556962027,
602
+ "grad_norm": 8.706924438476562,
603
+ "learning_rate": 4.814814814814815e-05,
604
+ "loss": 0.6555,
605
+ "step": 320
606
+ },
607
+ {
608
+ "epoch": 2.050632911392405,
609
+ "grad_norm": 8.192380905151367,
610
+ "learning_rate": 4.805438349742147e-05,
611
+ "loss": 0.5678,
612
+ "step": 324
613
+ },
614
+ {
615
+ "epoch": 2.0759493670886076,
616
+ "grad_norm": 26.89942169189453,
617
+ "learning_rate": 4.79606188466948e-05,
618
+ "loss": 0.3685,
619
+ "step": 328
620
+ },
621
+ {
622
+ "epoch": 2.1012658227848102,
623
+ "grad_norm": 20.788925170898438,
624
+ "learning_rate": 4.786685419596813e-05,
625
+ "loss": 0.4393,
626
+ "step": 332
627
+ },
628
+ {
629
+ "epoch": 2.1265822784810124,
630
+ "grad_norm": 17.45987319946289,
631
+ "learning_rate": 4.7773089545241446e-05,
632
+ "loss": 0.3558,
633
+ "step": 336
634
+ },
635
+ {
636
+ "epoch": 2.151898734177215,
637
+ "grad_norm": 17.20940589904785,
638
+ "learning_rate": 4.767932489451477e-05,
639
+ "loss": 0.3101,
640
+ "step": 340
641
+ },
642
+ {
643
+ "epoch": 2.1772151898734178,
644
+ "grad_norm": 13.34177017211914,
645
+ "learning_rate": 4.7585560243788094e-05,
646
+ "loss": 0.6948,
647
+ "step": 344
648
+ },
649
+ {
650
+ "epoch": 2.2025316455696204,
651
+ "grad_norm": 8.63040542602539,
652
+ "learning_rate": 4.749179559306142e-05,
653
+ "loss": 0.4689,
654
+ "step": 348
655
+ },
656
+ {
657
+ "epoch": 2.2278481012658227,
658
+ "grad_norm": 41.929141998291016,
659
+ "learning_rate": 4.739803094233474e-05,
660
+ "loss": 0.4344,
661
+ "step": 352
662
+ },
663
+ {
664
+ "epoch": 2.2531645569620253,
665
+ "grad_norm": 26.385360717773438,
666
+ "learning_rate": 4.730426629160807e-05,
667
+ "loss": 0.6043,
668
+ "step": 356
669
+ },
670
+ {
671
+ "epoch": 2.278481012658228,
672
+ "grad_norm": 17.394569396972656,
673
+ "learning_rate": 4.7210501640881385e-05,
674
+ "loss": 0.3698,
675
+ "step": 360
676
+ },
677
+ {
678
+ "epoch": 2.3037974683544302,
679
+ "grad_norm": 1.6962029933929443,
680
+ "learning_rate": 4.7116736990154716e-05,
681
+ "loss": 0.2685,
682
+ "step": 364
683
+ },
684
+ {
685
+ "epoch": 2.329113924050633,
686
+ "grad_norm": 19.53297233581543,
687
+ "learning_rate": 4.702297233942804e-05,
688
+ "loss": 0.3477,
689
+ "step": 368
690
+ },
691
+ {
692
+ "epoch": 2.3544303797468356,
693
+ "grad_norm": 5.328404903411865,
694
+ "learning_rate": 4.692920768870136e-05,
695
+ "loss": 0.2694,
696
+ "step": 372
697
+ },
698
+ {
699
+ "epoch": 2.379746835443038,
700
+ "grad_norm": 2.5393640995025635,
701
+ "learning_rate": 4.683544303797468e-05,
702
+ "loss": 0.1929,
703
+ "step": 376
704
+ },
705
+ {
706
+ "epoch": 2.4050632911392404,
707
+ "grad_norm": 14.026851654052734,
708
+ "learning_rate": 4.6741678387248013e-05,
709
+ "loss": 0.4045,
710
+ "step": 380
711
+ },
712
+ {
713
+ "epoch": 2.430379746835443,
714
+ "grad_norm": 36.21650695800781,
715
+ "learning_rate": 4.664791373652133e-05,
716
+ "loss": 0.1961,
717
+ "step": 384
718
+ },
719
+ {
720
+ "epoch": 2.4556962025316453,
721
+ "grad_norm": 38.489322662353516,
722
+ "learning_rate": 4.6554149085794655e-05,
723
+ "loss": 0.5762,
724
+ "step": 388
725
+ },
726
+ {
727
+ "epoch": 2.481012658227848,
728
+ "grad_norm": 56.10824966430664,
729
+ "learning_rate": 4.646038443506798e-05,
730
+ "loss": 1.1768,
731
+ "step": 392
732
+ },
733
+ {
734
+ "epoch": 2.5063291139240507,
735
+ "grad_norm": 2.1792027950286865,
736
+ "learning_rate": 4.6366619784341304e-05,
737
+ "loss": 0.3245,
738
+ "step": 396
739
+ },
740
+ {
741
+ "epoch": 2.5316455696202533,
742
+ "grad_norm": 25.301136016845703,
743
+ "learning_rate": 4.627285513361463e-05,
744
+ "loss": 0.4881,
745
+ "step": 400
746
+ },
747
+ {
748
+ "epoch": 2.5569620253164556,
749
+ "grad_norm": 23.114078521728516,
750
+ "learning_rate": 4.617909048288795e-05,
751
+ "loss": 0.6298,
752
+ "step": 404
753
+ },
754
+ {
755
+ "epoch": 2.5822784810126582,
756
+ "grad_norm": 6.997201442718506,
757
+ "learning_rate": 4.608532583216128e-05,
758
+ "loss": 0.3664,
759
+ "step": 408
760
+ },
761
+ {
762
+ "epoch": 2.607594936708861,
763
+ "grad_norm": 20.35528564453125,
764
+ "learning_rate": 4.59915611814346e-05,
765
+ "loss": 0.3108,
766
+ "step": 412
767
+ },
768
+ {
769
+ "epoch": 2.632911392405063,
770
+ "grad_norm": 24.20511245727539,
771
+ "learning_rate": 4.5897796530707926e-05,
772
+ "loss": 0.3533,
773
+ "step": 416
774
+ },
775
+ {
776
+ "epoch": 2.6582278481012658,
777
+ "grad_norm": 2.4780113697052,
778
+ "learning_rate": 4.580403187998125e-05,
779
+ "loss": 0.4303,
780
+ "step": 420
781
+ },
782
+ {
783
+ "epoch": 2.6835443037974684,
784
+ "grad_norm": 49.66173553466797,
785
+ "learning_rate": 4.5710267229254575e-05,
786
+ "loss": 0.4879,
787
+ "step": 424
788
+ },
789
+ {
790
+ "epoch": 2.708860759493671,
791
+ "grad_norm": 22.363910675048828,
792
+ "learning_rate": 4.56165025785279e-05,
793
+ "loss": 0.4178,
794
+ "step": 428
795
+ },
796
+ {
797
+ "epoch": 2.7341772151898733,
798
+ "grad_norm": 16.556734085083008,
799
+ "learning_rate": 4.552273792780122e-05,
800
+ "loss": 0.2627,
801
+ "step": 432
802
+ },
803
+ {
804
+ "epoch": 2.759493670886076,
805
+ "grad_norm": 14.872969627380371,
806
+ "learning_rate": 4.542897327707454e-05,
807
+ "loss": 0.3911,
808
+ "step": 436
809
+ },
810
+ {
811
+ "epoch": 2.7848101265822782,
812
+ "grad_norm": 15.42312240600586,
813
+ "learning_rate": 4.533520862634787e-05,
814
+ "loss": 0.4543,
815
+ "step": 440
816
+ },
817
+ {
818
+ "epoch": 2.810126582278481,
819
+ "grad_norm": 19.359683990478516,
820
+ "learning_rate": 4.5241443975621196e-05,
821
+ "loss": 0.3946,
822
+ "step": 444
823
+ },
824
+ {
825
+ "epoch": 2.8354430379746836,
826
+ "grad_norm": 20.327611923217773,
827
+ "learning_rate": 4.5147679324894514e-05,
828
+ "loss": 0.3452,
829
+ "step": 448
830
+ },
831
+ {
832
+ "epoch": 2.8607594936708862,
833
+ "grad_norm": 24.550987243652344,
834
+ "learning_rate": 4.505391467416784e-05,
835
+ "loss": 0.7032,
836
+ "step": 452
837
+ },
838
+ {
839
+ "epoch": 2.8860759493670884,
840
+ "grad_norm": 13.352927207946777,
841
+ "learning_rate": 4.496015002344117e-05,
842
+ "loss": 0.4979,
843
+ "step": 456
844
+ },
845
+ {
846
+ "epoch": 2.911392405063291,
847
+ "grad_norm": 20.34762954711914,
848
+ "learning_rate": 4.486638537271449e-05,
849
+ "loss": 0.5669,
850
+ "step": 460
851
+ },
852
+ {
853
+ "epoch": 2.9367088607594938,
854
+ "grad_norm": 2.153109312057495,
855
+ "learning_rate": 4.477262072198781e-05,
856
+ "loss": 0.2976,
857
+ "step": 464
858
+ },
859
+ {
860
+ "epoch": 2.962025316455696,
861
+ "grad_norm": 21.230073928833008,
862
+ "learning_rate": 4.4678856071261136e-05,
863
+ "loss": 0.421,
864
+ "step": 468
865
+ },
866
+ {
867
+ "epoch": 2.9873417721518987,
868
+ "grad_norm": 12.1097412109375,
869
+ "learning_rate": 4.458509142053446e-05,
870
+ "loss": 0.2361,
871
+ "step": 472
872
+ },
873
+ {
874
+ "epoch": 3.0,
875
+ "eval_accuracy": 0.8277777777777777,
876
+ "eval_f1_macro": 0.8648394526320947,
877
+ "eval_f1_micro": 0.8277777777777777,
878
+ "eval_f1_weighted": 0.827145991318232,
879
+ "eval_loss": 0.4699208438396454,
880
+ "eval_precision_macro": 0.8595340501792115,
881
+ "eval_precision_micro": 0.8277777777777777,
882
+ "eval_precision_weighted": 0.8456027479091995,
883
+ "eval_recall_macro": 0.8846808510638299,
884
+ "eval_recall_micro": 0.8277777777777777,
885
+ "eval_recall_weighted": 0.8277777777777777,
886
+ "eval_runtime": 0.4998,
887
+ "eval_samples_per_second": 360.158,
888
+ "eval_steps_per_second": 24.011,
889
+ "step": 474
890
+ }
891
+ ],
892
+ "logging_steps": 4,
893
+ "max_steps": 2370,
894
+ "num_input_tokens_seen": 0,
895
+ "num_train_epochs": 15,
896
+ "save_steps": 500,
897
+ "total_flos": 248642179752960.0,
898
+ "train_batch_size": 8,
899
+ "trial_name": null,
900
+ "trial_params": null
901
+ }
checkpoint-474/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a47d0f36e50bb83bbbce81e6e853cce603794c988099db2a46794c6506d7b6
3
+ size 4984
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "oliverguhr/german-sentiment-bert",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "finetuning_task": "germansentiment",
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "negative",
15
+ "1": "neutral",
16
+ "2": "positive"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "negative": 0,
22
+ "neutral": 1,
23
+ "positive": 2
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "bert",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.40.1",
35
+ "type_vocab_size": 2,
36
+ "use_cache": true,
37
+ "vocab_size": 30000
38
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea75d278f283fe62fb3b9ea3bbe2fd5a2e06e533169565aac67768a4b2901bf
3
+ size 436358132
runs/Apr30_22-01-41_r-zerithas-myownmodels-v1-b6o0chv5-dbe5b-x90im/events.out.tfevents.1714514501.r-zerithas-myownmodels-v1-b6o0chv5-dbe5b-x90im.60.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8000f54ad4d5fb5064e38d1e50d28f0fb3ba555f8c72a351e641f1819ad81403
3
- size 4867
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b56fd73b0e14b4ac82e75079057b8659d5d60bc0b596250db1471b8d3f8473
3
+ size 60102
runs/Apr30_22-01-41_r-zerithas-myownmodels-v1-b6o0chv5-dbe5b-x90im/events.out.tfevents.1714514613.r-zerithas-myownmodels-v1-b6o0chv5-dbe5b-x90im.60.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78d06e99292c93463a96b7b3cee049ce93d5e6e2356a6ea71f512ac205aaadad
3
+ size 921
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "2": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "3": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a47d0f36e50bb83bbbce81e6e853cce603794c988099db2a46794c6506d7b6
3
+ size 4984
training_params.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "v11/autotrain-data",
3
+ "model": "oliverguhr/german-sentiment-bert",
4
+ "lr": 5e-05,
5
+ "epochs": 15,
6
+ "max_seq_length": 128,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "v11",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "evaluation_strategy": "epoch",
26
+ "username": "Zerithas",
27
+ "log": "tensorboard"
28
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff