End of training
Browse files- README.md +5 -4
- all_results.json +23 -23
- eval_results.json +9 -9
- predict_results.json +8 -8
- predictions.txt +0 -0
- tb/events.out.tfevents.1725056175.6b97e535edda.43233.1 +3 -0
- train.log +48 -0
- train_results.json +6 -6
- trainer_state.json +150 -150
README.md
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
---
|
2 |
base_model: IVN-RIN/bioBIT
|
3 |
tags:
|
|
|
4 |
- generated_from_trainer
|
5 |
datasets:
|
6 |
-
- drugtemist-it-ner
|
7 |
metrics:
|
8 |
- precision
|
9 |
- recall
|
@@ -16,8 +17,8 @@ model-index:
|
|
16 |
name: Token Classification
|
17 |
type: token-classification
|
18 |
dataset:
|
19 |
-
name: drugtemist-it-ner
|
20 |
-
type: drugtemist-it-ner
|
21 |
config: DrugTEMIST Italian NER
|
22 |
split: validation
|
23 |
args: DrugTEMIST Italian NER
|
@@ -41,7 +42,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
41 |
|
42 |
# output
|
43 |
|
44 |
-
This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the drugtemist-it-ner dataset.
|
45 |
It achieves the following results on the evaluation set:
|
46 |
- Loss: 0.0067
|
47 |
- Precision: 0.9328
|
|
|
1 |
---
|
2 |
base_model: IVN-RIN/bioBIT
|
3 |
tags:
|
4 |
+
- token-classification
|
5 |
- generated_from_trainer
|
6 |
datasets:
|
7 |
+
- Rodrigo1771/drugtemist-it-ner
|
8 |
metrics:
|
9 |
- precision
|
10 |
- recall
|
|
|
17 |
name: Token Classification
|
18 |
type: token-classification
|
19 |
dataset:
|
20 |
+
name: Rodrigo1771/drugtemist-it-ner
|
21 |
+
type: Rodrigo1771/drugtemist-it-ner
|
22 |
config: DrugTEMIST Italian NER
|
23 |
split: validation
|
24 |
args: DrugTEMIST Italian NER
|
|
|
42 |
|
43 |
# output
|
44 |
|
45 |
+
This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the Rodrigo1771/drugtemist-it-ner dataset.
|
46 |
It achieves the following results on the evaluation set:
|
47 |
- Loss: 0.0067
|
48 |
- Precision: 0.9328
|
all_results.json
CHANGED
@@ -1,26 +1,26 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
-
"eval_samples":
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
-
"predict_accuracy": 0.
|
13 |
-
"predict_f1": 0.
|
14 |
-
"predict_loss": 0.
|
15 |
-
"predict_precision": 0.
|
16 |
-
"predict_recall": 0.
|
17 |
-
"predict_runtime":
|
18 |
-
"predict_samples_per_second":
|
19 |
-
"predict_steps_per_second":
|
20 |
-
"total_flos": 1.
|
21 |
-
"train_loss": 0.
|
22 |
-
"train_runtime":
|
23 |
-
"train_samples":
|
24 |
-
"train_samples_per_second":
|
25 |
-
"train_steps_per_second":
|
26 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9988184887042326,
|
4 |
+
"eval_f1": 0.936867469879518,
|
5 |
+
"eval_loss": 0.006724909413605928,
|
6 |
+
"eval_precision": 0.9328214971209213,
|
7 |
+
"eval_recall": 0.9409486931268151,
|
8 |
+
"eval_runtime": 14.1827,
|
9 |
+
"eval_samples": 6798,
|
10 |
+
"eval_samples_per_second": 479.317,
|
11 |
+
"eval_steps_per_second": 59.932,
|
12 |
+
"predict_accuracy": 0.9981367644802958,
|
13 |
+
"predict_f1": 0.8965517241379309,
|
14 |
+
"predict_loss": 0.010722821578383446,
|
15 |
+
"predict_precision": 0.8768736616702355,
|
16 |
+
"predict_recall": 0.9171332586786114,
|
17 |
+
"predict_runtime": 27.7085,
|
18 |
+
"predict_samples_per_second": 527.095,
|
19 |
+
"predict_steps_per_second": 65.9,
|
20 |
+
"total_flos": 1.4262694978690116e+16,
|
21 |
+
"train_loss": 0.0022696754537961062,
|
22 |
+
"train_runtime": 1261.5031,
|
23 |
+
"train_samples": 27198,
|
24 |
+
"train_samples_per_second": 215.6,
|
25 |
+
"train_steps_per_second": 3.369
|
26 |
}
|
eval_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
-
"eval_samples":
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9988184887042326,
|
4 |
+
"eval_f1": 0.936867469879518,
|
5 |
+
"eval_loss": 0.006724909413605928,
|
6 |
+
"eval_precision": 0.9328214971209213,
|
7 |
+
"eval_recall": 0.9409486931268151,
|
8 |
+
"eval_runtime": 14.1827,
|
9 |
+
"eval_samples": 6798,
|
10 |
+
"eval_samples_per_second": 479.317,
|
11 |
+
"eval_steps_per_second": 59.932
|
12 |
}
|
predict_results.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"predict_accuracy": 0.
|
3 |
-
"predict_f1": 0.
|
4 |
-
"predict_loss": 0.
|
5 |
-
"predict_precision": 0.
|
6 |
-
"predict_recall": 0.
|
7 |
-
"predict_runtime":
|
8 |
-
"predict_samples_per_second":
|
9 |
-
"predict_steps_per_second":
|
10 |
}
|
|
|
1 |
{
|
2 |
+
"predict_accuracy": 0.9981367644802958,
|
3 |
+
"predict_f1": 0.8965517241379309,
|
4 |
+
"predict_loss": 0.010722821578383446,
|
5 |
+
"predict_precision": 0.8768736616702355,
|
6 |
+
"predict_recall": 0.9171332586786114,
|
7 |
+
"predict_runtime": 27.7085,
|
8 |
+
"predict_samples_per_second": 527.095,
|
9 |
+
"predict_steps_per_second": 65.9
|
10 |
}
|
predictions.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725056175.6b97e535edda.43233.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80aa2e40ff46b611632e40117a4acd4e5f3d41a9a035d30842811f76e0b6b31b
|
3 |
+
size 560
|
train.log
CHANGED
@@ -1336,3 +1336,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
1336 |
{'eval_loss': 0.006724909413605928, 'eval_precision': 0.9328214971209213, 'eval_recall': 0.9409486931268151, 'eval_f1': 0.936867469879518, 'eval_accuracy': 0.9988184887042326, 'eval_runtime': 14.3451, 'eval_samples_per_second': 473.891, 'eval_steps_per_second': 59.254, 'epoch': 10.0}
|
1337 |
{'train_runtime': 1261.5031, 'train_samples_per_second': 215.6, 'train_steps_per_second': 3.369, 'train_loss': 0.0022696754537961062, 'epoch': 10.0}
|
1338 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1339 |
0%| | 0/850 [00:00<?, ?it/s]
|
1340 |
1%| | 9/850 [00:00<00:09, 88.19it/s]
|
1341 |
2%|▏ | 18/850 [00:00<00:10, 81.56it/s]
|
1342 |
3%|▎ | 27/850 [00:00<00:10, 80.93it/s]
|
1343 |
4%|▍ | 36/850 [00:00<00:09, 84.26it/s]
|
1344 |
5%|▌ | 45/850 [00:00<00:09, 85.17it/s]
|
1345 |
6%|▋ | 54/850 [00:00<00:10, 77.34it/s]
|
1346 |
7%|▋ | 63/850 [00:00<00:10, 78.32it/s]
|
1347 |
8%|▊ | 71/850 [00:00<00:10, 74.58it/s]
|
1348 |
9%|▉ | 79/850 [00:01<00:10, 75.01it/s]
|
1349 |
10%|█ | 88/850 [00:01<00:09, 76.98it/s]
|
1350 |
11%|█▏ | 97/850 [00:01<00:09, 79.71it/s]
|
1351 |
13%|█▎ | 107/850 [00:01<00:08, 83.23it/s]
|
1352 |
14%|█▎ | 116/850 [00:01<00:08, 83.47it/s]
|
1353 |
15%|█▍ | 125/850 [00:01<00:08, 83.94it/s]
|
1354 |
16%|█▌ | 134/850 [00:01<00:08, 81.91it/s]
|
1355 |
17%|█▋ | 144/850 [00:01<00:08, 84.36it/s]
|
1356 |
18%|█▊ | 153/850 [00:01<00:08, 79.26it/s]
|
1357 |
19%|█▉ | 162/850 [00:02<00:08, 81.90it/s]
|
1358 |
20%|██ | 171/850 [00:02<00:08, 83.12it/s]
|
1359 |
21%|██ | 180/850 [00:02<00:07, 84.98it/s]
|
1360 |
22%|██▏ | 189/850 [00:02<00:08, 82.59it/s]
|
1361 |
23%|██▎ | 198/850 [00:02<00:07, 83.32it/s]
|
1362 |
24%|██▍ | 207/850 [00:02<00:07, 82.06it/s]
|
1363 |
25%|██▌ | 216/850 [00:02<00:07, 80.40it/s]
|
1364 |
27%|██▋ | 226/850 [00:02<00:07, 83.55it/s]
|
1365 |
28%|██▊ | 236/850 [00:02<00:07, 85.79it/s]
|
1366 |
29%|██▉ | 245/850 [00:02<00:07, 84.04it/s]
|
1367 |
30%|██▉ | 254/850 [00:03<00:06, 85.38it/s]
|
1368 |
31%|███ | 263/850 [00:03<00:06, 85.08it/s]
|
1369 |
32%|███▏ | 272/850 [00:03<00:06, 86.46it/s]
|
1370 |
33%|███▎ | 281/850 [00:03<00:06, 87.20it/s]
|
1371 |
34%|███▍ | 290/850 [00:03<00:06, 87.65it/s]
|
1372 |
35%|███▌ | 299/850 [00:03<00:06, 85.40it/s]
|
1373 |
36%|███▌ | 308/850 [00:03<00:06, 85.40it/s]
|
1374 |
37%|███▋ | 318/850 [00:03<00:06, 87.55it/s]
|
1375 |
38%|███▊ | 327/850 [00:03<00:06, 85.57it/s]
|
1376 |
40%|███▉ | 336/850 [00:04<00:05, 86.18it/s]
|
1377 |
41%|████ | 345/850 [00:04<00:05, 85.61it/s]
|
1378 |
42%|████▏ | 355/850 [00:04<00:05, 87.78it/s]
|
1379 |
43%|████▎ | 364/850 [00:04<00:05, 84.15it/s]
|
1380 |
44%|████▍ | 373/850 [00:04<00:05, 84.84it/s]
|
1381 |
45%|████▍ | 382/850 [00:04<00:05, 80.74it/s]
|
1382 |
46%|████▌ | 391/850 [00:04<00:05, 80.97it/s]
|
1383 |
47%|████▋ | 400/850 [00:04<00:05, 78.77it/s]
|
1384 |
48%|████▊ | 410/850 [00:04<00:05, 83.43it/s]
|
1385 |
49%|████▉ | 420/850 [00:05<00:05, 85.76it/s]
|
1386 |
51%|█████ | 430/850 [00:05<00:04, 86.88it/s]
|
1387 |
52%|█████▏ | 439/850 [00:05<00:04, 85.98it/s]
|
1388 |
53%|█████▎ | 448/850 [00:05<00:04, 81.98it/s]
|
1389 |
54%|█████▍ | 457/850 [00:05<00:04, 83.43it/s]
|
1390 |
55%|█████▍ | 467/850 [00:05<00:04, 86.56it/s]
|
1391 |
56%|█████▌ | 477/850 [00:05<00:04, 88.16it/s]
|
1392 |
57%|█████▋ | 486/850 [00:05<00:04, 85.36it/s]
|
1393 |
58%|█████▊ | 496/850 [00:05<00:04, 87.52it/s]
|
1394 |
60%|█████▉ | 506/850 [00:06<00:03, 90.01it/s]
|
1395 |
61%|██████ | 516/850 [00:06<00:03, 89.02it/s]
|
1396 |
62%|██████▏ | 525/850 [00:06<00:03, 88.20it/s]
|
1397 |
63%|██████▎ | 534/850 [00:06<00:03, 85.08it/s]
|
1398 |
64%|██████▍ | 543/850 [00:06<00:03, 83.29it/s]
|
1399 |
65%|████���█▍ | 552/850 [00:06<00:03, 81.94it/s]
|
1400 |
66%|██████▌ | 561/850 [00:06<00:03, 80.66it/s]
|
1401 |
67%|██████▋ | 570/850 [00:06<00:03, 79.12it/s]
|
1402 |
68%|██████▊ | 578/850 [00:06<00:03, 78.66it/s]
|
1403 |
69%|██████▉ | 587/850 [00:07<00:03, 80.62it/s]
|
1404 |
70%|███████ | 596/850 [00:07<00:03, 81.35it/s]
|
1405 |
71%|███████ | 605/850 [00:07<00:03, 81.20it/s]
|
1406 |
72%|███████▏ | 615/850 [00:07<00:02, 82.94it/s]
|
1407 |
74%|███████▎ | 625/850 [00:07<00:02, 84.80it/s]
|
1408 |
75%|███████▍ | 634/850 [00:07<00:02, 84.22it/s]
|
1409 |
76%|███████▌ | 643/850 [00:07<00:02, 84.53it/s]
|
1410 |
77%|███████▋ | 653/850 [00:07<00:02, 87.76it/s]
|
1411 |
78%|███████▊ | 662/850 [00:07<00:02, 87.53it/s]
|
1412 |
79%|███████▉ | 671/850 [00:08<00:02, 87.19it/s]
|
1413 |
80%|████████ | 680/850 [00:08<00:01, 86.07it/s]
|
1414 |
81%|████████ | 689/850 [00:08<00:01, 84.91it/s]
|
1415 |
82%|████████▏ | 698/850 [00:08<00:01, 84.44it/s]
|
1416 |
83%|████████▎ | 707/850 [00:08<00:01, 81.34it/s]
|
1417 |
84%|████████▍ | 716/850 [00:08<00:01, 81.52it/s]
|
1418 |
85%|████████▌ | 725/850 [00:08<00:01, 78.53it/s]
|
1419 |
86%|████████▋ | 734/850 [00:08<00:01, 81.34it/s]
|
1420 |
87%|████████▋ | 743/850 [00:08<00:01, 79.59it/s]
|
1421 |
88%|████████▊ | 752/850 [00:09<00:01, 78.93it/s]
|
1422 |
90%|████████▉ | 761/850 [00:09<00:01, 81.25it/s]
|
1423 |
91%|█████████ | 770/850 [00:09<00:00, 81.25it/s]
|
1424 |
92%|█████████▏| 779/850 [00:09<00:00, 80.11it/s]
|
1425 |
93%|█████████▎| 788/850 [00:09<00:00, 78.07it/s]
|
1426 |
94%|█████████▍| 798/850 [00:09<00:00, 82.72it/s]
|
1427 |
95%|█████████▍| 807/850 [00:09<00:00, 81.30it/s]
|
1428 |
96%|█████████▌| 816/850 [00:09<00:00, 80.69it/s]
|
1429 |
97%|█████████▋| 826/850 [00:09<00:00, 82.05it/s]
|
1430 |
98%|█████████▊| 835/850 [00:10<00:00, 82.91it/s]
|
1431 |
99%|█████████▉| 844/850 [00:10<00:00, 83.44it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1432 |
0%| | 0/1826 [00:00<?, ?it/s]
|
1433 |
1%| | 10/1826 [00:00<00:18, 99.46it/s]
|
1434 |
1%| | 20/1826 [00:00<00:22, 80.10it/s]
|
1435 |
2%|▏ | 29/1826 [00:00<00:21, 82.82it/s]
|
1436 |
2%|▏ | 38/1826 [00:00<00:21, 82.91it/s]
|
1437 |
3%|▎ | 47/1826 [00:00<00:20, 85.19it/s]
|
1438 |
3%|▎ | 56/1826 [00:00<00:20, 85.50it/s]
|
1439 |
4%|▎ | 65/1826 [00:00<00:21, 82.13it/s]
|
1440 |
4%|▍ | 75/1826 [00:00<00:20, 84.79it/s]
|
1441 |
5%|▍ | 84/1826 [00:00<00:20, 86.10it/s]
|
1442 |
5%|▌ | 93/1826 [00:01<00:19, 87.04it/s]
|
1443 |
6%|▌ | 103/1826 [00:01<00:19, 89.92it/s]
|
1444 |
6%|▌ | 113/1826 [00:01<00:19, 89.70it/s]
|
1445 |
7%|▋ | 122/1826 [00:01<00:19, 86.11it/s]
|
1446 |
7%|▋ | 131/1826 [00:01<00:19, 84.81it/s]
|
1447 |
8%|▊ | 140/1826 [00:01<00:19, 85.70it/s]
|
1448 |
8%|▊ | 150/1826 [00:01<00:19, 87.70it/s]
|
1449 |
9%|▊ | 159/1826 [00:01<00:20, 82.63it/s]
|
1450 |
9%|▉ | 168/1826 [00:01<00:20, 82.89it/s]
|
1451 |
10%|▉ | 178/1826 [00:02<00:19, 86.63it/s]
|
1452 |
10%|█ | 187/1826 [00:02<00:19, 86.22it/s]
|
1453 |
11%|█ | 197/1826 [00:02<00:18, 86.83it/s]
|
1454 |
11%|█▏ | 206/1826 [00:02<00:18, 85.81it/s]
|
1455 |
12%|█▏ | 215/1826 [00:02<00:19, 84.24it/s]
|
1456 |
12%|█▏ | 224/1826 [00:02<00:18, 85.72it/s]
|
1457 |
13%|█▎ | 233/1826 [00:02<00:19, 82.32it/s]
|
1458 |
13%|█▎ | 242/1826 [00:02<00:19, 81.78it/s]
|
1459 |
14%|█▎ | 251/1826 [00:02<00:19, 81.73it/s]
|
1460 |
14%|█▍ | 260/1826 [00:03<00:19, 81.82it/s]
|
1461 |
15%|█▍ | 270/1826 [00:03<00:18, 84.77it/s]
|
1462 |
15%|█▌ | 280/1826 [00:03<00:17, 88.25it/s]
|
1463 |
16%|█▌ | 289/1826 [00:03<00:17, 86.31it/s]
|
1464 |
16%|█▋ | 298/1826 [00:03<00:17, 86.81it/s]
|
1465 |
17%|█▋ | 307/1826 [00:03<00:17, 86.16it/s]
|
1466 |
17%|█▋ | 316/1826 [00:03<00:17, 85.47it/s]
|
1467 |
18%|█▊ | 326/1826 [00:03<00:17, 87.31it/s]
|
1468 |
18%|█▊ | 335/1826 [00:03<00:17, 86.45it/s]
|
1469 |
19%|█▉ | 345/1826 [00:04<00:16, 88.96it/s]
|
1470 |
19%|█▉ | 354/1826 [00:04<00:17, 84.12it/s]
|
1471 |
20%|█▉ | 364/1826 [00:04<00:16, 86.21it/s]
|
1472 |
20%|██ | 374/1826 [00:04<00:16, 87.29it/s]
|
1473 |
21%|██ | 383/1826 [00:04<00:16, 87.07it/s]
|
1474 |
21%|██▏ | 392/1826 [00:04<00:17, 82.64it/s]
|
1475 |
22%|██▏ | 401/1826 [00:04<00:16, 84.60it/s]
|
1476 |
22%|██▏ | 410/1826 [00:04<00:16, 85.41it/s]
|
1477 |
23%|██▎ | 419/1826 [00:04<00:16, 84.65it/s]
|
1478 |
23%|██▎ | 428/1826 [00:05<00:16, 83.56it/s]
|
1479 |
24%|██▍ | 437/1826 [00:05<00:16, 84.51it/s]
|
1480 |
24%|██▍ | 446/1826 [00:05<00:16, 84.92it/s]
|
1481 |
25%|██▍ | 455/1826 [00:05<00:15, 85.69it/s]
|
1482 |
25%|██▌ | 465/1826 [00:05<00:15, 88.89it/s]
|
1483 |
26%|██▌ | 474/1826 [00:05<00:16, 84.46it/s]
|
1484 |
26%|██▋ | 483/1826 [00:05<00:15, 84.49it/s]
|
1485 |
27%|██▋ | 492/1826 [00:05<00:16, 82.42it/s]
|
1486 |
27%|██▋ | 502/1826 [00:05<00:15, 84.22it/s]
|
1487 |
28%|██▊ | 512/1826 [00:05<00:15, 86.43it/s]
|
1488 |
29%|██▊ | 522/1826 [00:06<00:14, 88.67it/s]
|
1489 |
29%|██▉ | 531/1826 [00:06<00:14, 86.95it/s]
|
1490 |
30%|██▉ | 540/1826 [00:06<00:14, 87.11it/s]
|
1491 |
30%|███ | 549/1826 [00:06<00:14, 85.25it/s]
|
1492 |
31%|███ | 558/1826 [00:06<00:15, 83.01it/s]
|
1493 |
31%|███ | 568/1826 [00:06<00:14, 85.14it/s]
|
1494 |
32%|███▏ | 577/1826 [00:06<00:14, 84.98it/s]
|
1495 |
32%|███▏ | 586/1826 [00:06<00:15, 78.67it/s]
|
1496 |
33%|███▎ | 595/1826 [00:06<00:15, 80.71it/s]
|
1497 |
33%|███▎ | 604/1826 [00:07<00:14, 82.23it/s]
|
1498 |
34%|███▎ | 613/1826 [00:07<00:14, 81.40it/s]
|
1499 |
34%|███▍ | 622/1826 [00:07<00:14, 82.83it/s]
|
1500 |
35%|███▍ | 632/1826 [00:07<00:13, 86.62it/s]
|
1501 |
35%|███▌ | 642/1826 [00:07<00:13, 87.79it/s]
|
1502 |
36%|███▌ | 651/1826 [00:07<00:13, 84.88it/s]
|
1503 |
36%|███▌ | 660/1826 [00:07<00:13, 86.28it/s]
|
1504 |
37%|███▋ | 670/1826 [00:07<00:13, 87.60it/s]
|
1505 |
37%|███▋ | 679/1826 [00:07<00:13, 87.43it/s]
|
1506 |
38%|███▊ | 689/1826 [00:08<00:12, 88.58it/s]
|
1507 |
38%|███▊ | 699/1826 [00:08<00:12, 90.30it/s]
|
1508 |
39%|███▉ | 709/1826 [00:08<00:12, 91.17it/s]
|
1509 |
39%|███▉ | 719/1826 [00:08<00:11, 92.57it/s]
|
1510 |
40%|███▉ | 729/1826 [00:08<00:11, 93.00it/s]
|
1511 |
40%|████ | 739/1826 [00:08<00:12, 90.46it/s]
|
1512 |
41%|████ | 749/1826 [00:08<00:11, 91.75it/s]
|
1513 |
42%|████▏ | 759/1826 [00:08<00:11, 91.80it/s]
|
1514 |
42%|████▏ | 769/1826 [00:08<00:11, 93.48it/s]
|
1515 |
43%|████▎ | 779/1826 [00:09<00:11, 90.59it/s]
|
1516 |
43%|████▎ | 789/1826 [00:09<00:11, 89.82it/s]
|
1517 |
44%|████▍ | 799/1826 [00:09<00:11, 88.45it/s]
|
1518 |
44%|████▍ | 809/1826 [00:09<00:11, 88.97it/s]
|
1519 |
45%|████▍ | 819/1826 [00:09<00:11, 89.58it/s]
|
1520 |
45%|████▌ | 829/1826 [00:09<00:10, 90.95it/s]
|
1521 |
46%|████▌ | 839/1826 [00:09<00:11, 87.82it/s]
|
1522 |
46%|████▋ | 849/1826 [00:09<00:10, 90.77it/s]
|
1523 |
47%|████▋ | 859/1826 [00:09<00:10, 93.15it/s]
|
1524 |
48%|████▊ | 869/1826 [00:10<00:10, 90.43it/s]
|
1525 |
48%|████▊ | 879/1826 [00:10<00:10, 89.07it/s]
|
1526 |
49%|████▊ | 888/1826 [00:10<00:10, 88.03it/s]
|
1527 |
49%|████▉ | 898/1826 [00:10<00:10, 89.92it/s]
|
1528 |
50%|████▉ | 908/1826 [00:10<00:10, 91.01it/s]
|
1529 |
50%|█████ | 918/1826 [00:10<00:09, 91.51it/s]
|
1530 |
51%|█████ | 928/1826 [00:10<00:09, 91.40it/s]
|
1531 |
51%|█████▏ | 938/1826 [00:10<00:10, 88.19it/s]
|
1532 |
52%|█████▏ | 947/1826 [00:10<00:10, 84.32it/s]
|
1533 |
52%|█████▏ | 956/1826 [00:11<00:10, 85.23it/s]
|
1534 |
53%|█████▎ | 966/1826 [00:11<00:09, 86.86it/s]
|
1535 |
53%|█████▎ | 975/1826 [00:11<00:09, 85.98it/s]
|
1536 |
54%|█████▍ | 985/1826 [00:11<00:09, 87.66it/s]
|
1537 |
54%|█████▍ | 994/1826 [00:11<00:09, 85.02it/s]
|
1538 |
55%|█████▍ | 1003/1826 [00:11<00:09, 83.12it/s]
|
1539 |
55%|█████▌ | 1012/1826 [00:11<00:09, 84.61it/s]
|
1540 |
56%|█████▌ | 1022/1826 [00:11<00:09, 88.01it/s]
|
1541 |
57%|█████▋ | 1032/1826 [00:11<00:08, 90.28it/s]
|
1542 |
57%|█████▋ | 1042/1826 [00:12<00:09, 86.92it/s]
|
1543 |
58%|█████▊ | 1051/1826 [00:12<00:09, 85.51it/s]
|
1544 |
58%|█████▊ | 1060/1826 [00:12<00:08, 86.69it/s]
|
1545 |
59%|█████▊ | 1070/1826 [00:12<00:08, 89.80it/s]
|
1546 |
59%|█████▉ | 1080/1826 [00:12<00:08, 91.79it/s]
|
1547 |
60%|█████▉ | 1090/1826 [00:12<00:07, 92.06it/s]
|
1548 |
60%|██████ | 1100/1826 [00:12<00:07, 92.64it/s]
|
1549 |
61%|██████ | 1110/1826 [00:12<00:08, 89.48it/s]
|
1550 |
61%|██████▏ | 1119/1826 [00:12<00:08, 87.58it/s]
|
1551 |
62%|██████▏ | 1129/1826 [00:13<00:07, 89.36it/s]
|
1552 |
62%|██████▏ | 1138/1826 [00:13<00:07, 88.55it/s]
|
1553 |
63%|██████▎ | 1147/1826 [00:13<00:07, 87.81it/s]
|
1554 |
63%|██████▎ | 1156/1826 [00:13<00:07, 87.34it/s]
|
1555 |
64%|██████▍ | 1165/1826 [00:13<00:07, 83.98it/s]
|
1556 |
64%|██████▍ | 1174/1826 [00:13<00:07, 84.35it/s]
|
1557 |
65%|██████▍ | 1183/1826 [00:13<00:07, 82.32it/s]
|
1558 |
65%|██████▌ | 1193/1826 [00:13<00:07, 84.81it/s]
|
1559 |
66%|██████▌ | 1202/1826 [00:13<00:07, 85.14it/s]
|
1560 |
66%|██████▋ | 1211/1826 [00:13<00:07, 83.20it/s]
|
1561 |
67%|██████▋ | 1220/1826 [00:14<00:07, 81.78it/s]
|
1562 |
67%|██████▋ | 1230/1826 [00:14<00:06, 85.50it/s]
|
1563 |
68%|██████▊ | 1240/1826 [00:14<00:06, 86.53it/s]
|
1564 |
68%|██████▊ | 1250/1826 [00:14<00:06, 85.10it/s]
|
1565 |
69%|██████▉ | 1260/1826 [00:14<00:06, 86.63it/s]
|
1566 |
69%|██████▉ | 1269/1826 [00:14<00:06, 87.13it/s]
|
1567 |
70%|██████▉ | 1278/1826 [00:14<00:06, 87.73it/s]
|
1568 |
70%|███████ | 1287/1826 [00:14<00:06, 88.07it/s]
|
1569 |
71%|███████ | 1297/1826 [00:14<00:05, 90.99it/s]
|
1570 |
72%|███████▏ | 1307/1826 [00:15<00:05, 91.31it/s]
|
1571 |
72%|███████▏ | 1317/1826 [00:15<00:05, 93.37it/s]
|
1572 |
73%|███████▎ | 1327/1826 [00:15<00:05, 92.71it/s]
|
1573 |
73%|███████▎ | 1337/1826 [00:15<00:05, 88.69it/s]
|
1574 |
74%|███████▎ | 1346/1826 [00:15<00:05, 85.16it/s]
|
1575 |
74%|███████▍ | 1355/1826 [00:15<00:05, 86.29it/s]
|
1576 |
75%|███████▍ | 1364/1826 [00:15<00:05, 87.05it/s]
|
1577 |
75%|███████▌ | 1373/1826 [00:15<00:05, 85.50it/s]
|
1578 |
76%|███████▌ | 1383/1826 [00:15<00:05, 88.26it/s]
|
1579 |
76%|███████▋ | 1393/1826 [00:16<00:04, 89.45it/s]
|
1580 |
77%|███████▋ | 1403/1826 [00:16<00:04, 91.42it/s]
|
1581 |
77%|███████▋ | 1413/1826 [00:16<00:04, 92.32it/s]
|
1582 |
78%|███████▊ | 1423/1826 [00:16<00:04, 89.99it/s]
|
1583 |
78%|███████▊ | 1433/1826 [00:16<00:04, 90.52it/s]
|
1584 |
79%|███████▉ | 1443/1826 [00:16<00:04, 85.95it/s]
|
1585 |
80%|███████▉ | 1453/1826 [00:16<00:04, 88.09it/s]
|
1586 |
80%|████████ | 1462/1826 [00:16<00:04, 88.33it/s]
|
1587 |
81%|████████ | 1471/1826 [00:16<00:04, 84.47it/s]
|
1588 |
81%|████████ | 1480/1826 [00:17<00:04, 84.58it/s]
|
1589 |
82%|████████▏ | 1489/1826 [00:17<00:04, 80.66it/s]
|
1590 |
82%|████████▏ | 1498/1826 [00:17<00:04, 81.96it/s]
|
1591 |
83%|████████▎ | 1507/1826 [00:17<00:03, 82.12it/s]
|
1592 |
83%|████████▎ | 1517/1826 [00:17<00:03, 84.73it/s]
|
1593 |
84%|████████▎ | 1526/1826 [00:17<00:03, 82.53it/s]
|
1594 |
84%|████████▍ | 1535/1826 [00:17<00:03, 81.76it/s]
|
1595 |
85%|████████▍ | 1544/1826 [00:17<00:03, 82.99it/s]
|
1596 |
85%|████████▌ | 1553/1826 [00:17<00:03, 83.81it/s]
|
1597 |
86%|████████▌ | 1562/1826 [00:18<00:03, 82.96it/s]
|
1598 |
86%|████████▌ | 1571/1826 [00:18<00:03, 81.80it/s]
|
1599 |
87%|████████▋ | 1580/1826 [00:18<00:02, 83.72it/s]
|
1600 |
87%|████████▋ | 1589/1826 [00:18<00:02, 82.90it/s]
|
1601 |
88%|████████▊ | 1598/1826 [00:18<00:02, 84.03it/s]
|
1602 |
88%|████████▊ | 1608/1826 [00:18<00:02, 86.25it/s]
|
1603 |
89%|████████▊ | 1617/1826 [00:18<00:02, 85.18it/s]
|
1604 |
89%|████████▉ | 1626/1826 [00:18<00:02, 77.48it/s]
|
1605 |
90%|████████▉ | 1636/1826 [00:18<00:02, 80.59it/s]
|
1606 |
90%|█████████ | 1645/1826 [00:19<00:02, 82.51it/s]
|
1607 |
91%|█████████ | 1655/1826 [00:19<00:02, 85.30it/s]
|
1608 |
91%|█████████ | 1664/1826 [00:19<00:01, 84.48it/s]
|
1609 |
92%|█████████▏| 1673/1826 [00:19<00:01, 85.56it/s]
|
1610 |
92%|█████████▏| 1683/1826 [00:19<00:01, 88.32it/s]
|
1611 |
93%|█████████▎| 1692/1826 [00:19<00:01, 86.33it/s]
|
1612 |
93%|█████████▎| 1701/1826 [00:19<00:01, 85.33it/s]
|
1613 |
94%|█████████▎| 1711/1826 [00:19<00:01, 87.29it/s]
|
1614 |
94%|█████████▍| 1720/1826 [00:19<00:01, 84.39it/s]
|
1615 |
95%|█████████▍| 1730/1826 [00:20<00:01, 87.06it/s]
|
1616 |
95%|█████████▌| 1740/1826 [00:20<00:00, 87.99it/s]
|
1617 |
96%|█████████▌| 1750/1826 [00:20<00:00, 88.28it/s]
|
1618 |
96%|█████████▋| 1759/1826 [00:20<00:00, 87.89it/s]
|
1619 |
97%|█████████▋| 1768/1826 [00:20<00:00, 87.33it/s]
|
1620 |
97%|█████████▋| 1777/1826 [00:20<00:00, 88.09it/s]
|
1621 |
98%|█████████▊| 1787/1826 [00:20<00:00, 88.65it/s]
|
1622 |
98%|█████████▊| 1796/1826 [00:20<00:00, 83.22it/s]
|
1623 |
99%|█████████▉| 1806/1826 [00:20<00:00, 86.07it/s]
|
1624 |
99%|█████████▉| 1815/1826 [00:20<00:00, 86.85it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1336 |
{'eval_loss': 0.006724909413605928, 'eval_precision': 0.9328214971209213, 'eval_recall': 0.9409486931268151, 'eval_f1': 0.936867469879518, 'eval_accuracy': 0.9988184887042326, 'eval_runtime': 14.3451, 'eval_samples_per_second': 473.891, 'eval_steps_per_second': 59.254, 'epoch': 10.0}
|
1337 |
{'train_runtime': 1261.5031, 'train_samples_per_second': 215.6, 'train_steps_per_second': 3.369, 'train_loss': 0.0022696754537961062, 'epoch': 10.0}
|
1338 |
|
1339 |
+
***** train metrics *****
|
1340 |
+
epoch = 10.0
|
1341 |
+
total_flos = 13283169GF
|
1342 |
+
train_loss = 0.0023
|
1343 |
+
train_runtime = 0:21:01.50
|
1344 |
+
train_samples = 27198
|
1345 |
+
train_samples_per_second = 215.6
|
1346 |
+
train_steps_per_second = 3.369
|
1347 |
+
08/30/2024 22:16:00 - INFO - __main__ - *** Evaluate ***
|
1348 |
+
[INFO|trainer.py:805] 2024-08-30 22:16:00,920 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
|
1349 |
+
[INFO|trainer.py:3788] 2024-08-30 22:16:00,922 >>
|
1350 |
+
***** Running Evaluation *****
|
1351 |
+
[INFO|trainer.py:3790] 2024-08-30 22:16:00,923 >> Num examples = 6798
|
1352 |
+
[INFO|trainer.py:3793] 2024-08-30 22:16:00,923 >> Batch size = 8
|
1353 |
+
|
1354 |
0%| | 0/850 [00:00<?, ?it/s]
|
1355 |
1%| | 9/850 [00:00<00:09, 88.19it/s]
|
1356 |
2%|▏ | 18/850 [00:00<00:10, 81.56it/s]
|
1357 |
3%|▎ | 27/850 [00:00<00:10, 80.93it/s]
|
1358 |
4%|▍ | 36/850 [00:00<00:09, 84.26it/s]
|
1359 |
5%|▌ | 45/850 [00:00<00:09, 85.17it/s]
|
1360 |
6%|▋ | 54/850 [00:00<00:10, 77.34it/s]
|
1361 |
7%|▋ | 63/850 [00:00<00:10, 78.32it/s]
|
1362 |
8%|▊ | 71/850 [00:00<00:10, 74.58it/s]
|
1363 |
9%|▉ | 79/850 [00:01<00:10, 75.01it/s]
|
1364 |
10%|█ | 88/850 [00:01<00:09, 76.98it/s]
|
1365 |
11%|█▏ | 97/850 [00:01<00:09, 79.71it/s]
|
1366 |
13%|█▎ | 107/850 [00:01<00:08, 83.23it/s]
|
1367 |
14%|█▎ | 116/850 [00:01<00:08, 83.47it/s]
|
1368 |
15%|█▍ | 125/850 [00:01<00:08, 83.94it/s]
|
1369 |
16%|█▌ | 134/850 [00:01<00:08, 81.91it/s]
|
1370 |
17%|█▋ | 144/850 [00:01<00:08, 84.36it/s]
|
1371 |
18%|█▊ | 153/850 [00:01<00:08, 79.26it/s]
|
1372 |
19%|█▉ | 162/850 [00:02<00:08, 81.90it/s]
|
1373 |
20%|██ | 171/850 [00:02<00:08, 83.12it/s]
|
1374 |
21%|██ | 180/850 [00:02<00:07, 84.98it/s]
|
1375 |
22%|██▏ | 189/850 [00:02<00:08, 82.59it/s]
|
1376 |
23%|██▎ | 198/850 [00:02<00:07, 83.32it/s]
|
1377 |
24%|██▍ | 207/850 [00:02<00:07, 82.06it/s]
|
1378 |
25%|██▌ | 216/850 [00:02<00:07, 80.40it/s]
|
1379 |
27%|██▋ | 226/850 [00:02<00:07, 83.55it/s]
|
1380 |
28%|██▊ | 236/850 [00:02<00:07, 85.79it/s]
|
1381 |
29%|██▉ | 245/850 [00:02<00:07, 84.04it/s]
|
1382 |
30%|██▉ | 254/850 [00:03<00:06, 85.38it/s]
|
1383 |
31%|███ | 263/850 [00:03<00:06, 85.08it/s]
|
1384 |
32%|███▏ | 272/850 [00:03<00:06, 86.46it/s]
|
1385 |
33%|███▎ | 281/850 [00:03<00:06, 87.20it/s]
|
1386 |
34%|███▍ | 290/850 [00:03<00:06, 87.65it/s]
|
1387 |
35%|███▌ | 299/850 [00:03<00:06, 85.40it/s]
|
1388 |
36%|███▌ | 308/850 [00:03<00:06, 85.40it/s]
|
1389 |
37%|███▋ | 318/850 [00:03<00:06, 87.55it/s]
|
1390 |
38%|███▊ | 327/850 [00:03<00:06, 85.57it/s]
|
1391 |
40%|███▉ | 336/850 [00:04<00:05, 86.18it/s]
|
1392 |
41%|████ | 345/850 [00:04<00:05, 85.61it/s]
|
1393 |
42%|████▏ | 355/850 [00:04<00:05, 87.78it/s]
|
1394 |
43%|████▎ | 364/850 [00:04<00:05, 84.15it/s]
|
1395 |
44%|████▍ | 373/850 [00:04<00:05, 84.84it/s]
|
1396 |
45%|████▍ | 382/850 [00:04<00:05, 80.74it/s]
|
1397 |
46%|████▌ | 391/850 [00:04<00:05, 80.97it/s]
|
1398 |
47%|████▋ | 400/850 [00:04<00:05, 78.77it/s]
|
1399 |
48%|████▊ | 410/850 [00:04<00:05, 83.43it/s]
|
1400 |
49%|████▉ | 420/850 [00:05<00:05, 85.76it/s]
|
1401 |
51%|█████ | 430/850 [00:05<00:04, 86.88it/s]
|
1402 |
52%|█████▏ | 439/850 [00:05<00:04, 85.98it/s]
|
1403 |
53%|█████▎ | 448/850 [00:05<00:04, 81.98it/s]
|
1404 |
54%|█████▍ | 457/850 [00:05<00:04, 83.43it/s]
|
1405 |
55%|█████▍ | 467/850 [00:05<00:04, 86.56it/s]
|
1406 |
56%|█████▌ | 477/850 [00:05<00:04, 88.16it/s]
|
1407 |
57%|█████▋ | 486/850 [00:05<00:04, 85.36it/s]
|
1408 |
58%|█████▊ | 496/850 [00:05<00:04, 87.52it/s]
|
1409 |
60%|█████▉ | 506/850 [00:06<00:03, 90.01it/s]
|
1410 |
61%|██████ | 516/850 [00:06<00:03, 89.02it/s]
|
1411 |
62%|██████▏ | 525/850 [00:06<00:03, 88.20it/s]
|
1412 |
63%|██████▎ | 534/850 [00:06<00:03, 85.08it/s]
|
1413 |
64%|██████▍ | 543/850 [00:06<00:03, 83.29it/s]
|
1414 |
65%|████���█▍ | 552/850 [00:06<00:03, 81.94it/s]
|
1415 |
66%|██████▌ | 561/850 [00:06<00:03, 80.66it/s]
|
1416 |
67%|██████▋ | 570/850 [00:06<00:03, 79.12it/s]
|
1417 |
68%|██████▊ | 578/850 [00:06<00:03, 78.66it/s]
|
1418 |
69%|██████▉ | 587/850 [00:07<00:03, 80.62it/s]
|
1419 |
70%|███████ | 596/850 [00:07<00:03, 81.35it/s]
|
1420 |
71%|███████ | 605/850 [00:07<00:03, 81.20it/s]
|
1421 |
72%|███████▏ | 615/850 [00:07<00:02, 82.94it/s]
|
1422 |
74%|███████▎ | 625/850 [00:07<00:02, 84.80it/s]
|
1423 |
75%|███████▍ | 634/850 [00:07<00:02, 84.22it/s]
|
1424 |
76%|███████▌ | 643/850 [00:07<00:02, 84.53it/s]
|
1425 |
77%|███████▋ | 653/850 [00:07<00:02, 87.76it/s]
|
1426 |
78%|███████▊ | 662/850 [00:07<00:02, 87.53it/s]
|
1427 |
79%|███████▉ | 671/850 [00:08<00:02, 87.19it/s]
|
1428 |
80%|████████ | 680/850 [00:08<00:01, 86.07it/s]
|
1429 |
81%|████████ | 689/850 [00:08<00:01, 84.91it/s]
|
1430 |
82%|████████▏ | 698/850 [00:08<00:01, 84.44it/s]
|
1431 |
83%|████████▎ | 707/850 [00:08<00:01, 81.34it/s]
|
1432 |
84%|████████▍ | 716/850 [00:08<00:01, 81.52it/s]
|
1433 |
85%|████████▌ | 725/850 [00:08<00:01, 78.53it/s]
|
1434 |
86%|████████▋ | 734/850 [00:08<00:01, 81.34it/s]
|
1435 |
87%|████████▋ | 743/850 [00:08<00:01, 79.59it/s]
|
1436 |
88%|████████▊ | 752/850 [00:09<00:01, 78.93it/s]
|
1437 |
90%|████████▉ | 761/850 [00:09<00:01, 81.25it/s]
|
1438 |
91%|█████████ | 770/850 [00:09<00:00, 81.25it/s]
|
1439 |
92%|█████████▏| 779/850 [00:09<00:00, 80.11it/s]
|
1440 |
93%|█████████▎| 788/850 [00:09<00:00, 78.07it/s]
|
1441 |
94%|█████████▍| 798/850 [00:09<00:00, 82.72it/s]
|
1442 |
95%|█████████▍| 807/850 [00:09<00:00, 81.30it/s]
|
1443 |
96%|█████████▌| 816/850 [00:09<00:00, 80.69it/s]
|
1444 |
97%|█████████▋| 826/850 [00:09<00:00, 82.05it/s]
|
1445 |
98%|█████████▊| 835/850 [00:10<00:00, 82.91it/s]
|
1446 |
99%|█████████▉| 844/850 [00:10<00:00, 83.44it/s]
|
1447 |
+
***** eval metrics *****
|
1448 |
+
epoch = 10.0
|
1449 |
+
eval_accuracy = 0.9988
|
1450 |
+
eval_f1 = 0.9369
|
1451 |
+
eval_loss = 0.0067
|
1452 |
+
eval_precision = 0.9328
|
1453 |
+
eval_recall = 0.9409
|
1454 |
+
eval_runtime = 0:00:14.18
|
1455 |
+
eval_samples = 6798
|
1456 |
+
eval_samples_per_second = 479.317
|
1457 |
+
eval_steps_per_second = 59.932
|
1458 |
+
08/30/2024 22:16:15 - INFO - __main__ - *** Predict ***
|
1459 |
+
[INFO|trainer.py:805] 2024-08-30 22:16:15,108 >> The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
|
1460 |
+
[INFO|trainer.py:3788] 2024-08-30 22:16:15,110 >>
|
1461 |
+
***** Running Prediction *****
|
1462 |
+
[INFO|trainer.py:3790] 2024-08-30 22:16:15,110 >> Num examples = 14605
|
1463 |
+
[INFO|trainer.py:3793] 2024-08-30 22:16:15,110 >> Batch size = 8
|
1464 |
+
|
1465 |
0%| | 0/1826 [00:00<?, ?it/s]
|
1466 |
1%| | 10/1826 [00:00<00:18, 99.46it/s]
|
1467 |
1%| | 20/1826 [00:00<00:22, 80.10it/s]
|
1468 |
2%|▏ | 29/1826 [00:00<00:21, 82.82it/s]
|
1469 |
2%|▏ | 38/1826 [00:00<00:21, 82.91it/s]
|
1470 |
3%|▎ | 47/1826 [00:00<00:20, 85.19it/s]
|
1471 |
3%|▎ | 56/1826 [00:00<00:20, 85.50it/s]
|
1472 |
4%|▎ | 65/1826 [00:00<00:21, 82.13it/s]
|
1473 |
4%|▍ | 75/1826 [00:00<00:20, 84.79it/s]
|
1474 |
5%|▍ | 84/1826 [00:00<00:20, 86.10it/s]
|
1475 |
5%|▌ | 93/1826 [00:01<00:19, 87.04it/s]
|
1476 |
6%|▌ | 103/1826 [00:01<00:19, 89.92it/s]
|
1477 |
6%|▌ | 113/1826 [00:01<00:19, 89.70it/s]
|
1478 |
7%|▋ | 122/1826 [00:01<00:19, 86.11it/s]
|
1479 |
7%|▋ | 131/1826 [00:01<00:19, 84.81it/s]
|
1480 |
8%|▊ | 140/1826 [00:01<00:19, 85.70it/s]
|
1481 |
8%|▊ | 150/1826 [00:01<00:19, 87.70it/s]
|
1482 |
9%|▊ | 159/1826 [00:01<00:20, 82.63it/s]
|
1483 |
9%|▉ | 168/1826 [00:01<00:20, 82.89it/s]
|
1484 |
10%|▉ | 178/1826 [00:02<00:19, 86.63it/s]
|
1485 |
10%|█ | 187/1826 [00:02<00:19, 86.22it/s]
|
1486 |
11%|█ | 197/1826 [00:02<00:18, 86.83it/s]
|
1487 |
11%|█▏ | 206/1826 [00:02<00:18, 85.81it/s]
|
1488 |
12%|█▏ | 215/1826 [00:02<00:19, 84.24it/s]
|
1489 |
12%|█▏ | 224/1826 [00:02<00:18, 85.72it/s]
|
1490 |
13%|█▎ | 233/1826 [00:02<00:19, 82.32it/s]
|
1491 |
13%|█▎ | 242/1826 [00:02<00:19, 81.78it/s]
|
1492 |
14%|█▎ | 251/1826 [00:02<00:19, 81.73it/s]
|
1493 |
14%|█▍ | 260/1826 [00:03<00:19, 81.82it/s]
|
1494 |
15%|█▍ | 270/1826 [00:03<00:18, 84.77it/s]
|
1495 |
15%|█▌ | 280/1826 [00:03<00:17, 88.25it/s]
|
1496 |
16%|█▌ | 289/1826 [00:03<00:17, 86.31it/s]
|
1497 |
16%|█▋ | 298/1826 [00:03<00:17, 86.81it/s]
|
1498 |
17%|█▋ | 307/1826 [00:03<00:17, 86.16it/s]
|
1499 |
17%|█▋ | 316/1826 [00:03<00:17, 85.47it/s]
|
1500 |
18%|█▊ | 326/1826 [00:03<00:17, 87.31it/s]
|
1501 |
18%|█▊ | 335/1826 [00:03<00:17, 86.45it/s]
|
1502 |
19%|█▉ | 345/1826 [00:04<00:16, 88.96it/s]
|
1503 |
19%|█▉ | 354/1826 [00:04<00:17, 84.12it/s]
|
1504 |
20%|█▉ | 364/1826 [00:04<00:16, 86.21it/s]
|
1505 |
20%|██ | 374/1826 [00:04<00:16, 87.29it/s]
|
1506 |
21%|██ | 383/1826 [00:04<00:16, 87.07it/s]
|
1507 |
21%|██▏ | 392/1826 [00:04<00:17, 82.64it/s]
|
1508 |
22%|██▏ | 401/1826 [00:04<00:16, 84.60it/s]
|
1509 |
22%|██▏ | 410/1826 [00:04<00:16, 85.41it/s]
|
1510 |
23%|██▎ | 419/1826 [00:04<00:16, 84.65it/s]
|
1511 |
23%|██▎ | 428/1826 [00:05<00:16, 83.56it/s]
|
1512 |
24%|██▍ | 437/1826 [00:05<00:16, 84.51it/s]
|
1513 |
24%|██▍ | 446/1826 [00:05<00:16, 84.92it/s]
|
1514 |
25%|██▍ | 455/1826 [00:05<00:15, 85.69it/s]
|
1515 |
25%|██▌ | 465/1826 [00:05<00:15, 88.89it/s]
|
1516 |
26%|██▌ | 474/1826 [00:05<00:16, 84.46it/s]
|
1517 |
26%|██▋ | 483/1826 [00:05<00:15, 84.49it/s]
|
1518 |
27%|██▋ | 492/1826 [00:05<00:16, 82.42it/s]
|
1519 |
27%|██▋ | 502/1826 [00:05<00:15, 84.22it/s]
|
1520 |
28%|██▊ | 512/1826 [00:05<00:15, 86.43it/s]
|
1521 |
29%|██▊ | 522/1826 [00:06<00:14, 88.67it/s]
|
1522 |
29%|██▉ | 531/1826 [00:06<00:14, 86.95it/s]
|
1523 |
30%|██▉ | 540/1826 [00:06<00:14, 87.11it/s]
|
1524 |
30%|███ | 549/1826 [00:06<00:14, 85.25it/s]
|
1525 |
31%|███ | 558/1826 [00:06<00:15, 83.01it/s]
|
1526 |
31%|███ | 568/1826 [00:06<00:14, 85.14it/s]
|
1527 |
32%|███▏ | 577/1826 [00:06<00:14, 84.98it/s]
|
1528 |
32%|███▏ | 586/1826 [00:06<00:15, 78.67it/s]
|
1529 |
33%|███▎ | 595/1826 [00:06<00:15, 80.71it/s]
|
1530 |
33%|███▎ | 604/1826 [00:07<00:14, 82.23it/s]
|
1531 |
34%|███▎ | 613/1826 [00:07<00:14, 81.40it/s]
|
1532 |
34%|███▍ | 622/1826 [00:07<00:14, 82.83it/s]
|
1533 |
35%|███▍ | 632/1826 [00:07<00:13, 86.62it/s]
|
1534 |
35%|███▌ | 642/1826 [00:07<00:13, 87.79it/s]
|
1535 |
36%|███▌ | 651/1826 [00:07<00:13, 84.88it/s]
|
1536 |
36%|███▌ | 660/1826 [00:07<00:13, 86.28it/s]
|
1537 |
37%|███▋ | 670/1826 [00:07<00:13, 87.60it/s]
|
1538 |
37%|███▋ | 679/1826 [00:07<00:13, 87.43it/s]
|
1539 |
38%|███▊ | 689/1826 [00:08<00:12, 88.58it/s]
|
1540 |
38%|███▊ | 699/1826 [00:08<00:12, 90.30it/s]
|
1541 |
39%|███▉ | 709/1826 [00:08<00:12, 91.17it/s]
|
1542 |
39%|███▉ | 719/1826 [00:08<00:11, 92.57it/s]
|
1543 |
40%|███▉ | 729/1826 [00:08<00:11, 93.00it/s]
|
1544 |
40%|████ | 739/1826 [00:08<00:12, 90.46it/s]
|
1545 |
41%|████ | 749/1826 [00:08<00:11, 91.75it/s]
|
1546 |
42%|████▏ | 759/1826 [00:08<00:11, 91.80it/s]
|
1547 |
42%|████▏ | 769/1826 [00:08<00:11, 93.48it/s]
|
1548 |
43%|████▎ | 779/1826 [00:09<00:11, 90.59it/s]
|
1549 |
43%|████▎ | 789/1826 [00:09<00:11, 89.82it/s]
|
1550 |
44%|████▍ | 799/1826 [00:09<00:11, 88.45it/s]
|
1551 |
44%|████▍ | 809/1826 [00:09<00:11, 88.97it/s]
|
1552 |
45%|████▍ | 819/1826 [00:09<00:11, 89.58it/s]
|
1553 |
45%|████▌ | 829/1826 [00:09<00:10, 90.95it/s]
|
1554 |
46%|████▌ | 839/1826 [00:09<00:11, 87.82it/s]
|
1555 |
46%|████▋ | 849/1826 [00:09<00:10, 90.77it/s]
|
1556 |
47%|████▋ | 859/1826 [00:09<00:10, 93.15it/s]
|
1557 |
48%|████▊ | 869/1826 [00:10<00:10, 90.43it/s]
|
1558 |
48%|████▊ | 879/1826 [00:10<00:10, 89.07it/s]
|
1559 |
49%|████▊ | 888/1826 [00:10<00:10, 88.03it/s]
|
1560 |
49%|████▉ | 898/1826 [00:10<00:10, 89.92it/s]
|
1561 |
50%|████▉ | 908/1826 [00:10<00:10, 91.01it/s]
|
1562 |
50%|█████ | 918/1826 [00:10<00:09, 91.51it/s]
|
1563 |
51%|█████ | 928/1826 [00:10<00:09, 91.40it/s]
|
1564 |
51%|█████▏ | 938/1826 [00:10<00:10, 88.19it/s]
|
1565 |
52%|█████▏ | 947/1826 [00:10<00:10, 84.32it/s]
|
1566 |
52%|█████▏ | 956/1826 [00:11<00:10, 85.23it/s]
|
1567 |
53%|█████▎ | 966/1826 [00:11<00:09, 86.86it/s]
|
1568 |
53%|█████▎ | 975/1826 [00:11<00:09, 85.98it/s]
|
1569 |
54%|█████▍ | 985/1826 [00:11<00:09, 87.66it/s]
|
1570 |
54%|█████▍ | 994/1826 [00:11<00:09, 85.02it/s]
|
1571 |
55%|█████▍ | 1003/1826 [00:11<00:09, 83.12it/s]
|
1572 |
55%|█████▌ | 1012/1826 [00:11<00:09, 84.61it/s]
|
1573 |
56%|█████▌ | 1022/1826 [00:11<00:09, 88.01it/s]
|
1574 |
57%|█████▋ | 1032/1826 [00:11<00:08, 90.28it/s]
|
1575 |
57%|█████▋ | 1042/1826 [00:12<00:09, 86.92it/s]
|
1576 |
58%|█████▊ | 1051/1826 [00:12<00:09, 85.51it/s]
|
1577 |
58%|█████▊ | 1060/1826 [00:12<00:08, 86.69it/s]
|
1578 |
59%|█████▊ | 1070/1826 [00:12<00:08, 89.80it/s]
|
1579 |
59%|█████▉ | 1080/1826 [00:12<00:08, 91.79it/s]
|
1580 |
60%|█████▉ | 1090/1826 [00:12<00:07, 92.06it/s]
|
1581 |
60%|██████ | 1100/1826 [00:12<00:07, 92.64it/s]
|
1582 |
61%|██████ | 1110/1826 [00:12<00:08, 89.48it/s]
|
1583 |
61%|██████▏ | 1119/1826 [00:12<00:08, 87.58it/s]
|
1584 |
62%|██████▏ | 1129/1826 [00:13<00:07, 89.36it/s]
|
1585 |
62%|██████▏ | 1138/1826 [00:13<00:07, 88.55it/s]
|
1586 |
63%|██████▎ | 1147/1826 [00:13<00:07, 87.81it/s]
|
1587 |
63%|██████▎ | 1156/1826 [00:13<00:07, 87.34it/s]
|
1588 |
64%|██████▍ | 1165/1826 [00:13<00:07, 83.98it/s]
|
1589 |
64%|██████▍ | 1174/1826 [00:13<00:07, 84.35it/s]
|
1590 |
65%|██████▍ | 1183/1826 [00:13<00:07, 82.32it/s]
|
1591 |
65%|██████▌ | 1193/1826 [00:13<00:07, 84.81it/s]
|
1592 |
66%|██████▌ | 1202/1826 [00:13<00:07, 85.14it/s]
|
1593 |
66%|██████▋ | 1211/1826 [00:13<00:07, 83.20it/s]
|
1594 |
67%|██████▋ | 1220/1826 [00:14<00:07, 81.78it/s]
|
1595 |
67%|██████▋ | 1230/1826 [00:14<00:06, 85.50it/s]
|
1596 |
68%|██████▊ | 1240/1826 [00:14<00:06, 86.53it/s]
|
1597 |
68%|██████▊ | 1250/1826 [00:14<00:06, 85.10it/s]
|
1598 |
69%|██████▉ | 1260/1826 [00:14<00:06, 86.63it/s]
|
1599 |
69%|██████▉ | 1269/1826 [00:14<00:06, 87.13it/s]
|
1600 |
70%|██████▉ | 1278/1826 [00:14<00:06, 87.73it/s]
|
1601 |
70%|███████ | 1287/1826 [00:14<00:06, 88.07it/s]
|
1602 |
71%|███████ | 1297/1826 [00:14<00:05, 90.99it/s]
|
1603 |
72%|███████▏ | 1307/1826 [00:15<00:05, 91.31it/s]
|
1604 |
72%|███████▏ | 1317/1826 [00:15<00:05, 93.37it/s]
|
1605 |
73%|███████▎ | 1327/1826 [00:15<00:05, 92.71it/s]
|
1606 |
73%|███████▎ | 1337/1826 [00:15<00:05, 88.69it/s]
|
1607 |
74%|███████▎ | 1346/1826 [00:15<00:05, 85.16it/s]
|
1608 |
74%|███████▍ | 1355/1826 [00:15<00:05, 86.29it/s]
|
1609 |
75%|███████▍ | 1364/1826 [00:15<00:05, 87.05it/s]
|
1610 |
75%|███████▌ | 1373/1826 [00:15<00:05, 85.50it/s]
|
1611 |
76%|███████▌ | 1383/1826 [00:15<00:05, 88.26it/s]
|
1612 |
76%|███████▋ | 1393/1826 [00:16<00:04, 89.45it/s]
|
1613 |
77%|███████▋ | 1403/1826 [00:16<00:04, 91.42it/s]
|
1614 |
77%|███████▋ | 1413/1826 [00:16<00:04, 92.32it/s]
|
1615 |
78%|███████▊ | 1423/1826 [00:16<00:04, 89.99it/s]
|
1616 |
78%|███████▊ | 1433/1826 [00:16<00:04, 90.52it/s]
|
1617 |
79%|███████▉ | 1443/1826 [00:16<00:04, 85.95it/s]
|
1618 |
80%|███████▉ | 1453/1826 [00:16<00:04, 88.09it/s]
|
1619 |
80%|████████ | 1462/1826 [00:16<00:04, 88.33it/s]
|
1620 |
81%|████████ | 1471/1826 [00:16<00:04, 84.47it/s]
|
1621 |
81%|████████ | 1480/1826 [00:17<00:04, 84.58it/s]
|
1622 |
82%|████████▏ | 1489/1826 [00:17<00:04, 80.66it/s]
|
1623 |
82%|████████▏ | 1498/1826 [00:17<00:04, 81.96it/s]
|
1624 |
83%|████████▎ | 1507/1826 [00:17<00:03, 82.12it/s]
|
1625 |
83%|████████▎ | 1517/1826 [00:17<00:03, 84.73it/s]
|
1626 |
84%|████████▎ | 1526/1826 [00:17<00:03, 82.53it/s]
|
1627 |
84%|████████▍ | 1535/1826 [00:17<00:03, 81.76it/s]
|
1628 |
85%|████████▍ | 1544/1826 [00:17<00:03, 82.99it/s]
|
1629 |
85%|████████▌ | 1553/1826 [00:17<00:03, 83.81it/s]
|
1630 |
86%|████████▌ | 1562/1826 [00:18<00:03, 82.96it/s]
|
1631 |
86%|████████▌ | 1571/1826 [00:18<00:03, 81.80it/s]
|
1632 |
87%|████████▋ | 1580/1826 [00:18<00:02, 83.72it/s]
|
1633 |
87%|████████▋ | 1589/1826 [00:18<00:02, 82.90it/s]
|
1634 |
88%|████████▊ | 1598/1826 [00:18<00:02, 84.03it/s]
|
1635 |
88%|████████▊ | 1608/1826 [00:18<00:02, 86.25it/s]
|
1636 |
89%|████████▊ | 1617/1826 [00:18<00:02, 85.18it/s]
|
1637 |
89%|████████▉ | 1626/1826 [00:18<00:02, 77.48it/s]
|
1638 |
90%|████████▉ | 1636/1826 [00:18<00:02, 80.59it/s]
|
1639 |
90%|█████████ | 1645/1826 [00:19<00:02, 82.51it/s]
|
1640 |
91%|█████████ | 1655/1826 [00:19<00:02, 85.30it/s]
|
1641 |
91%|█████████ | 1664/1826 [00:19<00:01, 84.48it/s]
|
1642 |
92%|█████████▏| 1673/1826 [00:19<00:01, 85.56it/s]
|
1643 |
92%|█████████▏| 1683/1826 [00:19<00:01, 88.32it/s]
|
1644 |
93%|█████████▎| 1692/1826 [00:19<00:01, 86.33it/s]
|
1645 |
93%|█████████▎| 1701/1826 [00:19<00:01, 85.33it/s]
|
1646 |
94%|█████████▎| 1711/1826 [00:19<00:01, 87.29it/s]
|
1647 |
94%|█████████▍| 1720/1826 [00:19<00:01, 84.39it/s]
|
1648 |
95%|█████████▍| 1730/1826 [00:20<00:01, 87.06it/s]
|
1649 |
95%|█████████▌| 1740/1826 [00:20<00:00, 87.99it/s]
|
1650 |
96%|█████████▌| 1750/1826 [00:20<00:00, 88.28it/s]
|
1651 |
96%|█████████▋| 1759/1826 [00:20<00:00, 87.89it/s]
|
1652 |
97%|█████████▋| 1768/1826 [00:20<00:00, 87.33it/s]
|
1653 |
97%|█████████▋| 1777/1826 [00:20<00:00, 88.09it/s]
|
1654 |
98%|█████████▊| 1787/1826 [00:20<00:00, 88.65it/s]
|
1655 |
98%|█████████▊| 1796/1826 [00:20<00:00, 83.22it/s]
|
1656 |
99%|█████████▉| 1806/1826 [00:20<00:00, 86.07it/s]
|
1657 |
99%|█████████▉| 1815/1826 [00:20<00:00, 86.85it/s]
|
1658 |
+
[INFO|trainer.py:3478] 2024-08-30 22:16:43,468 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
1659 |
+
[INFO|configuration_utils.py:472] 2024-08-30 22:16:43,469 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
1660 |
+
[INFO|modeling_utils.py:2690] 2024-08-30 22:16:44,625 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
1661 |
+
[INFO|tokenization_utils_base.py:2574] 2024-08-30 22:16:44,626 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
1662 |
+
[INFO|tokenization_utils_base.py:2583] 2024-08-30 22:16:44,627 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
1663 |
+
***** predict metrics *****
|
1664 |
+
predict_accuracy = 0.9981
|
1665 |
+
predict_f1 = 0.8966
|
1666 |
+
predict_loss = 0.0107
|
1667 |
+
predict_precision = 0.8769
|
1668 |
+
predict_recall = 0.9171
|
1669 |
+
predict_runtime = 0:00:27.70
|
1670 |
+
predict_samples_per_second = 527.095
|
1671 |
+
predict_steps_per_second = 65.9
|
1672 |
+
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"total_flos": 1.
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second":
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"total_flos": 1.4262694978690116e+16,
|
4 |
+
"train_loss": 0.0022696754537961062,
|
5 |
+
"train_runtime": 1261.5031,
|
6 |
+
"train_samples": 27198,
|
7 |
+
"train_samples_per_second": 215.6,
|
8 |
+
"train_steps_per_second": 3.369
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,201 +1,201 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-
|
4 |
"epoch": 10.0,
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss": 0.
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime":
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second":
|
21 |
-
"step":
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"epoch": 1.
|
25 |
-
"grad_norm": 0.
|
26 |
-
"learning_rate": 4.
|
27 |
-
"loss": 0.
|
28 |
"step": 500
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
-
"eval_accuracy": 0.
|
33 |
-
"eval_f1": 0.
|
34 |
-
"eval_loss": 0.
|
35 |
-
"eval_precision": 0.
|
36 |
-
"eval_recall": 0.
|
37 |
-
"eval_runtime":
|
38 |
-
"eval_samples_per_second":
|
39 |
-
"eval_steps_per_second":
|
40 |
-
"step":
|
41 |
-
},
|
42 |
-
{
|
43 |
-
"epoch": 2.
|
44 |
-
"grad_norm": 0.
|
45 |
-
"learning_rate": 3.
|
46 |
-
"loss": 0.
|
47 |
"step": 1000
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
-
"eval_accuracy": 0.
|
52 |
-
"eval_f1": 0.
|
53 |
-
"eval_loss": 0.
|
54 |
-
"eval_precision": 0.
|
55 |
-
"eval_recall": 0.
|
56 |
-
"eval_runtime":
|
57 |
-
"eval_samples_per_second":
|
58 |
-
"eval_steps_per_second":
|
59 |
-
"step":
|
60 |
-
},
|
61 |
-
{
|
62 |
-
"epoch": 3.
|
63 |
-
"grad_norm": 0.
|
64 |
-
"learning_rate": 3.
|
65 |
-
"loss": 0.
|
66 |
"step": 1500
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
-
"eval_accuracy": 0.
|
71 |
-
"eval_f1": 0.
|
72 |
-
"eval_loss": 0.
|
73 |
-
"eval_precision": 0.
|
74 |
-
"eval_recall": 0.
|
75 |
-
"eval_runtime":
|
76 |
-
"eval_samples_per_second":
|
77 |
-
"eval_steps_per_second":
|
78 |
-
"step":
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"epoch": 4.
|
82 |
-
"grad_norm": 0.
|
83 |
-
"learning_rate": 2.
|
84 |
-
"loss": 0.
|
85 |
"step": 2000
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
-
"eval_accuracy": 0.
|
90 |
-
"eval_f1": 0.
|
91 |
-
"eval_loss": 0.
|
92 |
-
"eval_precision": 0.
|
93 |
-
"eval_recall": 0.
|
94 |
-
"eval_runtime":
|
95 |
-
"eval_samples_per_second":
|
96 |
-
"eval_steps_per_second":
|
97 |
-
"step":
|
98 |
-
},
|
99 |
-
{
|
100 |
-
"epoch": 5.
|
101 |
-
"grad_norm": 0.
|
102 |
-
"learning_rate": 2.
|
103 |
-
"loss": 0.
|
104 |
"step": 2500
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
-
"eval_accuracy": 0.
|
109 |
-
"eval_f1": 0.
|
110 |
-
"eval_loss": 0.
|
111 |
-
"eval_precision": 0.
|
112 |
-
"eval_recall": 0.
|
113 |
-
"eval_runtime":
|
114 |
-
"eval_samples_per_second":
|
115 |
-
"eval_steps_per_second":
|
116 |
-
"step":
|
117 |
-
},
|
118 |
-
{
|
119 |
-
"epoch": 6.912442396313364,
|
120 |
-
"grad_norm": 0.009607589803636074,
|
121 |
-
"learning_rate": 1.543778801843318e-05,
|
122 |
-
"loss": 0.0009,
|
123 |
-
"step": 3000
|
124 |
},
|
125 |
{
|
126 |
"epoch": 7.0,
|
127 |
-
"eval_accuracy": 0.
|
128 |
-
"eval_f1": 0.
|
129 |
-
"eval_loss": 0.
|
130 |
-
"eval_precision": 0.
|
131 |
-
"eval_recall": 0.
|
132 |
-
"eval_runtime":
|
133 |
-
"eval_samples_per_second":
|
134 |
-
"eval_steps_per_second":
|
135 |
-
"step":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
},
|
137 |
{
|
138 |
"epoch": 8.0,
|
139 |
-
"eval_accuracy": 0.
|
140 |
-
"eval_f1": 0.
|
141 |
-
"eval_loss": 0.
|
142 |
-
"eval_precision": 0.
|
143 |
-
"eval_recall": 0.
|
144 |
-
"eval_runtime":
|
145 |
-
"eval_samples_per_second":
|
146 |
-
"eval_steps_per_second":
|
147 |
-
"step":
|
148 |
-
},
|
149 |
-
{
|
150 |
-
"epoch": 8.
|
151 |
-
"grad_norm": 0.
|
152 |
-
"learning_rate":
|
153 |
-
"loss": 0.
|
154 |
"step": 3500
|
155 |
},
|
156 |
{
|
157 |
"epoch": 9.0,
|
158 |
-
"eval_accuracy": 0.
|
159 |
-
"eval_f1": 0.
|
160 |
-
"eval_loss": 0.
|
161 |
-
"eval_precision": 0.
|
162 |
-
"eval_recall": 0.
|
163 |
-
"eval_runtime":
|
164 |
-
"eval_samples_per_second":
|
165 |
-
"eval_steps_per_second":
|
166 |
-
"step":
|
167 |
-
},
|
168 |
-
{
|
169 |
-
"epoch": 9.
|
170 |
-
"grad_norm": 0.
|
171 |
-
"learning_rate":
|
172 |
-
"loss": 0.
|
173 |
"step": 4000
|
174 |
},
|
175 |
{
|
176 |
"epoch": 10.0,
|
177 |
-
"eval_accuracy": 0.
|
178 |
-
"eval_f1": 0.
|
179 |
-
"eval_loss": 0.
|
180 |
-
"eval_precision": 0.
|
181 |
-
"eval_recall": 0.
|
182 |
-
"eval_runtime":
|
183 |
-
"eval_samples_per_second":
|
184 |
-
"eval_steps_per_second":
|
185 |
-
"step":
|
186 |
},
|
187 |
{
|
188 |
"epoch": 10.0,
|
189 |
-
"step":
|
190 |
-
"total_flos": 1.
|
191 |
-
"train_loss": 0.
|
192 |
-
"train_runtime":
|
193 |
-
"train_samples_per_second":
|
194 |
-
"train_steps_per_second":
|
195 |
}
|
196 |
],
|
197 |
"logging_steps": 500,
|
198 |
-
"max_steps":
|
199 |
"num_input_tokens_seen": 0,
|
200 |
"num_train_epochs": 10,
|
201 |
"save_steps": 500,
|
@@ -211,7 +211,7 @@
|
|
211 |
"attributes": {}
|
212 |
}
|
213 |
},
|
214 |
-
"total_flos": 1.
|
215 |
"train_batch_size": 32,
|
216 |
"trial_name": null,
|
217 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.936867469879518,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4250",
|
4 |
"epoch": 10.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 4250,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.9981303557517528,
|
14 |
+
"eval_f1": 0.8939962476547841,
|
15 |
+
"eval_loss": 0.00556989898905158,
|
16 |
+
"eval_precision": 0.8671519563239308,
|
17 |
+
"eval_recall": 0.9225556631171346,
|
18 |
+
"eval_runtime": 14.25,
|
19 |
+
"eval_samples_per_second": 477.051,
|
20 |
+
"eval_steps_per_second": 59.649,
|
21 |
+
"step": 425
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.1764705882352942,
|
25 |
+
"grad_norm": 0.1311497986316681,
|
26 |
+
"learning_rate": 4.411764705882353e-05,
|
27 |
+
"loss": 0.0104,
|
28 |
"step": 500
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
+
"eval_accuracy": 0.9985782913528953,
|
33 |
+
"eval_f1": 0.9216722729456991,
|
34 |
+
"eval_loss": 0.0041933078318834305,
|
35 |
+
"eval_precision": 0.9150763358778626,
|
36 |
+
"eval_recall": 0.9283639883833494,
|
37 |
+
"eval_runtime": 14.1751,
|
38 |
+
"eval_samples_per_second": 479.575,
|
39 |
+
"eval_steps_per_second": 59.965,
|
40 |
+
"step": 850
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.3529411764705883,
|
44 |
+
"grad_norm": 0.002636878052726388,
|
45 |
+
"learning_rate": 3.8235294117647055e-05,
|
46 |
+
"loss": 0.0034,
|
47 |
"step": 1000
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
+
"eval_accuracy": 0.9985133731498312,
|
52 |
+
"eval_f1": 0.9155339805825242,
|
53 |
+
"eval_loss": 0.004266700241714716,
|
54 |
+
"eval_precision": 0.9182083739045764,
|
55 |
+
"eval_recall": 0.9128751210067764,
|
56 |
+
"eval_runtime": 14.285,
|
57 |
+
"eval_samples_per_second": 475.883,
|
58 |
+
"eval_steps_per_second": 59.503,
|
59 |
+
"step": 1275
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 3.5294117647058822,
|
63 |
+
"grad_norm": 0.08669757843017578,
|
64 |
+
"learning_rate": 3.235294117647059e-05,
|
65 |
+
"loss": 0.0022,
|
66 |
"step": 1500
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
+
"eval_accuracy": 0.9985847831732018,
|
71 |
+
"eval_f1": 0.9250367466927977,
|
72 |
+
"eval_loss": 0.0043651387095451355,
|
73 |
+
"eval_precision": 0.9365079365079365,
|
74 |
+
"eval_recall": 0.9138431752178122,
|
75 |
+
"eval_runtime": 14.5173,
|
76 |
+
"eval_samples_per_second": 468.27,
|
77 |
+
"eval_steps_per_second": 58.551,
|
78 |
+
"step": 1700
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 4.705882352941177,
|
82 |
+
"grad_norm": 0.27693310379981995,
|
83 |
+
"learning_rate": 2.647058823529412e-05,
|
84 |
+
"loss": 0.0012,
|
85 |
"step": 2000
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
+
"eval_accuracy": 0.9985393404310569,
|
90 |
+
"eval_f1": 0.919463087248322,
|
91 |
+
"eval_loss": 0.006118799094110727,
|
92 |
+
"eval_precision": 0.9107312440645774,
|
93 |
+
"eval_recall": 0.9283639883833494,
|
94 |
+
"eval_runtime": 14.2824,
|
95 |
+
"eval_samples_per_second": 475.97,
|
96 |
+
"eval_steps_per_second": 59.514,
|
97 |
+
"step": 2125
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"epoch": 5.882352941176471,
|
101 |
+
"grad_norm": 0.008007431402802467,
|
102 |
+
"learning_rate": 2.058823529411765e-05,
|
103 |
+
"loss": 0.0009,
|
104 |
"step": 2500
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
+
"eval_accuracy": 0.9986626850168787,
|
109 |
+
"eval_f1": 0.9221213569039655,
|
110 |
+
"eval_loss": 0.005954863503575325,
|
111 |
+
"eval_precision": 0.910377358490566,
|
112 |
+
"eval_recall": 0.9341723136495643,
|
113 |
+
"eval_runtime": 14.2494,
|
114 |
+
"eval_samples_per_second": 477.072,
|
115 |
+
"eval_steps_per_second": 59.652,
|
116 |
+
"step": 2550
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
},
|
118 |
{
|
119 |
"epoch": 7.0,
|
120 |
+
"eval_accuracy": 0.9986691768371851,
|
121 |
+
"eval_f1": 0.9314148681055155,
|
122 |
+
"eval_loss": 0.006543714087456465,
|
123 |
+
"eval_precision": 0.9230038022813688,
|
124 |
+
"eval_recall": 0.9399806389157793,
|
125 |
+
"eval_runtime": 14.386,
|
126 |
+
"eval_samples_per_second": 472.542,
|
127 |
+
"eval_steps_per_second": 59.085,
|
128 |
+
"step": 2975
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 7.0588235294117645,
|
132 |
+
"grad_norm": 0.0017305670771747828,
|
133 |
+
"learning_rate": 1.4705882352941177e-05,
|
134 |
+
"loss": 0.0005,
|
135 |
+
"step": 3000
|
136 |
},
|
137 |
{
|
138 |
"epoch": 8.0,
|
139 |
+
"eval_accuracy": 0.9986886522981044,
|
140 |
+
"eval_f1": 0.9280540801545147,
|
141 |
+
"eval_loss": 0.005883762612938881,
|
142 |
+
"eval_precision": 0.9258188824662813,
|
143 |
+
"eval_recall": 0.9303000968054211,
|
144 |
+
"eval_runtime": 14.3169,
|
145 |
+
"eval_samples_per_second": 474.822,
|
146 |
+
"eval_steps_per_second": 59.37,
|
147 |
+
"step": 3400
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 8.235294117647058,
|
151 |
+
"grad_norm": 0.00020609228522516787,
|
152 |
+
"learning_rate": 8.823529411764707e-06,
|
153 |
+
"loss": 0.0004,
|
154 |
"step": 3500
|
155 |
},
|
156 |
{
|
157 |
"epoch": 9.0,
|
158 |
+
"eval_accuracy": 0.9987276032199429,
|
159 |
+
"eval_f1": 0.9317307692307693,
|
160 |
+
"eval_loss": 0.00656876852735877,
|
161 |
+
"eval_precision": 0.9255014326647565,
|
162 |
+
"eval_recall": 0.9380445304937076,
|
163 |
+
"eval_runtime": 14.5715,
|
164 |
+
"eval_samples_per_second": 466.526,
|
165 |
+
"eval_steps_per_second": 58.333,
|
166 |
+
"step": 3825
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 9.411764705882353,
|
170 |
+
"grad_norm": 0.00026785818045027554,
|
171 |
+
"learning_rate": 2.9411764705882355e-06,
|
172 |
+
"loss": 0.0001,
|
173 |
"step": 4000
|
174 |
},
|
175 |
{
|
176 |
"epoch": 10.0,
|
177 |
+
"eval_accuracy": 0.9988184887042326,
|
178 |
+
"eval_f1": 0.936867469879518,
|
179 |
+
"eval_loss": 0.006724909413605928,
|
180 |
+
"eval_precision": 0.9328214971209213,
|
181 |
+
"eval_recall": 0.9409486931268151,
|
182 |
+
"eval_runtime": 14.3451,
|
183 |
+
"eval_samples_per_second": 473.891,
|
184 |
+
"eval_steps_per_second": 59.254,
|
185 |
+
"step": 4250
|
186 |
},
|
187 |
{
|
188 |
"epoch": 10.0,
|
189 |
+
"step": 4250,
|
190 |
+
"total_flos": 1.4262694978690116e+16,
|
191 |
+
"train_loss": 0.0022696754537961062,
|
192 |
+
"train_runtime": 1261.5031,
|
193 |
+
"train_samples_per_second": 215.6,
|
194 |
+
"train_steps_per_second": 3.369
|
195 |
}
|
196 |
],
|
197 |
"logging_steps": 500,
|
198 |
+
"max_steps": 4250,
|
199 |
"num_input_tokens_seen": 0,
|
200 |
"num_train_epochs": 10,
|
201 |
"save_steps": 500,
|
|
|
211 |
"attributes": {}
|
212 |
}
|
213 |
},
|
214 |
+
"total_flos": 1.4262694978690116e+16,
|
215 |
"train_batch_size": 32,
|
216 |
"trial_name": null,
|
217 |
"trial_params": null
|