mgkamalesh7
commited on
Commit
•
3b8379b
1
Parent(s):
7fc5233
End of training
Browse files
README.md
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
base_model: facebook/bart-base
|
4 |
+
tags:
|
5 |
+
- generated_from_trainer
|
6 |
+
metrics:
|
7 |
+
- rouge
|
8 |
+
model-index:
|
9 |
+
- name: bart-base-wsd-finetuned-cve-reason
|
10 |
+
results: []
|
11 |
+
---
|
12 |
+
|
13 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
14 |
+
should probably proofread and complete it, then remove this comment. -->
|
15 |
+
|
16 |
+
# bart-base-wsd-finetuned-cve-reason
|
17 |
+
|
18 |
+
This model is a fine-tuned version of [facebook/bart-base](https://huggingface.co/facebook/bart-base) on the None dataset.
|
19 |
+
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.3236
|
21 |
+
- Rouge1: 90.5086
|
22 |
+
- Rouge2: 86.7313
|
23 |
+
- Rougel: 90.5004
|
24 |
+
- Rougelsum: 90.4025
|
25 |
+
- Gen Len: 8.5902
|
26 |
+
|
27 |
+
## Model description
|
28 |
+
|
29 |
+
More information needed
|
30 |
+
|
31 |
+
## Intended uses & limitations
|
32 |
+
|
33 |
+
More information needed
|
34 |
+
|
35 |
+
## Training and evaluation data
|
36 |
+
|
37 |
+
More information needed
|
38 |
+
|
39 |
+
## Training procedure
|
40 |
+
|
41 |
+
### Training hyperparameters
|
42 |
+
|
43 |
+
The following hyperparameters were used during training:
|
44 |
+
- learning_rate: 2e-05
|
45 |
+
- train_batch_size: 16
|
46 |
+
- eval_batch_size: 16
|
47 |
+
- seed: 42
|
48 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
49 |
+
- lr_scheduler_type: linear
|
50 |
+
- num_epochs: 200
|
51 |
+
- mixed_precision_training: Native AMP
|
52 |
+
|
53 |
+
### Training results
|
54 |
+
|
55 |
+
| Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
|
56 |
+
|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
|
57 |
+
| No log | 1.0 | 56 | 0.5785 | 70.2552 | 61.7586 | 70.3201 | 70.3702 | 8.0328 |
|
58 |
+
| No log | 2.0 | 112 | 0.4143 | 85.2974 | 79.9312 | 85.3423 | 85.3688 | 8.4295 |
|
59 |
+
| No log | 3.0 | 168 | 0.3903 | 85.4657 | 78.0399 | 85.0825 | 85.0315 | 8.518 |
|
60 |
+
| No log | 4.0 | 224 | 0.3799 | 82.3413 | 78.0306 | 82.3002 | 82.1323 | 8.3213 |
|
61 |
+
| No log | 5.0 | 280 | 0.3536 | 86.8229 | 81.6826 | 86.6938 | 86.7128 | 8.5246 |
|
62 |
+
| No log | 6.0 | 336 | 0.3583 | 88.3834 | 83.6765 | 88.3687 | 88.3368 | 8.4164 |
|
63 |
+
| No log | 7.0 | 392 | 0.3474 | 87.6783 | 84.0721 | 87.6311 | 87.5552 | 8.4885 |
|
64 |
+
| No log | 8.0 | 448 | 0.3674 | 88.1823 | 83.7787 | 88.1658 | 88.0453 | 8.6656 |
|
65 |
+
| 0.3758 | 9.0 | 504 | 0.3357 | 89.3687 | 85.4151 | 89.2735 | 89.1779 | 8.5377 |
|
66 |
+
| 0.3758 | 10.0 | 560 | 0.3666 | 89.2611 | 85.8911 | 89.3461 | 89.2438 | 8.7902 |
|
67 |
+
| 0.3758 | 11.0 | 616 | 0.3650 | 88.4002 | 84.0876 | 88.4319 | 88.3324 | 8.7639 |
|
68 |
+
| 0.3758 | 12.0 | 672 | 0.3381 | 89.8928 | 86.2751 | 89.9706 | 89.891 | 8.741 |
|
69 |
+
| 0.3758 | 13.0 | 728 | 0.3236 | 90.5086 | 86.7313 | 90.5004 | 90.4025 | 8.5902 |
|
70 |
+
| 0.3758 | 14.0 | 784 | 0.3577 | 89.6929 | 85.2464 | 89.4044 | 89.2693 | 8.5115 |
|
71 |
+
| 0.3758 | 15.0 | 840 | 0.3414 | 87.0953 | 83.2736 | 86.9541 | 87.0706 | 8.5902 |
|
72 |
+
| 0.3758 | 16.0 | 896 | 0.3636 | 89.0054 | 85.0881 | 89.0154 | 88.8735 | 8.6885 |
|
73 |
+
| 0.3758 | 17.0 | 952 | 0.3596 | 89.6327 | 86.0865 | 89.6939 | 89.624 | 8.7049 |
|
74 |
+
| 0.1003 | 18.0 | 1008 | 0.3286 | 89.5349 | 85.7598 | 89.5881 | 89.5125 | 8.5934 |
|
75 |
+
| 0.1003 | 19.0 | 1064 | 0.3573 | 89.3753 | 85.6797 | 89.3238 | 89.1992 | 8.6361 |
|
76 |
+
| 0.1003 | 20.0 | 1120 | 0.3589 | 90.3086 | 86.7555 | 90.2283 | 90.1314 | 8.6492 |
|
77 |
+
| 0.1003 | 21.0 | 1176 | 0.3500 | 89.9113 | 84.7301 | 89.8777 | 89.8271 | 8.5246 |
|
78 |
+
| 0.1003 | 22.0 | 1232 | 0.3738 | 90.6328 | 86.8572 | 90.653 | 90.5831 | 8.6492 |
|
79 |
+
| 0.1003 | 23.0 | 1288 | 0.3446 | 90.8409 | 86.7153 | 90.8496 | 90.8431 | 8.5279 |
|
80 |
+
|
81 |
+
|
82 |
+
### Framework versions
|
83 |
+
|
84 |
+
- Transformers 4.42.3
|
85 |
+
- Pytorch 2.3.0+cu121
|
86 |
+
- Datasets 2.20.0
|
87 |
+
- Tokenizers 0.19.1
|
generation_config.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 0,
|
3 |
+
"decoder_start_token_id": 2,
|
4 |
+
"early_stopping": true,
|
5 |
+
"eos_token_id": 2,
|
6 |
+
"forced_bos_token_id": 0,
|
7 |
+
"forced_eos_token_id": 2,
|
8 |
+
"no_repeat_ngram_size": 3,
|
9 |
+
"num_beams": 4,
|
10 |
+
"pad_token_id": 1,
|
11 |
+
"transformers_version": "4.42.3"
|
12 |
+
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557912620
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f217f1c8a4f85947182a6ef5d028148cd8a2642c1eded1aae02f17cf6601d6a3
|
3 |
size 557912620
|
runs/Jul09_20-04-38_4dc9357a7cea/events.out.tfevents.1720557051.4dc9357a7cea.4240.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0899606576c8fb3c141584e82d17ec8eef8cca828902d46dff98847a40f0d18
|
3 |
+
size 613
|