José Ángel González commited on
Commit
4509e78
1 Parent(s): 4d3149d
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  license: apache-2.0
3
  tags:
4
- - summarization
5
  metrics:
6
  - rouge
7
  model-index:
@@ -13,7 +13,7 @@ model-index:
13
  metrics:
14
  - name: Rouge1
15
  type: rouge
16
- value: 24.5193
17
  ---
18
 
19
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -23,12 +23,12 @@ should probably proofread and complete it, then remove this comment. -->
23
 
24
  This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
25
  It achieves the following results on the evaluation set:
26
- - Loss: 3.7900
27
- - Rouge1: 24.5193
28
- - Rouge2: 6.267
29
- - Rougel: 17.4389
30
- - Rougelsum: 20.5821
31
- - Gen Len: 56.8235
32
 
33
  ## Model description
34
 
@@ -48,29 +48,22 @@ More information needed
48
 
49
  The following hyperparameters were used during training:
50
  - learning_rate: 5e-05
51
- - train_batch_size: 8
52
- - eval_batch_size: 8
53
  - seed: 42
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.15
57
- - num_epochs: 10.0
58
  - mixed_precision_training: Native AMP
59
 
60
  ### Training results
61
 
62
- | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
63
- |:-------------:|:-----:|:-----:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
64
- | 3.2899 | 1.0 | 2875 | 3.0328 | 16.185 | 4.0368 | 12.9047 | 14.0748 | 19.3457 |
65
- | 3.0916 | 2.0 | 5750 | 3.0548 | 16.2962 | 3.9567 | 13.0426 | 14.2023 | 19.3427 |
66
- | 2.8345 | 3.0 | 8625 | 3.0645 | 16.4597 | 4.2017 | 13.3787 | 14.5527 | 19.9707 |
67
- | 2.5522 | 4.0 | 11500 | 3.0988 | 16.8388 | 4.3742 | 13.5688 | 14.7003 | 19.9324 |
68
- | 2.2307 | 5.0 | 14375 | 3.2058 | 16.4764 | 4.2906 | 13.3875 | 14.5223 | 19.8502 |
69
- | 1.8381 | 6.0 | 17250 | 3.3179 | 16.6764 | 4.4834 | 13.5489 | 14.6173 | 19.9681 |
70
- | 1.6203 | 7.0 | 20125 | 3.4763 | 17.0434 | 4.5045 | 13.8329 | 14.9286 | 19.9105 |
71
- | 1.4982 | 8.0 | 23000 | 3.6031 | 17.0044 | 4.7727 | 13.8743 | 14.9683 | 19.9539 |
72
- | 1.3385 | 9.0 | 25875 | 3.7051 | 17.0903 | 4.5413 | 13.8897 | 15.0091 | 19.8291 |
73
- | 1.2211 | 10.0 | 28750 | 3.7900 | 16.7843 | 4.4907 | 13.6418 | 14.7366 | 19.9066 |
74
 
75
 
76
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
  tags:
4
+ - generated_from_trainer
5
  metrics:
6
  - rouge
7
  model-index:
 
13
  metrics:
14
  - name: Rouge1
15
  type: rouge
16
+ value: 26.1391
17
  ---
18
 
19
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
23
 
24
  This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
25
  It achieves the following results on the evaluation set:
26
+ - Loss: 3.0970
27
+ - Rouge1: 26.1391
28
+ - Rouge2: 7.3101
29
+ - Rougel: 19.1217
30
+ - Rougelsum: 21.9706
31
+ - Gen Len: 46.2245
32
 
33
  ## Model description
34
 
 
48
 
49
  The following hyperparameters were used during training:
50
  - learning_rate: 5e-05
51
+ - train_batch_size: 4
52
+ - eval_batch_size: 4
53
  - seed: 42
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.15
57
+ - num_epochs: 3.0
58
  - mixed_precision_training: Native AMP
59
 
60
  ### Training results
61
 
62
+ | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
63
+ |:-------------:|:-----:|:------:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
64
+ | 3.3259 | 1.0 | 33875 | 3.2535 | 17.942 | 4.5143 | 14.2766 | 15.582 | 19.3901 |
65
+ | 2.9764 | 2.0 | 67750 | 3.1278 | 18.6558 | 5.1844 | 15.0939 | 16.3367 | 19.9174 |
66
+ | 2.5889 | 3.0 | 101625 | 3.0970 | 19.1763 | 5.4517 | 15.5342 | 16.7186 | 19.8855 |
 
 
 
 
 
 
 
67
 
68
 
69
  ### Framework versions
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_gen_len": 56.8235,
4
- "eval_loss": 3.790048360824585,
5
- "eval_rouge1": 24.5193,
6
- "eval_rouge2": 6.267,
7
- "eval_rougeL": 17.4389,
8
- "eval_rougeLsum": 20.5821,
9
- "eval_runtime": 686.4492,
10
- "eval_samples": 2323,
11
- "eval_samples_per_second": 3.384,
12
- "eval_steps_per_second": 0.424,
13
- "train_loss": 2.145213280321204,
14
- "train_runtime": 23488.7209,
15
- "train_samples": 23000,
16
- "train_samples_per_second": 9.792,
17
- "train_steps_per_second": 1.224
18
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_gen_len": 46.2245,
4
+ "eval_loss": 3.09702730178833,
5
+ "eval_rouge1": 26.1391,
6
+ "eval_rouge2": 7.3101,
7
+ "eval_rougeL": 19.1217,
8
+ "eval_rougeLsum": 21.9706,
9
+ "eval_runtime": 1870.6168,
10
+ "eval_samples": 7527,
11
+ "eval_samples_per_second": 4.024,
12
+ "eval_steps_per_second": 1.006,
13
+ "train_loss": 3.0545432459982558,
14
+ "train_runtime": 34500.7445,
15
+ "train_samples": 135500,
16
+ "train_samples_per_second": 11.782,
17
+ "train_steps_per_second": 2.946
18
  }
config.json CHANGED
@@ -48,10 +48,9 @@
48
  "scale_embedding": false,
49
  "task_specific_params": {
50
  "summarization": {
51
- "early_stopping": true,
52
- "length_penalty": 2.0,
53
  "max_length": 128,
54
- "min_length": 15,
55
  "num_beams": 4
56
  },
57
  "summarization_cnn": {
 
48
  "scale_embedding": false,
49
  "task_specific_params": {
50
  "summarization": {
51
+ "length_penalty": 1.0,
 
52
  "max_length": 128,
53
+ "min_length": 12,
54
  "num_beams": 4
55
  },
56
  "summarization_cnn": {
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_gen_len": 56.8235,
4
- "eval_loss": 3.790048360824585,
5
- "eval_rouge1": 24.5193,
6
- "eval_rouge2": 6.267,
7
- "eval_rougeL": 17.4389,
8
- "eval_rougeLsum": 20.5821,
9
- "eval_runtime": 686.4492,
10
- "eval_samples": 2323,
11
- "eval_samples_per_second": 3.384,
12
- "eval_steps_per_second": 0.424
13
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_gen_len": 46.2245,
4
+ "eval_loss": 3.09702730178833,
5
+ "eval_rouge1": 26.1391,
6
+ "eval_rouge2": 7.3101,
7
+ "eval_rougeL": 19.1217,
8
+ "eval_rougeLsum": 21.9706,
9
+ "eval_runtime": 1870.6168,
10
+ "eval_samples": 7527,
11
+ "eval_samples_per_second": 4.024,
12
+ "eval_steps_per_second": 1.006
13
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a56a4efb7f95a9c07a59b2460261edeea7485fe431c55e8fe740b2c1f9028e41
3
  size 1625569391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9a37fbb3be47b097d897478a0dce567e2b555ddb72c818086cb9db66ee5693c
3
  size 1625569391
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 2.145213280321204,
4
- "train_runtime": 23488.7209,
5
- "train_samples": 23000,
6
- "train_samples_per_second": 9.792,
7
- "train_steps_per_second": 1.224
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 3.0545432459982558,
4
+ "train_runtime": 34500.7445,
5
+ "train_samples": 135500,
6
+ "train_samples_per_second": 11.782,
7
+ "train_steps_per_second": 2.946
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b18db3123e11bca3b4df4077ecd882047ff189fe4bd30dc91442fdc0308aa5b0
3
  size 2799
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd3d6861a6321189c3d8d75a6d4b2116697c61f2cd4cd1090421fe4d9ec4a0a
3
  size 2799