add model

Browse files

Files changed (9) hide show

README.md +16 -23
all_results.json +16 -16
config.json +2 -3
eval_results.json +11 -11
pytorch_model.bin +1 -1
tokenizer.json +0 -0
train_results.json +6 -6
trainer_state.json +0 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 license: apache-2.0
 tags:
-- summarization
 metrics:
 - rouge
 model-index:
@@ -13,7 +13,7 @@ model-index:
     metrics:
     - name: Rouge1
       type: rouge
-      value: 24.5193
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -23,12 +23,12 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.7900
-- Rouge1: 24.5193
-- Rouge2: 6.267
-- Rougel: 17.4389
-- Rougelsum: 20.5821
-- Gen Len: 56.8235
 ## Model description
@@ -48,29 +48,22 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
-- train_batch_size: 8
-- eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.15
-- num_epochs: 10.0
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Rouge1  | Rouge2 | Rougel  | Rougelsum | Gen Len |
-|:-------------:|:-----:|:-----:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
-| 3.2899        | 1.0   | 2875  | 3.0328          | 16.185  | 4.0368 | 12.9047 | 14.0748   | 19.3457 |
-| 3.0916        | 2.0   | 5750  | 3.0548          | 16.2962 | 3.9567 | 13.0426 | 14.2023   | 19.3427 |
-| 2.8345        | 3.0   | 8625  | 3.0645          | 16.4597 | 4.2017 | 13.3787 | 14.5527   | 19.9707 |
-| 2.5522        | 4.0   | 11500 | 3.0988          | 16.8388 | 4.3742 | 13.5688 | 14.7003   | 19.9324 |
-| 2.2307        | 5.0   | 14375 | 3.2058          | 16.4764 | 4.2906 | 13.3875 | 14.5223   | 19.8502 |
-| 1.8381        | 6.0   | 17250 | 3.3179          | 16.6764 | 4.4834 | 13.5489 | 14.6173   | 19.9681 |
-| 1.6203        | 7.0   | 20125 | 3.4763          | 17.0434 | 4.5045 | 13.8329 | 14.9286   | 19.9105 |
-| 1.4982        | 8.0   | 23000 | 3.6031          | 17.0044 | 4.7727 | 13.8743 | 14.9683   | 19.9539 |
-| 1.3385        | 9.0   | 25875 | 3.7051          | 17.0903 | 4.5413 | 13.8897 | 15.0091   | 19.8291 |
-| 1.2211        | 10.0  | 28750 | 3.7900          | 16.7843 | 4.4907 | 13.6418 | 14.7366   | 19.9066 |
 ### Framework versions

 ---
 license: apache-2.0
 tags:
+- generated_from_trainer
 metrics:
 - rouge
 model-index:
     metrics:
     - name: Rouge1
       type: rouge
+      value: 26.1391
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.0970
+- Rouge1: 26.1391
+- Rouge2: 7.3101
+- Rougel: 19.1217
+- Rougelsum: 21.9706
+- Gen Len: 46.2245
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
+- train_batch_size: 4
+- eval_batch_size: 4
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.15
+- num_epochs: 3.0
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch | Step   | Validation Loss | Rouge1  | Rouge2 | Rougel  | Rougelsum | Gen Len |
+|:-------------:|:-----:|:------:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
+| 3.3259        | 1.0   | 33875  | 3.2535          | 17.942  | 4.5143 | 14.2766 | 15.582    | 19.3901 |
+| 2.9764        | 2.0   | 67750  | 3.1278          | 18.6558 | 5.1844 | 15.0939 | 16.3367   | 19.9174 |
+| 2.5889        | 3.0   | 101625 | 3.0970          | 19.1763 | 5.4517 | 15.5342 | 16.7186   | 19.8855 |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
-    "epoch": 10.0,
-    "eval_gen_len": 56.8235,
-    "eval_loss": 3.790048360824585,
-    "eval_rouge1": 24.5193,
-    "eval_rouge2": 6.267,
-    "eval_rougeL": 17.4389,
-    "eval_rougeLsum": 20.5821,
-    "eval_runtime": 686.4492,
-    "eval_samples": 2323,
-    "eval_samples_per_second": 3.384,
-    "eval_steps_per_second": 0.424,
-    "train_loss": 2.145213280321204,
-    "train_runtime": 23488.7209,
-    "train_samples": 23000,
-    "train_samples_per_second": 9.792,
-    "train_steps_per_second": 1.224
 }

 {
+    "epoch": 3.0,
+    "eval_gen_len": 46.2245,
+    "eval_loss": 3.09702730178833,
+    "eval_rouge1": 26.1391,
+    "eval_rouge2": 7.3101,
+    "eval_rougeL": 19.1217,
+    "eval_rougeLsum": 21.9706,
+    "eval_runtime": 1870.6168,
+    "eval_samples": 7527,
+    "eval_samples_per_second": 4.024,
+    "eval_steps_per_second": 1.006,
+    "train_loss": 3.0545432459982558,
+    "train_runtime": 34500.7445,
+    "train_samples": 135500,
+    "train_samples_per_second": 11.782,
+    "train_steps_per_second": 2.946
 }

config.json CHANGED Viewed

@@ -48,10 +48,9 @@
   "scale_embedding": false,
   "task_specific_params": {
     "summarization": {
-      "early_stopping": true,
-      "length_penalty": 2.0,
       "max_length": 128,
-      "min_length": 15,
       "num_beams": 4
     },
     "summarization_cnn": {

   "scale_embedding": false,
   "task_specific_params": {
     "summarization": {
+      "length_penalty": 1.0,
       "max_length": 128,
+      "min_length": 12,
       "num_beams": 4
     },
     "summarization_cnn": {

eval_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 10.0,
-    "eval_gen_len": 56.8235,
-    "eval_loss": 3.790048360824585,
-    "eval_rouge1": 24.5193,
-    "eval_rouge2": 6.267,
-    "eval_rougeL": 17.4389,
-    "eval_rougeLsum": 20.5821,
-    "eval_runtime": 686.4492,
-    "eval_samples": 2323,
-    "eval_samples_per_second": 3.384,
-    "eval_steps_per_second": 0.424
 }

 {
+    "epoch": 3.0,
+    "eval_gen_len": 46.2245,
+    "eval_loss": 3.09702730178833,
+    "eval_rouge1": 26.1391,
+    "eval_rouge2": 7.3101,
+    "eval_rougeL": 19.1217,
+    "eval_rougeLsum": 21.9706,
+    "eval_runtime": 1870.6168,
+    "eval_samples": 7527,
+    "eval_samples_per_second": 4.024,
+    "eval_steps_per_second": 1.006
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a56a4efb7f95a9c07a59b2460261edeea7485fe431c55e8fe740b2c1f9028e41
 size 1625569391

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9a37fbb3be47b097d897478a0dce567e2b555ddb72c818086cb9db66ee5693c
 size 1625569391

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 10.0,
-    "train_loss": 2.145213280321204,
-    "train_runtime": 23488.7209,
-    "train_samples": 23000,
-    "train_samples_per_second": 9.792,
-    "train_steps_per_second": 1.224
 }

 {
+    "epoch": 3.0,
+    "train_loss": 3.0545432459982558,
+    "train_runtime": 34500.7445,
+    "train_samples": 135500,
+    "train_samples_per_second": 11.782,
+    "train_steps_per_second": 2.946
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b18db3123e11bca3b4df4077ecd882047ff189fe4bd30dc91442fdc0308aa5b0
 size 2799

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdd3d6861a6321189c3d8d75a6d4b2116697c61f2cd4cd1090421fe4d9ec4a0a
 size 2799