LA1512/led-pubmed-20K

Files changed (6) hide show

README.md CHANGED Viewed

@@ -1,28 +1,10 @@
 ---
-license: bsd-3-clause
-base_model: LA1512/led-1000-epoch-1
 tags:
 - generated_from_trainer
-datasets:
-- pubmed-summarization
-metrics:
-- rouge
 model-index:
 - name: results
-  results:
-  - task:
-      name: Sequence-to-sequence Language Modeling
-      type: text2text-generation
-    dataset:
-      name: pubmed-summarization
-      type: pubmed-summarization
-      config: section
-      split: validation
-      args: section
-    metrics:
-    - name: Rouge1
-      type: rouge
-      value: 43.1934
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,14 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
 # results
-This model is a fine-tuned version of [LA1512/led-1000-epoch-1](https://huggingface.co/LA1512/led-1000-epoch-1) on the pubmed-summarization dataset.
-It achieves the following results on the evaluation set:
-- Loss: 3.1831
-- Rouge1: 43.1934
-- Rouge2: 16.7702
-- Rougel: 24.2151
-- Rougelsum: 38.4858
-- Gen Len: 267.815
 ## Model description
@@ -60,16 +35,14 @@ The following hyperparameters were used during training:
 - train_batch_size: 2
 - eval_batch_size: 2
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- lr_scheduler_warmup_steps: 500
 - num_epochs: 1
 - label_smoothing_factor: 0.1
-### Training results
 ### Framework versions
 - Transformers 4.39.3

 ---
+base_model: LA1512/led-pubmed-20K
 tags:
 - generated_from_trainer
 model-index:
 - name: results
+  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # results
+This model is a fine-tuned version of [LA1512/led-pubmed-20K](https://huggingface.co/LA1512/led-pubmed-20K) on an unknown dataset.
 ## Model description
 - train_batch_size: 2
 - eval_batch_size: 2
 - seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 100
 - num_epochs: 1
 - label_smoothing_factor: 0.1
 ### Framework versions
 - Transformers 4.39.3

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "LA1512/led-1000-epoch-1",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "architectures": [
@@ -42,19 +42,19 @@
     "LABEL_1": 1,
     "LABEL_2": 2
   },
-  "length_penalty": 0.8,
   "max_decoder_position_embeddings": 1024,
   "max_encoder_position_embeddings": 16384,
-  "max_length": 1024,
-  "min_length": 8,
   "model_type": "led",
   "no_repeat_ngram_size": 3,
-  "num_beams": 4,
   "num_hidden_layers": 6,
   "pad_token_id": 1,
   "repetition_penalty": 3.5,
   "torch_dtype": "float32",
   "transformers_version": "4.39.3",
-  "use_cache": true,
   "vocab_size": 50265
 }

 {
+  "_name_or_path": "LA1512/led-pubmed-20K",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "architectures": [
     "LABEL_1": 1,
     "LABEL_2": 2
   },
+  "length_penalty": 2.0,
   "max_decoder_position_embeddings": 1024,
   "max_encoder_position_embeddings": 16384,
+  "max_length": 512,
+  "min_length": 100,
   "model_type": "led",
   "no_repeat_ngram_size": 3,
+  "num_beams": 2,
   "num_hidden_layers": 6,
   "pad_token_id": 1,
   "repetition_penalty": 3.5,
   "torch_dtype": "float32",
   "transformers_version": "4.39.3",
+  "use_cache": false,
   "vocab_size": 50265
 }

generation_config.json CHANGED Viewed

@@ -1,14 +1,16 @@
 {
   "bos_token_id": 0,
   "decoder_start_token_id": 2,
   "early_stopping": true,
   "eos_token_id": 2,
-  "length_penalty": 0.8,
-  "max_length": 1024,
-  "min_length": 8,
   "no_repeat_ngram_size": 3,
-  "num_beams": 4,
   "pad_token_id": 1,
   "repetition_penalty": 3.5,
-  "transformers_version": "4.39.3"
 }

 {
+  "_from_model_config": true,
   "bos_token_id": 0,
   "decoder_start_token_id": 2,
   "early_stopping": true,
   "eos_token_id": 2,
+  "length_penalty": 2.0,
+  "max_length": 512,
+  "min_length": 100,
   "no_repeat_ngram_size": 3,
+  "num_beams": 2,
   "pad_token_id": 1,
   "repetition_penalty": 3.5,
+  "transformers_version": "4.39.3",
+  "use_cache": false
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0970c2d78b268b71a159958e88310d8c800540b7825cf414876ad355e312965f
 size 647614116

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7468383a1bc6072848fabca2e294021ca83773c8c4e81da0e603d285068f9e2
 size 647614116

tokenizer.json CHANGED Viewed

@@ -2,13 +2,13 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 1024,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
-      "Fixed": 1024
     },
     "direction": "Right",
     "pad_to_multiple_of": null,

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 512,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
+      "Fixed": 512
     },
     "direction": "Right",
     "pad_to_multiple_of": null,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:301b9e6e7201f138110067138a881ad800c0a1bf46b87ae55423081b9037e7bf
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:2869564b03b597f02d45e978f01dfc55be05c44076c2cdf0a908fbf6998a23e8
 size 5048