LA1512 commited on
Commit
978fc50
1 Parent(s): 237cc89

LA1512/led-pubmed-20K

Browse files
README.md CHANGED
@@ -1,28 +1,10 @@
1
  ---
2
- license: bsd-3-clause
3
- base_model: LA1512/led-1000-epoch-1
4
  tags:
5
  - generated_from_trainer
6
- datasets:
7
- - pubmed-summarization
8
- metrics:
9
- - rouge
10
  model-index:
11
  - name: results
12
- results:
13
- - task:
14
- name: Sequence-to-sequence Language Modeling
15
- type: text2text-generation
16
- dataset:
17
- name: pubmed-summarization
18
- type: pubmed-summarization
19
- config: section
20
- split: validation
21
- args: section
22
- metrics:
23
- - name: Rouge1
24
- type: rouge
25
- value: 43.1934
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,14 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # results
32
 
33
- This model is a fine-tuned version of [LA1512/led-1000-epoch-1](https://huggingface.co/LA1512/led-1000-epoch-1) on the pubmed-summarization dataset.
34
- It achieves the following results on the evaluation set:
35
- - Loss: 3.1831
36
- - Rouge1: 43.1934
37
- - Rouge2: 16.7702
38
- - Rougel: 24.2151
39
- - Rougelsum: 38.4858
40
- - Gen Len: 267.815
41
 
42
  ## Model description
43
 
@@ -60,16 +35,14 @@ The following hyperparameters were used during training:
60
  - train_batch_size: 2
61
  - eval_batch_size: 2
62
  - seed: 42
 
 
63
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
64
  - lr_scheduler_type: linear
65
- - lr_scheduler_warmup_steps: 500
66
  - num_epochs: 1
67
  - label_smoothing_factor: 0.1
68
 
69
- ### Training results
70
-
71
-
72
-
73
  ### Framework versions
74
 
75
  - Transformers 4.39.3
 
1
  ---
2
+ base_model: LA1512/led-pubmed-20K
 
3
  tags:
4
  - generated_from_trainer
 
 
 
 
5
  model-index:
6
  - name: results
7
+ results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
12
 
13
  # results
14
 
15
+ This model is a fine-tuned version of [LA1512/led-pubmed-20K](https://huggingface.co/LA1512/led-pubmed-20K) on an unknown dataset.
 
 
 
 
 
 
 
16
 
17
  ## Model description
18
 
 
35
  - train_batch_size: 2
36
  - eval_batch_size: 2
37
  - seed: 42
38
+ - gradient_accumulation_steps: 8
39
+ - total_train_batch_size: 16
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: linear
42
+ - lr_scheduler_warmup_steps: 100
43
  - num_epochs: 1
44
  - label_smoothing_factor: 0.1
45
 
 
 
 
 
46
  ### Framework versions
47
 
48
  - Transformers 4.39.3
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "LA1512/led-1000-epoch-1",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
@@ -42,19 +42,19 @@
42
  "LABEL_1": 1,
43
  "LABEL_2": 2
44
  },
45
- "length_penalty": 0.8,
46
  "max_decoder_position_embeddings": 1024,
47
  "max_encoder_position_embeddings": 16384,
48
- "max_length": 1024,
49
- "min_length": 8,
50
  "model_type": "led",
51
  "no_repeat_ngram_size": 3,
52
- "num_beams": 4,
53
  "num_hidden_layers": 6,
54
  "pad_token_id": 1,
55
  "repetition_penalty": 3.5,
56
  "torch_dtype": "float32",
57
  "transformers_version": "4.39.3",
58
- "use_cache": true,
59
  "vocab_size": 50265
60
  }
 
1
  {
2
+ "_name_or_path": "LA1512/led-pubmed-20K",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
 
42
  "LABEL_1": 1,
43
  "LABEL_2": 2
44
  },
45
+ "length_penalty": 2.0,
46
  "max_decoder_position_embeddings": 1024,
47
  "max_encoder_position_embeddings": 16384,
48
+ "max_length": 512,
49
+ "min_length": 100,
50
  "model_type": "led",
51
  "no_repeat_ngram_size": 3,
52
+ "num_beams": 2,
53
  "num_hidden_layers": 6,
54
  "pad_token_id": 1,
55
  "repetition_penalty": 3.5,
56
  "torch_dtype": "float32",
57
  "transformers_version": "4.39.3",
58
+ "use_cache": false,
59
  "vocab_size": 50265
60
  }
generation_config.json CHANGED
@@ -1,14 +1,16 @@
1
  {
 
2
  "bos_token_id": 0,
3
  "decoder_start_token_id": 2,
4
  "early_stopping": true,
5
  "eos_token_id": 2,
6
- "length_penalty": 0.8,
7
- "max_length": 1024,
8
- "min_length": 8,
9
  "no_repeat_ngram_size": 3,
10
- "num_beams": 4,
11
  "pad_token_id": 1,
12
  "repetition_penalty": 3.5,
13
- "transformers_version": "4.39.3"
 
14
  }
 
1
  {
2
+ "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "decoder_start_token_id": 2,
5
  "early_stopping": true,
6
  "eos_token_id": 2,
7
+ "length_penalty": 2.0,
8
+ "max_length": 512,
9
+ "min_length": 100,
10
  "no_repeat_ngram_size": 3,
11
+ "num_beams": 2,
12
  "pad_token_id": 1,
13
  "repetition_penalty": 3.5,
14
+ "transformers_version": "4.39.3",
15
+ "use_cache": false
16
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0970c2d78b268b71a159958e88310d8c800540b7825cf414876ad355e312965f
3
  size 647614116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7468383a1bc6072848fabca2e294021ca83773c8c4e81da0e603d285068f9e2
3
  size 647614116
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 1024,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 1024
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 512
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:301b9e6e7201f138110067138a881ad800c0a1bf46b87ae55423081b9037e7bf
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2869564b03b597f02d45e978f01dfc55be05c44076c2cdf0a908fbf6998a23e8
3
  size 5048