Natet commited on
Commit
e871519
β€’
1 Parent(s): 7250988
README.md CHANGED
@@ -2,26 +2,29 @@
2
  license: apache-2.0
3
  base_model: IlyaGusev/rut5_base_sum_gazeta
4
  tags:
5
- - summarization_4
6
  - generated_from_trainer
7
- metrics:
8
- - rouge
9
  model-index:
10
  - name: rut5_base_sum_gazeta-finetuned_week_gpt
11
  results: []
12
  ---
13
 
14
- # rut5_base_sum_gazeta-finetuned_week_gpt
 
15
 
16
- This model is a fine-tuned version of [IlyaGusev/rut5_base_sum_gazeta](https://huggingface.co/IlyaGusev/rut5_base_sum_gazeta) on Natet/gpt_week_yandex dataset.
17
- This model is suitable for summarizing Hubr articles.
18
 
 
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.2643
21
- - Rouge1: 38.9266
22
- - Rouge2: 18.0587
23
- - Rougel: 38.1447
24
- - Rougelsum: 38.1337
 
 
 
 
25
 
26
  ## Model description
27
 
@@ -41,30 +44,16 @@ More information needed
41
 
42
  The following hyperparameters were used during training:
43
  - learning_rate: 5.6e-05
44
- - train_batch_size: 8
45
- - eval_batch_size: 8
46
  - seed: 42
47
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
  - lr_scheduler_type: linear
49
  - num_epochs: 8
50
 
51
- ### Training results
52
-
53
- | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
54
- |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|
55
- | 1.7691 | 1.0 | 1110 | 1.4005 | 37.7689 | 17.7394 | 36.8468 | 36.8842 |
56
- | 1.4892 | 2.0 | 2220 | 1.3477 | 35.9349 | 16.8403 | 35.1786 | 35.2055 |
57
- | 1.3579 | 3.0 | 3330 | 1.3079 | 37.7579 | 17.6421 | 36.8439 | 36.8182 |
58
- | 1.2708 | 4.0 | 4440 | 1.2675 | 37.867 | 17.3909 | 36.9706 | 36.987 |
59
- | 1.2006 | 5.0 | 5550 | 1.2703 | 38.8218 | 17.9772 | 38.001 | 37.9811 |
60
- | 1.1519 | 6.0 | 6660 | 1.2703 | 38.0351 | 17.5386 | 37.209 | 37.1815 |
61
- | 1.1132 | 7.0 | 7770 | 1.2593 | 38.4673 | 17.8343 | 37.529 | 37.5268 |
62
- | 1.0932 | 8.0 | 8880 | 1.2643 | 38.9266 | 18.0587 | 38.1447 | 38.1337 |
63
-
64
-
65
  ### Framework versions
66
 
67
- - Transformers 4.33.0
68
  - Pytorch 2.0.0
69
  - Datasets 2.1.0
70
- - Tokenizers 0.13.3
 
2
  license: apache-2.0
3
  base_model: IlyaGusev/rut5_base_sum_gazeta
4
  tags:
5
+ - summarization
6
  - generated_from_trainer
 
 
7
  model-index:
8
  - name: rut5_base_sum_gazeta-finetuned_week_gpt
9
  results: []
10
  ---
11
 
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
 
15
+ # rut5_base_sum_gazeta-finetuned_week_gpt
 
16
 
17
+ This model is a fine-tuned version of [IlyaGusev/rut5_base_sum_gazeta](https://huggingface.co/IlyaGusev/rut5_base_sum_gazeta) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - eval_loss: 2.3331
20
+ - eval_rouge1: 0.0
21
+ - eval_rouge2: 0.0
22
+ - eval_rougeL: 0.0
23
+ - eval_rougeLsum: 0.0
24
+ - eval_runtime: 7.8395
25
+ - eval_samples_per_second: 0.383
26
+ - eval_steps_per_second: 0.128
27
+ - step: 0
28
 
29
  ## Model description
30
 
 
44
 
45
  The following hyperparameters were used during training:
46
  - learning_rate: 5.6e-05
47
+ - train_batch_size: 16
48
+ - eval_batch_size: 16
49
  - seed: 42
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
  - lr_scheduler_type: linear
52
  - num_epochs: 8
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  ### Framework versions
55
 
56
+ - Transformers 4.36.2
57
  - Pytorch 2.0.0
58
  - Datasets 2.1.0
59
+ - Tokenizers 0.15.0
config.json CHANGED
@@ -30,7 +30,7 @@
30
  "tie_word_embeddings": false,
31
  "tokenizer_class": "T5Tokenizer",
32
  "torch_dtype": "float32",
33
- "transformers_version": "4.33.0",
34
  "use_cache": true,
35
  "vocab_size": 30000
36
  }
 
30
  "tie_word_embeddings": false,
31
  "tokenizer_class": "T5Tokenizer",
32
  "torch_dtype": "float32",
33
+ "transformers_version": "4.36.2",
34
  "use_cache": true,
35
  "vocab_size": 30000
36
  }
generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  "max_length": 200,
6
  "num_beams": 5,
7
  "pad_token_id": 0,
8
- "transformers_version": "4.33.0"
9
  }
 
5
  "max_length": 200,
6
  "num_beams": 5,
7
  "pad_token_id": 0,
8
+ "transformers_version": "4.36.2"
9
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4ea4a6196701370c6d96dd9e35b5e2e625db48c4ab487302dd89d2e8bc8a633
3
+ size 977270632
runs/Jan23_06-13-55_1fca61cf9468/events.out.tfevents.1705990445.1fca61cf9468.26.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72496ec47665df49742d009dda91596f82034e7a7bb0d4bfe3a5e86d0d4d520a
3
+ size 488
special_tokens_map.json CHANGED
@@ -1,5 +1,23 @@
1
  {
2
- "eos_token": "</s>",
3
- "pad_token": "<pad>",
4
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
 
1
  {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer.json CHANGED
@@ -55,7 +55,8 @@
55
  "pre_tokenizer": {
56
  "type": "Metaspace",
57
  "replacement": "▁",
58
- "add_prefix_space": true
 
59
  },
60
  "post_processor": {
61
  "type": "TemplateProcessing",
@@ -114,7 +115,8 @@
114
  "decoder": {
115
  "type": "Metaspace",
116
  "replacement": "▁",
117
- "add_prefix_space": true
 
118
  },
119
  "model": {
120
  "type": "Unigram",
@@ -120120,6 +120122,7 @@
120120
  "▁<extra_id_0>",
120121
  0.0
120122
  ]
120123
- ]
 
120124
  }
120125
  }
 
55
  "pre_tokenizer": {
56
  "type": "Metaspace",
57
  "replacement": "▁",
58
+ "add_prefix_space": true,
59
+ "prepend_scheme": "always"
60
  },
61
  "post_processor": {
62
  "type": "TemplateProcessing",
 
115
  "decoder": {
116
  "type": "Metaspace",
117
  "replacement": "▁",
118
+ "add_prefix_space": true,
119
+ "prepend_scheme": "always"
120
  },
121
  "model": {
122
  "type": "Unigram",
 
120122
  "▁<extra_id_0>",
120123
  0.0
120124
  ]
120125
+ ],
120126
+ "byte_fallback": false
120127
  }
120128
  }
tokenizer_config.json CHANGED
@@ -1,5 +1,31 @@
1
  {
2
- "additional_special_tokens": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "clean_up_tokenization_spaces": true,
4
  "eos_token": "</s>",
5
  "extra_ids": 0,
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
  "clean_up_tokenization_spaces": true,
30
  "eos_token": "</s>",
31
  "extra_ids": 0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0589f8cdf02c4eaa97d8242bd3f50d9a5cdd49cdfbaf8652e423a63645bb55e
3
- size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b33970fa0a97389aeebcbdbfe0733430564e97ffcb91f910e4e1e81abd2df2
3
+ size 4475