Strange18 commited on
Commit
d34d48c
·
verified ·
1 Parent(s): 1dd7ece

trained on new dataset for 2 epochs

Browse files
README.md CHANGED
@@ -1,12 +1,9 @@
1
  ---
2
  library_name: transformers
3
- base_model: Strange18/results
 
4
  tags:
5
  - generated_from_trainer
6
- metrics:
7
- - sacrebleu
8
- - bleu
9
- - rouge
10
  model-index:
11
  - name: results
12
  results: []
@@ -17,16 +14,21 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # results
19
 
20
- This model is a fine-tuned version of [Strange18/results](https://huggingface.co/Strange18/results) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.1108
23
- - Sacrebleu: 28.4098
24
- - Bleu: 0.2841
25
- - Rouge1: 0.6157
26
- - Rouge2: 0.3844
27
- - Rougel: 0.5828
28
- - Rougelsum: 0.5826
29
- - Ter: 53.4048
 
 
 
 
 
30
 
31
  ## Model description
32
 
@@ -46,26 +48,17 @@ More information needed
46
 
47
  The following hyperparameters were used during training:
48
  - learning_rate: 1e-05
49
- - train_batch_size: 2
50
- - eval_batch_size: 2
51
  - seed: 42
52
  - gradient_accumulation_steps: 4
53
- - total_train_batch_size: 8
54
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_steps: 700
57
- - num_epochs: 3
58
  - mixed_precision_training: Native AMP
59
 
60
- ### Training results
61
-
62
- | Training Loss | Epoch | Step | Validation Loss | Sacrebleu | Bleu | Rouge1 | Rouge2 | Rougel | Rougelsum | Ter |
63
- |:-------------:|:------:|:----:|:---------------:|:---------:|:------:|:------:|:------:|:------:|:---------:|:-------:|
64
- | 0.2067 | 0.9978 | 335 | 0.1662 | 16.3668 | 0.1637 | 0.4966 | 0.2466 | 0.4518 | 0.4506 | 68.4718 |
65
- | 0.1603 | 1.9993 | 671 | 0.1570 | 19.9178 | 0.1992 | 0.5186 | 0.2854 | 0.4882 | 0.4874 | 63.0027 |
66
- | 0.1169 | 2.9948 | 1005 | 0.1108 | 28.4098 | 0.2841 | 0.6157 | 0.3844 | 0.5828 | 0.5826 | 53.4048 |
67
-
68
-
69
  ### Framework versions
70
 
71
  - Transformers 4.46.3
 
1
  ---
2
  library_name: transformers
3
+ license: mit
4
+ base_model: facebook/mbart-large-50
5
  tags:
6
  - generated_from_trainer
 
 
 
 
7
  model-index:
8
  - name: results
9
  results: []
 
14
 
15
  # results
16
 
17
+ This model is a fine-tuned version of [facebook/mbart-large-50](https://huggingface.co/facebook/mbart-large-50) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - eval_loss: 0.2581
20
+ - eval_sacrebleu: 32.6503
21
+ - eval_bleu: 0.3265
22
+ - eval_rouge1: 0.6372
23
+ - eval_rouge2: 0.4402
24
+ - eval_rougeL: 0.5635
25
+ - eval_rougeLsum: 0.5632
26
+ - eval_ter: 60.4240
27
+ - eval_runtime: 826.7326
28
+ - eval_samples_per_second: 1.512
29
+ - eval_steps_per_second: 0.379
30
+ - epoch: 2.0
31
+ - step: 2969
32
 
33
  ## Model description
34
 
 
48
 
49
  The following hyperparameters were used during training:
50
  - learning_rate: 1e-05
51
+ - train_batch_size: 4
52
+ - eval_batch_size: 4
53
  - seed: 42
54
  - gradient_accumulation_steps: 4
55
+ - total_train_batch_size: 16
56
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
57
  - lr_scheduler_type: linear
58
  - lr_scheduler_warmup_steps: 700
59
+ - num_epochs: 5
60
  - mixed_precision_training: Native AMP
61
 
 
 
 
 
 
 
 
 
 
62
  ### Framework versions
63
 
64
  - Transformers 4.46.3
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Strange18/results",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
 
1
  {
2
+ "_name_or_path": "facebook/mbart-large-50",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
generation_config.json CHANGED
@@ -1,8 +1,11 @@
1
  {
2
  "bos_token_id": 0,
3
  "decoder_start_token_id": 2,
 
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
 
 
6
  "pad_token_id": 1,
7
  "transformers_version": "4.46.3"
8
  }
 
1
  {
2
  "bos_token_id": 0,
3
  "decoder_start_token_id": 2,
4
+ "early_stopping": true,
5
  "eos_token_id": 2,
6
  "forced_eos_token_id": 2,
7
+ "max_length": 200,
8
+ "num_beams": 5,
9
  "pad_token_id": 1,
10
  "transformers_version": "4.46.3"
11
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2e8ef8afc2c8e8c14a003a30b03d2e9f35aef109b2cafb7c4ad8cb2ac055ba3
3
  size 2444578688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95ab08cebabd0e7ed31f9eb7fbfd791e229df4e8a32de44e1f4764825147139f
3
  size 2444578688
special_tokens_map.json CHANGED
@@ -53,27 +53,9 @@
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
- "bos_token": {
57
- "content": "<s>",
58
- "lstrip": false,
59
- "normalized": false,
60
- "rstrip": false,
61
- "single_word": false
62
- },
63
- "cls_token": {
64
- "content": "<s>",
65
- "lstrip": false,
66
- "normalized": false,
67
- "rstrip": false,
68
- "single_word": false
69
- },
70
- "eos_token": {
71
- "content": "</s>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false
76
- },
77
  "mask_token": {
78
  "content": "<mask>",
79
  "lstrip": true,
@@ -81,25 +63,7 @@
81
  "rstrip": false,
82
  "single_word": false
83
  },
84
- "pad_token": {
85
- "content": "<pad>",
86
- "lstrip": false,
87
- "normalized": false,
88
- "rstrip": false,
89
- "single_word": false
90
- },
91
- "sep_token": {
92
- "content": "</s>",
93
- "lstrip": false,
94
- "normalized": false,
95
- "rstrip": false,
96
- "single_word": false
97
- },
98
- "unk_token": {
99
- "content": "<unk>",
100
- "lstrip": false,
101
- "normalized": false,
102
- "rstrip": false,
103
- "single_word": false
104
- }
105
  }
 
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
+ "bos_token": "<s>",
57
+ "cls_token": "<s>",
58
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "mask_token": {
60
  "content": "<mask>",
61
  "lstrip": true,
 
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
+ "pad_token": "<pad>",
67
+ "sep_token": "</s>",
68
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ac4bfeac2fcd7cbc788d5a8d708aea33f37f05b4898b0c23651da928afcfa72
3
- size 17094570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dc505bc21a5065c9dc2bf957fdcbf1626c7ec1e706cd25535dfbd2846956518
3
+ size 17109921
tokenizer_config.json CHANGED
@@ -516,19 +516,12 @@
516
  "cls_token": "<s>",
517
  "eos_token": "</s>",
518
  "mask_token": "<mask>",
519
- "max_length": 200,
520
  "model_max_length": 1024,
521
- "pad_to_multiple_of": null,
522
  "pad_token": "<pad>",
523
- "pad_token_type_id": 0,
524
- "padding_side": "right",
525
  "sep_token": "</s>",
526
  "sp_model_kwargs": {},
527
  "src_lang": "ne_NP",
528
- "stride": 0,
529
  "tgt_lang": "en_XX",
530
  "tokenizer_class": "MBart50Tokenizer",
531
- "truncation_side": "right",
532
- "truncation_strategy": "longest_first",
533
  "unk_token": "<unk>"
534
  }
 
516
  "cls_token": "<s>",
517
  "eos_token": "</s>",
518
  "mask_token": "<mask>",
 
519
  "model_max_length": 1024,
 
520
  "pad_token": "<pad>",
 
 
521
  "sep_token": "</s>",
522
  "sp_model_kwargs": {},
523
  "src_lang": "ne_NP",
 
524
  "tgt_lang": "en_XX",
525
  "tokenizer_class": "MBart50Tokenizer",
 
 
526
  "unk_token": "<unk>"
527
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50a434fc6367b5dfee6cea33ddd0ee0000ddf4739bf62a83674e602713332960
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa7a50051e6f6db3fa6a91c95035ec9ada3e5ad7cbe89fd2b3495d6de9e3e6a
3
  size 5432