Strange18 commited on
Commit
3b2fb33
·
verified ·
1 Parent(s): 1d2189b

trained on new dataset for 2 epochs

Browse files
README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model: Strange18/results
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: results
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # results
15
+
16
+ This model is a fine-tuned version of [Strange18/results](https://huggingface.co/Strange18/results) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - epoch: 2.0
19
+ - eval_bleu: 0.2356
20
+ - eval_loss: 0.1289
21
+ - eval_rouge1: 0.5418
22
+ - eval_rouge2: 0.3046
23
+ - eval_rougeL: 0.4929
24
+ - eval_rougeLsum: 0.4932
25
+ - eval_runtime: 31.1564
26
+ - eval_sacrebleu: 23.5626
27
+ - eval_samples_per_second: 4.558
28
+ - eval_steps_per_second: 1.155
29
+ - eval_ter: 63.2172
30
+ - step: 672
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 1e-05
50
+ - train_batch_size: 4
51
+ - eval_batch_size: 4
52
+ - seed: 42
53
+ - gradient_accumulation_steps: 2
54
+ - total_train_batch_size: 8
55
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
56
+ - lr_scheduler_type: linear
57
+ - lr_scheduler_warmup_steps: 50
58
+ - num_epochs: 4
59
+ - mixed_precision_training: Native AMP
60
+
61
+ ### Framework versions
62
+
63
+ - Transformers 4.46.3
64
+ - Pytorch 2.1.2
65
+ - Datasets 3.1.0
66
+ - Tokenizers 0.20.3
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/mbart-large-50",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
@@ -52,7 +52,7 @@
52
  "static_position_embeddings": false,
53
  "tokenizer_class": "MBart50Tokenizer",
54
  "torch_dtype": "float32",
55
- "transformers_version": "4.46.2",
56
  "use_cache": true,
57
  "vocab_size": 250054
58
  }
 
1
  {
2
+ "_name_or_path": "Strange18/results",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
 
52
  "static_position_embeddings": false,
53
  "tokenizer_class": "MBart50Tokenizer",
54
  "torch_dtype": "float32",
55
+ "transformers_version": "4.46.3",
56
  "use_cache": true,
57
  "vocab_size": 250054
58
  }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "forced_eos_token_id": 2,
6
+ "pad_token_id": 1,
7
+ "transformers_version": "4.46.3"
8
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea539e4d6ed87fc13a2b4d26eed5e65bb4a46f238c574245f13009fda1bd9641
3
  size 2444578688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:559d431f1d0dc6f1b26685fa090cd704cf740851239cfa84fb47406679b8c2c0
3
  size 2444578688
special_tokens_map.json CHANGED
@@ -53,9 +53,27 @@
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
- "bos_token": "<s>",
57
- "cls_token": "<s>",
58
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "mask_token": {
60
  "content": "<mask>",
61
  "lstrip": true,
@@ -63,7 +81,25 @@
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
- "pad_token": "<pad>",
67
- "sep_token": "</s>",
68
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
 
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
+ "bos_token": {
57
+ "content": "<s>",
58
+ "lstrip": false,
59
+ "normalized": false,
60
+ "rstrip": false,
61
+ "single_word": false
62
+ },
63
+ "cls_token": {
64
+ "content": "<s>",
65
+ "lstrip": false,
66
+ "normalized": false,
67
+ "rstrip": false,
68
+ "single_word": false
69
+ },
70
+ "eos_token": {
71
+ "content": "</s>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false
76
+ },
77
  "mask_token": {
78
  "content": "<mask>",
79
  "lstrip": true,
 
81
  "rstrip": false,
82
  "single_word": false
83
  },
84
+ "pad_token": {
85
+ "content": "<pad>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false
90
+ },
91
+ "sep_token": {
92
+ "content": "</s>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false
97
+ },
98
+ "unk_token": {
99
+ "content": "<unk>",
100
+ "lstrip": false,
101
+ "normalized": false,
102
+ "rstrip": false,
103
+ "single_word": false
104
+ }
105
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fa01d6979e2a73175ce3e1e52801925be5fbcdf29d4f373aad434f7ff6b4bd9
3
- size 17110186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac4bfeac2fcd7cbc788d5a8d708aea33f37f05b4898b0c23651da928afcfa72
3
+ size 17094570
tokenizer_config.json CHANGED
@@ -516,12 +516,19 @@
516
  "cls_token": "<s>",
517
  "eos_token": "</s>",
518
  "mask_token": "<mask>",
 
519
  "model_max_length": 1024,
 
520
  "pad_token": "<pad>",
 
 
521
  "sep_token": "</s>",
522
  "sp_model_kwargs": {},
523
  "src_lang": "ne_NP",
 
524
  "tgt_lang": "en_XX",
525
  "tokenizer_class": "MBart50Tokenizer",
 
 
526
  "unk_token": "<unk>"
527
  }
 
516
  "cls_token": "<s>",
517
  "eos_token": "</s>",
518
  "mask_token": "<mask>",
519
+ "max_length": 200,
520
  "model_max_length": 1024,
521
+ "pad_to_multiple_of": null,
522
  "pad_token": "<pad>",
523
+ "pad_token_type_id": 0,
524
+ "padding_side": "right",
525
  "sep_token": "</s>",
526
  "sp_model_kwargs": {},
527
  "src_lang": "ne_NP",
528
+ "stride": 0,
529
  "tgt_lang": "en_XX",
530
  "tokenizer_class": "MBart50Tokenizer",
531
+ "truncation_side": "right",
532
+ "truncation_strategy": "longest_first",
533
  "unk_token": "<unk>"
534
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e74b08053c1ada3a7db96ee8d19ee535a0e7cdd5260e01552cdd9095aa3091a
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb852f8eeb1665d13ac00df8d7045de1c8834dd98f21f9748f2e4f7eccd4cc95
3
  size 5432