GlycerinLOL commited on
Commit
c65750e
1 Parent(s): 3de6ccc

Training in progress, step 500

Browse files
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_gen_len": 28.834,
3
+ "predict_loss": 1.8422898054122925,
4
+ "predict_rouge1": 0.4278,
5
+ "predict_rouge2": 0.1789,
6
+ "predict_rougeL": 0.3407,
7
+ "predict_rougeLsum": 0.3407,
8
+ "predict_runtime": 2061.934,
9
+ "predict_samples": 11334,
10
+ "predict_samples_per_second": 5.497,
11
+ "predict_steps_per_second": 0.172
12
+ }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "google/pegasus-xsum",
3
  "activation_dropout": 0.1,
4
  "activation_function": "relu",
5
  "add_bias_logits": false,
@@ -17,14 +17,13 @@
17
  "decoder_layerdrop": 0.0,
18
  "decoder_layers": 16,
19
  "decoder_start_token_id": 0,
20
- "do_blenderbot_90_layernorm": false,
21
  "dropout": 0.1,
22
  "encoder_attention_heads": 16,
23
  "encoder_ffn_dim": 4096,
24
  "encoder_layerdrop": 0.0,
25
  "encoder_layers": 16,
26
  "eos_token_id": 1,
27
- "extra_pos_embeddings": 0,
28
  "force_bos_token_to_be_generated": false,
29
  "forced_eos_token_id": 1,
30
  "gradient_checkpointing": false,
@@ -40,9 +39,9 @@
40
  "LABEL_1": 1,
41
  "LABEL_2": 2
42
  },
43
- "length_penalty": 0.6,
44
- "max_length": 64,
45
- "max_position_embeddings": 512,
46
  "model_type": "pegasus",
47
  "normalize_before": true,
48
  "normalize_embedding": false,
@@ -51,6 +50,73 @@
51
  "pad_token_id": 0,
52
  "scale_embedding": true,
53
  "static_position_embeddings": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  "torch_dtype": "float32",
55
  "transformers_version": "4.36.0",
56
  "use_cache": true,
 
1
  {
2
+ "_name_or_path": "Models/LLM_Teached_Pegasus_Large",
3
  "activation_dropout": 0.1,
4
  "activation_function": "relu",
5
  "add_bias_logits": false,
 
17
  "decoder_layerdrop": 0.0,
18
  "decoder_layers": 16,
19
  "decoder_start_token_id": 0,
 
20
  "dropout": 0.1,
21
  "encoder_attention_heads": 16,
22
  "encoder_ffn_dim": 4096,
23
  "encoder_layerdrop": 0.0,
24
  "encoder_layers": 16,
25
  "eos_token_id": 1,
26
+ "extra_pos_embeddings": 1,
27
  "force_bos_token_to_be_generated": false,
28
  "forced_eos_token_id": 1,
29
  "gradient_checkpointing": false,
 
39
  "LABEL_1": 1,
40
  "LABEL_2": 2
41
  },
42
+ "length_penalty": 0.8,
43
+ "max_length": 256,
44
+ "max_position_embeddings": 1024,
45
  "model_type": "pegasus",
46
  "normalize_before": true,
47
  "normalize_embedding": false,
 
50
  "pad_token_id": 0,
51
  "scale_embedding": true,
52
  "static_position_embeddings": true,
53
+ "task_specific_params": {
54
+ "summarization_aeslc": {
55
+ "length_penalty": 0.6,
56
+ "max_length": 32,
57
+ "max_position_embeddings": 512
58
+ },
59
+ "summarization_arxiv": {
60
+ "length_penalty": 0.8,
61
+ "max_length": 256,
62
+ "max_position_embeddings": 1024
63
+ },
64
+ "summarization_big_patent": {
65
+ "length_penalty": 0.7,
66
+ "max_length": 256,
67
+ "max_position_embeddings": 1024
68
+ },
69
+ "summarization_billsum": {
70
+ "length_penalty": 0.6,
71
+ "max_length": 256,
72
+ "max_position_embeddings": 1024
73
+ },
74
+ "summarization_cnn_dailymail": {
75
+ "length_penalty": 0.8,
76
+ "max_length": 128,
77
+ "max_position_embeddings": 1024
78
+ },
79
+ "summarization_gigaword": {
80
+ "length_penalty": 0.6,
81
+ "max_length": 32,
82
+ "max_position_embeddings": 128
83
+ },
84
+ "summarization_large": {
85
+ "length_penalty": 0.8,
86
+ "max_length": 256,
87
+ "max_position_embeddings": 1024
88
+ },
89
+ "summarization_multi_news": {
90
+ "length_penalty": 0.8,
91
+ "max_length": 256,
92
+ "max_position_embeddings": 1024
93
+ },
94
+ "summarization_newsroom": {
95
+ "length_penalty": 0.8,
96
+ "max_length": 128,
97
+ "max_position_embeddings": 512
98
+ },
99
+ "summarization_pubmed": {
100
+ "length_penalty": 0.8,
101
+ "max_length": 256,
102
+ "max_position_embeddings": 1024
103
+ },
104
+ "summarization_reddit_tifu": {
105
+ "length_penalty": 0.6,
106
+ "max_length": 128,
107
+ "max_position_embeddings": 512
108
+ },
109
+ "summarization_wikihow": {
110
+ "length_penalty": 0.6,
111
+ "max_length": 256,
112
+ "max_position_embeddings": 512
113
+ },
114
+ "summarization_xsum": {
115
+ "length_penalty": 0.8,
116
+ "max_length": 64,
117
+ "max_position_embeddings": 512
118
+ }
119
+ },
120
  "torch_dtype": "float32",
121
  "transformers_version": "4.36.0",
122
  "use_cache": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8574faf8d71bd5d564a560ba9ec16dbfc00f903f1878c0430ba026e8dda5ac66
3
- size 2279458540
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ca87ca1f5863488ab89052eb60e15357702f3e3db7e69ce696fe024c0be46d0
3
+ size 2283652852
predict_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_gen_len": 28.834,
3
+ "predict_loss": 1.8422898054122925,
4
+ "predict_rouge1": 0.4278,
5
+ "predict_rouge2": 0.1789,
6
+ "predict_rougeL": 0.3407,
7
+ "predict_rougeLsum": 0.3407,
8
+ "predict_runtime": 2061.934,
9
+ "predict_samples": 11334,
10
+ "predict_samples_per_second": 5.497,
11
+ "predict_steps_per_second": 0.172
12
+ }
runs/Dec28_11-08-41_n4bcoectr1703727001286-fmclw/events.out.tfevents.1703732925.n4bcoectr1703727001286-fmclw.60838.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e111294da9f9189e392a21e633d28815a197c269bbc5b96303736e7ffa395b
3
+ size 6997
runs/Dec28_11-10-37_n4bcoectr1703727001286-fmclw/events.out.tfevents.1703733041.n4bcoectr1703727001286-fmclw.62870.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3556a1c1f2861c49fda044a8dca453e99833faa6b64398611a27929f18fcb17c
3
+ size 6997
runs/Dec28_11-12-24_n4bcoectr1703727001286-fmclw/events.out.tfevents.1703733148.n4bcoectr1703727001286-fmclw.64933.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0969e479a913a45250e3ee313ccde06aa1a4cb2a45e7c5276249e03d7a603cc5
3
+ size 7154
special_tokens_map.json CHANGED
@@ -103,8 +103,32 @@
103
  "<unk_101>",
104
  "<unk_102>"
105
  ],
106
- "eos_token": "</s>",
107
- "mask_token": "<mask_2>",
108
- "pad_token": "<pad>",
109
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  }
 
103
  "<unk_101>",
104
  "<unk_102>"
105
  ],
106
+ "eos_token": {
107
+ "content": "</s>",
108
+ "lstrip": false,
109
+ "normalized": false,
110
+ "rstrip": false,
111
+ "single_word": false
112
+ },
113
+ "mask_token": {
114
+ "content": "<mask_2>",
115
+ "lstrip": false,
116
+ "normalized": false,
117
+ "rstrip": false,
118
+ "single_word": false
119
+ },
120
+ "pad_token": {
121
+ "content": "<pad>",
122
+ "lstrip": false,
123
+ "normalized": false,
124
+ "rstrip": false,
125
+ "single_word": false
126
+ },
127
+ "unk_token": {
128
+ "content": "<unk>",
129
+ "lstrip": false,
130
+ "normalized": false,
131
+ "rstrip": false,
132
+ "single_word": false
133
+ }
134
  }
tokenizer.json CHANGED
@@ -980,10 +980,18 @@
980
  ]
981
  },
982
  "pre_tokenizer": {
983
- "type": "Metaspace",
984
- "replacement": "▁",
985
- "add_prefix_space": true,
986
- "prepend_scheme": "always"
 
 
 
 
 
 
 
 
987
  },
988
  "post_processor": {
989
  "type": "TemplateProcessing",
 
980
  ]
981
  },
982
  "pre_tokenizer": {
983
+ "type": "Sequence",
984
+ "pretokenizers": [
985
+ {
986
+ "type": "WhitespaceSplit"
987
+ },
988
+ {
989
+ "type": "Metaspace",
990
+ "replacement": "▁",
991
+ "add_prefix_space": true,
992
+ "prepend_scheme": "always"
993
+ }
994
+ ]
995
  },
996
  "post_processor": {
997
  "type": "TemplateProcessing",
tokenizer_config.json CHANGED
@@ -958,9 +958,14 @@
958
  "full_tokenizer_file": null,
959
  "mask_token": "<mask_2>",
960
  "mask_token_sent": "<mask_1>",
961
- "model_max_length": 512,
 
962
  "offset": 103,
963
  "pad_token": "<pad>",
 
 
964
  "tokenizer_class": "PegasusTokenizer",
 
 
965
  "unk_token": "<unk>"
966
  }
 
958
  "full_tokenizer_file": null,
959
  "mask_token": "<mask_2>",
960
  "mask_token_sent": "<mask_1>",
961
+ "max_length": 64,
962
+ "model_max_length": 1024,
963
  "offset": 103,
964
  "pad_token": "<pad>",
965
+ "sp_model_kwargs": {},
966
+ "stride": 0,
967
  "tokenizer_class": "PegasusTokenizer",
968
+ "truncation_side": "right",
969
+ "truncation_strategy": "longest_first",
970
  "unk_token": "<unk>"
971
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6c7e1d3041fbab4a90c6690172fedbda8b05ba6fc3056a81807670ba8818e8c
3
- size 4411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c42f961a10319a18532a4b16bc9ea097d53e9ca0abae8d756ba4eb4b19798f
3
+ size 4475