danield12 commited on
Commit
edab8ca
·
verified ·
1 Parent(s): 7d3c646

Upload folder using huggingface_hub

Browse files
best/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
best/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-xsmall",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 384,
10
+ "id2label": {
11
+ "0": "human",
12
+ "1": "machine"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "label2id": {
17
+ "human": 0,
18
+ "machine": 1
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 6,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 384,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.47.0",
42
+ "type_vocab_size": 0,
43
+ "vocab_size": 128100
44
+ }
best/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c1b6f5ea2d039b9c9110ade1870be063c10a70afd28849aeaae6c02305d2e0
3
+ size 283347432
best/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
best/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
best/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
best/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
best/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789de4a2b6c30e6874a2920b354aec1ad5e4e55abf36678e20075927918410a2
3
+ size 5368
checkpoint-11976/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-11976/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-xsmall",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 384,
10
+ "id2label": {
11
+ "0": "human",
12
+ "1": "machine"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "label2id": {
17
+ "human": 0,
18
+ "machine": 1
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 6,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 384,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.47.0",
42
+ "type_vocab_size": 0,
43
+ "vocab_size": 128100
44
+ }
checkpoint-11976/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b8f1dc9666c87c99b057b626ec546333ece50dc2c2726116a5c4f02f381bac
3
+ size 283347432
checkpoint-11976/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f0f2088cce71a33a60bf9a49c06ae7a59c134d78e12991040e4cc5e61f79a2a
3
+ size 566814714
checkpoint-11976/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed57eb25c2204d38611cd294c889cb94217b91fb554d21998d996221e29501e8
3
+ size 14244
checkpoint-11976/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28dd990fb8bcb8c54e47457d67281f1cfa14ff9e49d69616761dac732dd545f1
3
+ size 1064
checkpoint-11976/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-11976/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-11976/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-11976/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
checkpoint-11976/trainer_state.json ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2785227596759796,
3
+ "best_model_checkpoint": "Machine-Generated-Text-Detection/deberta/checkpoint-5988",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 11976,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08350033400133601,
13
+ "grad_norm": 1.8728934526443481,
14
+ "learning_rate": 1.9164996659986642e-05,
15
+ "loss": 0.2671,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.16700066800267202,
20
+ "grad_norm": 0.5576282143592834,
21
+ "learning_rate": 1.832999331997328e-05,
22
+ "loss": 0.1527,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.250501002004008,
27
+ "grad_norm": 7.88850736618042,
28
+ "learning_rate": 1.7494989979959922e-05,
29
+ "loss": 0.135,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.33400133600534404,
34
+ "grad_norm": 0.6910138130187988,
35
+ "learning_rate": 1.6659986639946563e-05,
36
+ "loss": 0.1107,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.41750167000668004,
41
+ "grad_norm": 8.903568267822266,
42
+ "learning_rate": 1.58249832999332e-05,
43
+ "loss": 0.1152,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.501002004008016,
48
+ "grad_norm": 0.08548898249864578,
49
+ "learning_rate": 1.498997995991984e-05,
50
+ "loss": 0.1024,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.584502338009352,
55
+ "grad_norm": 8.673635482788086,
56
+ "learning_rate": 1.415497661990648e-05,
57
+ "loss": 0.0967,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.6680026720106881,
62
+ "grad_norm": 7.020376682281494,
63
+ "learning_rate": 1.331997327989312e-05,
64
+ "loss": 0.1024,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.751503006012024,
69
+ "grad_norm": 11.58995532989502,
70
+ "learning_rate": 1.248496993987976e-05,
71
+ "loss": 0.0809,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.8350033400133601,
76
+ "grad_norm": 8.78143310546875,
77
+ "learning_rate": 1.16499665998664e-05,
78
+ "loss": 0.0823,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.918503674014696,
83
+ "grad_norm": 9.562505722045898,
84
+ "learning_rate": 1.081496325985304e-05,
85
+ "loss": 0.0769,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 1.0,
90
+ "eval_f1": 0.9371242484969939,
91
+ "eval_loss": 0.2785227596759796,
92
+ "eval_runtime": 269.2073,
93
+ "eval_samples_per_second": 88.972,
94
+ "eval_steps_per_second": 5.561,
95
+ "step": 5988
96
+ },
97
+ {
98
+ "epoch": 1.002004008016032,
99
+ "grad_norm": 9.97668170928955,
100
+ "learning_rate": 9.97995991983968e-06,
101
+ "loss": 0.0756,
102
+ "step": 6000
103
+ },
104
+ {
105
+ "epoch": 1.085504342017368,
106
+ "grad_norm": 0.9769249558448792,
107
+ "learning_rate": 9.14495657982632e-06,
108
+ "loss": 0.0652,
109
+ "step": 6500
110
+ },
111
+ {
112
+ "epoch": 1.169004676018704,
113
+ "grad_norm": 0.14497636258602142,
114
+ "learning_rate": 8.30995323981296e-06,
115
+ "loss": 0.0587,
116
+ "step": 7000
117
+ },
118
+ {
119
+ "epoch": 1.25250501002004,
120
+ "grad_norm": 0.04314618557691574,
121
+ "learning_rate": 7.474949899799599e-06,
122
+ "loss": 0.052,
123
+ "step": 7500
124
+ },
125
+ {
126
+ "epoch": 1.3360053440213762,
127
+ "grad_norm": 0.008493722416460514,
128
+ "learning_rate": 6.63994655978624e-06,
129
+ "loss": 0.0481,
130
+ "step": 8000
131
+ },
132
+ {
133
+ "epoch": 1.4195056780227122,
134
+ "grad_norm": 0.08650221675634384,
135
+ "learning_rate": 5.80494321977288e-06,
136
+ "loss": 0.0505,
137
+ "step": 8500
138
+ },
139
+ {
140
+ "epoch": 1.503006012024048,
141
+ "grad_norm": 0.061995044350624084,
142
+ "learning_rate": 4.969939879759519e-06,
143
+ "loss": 0.0591,
144
+ "step": 9000
145
+ },
146
+ {
147
+ "epoch": 1.586506346025384,
148
+ "grad_norm": 0.5668061375617981,
149
+ "learning_rate": 4.1349365397461595e-06,
150
+ "loss": 0.0449,
151
+ "step": 9500
152
+ },
153
+ {
154
+ "epoch": 1.6700066800267201,
155
+ "grad_norm": 24.995769500732422,
156
+ "learning_rate": 3.2999331997327993e-06,
157
+ "loss": 0.0468,
158
+ "step": 10000
159
+ },
160
+ {
161
+ "epoch": 1.753507014028056,
162
+ "grad_norm": 0.08540898561477661,
163
+ "learning_rate": 2.464929859719439e-06,
164
+ "loss": 0.0508,
165
+ "step": 10500
166
+ },
167
+ {
168
+ "epoch": 1.8370073480293923,
169
+ "grad_norm": 0.008522373624145985,
170
+ "learning_rate": 1.629926519706079e-06,
171
+ "loss": 0.0508,
172
+ "step": 11000
173
+ },
174
+ {
175
+ "epoch": 1.920507682030728,
176
+ "grad_norm": 0.0260649211704731,
177
+ "learning_rate": 7.949231796927188e-07,
178
+ "loss": 0.0369,
179
+ "step": 11500
180
+ },
181
+ {
182
+ "epoch": 2.0,
183
+ "eval_f1": 0.939003006012024,
184
+ "eval_loss": 0.3057553768157959,
185
+ "eval_runtime": 270.9202,
186
+ "eval_samples_per_second": 88.41,
187
+ "eval_steps_per_second": 5.526,
188
+ "step": 11976
189
+ }
190
+ ],
191
+ "logging_steps": 500,
192
+ "max_steps": 11976,
193
+ "num_input_tokens_seen": 0,
194
+ "num_train_epochs": 2,
195
+ "save_steps": 500,
196
+ "stateful_callbacks": {
197
+ "TrainerControl": {
198
+ "args": {
199
+ "should_epoch_stop": false,
200
+ "should_evaluate": false,
201
+ "should_log": false,
202
+ "should_save": true,
203
+ "should_training_stop": true
204
+ },
205
+ "attributes": {}
206
+ }
207
+ },
208
+ "total_flos": 1.261573837252608e+16,
209
+ "train_batch_size": 16,
210
+ "trial_name": null,
211
+ "trial_params": null
212
+ }
checkpoint-11976/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789de4a2b6c30e6874a2920b354aec1ad5e4e55abf36678e20075927918410a2
3
+ size 5368
checkpoint-5988/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-5988/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-xsmall",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 384,
10
+ "id2label": {
11
+ "0": "human",
12
+ "1": "machine"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "label2id": {
17
+ "human": 0,
18
+ "machine": 1
19
+ },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
26
+ "num_attention_heads": 6,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 384,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.47.0",
42
+ "type_vocab_size": 0,
43
+ "vocab_size": 128100
44
+ }
checkpoint-5988/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c1b6f5ea2d039b9c9110ade1870be063c10a70afd28849aeaae6c02305d2e0
3
+ size 283347432
checkpoint-5988/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2448759def6302a46db7df39300234434637b0185b22395cf24d62f04fc2abbc
3
+ size 566814714
checkpoint-5988/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43417117d08f6f1db417ffa5433d4d3c3fa7901dd081de7a8875670a9ec506ad
3
+ size 14244
checkpoint-5988/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec6adcfc715a01b4971f7bd4468cc3d670b55611d5a481b47e778747a988a43
3
+ size 1064
checkpoint-5988/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-5988/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-5988/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-5988/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
checkpoint-5988/trainer_state.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2785227596759796,
3
+ "best_model_checkpoint": "Machine-Generated-Text-Detection/deberta/checkpoint-5988",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5988,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08350033400133601,
13
+ "grad_norm": 1.8728934526443481,
14
+ "learning_rate": 1.9164996659986642e-05,
15
+ "loss": 0.2671,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.16700066800267202,
20
+ "grad_norm": 0.5576282143592834,
21
+ "learning_rate": 1.832999331997328e-05,
22
+ "loss": 0.1527,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.250501002004008,
27
+ "grad_norm": 7.88850736618042,
28
+ "learning_rate": 1.7494989979959922e-05,
29
+ "loss": 0.135,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.33400133600534404,
34
+ "grad_norm": 0.6910138130187988,
35
+ "learning_rate": 1.6659986639946563e-05,
36
+ "loss": 0.1107,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.41750167000668004,
41
+ "grad_norm": 8.903568267822266,
42
+ "learning_rate": 1.58249832999332e-05,
43
+ "loss": 0.1152,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.501002004008016,
48
+ "grad_norm": 0.08548898249864578,
49
+ "learning_rate": 1.498997995991984e-05,
50
+ "loss": 0.1024,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.584502338009352,
55
+ "grad_norm": 8.673635482788086,
56
+ "learning_rate": 1.415497661990648e-05,
57
+ "loss": 0.0967,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.6680026720106881,
62
+ "grad_norm": 7.020376682281494,
63
+ "learning_rate": 1.331997327989312e-05,
64
+ "loss": 0.1024,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.751503006012024,
69
+ "grad_norm": 11.58995532989502,
70
+ "learning_rate": 1.248496993987976e-05,
71
+ "loss": 0.0809,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.8350033400133601,
76
+ "grad_norm": 8.78143310546875,
77
+ "learning_rate": 1.16499665998664e-05,
78
+ "loss": 0.0823,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.918503674014696,
83
+ "grad_norm": 9.562505722045898,
84
+ "learning_rate": 1.081496325985304e-05,
85
+ "loss": 0.0769,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 1.0,
90
+ "eval_f1": 0.9371242484969939,
91
+ "eval_loss": 0.2785227596759796,
92
+ "eval_runtime": 269.2073,
93
+ "eval_samples_per_second": 88.972,
94
+ "eval_steps_per_second": 5.561,
95
+ "step": 5988
96
+ }
97
+ ],
98
+ "logging_steps": 500,
99
+ "max_steps": 11976,
100
+ "num_input_tokens_seen": 0,
101
+ "num_train_epochs": 2,
102
+ "save_steps": 500,
103
+ "stateful_callbacks": {
104
+ "TrainerControl": {
105
+ "args": {
106
+ "should_epoch_stop": false,
107
+ "should_evaluate": false,
108
+ "should_log": false,
109
+ "should_save": true,
110
+ "should_training_stop": false
111
+ },
112
+ "attributes": {}
113
+ }
114
+ },
115
+ "total_flos": 6307877420649216.0,
116
+ "train_batch_size": 16,
117
+ "trial_name": null,
118
+ "trial_params": null
119
+ }
checkpoint-5988/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789de4a2b6c30e6874a2920b354aec1ad5e4e55abf36678e20075927918410a2
3
+ size 5368