MutazYoune commited on
Commit
dced883
1 Parent(s): 3c81ae9

Upload 13 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
config.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "_name_or_path": "/content/drive/MyDrive/Data_For_Bert/BERT_racism/checkpoint-90000",
3
- "_num_labels": 2,
4
  "architectures": [
5
- "BertForMaskedLM"
6
  ],
7
  "attention_probs_dropout_prob": 0.1,
8
  "classifier_dropout": null,
@@ -11,17 +10,23 @@
11
  "hidden_size": 768,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 3072,
14
- "layer_norm_eps": 1e-12,
15
  "max_position_embeddings": 512,
 
16
  "model_type": "bert",
 
17
  "num_attention_heads": 12,
18
  "num_hidden_layers": 12,
19
- "output_past": true,
20
  "pad_token_id": 0,
 
 
 
21
  "position_embedding_type": "absolute",
 
 
22
  "torch_dtype": "float32",
23
  "transformers_version": "4.26.0.dev0",
24
- "type_vocab_size": 2,
25
  "use_cache": true,
26
- "vocab_size": 30522
27
  }
 
1
  {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
 
3
  "architectures": [
4
+ "DebertaV2ForMaskedLM"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
 
10
  "hidden_size": 768,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-07,
14
  "max_position_embeddings": 512,
15
+ "max_relative_positions": -1,
16
  "model_type": "bert",
17
+ "norm_rel_ebd": "layer_norm",
18
  "num_attention_heads": 12,
19
  "num_hidden_layers": 12,
 
20
  "pad_token_id": 0,
21
+ "pos_att_type": "p2c|c2p",
22
+ "position_biased_input": false,
23
+ "position_buckets": 256,
24
  "position_embedding_type": "absolute",
25
+ "relative_attention": true,
26
+ "share_att_key": true,
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.26.0.dev0",
29
+ "type_vocab_size": 0,
30
  "use_cache": true,
31
+ "vocab_size": 128100
32
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3efb2bc80788e3daa67aa17d67fb6c0f17604873199b60e81d0835c577cfe075
3
- size 876232773
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d619d7691af28e8f6f02dba974d8fe0ea229b3eeeca668cbb30f57cef2b65d3
3
+ size 1476533253
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5a45d7c0ee35f90c4e0c607ad65c8e38dca2bfb86308ccdf560065981936d31
3
- size 438130617
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b48c21b3747aaad55f1e56af06c5061b80292b51ca53655302d0db9631dfeffc
3
+ size 738281789
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17e9de62200b08187e47517ccd3a0bf530efc0142f9de17bc288408d02f6df55
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4d754cd5b0ab83c33e00889facce2acc25529484546e402e82fae8129d3cb5
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:803576f45f580b24287f72d7e63d4eb932e6ee47fe5e5102c8360a65c6d26b02
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6357cd353dfa90555ff09c45b944d9f4105d732ce3b83ffe5da3e0092a8a5b0
3
  size 627
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer_config.json CHANGED
@@ -1,17 +1,18 @@
1
  {
 
2
  "cls_token": "[CLS]",
3
- "do_basic_tokenize": true,
4
- "do_lower_case": true,
5
  "mask_token": "[MASK]",
6
- "max_len": 512,
7
- "model_max_length": 512,
8
- "name_or_path": "GroNLP/hateBERT",
9
- "never_split": null,
10
  "pad_token": "[PAD]",
11
  "sep_token": "[SEP]",
12
- "special_tokens_map_file": "/root/.cache/huggingface/hub/models--GroNLP--hateBERT/snapshots/f56d507e4b6a64413aff29e541e1b2178ee79d67/special_tokens_map.json",
13
- "strip_accents": null,
14
- "tokenize_chinese_chars": true,
15
- "tokenizer_class": "BertTokenizer",
16
- "unk_token": "[UNK]"
 
17
  }
 
1
  {
2
+ "bos_token": "[CLS]",
3
  "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "eos_token": "[SEP]",
6
  "mask_token": "[MASK]",
7
+ "max_len": 768,
8
+ "model_max_length": 768,
9
+ "name_or_path": "microsoft/deberta-v3-base",
 
10
  "pad_token": "[PAD]",
11
  "sep_token": "[SEP]",
12
+ "sp_model_kwargs": {},
13
+ "special_tokens_map_file": null,
14
+ "split_by_punct": false,
15
+ "tokenizer_class": "DebertaV2Tokenizer",
16
+ "unk_token": "[UNK]",
17
+ "vocab_type": "spm"
18
  }
trainer_state.json CHANGED
@@ -1,256 +1,136 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3267493342482315,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.01,
12
- "learning_rate": 3.7438734499828454e-05,
13
- "loss": 1.691,
14
- "step": 500
15
- },
16
  {
17
  "epoch": 0.02,
18
- "learning_rate": 3.737746899965691e-05,
19
- "loss": 1.7454,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.02,
24
- "learning_rate": 3.731620349948536e-05,
25
- "loss": 1.6933,
26
- "step": 1500
27
  },
28
  {
29
  "epoch": 0.03,
30
- "learning_rate": 3.725493799931382e-05,
31
- "loss": 1.7047,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.04,
36
- "learning_rate": 3.719367249914228e-05,
37
- "loss": 1.6615,
38
- "step": 2500
39
  },
40
  {
41
  "epoch": 0.05,
42
- "learning_rate": 3.7132406998970736e-05,
43
- "loss": 1.6795,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 0.06,
48
- "learning_rate": 3.7071141498799194e-05,
49
- "loss": 1.6472,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 0.07,
54
- "learning_rate": 3.700987599862765e-05,
55
- "loss": 1.6646,
56
- "step": 4000
57
  },
58
  {
59
  "epoch": 0.07,
60
- "learning_rate": 3.694861049845611e-05,
61
- "loss": 1.6494,
62
- "step": 4500
63
  },
64
  {
65
  "epoch": 0.08,
66
- "learning_rate": 3.688734499828457e-05,
67
- "loss": 1.6385,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 0.09,
72
- "learning_rate": 3.682607949811302e-05,
73
- "loss": 1.6801,
74
- "step": 5500
75
  },
76
  {
77
  "epoch": 0.1,
78
- "learning_rate": 3.6764813997941475e-05,
79
- "loss": 1.6442,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 0.11,
84
- "learning_rate": 3.670354849776993e-05,
85
- "loss": 1.6434,
86
- "step": 6500
87
  },
88
  {
89
  "epoch": 0.11,
90
- "learning_rate": 3.664228299759839e-05,
91
- "loss": 1.6597,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 0.12,
96
- "learning_rate": 3.658101749742685e-05,
97
- "loss": 1.6386,
98
- "step": 7500
99
  },
100
  {
101
  "epoch": 0.13,
102
- "learning_rate": 3.65197519972553e-05,
103
- "loss": 1.6254,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 0.14,
108
- "learning_rate": 3.645848649708376e-05,
109
- "loss": 1.6496,
110
- "step": 8500
111
  },
112
  {
113
  "epoch": 0.15,
114
- "learning_rate": 3.6397220996912214e-05,
115
- "loss": 1.6526,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 0.16,
120
- "learning_rate": 3.633595549674067e-05,
121
- "loss": 1.6393,
122
- "step": 9500
123
  },
124
  {
125
  "epoch": 0.16,
126
- "learning_rate": 3.627468999656913e-05,
127
- "loss": 1.6451,
128
- "step": 10000
129
- },
130
- {
131
- "epoch": 0.17,
132
- "learning_rate": 3.621342449639758e-05,
133
- "loss": 1.6414,
134
- "step": 10500
135
  },
136
  {
137
  "epoch": 0.18,
138
- "learning_rate": 3.6152158996226045e-05,
139
- "loss": 1.6656,
140
- "step": 11000
141
- },
142
- {
143
- "epoch": 0.19,
144
- "learning_rate": 3.60908934960545e-05,
145
- "loss": 1.625,
146
- "step": 11500
147
- },
148
- {
149
- "epoch": 0.2,
150
- "learning_rate": 3.6029627995882954e-05,
151
- "loss": 1.6365,
152
- "step": 12000
153
  },
154
  {
155
  "epoch": 0.2,
156
- "learning_rate": 3.596836249571141e-05,
157
- "loss": 1.6708,
158
- "step": 12500
159
  },
160
  {
161
  "epoch": 0.21,
162
- "learning_rate": 3.590709699553987e-05,
163
- "loss": 1.6482,
164
- "step": 13000
165
- },
166
- {
167
- "epoch": 0.22,
168
- "learning_rate": 3.584583149536833e-05,
169
- "loss": 1.5945,
170
- "step": 13500
171
  },
172
  {
173
  "epoch": 0.23,
174
- "learning_rate": 3.5784565995196784e-05,
175
- "loss": 1.6005,
176
- "step": 14000
177
- },
178
- {
179
- "epoch": 0.24,
180
- "learning_rate": 3.5723300495025235e-05,
181
- "loss": 1.6449,
182
- "step": 14500
183
- },
184
- {
185
- "epoch": 0.25,
186
- "learning_rate": 3.566203499485369e-05,
187
- "loss": 1.5985,
188
- "step": 15000
189
  },
190
  {
191
  "epoch": 0.25,
192
- "learning_rate": 3.560076949468215e-05,
193
- "loss": 1.617,
194
- "step": 15500
195
  },
196
  {
197
  "epoch": 0.26,
198
- "learning_rate": 3.553950399451061e-05,
199
- "loss": 1.6127,
200
- "step": 16000
201
- },
202
- {
203
- "epoch": 0.27,
204
- "learning_rate": 3.5478238494339066e-05,
205
- "loss": 1.6327,
206
- "step": 16500
207
  },
208
  {
209
  "epoch": 0.28,
210
- "learning_rate": 3.541697299416752e-05,
211
- "loss": 1.6404,
212
- "step": 17000
213
- },
214
- {
215
- "epoch": 0.29,
216
- "learning_rate": 3.5355707493995974e-05,
217
- "loss": 1.595,
218
- "step": 17500
219
  },
220
  {
221
  "epoch": 0.29,
222
- "learning_rate": 3.529444199382444e-05,
223
- "loss": 1.5921,
224
- "step": 18000
225
- },
226
- {
227
- "epoch": 0.3,
228
- "learning_rate": 3.523317649365289e-05,
229
- "loss": 1.6169,
230
- "step": 18500
231
  },
232
  {
233
  "epoch": 0.31,
234
- "learning_rate": 3.517191099348135e-05,
235
- "loss": 1.6253,
236
- "step": 19000
237
- },
238
- {
239
- "epoch": 0.32,
240
- "learning_rate": 3.5110645493309805e-05,
241
- "loss": 1.6448,
242
- "step": 19500
243
  },
244
  {
245
  "epoch": 0.33,
246
- "learning_rate": 3.504937999313826e-05,
247
- "loss": 1.6232,
248
- "step": 20000
249
  }
250
  ],
251
- "max_steps": 306045,
252
- "num_train_epochs": 5,
253
- "total_flos": 4486605689289600.0,
254
  "trial_name": null,
255
  "trial_params": null
256
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.32674399607907206,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
10
  {
11
  "epoch": 0.02,
12
+ "learning_rate": 3.729578500245058e-05,
13
+ "loss": 6.3916,
14
+ "step": 500
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 0.03,
18
+ "learning_rate": 3.7091570004901157e-05,
19
+ "loss": 5.0294,
20
+ "step": 1000
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 0.05,
24
+ "learning_rate": 3.6887355007351737e-05,
25
+ "loss": 4.6716,
26
+ "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 0.07,
30
+ "learning_rate": 3.6683140009802316e-05,
31
+ "loss": 4.4325,
32
+ "step": 2000
33
  },
34
  {
35
  "epoch": 0.08,
36
+ "learning_rate": 3.6478925012252896e-05,
37
+ "loss": 4.2343,
38
+ "step": 2500
 
 
 
 
 
 
39
  },
40
  {
41
  "epoch": 0.1,
42
+ "learning_rate": 3.6274710014703476e-05,
43
+ "loss": 4.1306,
44
+ "step": 3000
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 0.11,
48
+ "learning_rate": 3.6070495017154056e-05,
49
+ "loss": 4.056,
50
+ "step": 3500
 
 
 
 
 
 
51
  },
52
  {
53
  "epoch": 0.13,
54
+ "learning_rate": 3.5866280019604636e-05,
55
+ "loss": 3.9515,
56
+ "step": 4000
 
 
 
 
 
 
57
  },
58
  {
59
  "epoch": 0.15,
60
+ "learning_rate": 3.5662065022055216e-05,
61
+ "loss": 3.8561,
62
+ "step": 4500
 
 
 
 
 
 
63
  },
64
  {
65
  "epoch": 0.16,
66
+ "learning_rate": 3.5457850024505796e-05,
67
+ "loss": 3.8194,
68
+ "step": 5000
 
 
 
 
 
 
69
  },
70
  {
71
  "epoch": 0.18,
72
+ "learning_rate": 3.5253635026956376e-05,
73
+ "loss": 3.7632,
74
+ "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
75
  },
76
  {
77
  "epoch": 0.2,
78
+ "learning_rate": 3.5049420029406956e-05,
79
+ "loss": 3.7016,
80
+ "step": 6000
81
  },
82
  {
83
  "epoch": 0.21,
84
+ "learning_rate": 3.4845205031857536e-05,
85
+ "loss": 3.6589,
86
+ "step": 6500
 
 
 
 
 
 
87
  },
88
  {
89
  "epoch": 0.23,
90
+ "learning_rate": 3.4640990034308116e-05,
91
+ "loss": 3.645,
92
+ "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
93
  },
94
  {
95
  "epoch": 0.25,
96
+ "learning_rate": 3.4436775036758696e-05,
97
+ "loss": 3.5688,
98
+ "step": 7500
99
  },
100
  {
101
  "epoch": 0.26,
102
+ "learning_rate": 3.4232560039209276e-05,
103
+ "loss": 3.558,
104
+ "step": 8000
 
 
 
 
 
 
105
  },
106
  {
107
  "epoch": 0.28,
108
+ "learning_rate": 3.4028345041659856e-05,
109
+ "loss": 3.5789,
110
+ "step": 8500
 
 
 
 
 
 
111
  },
112
  {
113
  "epoch": 0.29,
114
+ "learning_rate": 3.3824130044110436e-05,
115
+ "loss": 3.5275,
116
+ "step": 9000
 
 
 
 
 
 
117
  },
118
  {
119
  "epoch": 0.31,
120
+ "learning_rate": 3.3619915046561016e-05,
121
+ "loss": 3.4434,
122
+ "step": 9500
 
 
 
 
 
 
123
  },
124
  {
125
  "epoch": 0.33,
126
+ "learning_rate": 3.3415700049011596e-05,
127
+ "loss": 3.4455,
128
+ "step": 10000
129
  }
130
  ],
131
+ "max_steps": 91815,
132
+ "num_train_epochs": 3,
133
+ "total_flos": 4657287913271424.0,
134
  "trial_name": null,
135
  "trial_params": null
136
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9440ca29a771ae95de805c0f8b289de7b342428a78875730b6fc863782dd8d92
3
- size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:772c12184f183531438a23ce20c0ac2de0537c7af53700692ae8f1564bbdf0ef
3
+ size 3515
vocab.json CHANGED
The diff for this file is too large to render. See raw diff