arnocandel commited on
Commit
2476be7
1 Parent(s): 407613b

Upload 15 files

Browse files
README.md CHANGED
@@ -1,3 +1,37 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - en
5
+ pipeline_tag: token-classification
6
+ ---
7
+
8
+ A finetuned model designed to recognize and classify Personally Identifiable Information (PII) within unstructured text data. This powerful model accurately identifies a wide range of PII categories, such as account names, credit card numbers, emails, phone numbers, and addresses. The model is specifically trained to detect various PII types, including but not limited to:
9
+
10
+ ```
11
+ | Category | Data |
12
+ |------------------------|----------------------------------------------------------------------------------------|
13
+ | Account-related information | Account name, account number, and transaction amounts |
14
+ | Banking details | BIC, IBAN, and Bitcoin or Ethereum addresses |
15
+ | Personal information | Full name, first name, middle name, last name, gender, and date of birth |
16
+ | Contact information | Email, phone number, and street address (including building number, city, county, state, and zip code) |
17
+ | Job-related data | Job title, job area, job descriptor, and job type |
18
+ | Financial data | Credit card number, issuer, CVV, and currency information (code, name, and symbol) |
19
+ | Digital identifiers | IP addresses (IPv4 and IPv6), MAC addresses, and user agents |
20
+ | Online presence | URL, usernames, and passwords |
21
+ | Other sensitive data | SSN, vehicle VIN and VRM, phone IMEI, and nearby GPS coordinates |
22
+ ```
23
+
24
+
25
+ The PII Identifier Model ensures data privacy and compliance by effectively detecting and categorizing sensitive information within documents, emails, user-generated content, and more. Make your data processing safer and more secure with our state-of-the-art PII detection technology.
26
+
27
+ How to do Inference :
28
+
29
+ ```
30
+ from transformers import pipeline
31
+ gen = pipeline("token-classification", "lakshyakh93/deberta_finetuned_pii", device=-1)
32
+
33
+ text = "My name is John and I live in California."
34
+ output = gen(text, aggregation_strategy="first")
35
+ ```
36
+
37
+ For any more details reach out to lakshaya.khandelwal@gmail.com
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "[CLS]": 1,
3
+ "[MASK]": 50264,
4
+ "[PAD]": 0,
5
+ "[SEP]": 2,
6
+ "[UNK]": 3
7
+ }
config.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-base",
3
+ "architectures": [
4
+ "DebertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "B-PREFIX",
12
+ "1": "I-PREFIX",
13
+ "2": "B-FIRSTNAME",
14
+ "3": "I-FIRSTNAME",
15
+ "4": "B-MIDDLENAME",
16
+ "5": "B-LASTNAME",
17
+ "6": "I-LASTNAME",
18
+ "7": "O",
19
+ "8": "B-JOBDESCRIPTOR",
20
+ "9": "B-JOBTITLE",
21
+ "10": "I-JOBTITLE",
22
+ "11": "B-COMPANY_NAME",
23
+ "12": "I-COMPANY_NAME",
24
+ "13": "B-JOBAREA",
25
+ "14": "B-EMAIL",
26
+ "15": "I-EMAIL",
27
+ "16": "B-TIME",
28
+ "17": "I-TIME",
29
+ "18": "B-DATE",
30
+ "19": "I-DATE",
31
+ "20": "B-URL",
32
+ "21": "I-URL",
33
+ "22": "B-BITCOINADDRESS",
34
+ "23": "I-BITCOINADDRESS",
35
+ "24": "B-ETHEREUMADDRESS",
36
+ "25": "I-ETHEREUMADDRESS",
37
+ "26": "B-ACCOUNTNAME",
38
+ "27": "I-ACCOUNTNAME",
39
+ "28": "B-IBAN",
40
+ "29": "I-IBAN",
41
+ "30": "B-ACCOUNTNUMBER",
42
+ "31": "I-ACCOUNTNUMBER",
43
+ "32": "B-BIC",
44
+ "33": "I-BIC",
45
+ "34": "B-IPV4",
46
+ "35": "I-IPV4",
47
+ "36": "B-STREETADDRESS",
48
+ "37": "I-STREETADDRESS",
49
+ "38": "B-CITY",
50
+ "39": "I-CITY",
51
+ "40": "B-ZIPCODE",
52
+ "41": "I-ZIPCODE",
53
+ "42": "B-USERNAME",
54
+ "43": "I-USERNAME",
55
+ "44": "B-IPV6",
56
+ "45": "I-IPV6",
57
+ "46": "B-CREDITCARDNUMBER",
58
+ "47": "I-CREDITCARDNUMBER",
59
+ "48": "B-VEHICLEVIN",
60
+ "49": "I-VEHICLEVIN",
61
+ "50": "B-SUFFIX",
62
+ "51": "I-SUFFIX",
63
+ "52": "B-AMOUNT",
64
+ "53": "I-AMOUNT",
65
+ "54": "B-CURRENCY",
66
+ "55": "I-CURRENCY",
67
+ "56": "B-PASSWORD",
68
+ "57": "I-PASSWORD",
69
+ "58": "B-JOBTYPE",
70
+ "59": "B-STATE",
71
+ "60": "B-BUILDINGNUMBER",
72
+ "61": "I-BUILDINGNUMBER",
73
+ "62": "B-VEHICLEVRM",
74
+ "63": "I-VEHICLEVRM",
75
+ "64": "B-PHONEIMEI",
76
+ "65": "I-PHONEIMEI",
77
+ "66": "I-JOBAREA",
78
+ "67": "I-STATE",
79
+ "68": "B-COUNTY",
80
+ "69": "B-CURRENCYNAME",
81
+ "70": "I-CURRENCYNAME",
82
+ "71": "B-CURRENCYSYMBOL",
83
+ "72": "B-MASKEDNUMBER",
84
+ "73": "I-MASKEDNUMBER",
85
+ "74": "B-PHONE_NUMBER",
86
+ "75": "I-PHONE_NUMBER",
87
+ "76": "B-SECONDARYADDRESS",
88
+ "77": "I-SECONDARYADDRESS",
89
+ "78": "B-SSN",
90
+ "79": "I-SSN",
91
+ "80": "B-CURRENCYCODE",
92
+ "81": "B-LITECOINADDRESS",
93
+ "82": "I-LITECOINADDRESS",
94
+ "83": "B-MAC",
95
+ "84": "I-MAC",
96
+ "85": "B-CREDITCARDISSUER",
97
+ "86": "I-CREDITCARDISSUER",
98
+ "87": "B-CREDITCARDCVV",
99
+ "88": "I-CREDITCARDCVV",
100
+ "89": "B-USERAGENT",
101
+ "90": "I-USERAGENT",
102
+ "91": "B-IP",
103
+ "92": "I-IP",
104
+ "93": "B-SEX",
105
+ "94": "B-STREET",
106
+ "95": "I-STREET",
107
+ "96": "B-PIN",
108
+ "97": "I-PIN",
109
+ "98": "I-JOBTYPE",
110
+ "99": "I-MIDDLENAME",
111
+ "100": "I-CURRENCYCODE",
112
+ "101": "I-CURRENCYSYMBOL",
113
+ "102": "B-FULLNAME",
114
+ "103": "I-FULLNAME",
115
+ "104": "B-NAME",
116
+ "105": "I-NAME",
117
+ "106": "B-GENDER",
118
+ "107": "B-NUMBER",
119
+ "108": "I-NUMBER",
120
+ "109": "I-GENDER",
121
+ "110": "B-NEARBYGPSCOORDINATE",
122
+ "111": "I-NEARBYGPSCOORDINATE",
123
+ "112": "B-DISPLAYNAME",
124
+ "113": "I-DISPLAYNAME",
125
+ "114": "B-SEXTYPE",
126
+ "115": "B-ORDINALDIRECTION"
127
+ },
128
+ "initializer_range": 0.02,
129
+ "intermediate_size": 3072,
130
+ "label2id": {
131
+ "B-ACCOUNTNAME": 26,
132
+ "B-ACCOUNTNUMBER": 30,
133
+ "B-AMOUNT": 52,
134
+ "B-BIC": 32,
135
+ "B-BITCOINADDRESS": 22,
136
+ "B-BUILDINGNUMBER": 60,
137
+ "B-CITY": 38,
138
+ "B-COMPANY_NAME": 11,
139
+ "B-COUNTY": 68,
140
+ "B-CREDITCARDCVV": 87,
141
+ "B-CREDITCARDISSUER": 85,
142
+ "B-CREDITCARDNUMBER": 46,
143
+ "B-CURRENCY": 54,
144
+ "B-CURRENCYCODE": 80,
145
+ "B-CURRENCYNAME": 69,
146
+ "B-CURRENCYSYMBOL": 71,
147
+ "B-DATE": 18,
148
+ "B-DISPLAYNAME": 112,
149
+ "B-EMAIL": 14,
150
+ "B-ETHEREUMADDRESS": 24,
151
+ "B-FIRSTNAME": 2,
152
+ "B-FULLNAME": 102,
153
+ "B-GENDER": 106,
154
+ "B-IBAN": 28,
155
+ "B-IP": 91,
156
+ "B-IPV4": 34,
157
+ "B-IPV6": 44,
158
+ "B-JOBAREA": 13,
159
+ "B-JOBDESCRIPTOR": 8,
160
+ "B-JOBTITLE": 9,
161
+ "B-JOBTYPE": 58,
162
+ "B-LASTNAME": 5,
163
+ "B-LITECOINADDRESS": 81,
164
+ "B-MAC": 83,
165
+ "B-MASKEDNUMBER": 72,
166
+ "B-MIDDLENAME": 4,
167
+ "B-NAME": 104,
168
+ "B-NEARBYGPSCOORDINATE": 110,
169
+ "B-NUMBER": 107,
170
+ "B-ORDINALDIRECTION": 115,
171
+ "B-PASSWORD": 56,
172
+ "B-PHONEIMEI": 64,
173
+ "B-PHONE_NUMBER": 74,
174
+ "B-PIN": 96,
175
+ "B-PREFIX": 0,
176
+ "B-SECONDARYADDRESS": 76,
177
+ "B-SEX": 93,
178
+ "B-SEXTYPE": 114,
179
+ "B-SSN": 78,
180
+ "B-STATE": 59,
181
+ "B-STREET": 94,
182
+ "B-STREETADDRESS": 36,
183
+ "B-SUFFIX": 50,
184
+ "B-TIME": 16,
185
+ "B-URL": 20,
186
+ "B-USERAGENT": 89,
187
+ "B-USERNAME": 42,
188
+ "B-VEHICLEVIN": 48,
189
+ "B-VEHICLEVRM": 62,
190
+ "B-ZIPCODE": 40,
191
+ "I-ACCOUNTNAME": 27,
192
+ "I-ACCOUNTNUMBER": 31,
193
+ "I-AMOUNT": 53,
194
+ "I-BIC": 33,
195
+ "I-BITCOINADDRESS": 23,
196
+ "I-BUILDINGNUMBER": 61,
197
+ "I-CITY": 39,
198
+ "I-COMPANY_NAME": 12,
199
+ "I-CREDITCARDCVV": 88,
200
+ "I-CREDITCARDISSUER": 86,
201
+ "I-CREDITCARDNUMBER": 47,
202
+ "I-CURRENCY": 55,
203
+ "I-CURRENCYCODE": 100,
204
+ "I-CURRENCYNAME": 70,
205
+ "I-CURRENCYSYMBOL": 101,
206
+ "I-DATE": 19,
207
+ "I-DISPLAYNAME": 113,
208
+ "I-EMAIL": 15,
209
+ "I-ETHEREUMADDRESS": 25,
210
+ "I-FIRSTNAME": 3,
211
+ "I-FULLNAME": 103,
212
+ "I-GENDER": 109,
213
+ "I-IBAN": 29,
214
+ "I-IP": 92,
215
+ "I-IPV4": 35,
216
+ "I-IPV6": 45,
217
+ "I-JOBAREA": 66,
218
+ "I-JOBTITLE": 10,
219
+ "I-JOBTYPE": 98,
220
+ "I-LASTNAME": 6,
221
+ "I-LITECOINADDRESS": 82,
222
+ "I-MAC": 84,
223
+ "I-MASKEDNUMBER": 73,
224
+ "I-MIDDLENAME": 99,
225
+ "I-NAME": 105,
226
+ "I-NEARBYGPSCOORDINATE": 111,
227
+ "I-NUMBER": 108,
228
+ "I-PASSWORD": 57,
229
+ "I-PHONEIMEI": 65,
230
+ "I-PHONE_NUMBER": 75,
231
+ "I-PIN": 97,
232
+ "I-PREFIX": 1,
233
+ "I-SECONDARYADDRESS": 77,
234
+ "I-SSN": 79,
235
+ "I-STATE": 67,
236
+ "I-STREET": 95,
237
+ "I-STREETADDRESS": 37,
238
+ "I-SUFFIX": 51,
239
+ "I-TIME": 17,
240
+ "I-URL": 21,
241
+ "I-USERAGENT": 90,
242
+ "I-USERNAME": 43,
243
+ "I-VEHICLEVIN": 49,
244
+ "I-VEHICLEVRM": 63,
245
+ "I-ZIPCODE": 41,
246
+ "O": 7
247
+ },
248
+ "layer_norm_eps": 1e-07,
249
+ "max_position_embeddings": 512,
250
+ "max_relative_positions": -1,
251
+ "model_type": "deberta",
252
+ "num_attention_heads": 12,
253
+ "num_hidden_layers": 12,
254
+ "pad_token_id": 0,
255
+ "pooler_dropout": 0,
256
+ "pooler_hidden_act": "gelu",
257
+ "pooler_hidden_size": 768,
258
+ "pos_att_type": [
259
+ "c2p",
260
+ "p2c"
261
+ ],
262
+ "position_biased_input": false,
263
+ "relative_attention": true,
264
+ "torch_dtype": "float32",
265
+ "transformers_version": "4.34.0",
266
+ "type_vocab_size": 0,
267
+ "vocab_size": 50265
268
+ }
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:549cfaf98d70c5d1b9228fde16d7f5049e4b6a75915460ced13aab0217da656e
3
+ size 1109692613
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6539218f24a2ebcaf67665049f657d0ee61afb44b856588a0efe3ca8c33a8cf2
3
+ size 554831781
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463ca5ebc7cb3d40f145ee439ba1e616e46262e1cf05052699c423b2150a17cd
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:257d4e5b87ed6c145ab2d02ea57f1cb8736e2bb139099f5b69120642f82b0a20
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": true,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "[PAD]",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "[CLS]",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "[SEP]",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "[UNK]",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "50264": {
38
+ "content": "[MASK]",
39
+ "lstrip": true,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ }
45
+ },
46
+ "additional_special_tokens": [],
47
+ "bos_token": "[CLS]",
48
+ "clean_up_tokenization_spaces": true,
49
+ "cls_token": "[CLS]",
50
+ "do_lower_case": false,
51
+ "eos_token": "[SEP]",
52
+ "errors": "replace",
53
+ "mask_token": "[MASK]",
54
+ "model_max_length": 512,
55
+ "pad_token": "[PAD]",
56
+ "sep_token": "[SEP]",
57
+ "tokenizer_class": "DebertaTokenizer",
58
+ "unk_token": "[UNK]",
59
+ "vocab_type": "gpt2"
60
+ }
trainer_state.json ADDED
@@ -0,0 +1,618 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.008737307973206043,
3
+ "best_model_checkpoint": "drive/MyDrive/colabFiles/safellm/aiPII/model/deberta_finetuned/checkpoint-9359",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 9359,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.37,
13
+ "learning_rate": 1.3354700854700855e-05,
14
+ "loss": 1.5328,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.75,
19
+ "learning_rate": 2.670940170940171e-05,
20
+ "loss": 0.3157,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "eval_ACCOUNTNAME_f1": 0.8498583569405098,
26
+ "eval_ACCOUNTNUMBER_f1": 0.7777777777777778,
27
+ "eval_AMOUNT_f1": 0.7830423940149626,
28
+ "eval_BIC_f1": 0.10999999999999999,
29
+ "eval_BITCOINADDRESS_f1": 0.0489795918367347,
30
+ "eval_BUILDINGNUMBER_f1": 0.2996941896024465,
31
+ "eval_CITY_f1": 0.9399141630901287,
32
+ "eval_COMPANY_NAME_f1": 0.3222222222222222,
33
+ "eval_COUNTY_f1": 0.9363207547169812,
34
+ "eval_CREDITCARDCVV_f1": 0.0,
35
+ "eval_CREDITCARDISSUER_f1": 0.0916030534351145,
36
+ "eval_CREDITCARDNUMBER_f1": 0.6247379454926625,
37
+ "eval_CURRENCYCODE_f1": 0.0,
38
+ "eval_CURRENCYNAME_f1": 0.008658008658008658,
39
+ "eval_CURRENCYSYMBOL_f1": 0.0,
40
+ "eval_CURRENCY_f1": 0.4955156950672646,
41
+ "eval_DATE_f1": 0.8909952606635071,
42
+ "eval_DISPLAYNAME_f1": 0.0,
43
+ "eval_EMAIL_f1": 0.9967564060979566,
44
+ "eval_ETHEREUMADDRESS_f1": 0.7978142076502732,
45
+ "eval_FIRSTNAME_f1": 0.7487401007919366,
46
+ "eval_FULLNAME_f1": 0.9725754176015956,
47
+ "eval_GENDER_f1": 0.5737704918032787,
48
+ "eval_IBAN_f1": 0.5320197044334977,
49
+ "eval_IPV4_f1": 0.04864864864864866,
50
+ "eval_IPV6_f1": 0.6883720930232559,
51
+ "eval_IP_f1": 0.004514672686230249,
52
+ "eval_JOBAREA_f1": 0.8751714677640604,
53
+ "eval_JOBDESCRIPTOR_f1": 0.08,
54
+ "eval_JOBTITLE_f1": 0.7678571428571428,
55
+ "eval_JOBTYPE_f1": 0.18320610687022904,
56
+ "eval_LASTNAME_f1": 0.3830787309048179,
57
+ "eval_LITECOINADDRESS_f1": 0.03690036900369004,
58
+ "eval_MAC_f1": 0.588235294117647,
59
+ "eval_MASKEDNUMBER_f1": 0.0,
60
+ "eval_MIDDLENAME_f1": 0.0,
61
+ "eval_NAME_f1": 0.9682713347921226,
62
+ "eval_NEARBYGPSCOORDINATE_f1": 0.0,
63
+ "eval_NUMBER_f1": 0.0,
64
+ "eval_PASSWORD_f1": 0.6082289803220037,
65
+ "eval_PHONEIMEI_f1": 0.911242603550296,
66
+ "eval_PHONE_NUMBER_f1": 0.42622950819672123,
67
+ "eval_PIN_f1": 0.014184397163120567,
68
+ "eval_PREFIX_f1": 0.8070175438596491,
69
+ "eval_SECONDARYADDRESS_f1": 0.7699530516431925,
70
+ "eval_SEXTYPE_f1": 0.0,
71
+ "eval_SEX_f1": 0.8055555555555556,
72
+ "eval_SSN_f1": 0.013605442176870748,
73
+ "eval_STATE_f1": 0.9252669039145908,
74
+ "eval_STREETADDRESS_f1": 0.7314578005115089,
75
+ "eval_STREET_f1": 0.38073394495412843,
76
+ "eval_SUFFIX_f1": 0.0,
77
+ "eval_TIME_f1": 0.9922480620155039,
78
+ "eval_URL_f1": 0.9927431059506532,
79
+ "eval_USERAGENT_f1": 0.9321266968325792,
80
+ "eval_USERNAME_f1": 0.7885597548518897,
81
+ "eval_VEHICLEVIN_f1": 0.0,
82
+ "eval_VEHICLEVRM_f1": 0.2533333333333333,
83
+ "eval_ZIPCODE_f1": 0.7258064516129034,
84
+ "eval_loss": 0.14694246649742126,
85
+ "eval_overall_accuracy": 0.9484817248102932,
86
+ "eval_overall_f1": 0.8294381247845569,
87
+ "eval_overall_precision": 0.8040499899752723,
88
+ "eval_overall_recall": 0.8564818110628604,
89
+ "eval_runtime": 90.4464,
90
+ "eval_samples_per_second": 118.225,
91
+ "eval_steps_per_second": 14.782,
92
+ "step": 1337
93
+ },
94
+ {
95
+ "epoch": 1.12,
96
+ "learning_rate": 4.006410256410257e-05,
97
+ "loss": 0.2134,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 1.5,
102
+ "learning_rate": 4.914518498731134e-05,
103
+ "loss": 0.1542,
104
+ "step": 2000
105
+ },
106
+ {
107
+ "epoch": 1.87,
108
+ "learning_rate": 4.580606384399626e-05,
109
+ "loss": 0.1231,
110
+ "step": 2500
111
+ },
112
+ {
113
+ "epoch": 2.0,
114
+ "eval_ACCOUNTNAME_f1": 0.9712460063897763,
115
+ "eval_ACCOUNTNUMBER_f1": 0.9777777777777777,
116
+ "eval_AMOUNT_f1": 0.9312169312169312,
117
+ "eval_BIC_f1": 0.8363636363636363,
118
+ "eval_BITCOINADDRESS_f1": 0.7272727272727273,
119
+ "eval_BUILDINGNUMBER_f1": 0.6622950819672131,
120
+ "eval_CITY_f1": 0.968736239542052,
121
+ "eval_COMPANY_NAME_f1": 0.874074074074074,
122
+ "eval_COUNTY_f1": 0.994413407821229,
123
+ "eval_CREDITCARDCVV_f1": 0.8474576271186439,
124
+ "eval_CREDITCARDISSUER_f1": 0.7811158798283261,
125
+ "eval_CREDITCARDNUMBER_f1": 0.09195402298850575,
126
+ "eval_CURRENCYCODE_f1": 0.564102564102564,
127
+ "eval_CURRENCYNAME_f1": 0.24761904761904763,
128
+ "eval_CURRENCYSYMBOL_f1": 0.5306122448979592,
129
+ "eval_CURRENCY_f1": 0.6920152091254752,
130
+ "eval_DATE_f1": 0.9928057553956835,
131
+ "eval_DISPLAYNAME_f1": 0.0,
132
+ "eval_EMAIL_f1": 0.9993498049414824,
133
+ "eval_ETHEREUMADDRESS_f1": 0.9528795811518325,
134
+ "eval_FIRSTNAME_f1": 0.7688584106494555,
135
+ "eval_FULLNAME_f1": 0.9868637110016422,
136
+ "eval_GENDER_f1": 0.9075630252100839,
137
+ "eval_IBAN_f1": 0.9468599033816426,
138
+ "eval_IPV4_f1": 0.84375,
139
+ "eval_IPV6_f1": 0.7846153846153846,
140
+ "eval_IP_f1": 0.0,
141
+ "eval_JOBAREA_f1": 0.9410050983248361,
142
+ "eval_JOBDESCRIPTOR_f1": 0.5128205128205128,
143
+ "eval_JOBTITLE_f1": 0.9441233140655106,
144
+ "eval_JOBTYPE_f1": 0.7578125000000001,
145
+ "eval_LASTNAME_f1": 0.5283422459893049,
146
+ "eval_LITECOINADDRESS_f1": 0.7647058823529412,
147
+ "eval_MAC_f1": 0.9702970297029703,
148
+ "eval_MASKEDNUMBER_f1": 0.3492063492063492,
149
+ "eval_MIDDLENAME_f1": 0.3691460055096419,
150
+ "eval_NAME_f1": 0.988691888873377,
151
+ "eval_NUMBER_f1": 0.47393364928909953,
152
+ "eval_PASSWORD_f1": 0.9886363636363638,
153
+ "eval_PHONEIMEI_f1": 0.9629629629629629,
154
+ "eval_PHONE_NUMBER_f1": 0.8366533864541832,
155
+ "eval_PIN_f1": 0.7967479674796748,
156
+ "eval_PREFIX_f1": 0.8198757763975155,
157
+ "eval_SECONDARYADDRESS_f1": 0.9265536723163842,
158
+ "eval_SEXTYPE_f1": 0.0,
159
+ "eval_SEX_f1": 0.8941176470588235,
160
+ "eval_SSN_f1": 0.7486033519553071,
161
+ "eval_STATE_f1": 0.9692487349163099,
162
+ "eval_STREETADDRESS_f1": 0.8785310734463277,
163
+ "eval_STREET_f1": 0.5085714285714286,
164
+ "eval_SUFFIX_f1": 0.5306122448979592,
165
+ "eval_TIME_f1": 0.942528735632184,
166
+ "eval_URL_f1": 0.9738372093023255,
167
+ "eval_USERAGENT_f1": 0.912442396313364,
168
+ "eval_USERNAME_f1": 0.9172625127681308,
169
+ "eval_VEHICLEVIN_f1": 0.8368794326241135,
170
+ "eval_VEHICLEVRM_f1": 0.8051948051948051,
171
+ "eval_ZIPCODE_f1": 0.8036117381489842,
172
+ "eval_loss": 0.08253061771392822,
173
+ "eval_overall_accuracy": 0.9728585799624934,
174
+ "eval_overall_f1": 0.9135649808334365,
175
+ "eval_overall_precision": 0.9068209714185517,
176
+ "eval_overall_recall": 0.9204100519683919,
177
+ "eval_runtime": 83.6232,
178
+ "eval_samples_per_second": 127.871,
179
+ "eval_steps_per_second": 15.988,
180
+ "step": 2674
181
+ },
182
+ {
183
+ "epoch": 2.24,
184
+ "learning_rate": 4.2466942700681185e-05,
185
+ "loss": 0.099,
186
+ "step": 3000
187
+ },
188
+ {
189
+ "epoch": 2.62,
190
+ "learning_rate": 3.91278215573661e-05,
191
+ "loss": 0.0815,
192
+ "step": 3500
193
+ },
194
+ {
195
+ "epoch": 2.99,
196
+ "learning_rate": 3.578870041405102e-05,
197
+ "loss": 0.0745,
198
+ "step": 4000
199
+ },
200
+ {
201
+ "epoch": 3.0,
202
+ "eval_ACCOUNTNAME_f1": 0.9967426710097721,
203
+ "eval_ACCOUNTNUMBER_f1": 0.9821428571428572,
204
+ "eval_AMOUNT_f1": 0.957983193277311,
205
+ "eval_BIC_f1": 0.9054054054054054,
206
+ "eval_BITCOINADDRESS_f1": 0.8984375,
207
+ "eval_BUILDINGNUMBER_f1": 0.7807807807807808,
208
+ "eval_CITY_f1": 0.9926682959342368,
209
+ "eval_COMPANY_NAME_f1": 0.9468354430379746,
210
+ "eval_COUNTY_f1": 0.9977728285077951,
211
+ "eval_CREDITCARDCVV_f1": 0.912,
212
+ "eval_CREDITCARDISSUER_f1": 0.9665071770334929,
213
+ "eval_CREDITCARDNUMBER_f1": 0.9044585987261147,
214
+ "eval_CURRENCYCODE_f1": 0.9079754601226994,
215
+ "eval_CURRENCYNAME_f1": 0.25837320574162675,
216
+ "eval_CURRENCYSYMBOL_f1": 0.762589928057554,
217
+ "eval_CURRENCY_f1": 0.8146341463414634,
218
+ "eval_DATE_f1": 0.9976019184652278,
219
+ "eval_DISPLAYNAME_f1": 0.5544554455445545,
220
+ "eval_EMAIL_f1": 1.0,
221
+ "eval_ETHEREUMADDRESS_f1": 0.9837837837837838,
222
+ "eval_FIRSTNAME_f1": 0.8647106764466178,
223
+ "eval_FULLNAME_f1": 0.9867741529159844,
224
+ "eval_GENDER_f1": 0.9914529914529915,
225
+ "eval_IBAN_f1": 0.9751243781094527,
226
+ "eval_IPV4_f1": 0.852017937219731,
227
+ "eval_IPV6_f1": 0.44791666666666663,
228
+ "eval_IP_f1": 0.0,
229
+ "eval_JOBAREA_f1": 0.9867963863794301,
230
+ "eval_JOBDESCRIPTOR_f1": 0.8383838383838385,
231
+ "eval_JOBTITLE_f1": 0.9901380670611439,
232
+ "eval_JOBTYPE_f1": 0.7729468599033816,
233
+ "eval_LASTNAME_f1": 0.6699147381242387,
234
+ "eval_LITECOINADDRESS_f1": 0.84472049689441,
235
+ "eval_MAC_f1": 0.9565217391304348,
236
+ "eval_MASKEDNUMBER_f1": 0.8143712574850299,
237
+ "eval_MIDDLENAME_f1": 0.702355460385439,
238
+ "eval_NAME_f1": 0.9959355290819901,
239
+ "eval_NUMBER_f1": 0.7282051282051282,
240
+ "eval_PASSWORD_f1": 0.9725274725274725,
241
+ "eval_PHONEIMEI_f1": 0.963855421686747,
242
+ "eval_PHONE_NUMBER_f1": 0.8472222222222221,
243
+ "eval_PIN_f1": 0.8648648648648648,
244
+ "eval_PREFIX_f1": 0.5957446808510638,
245
+ "eval_SECONDARYADDRESS_f1": 0.9883720930232558,
246
+ "eval_SEXTYPE_f1": 0.8571428571428571,
247
+ "eval_SEX_f1": 0.945945945945946,
248
+ "eval_SSN_f1": 0.8701298701298701,
249
+ "eval_STATE_f1": 0.9960222752585521,
250
+ "eval_STREETADDRESS_f1": 0.904899135446686,
251
+ "eval_STREET_f1": 0.7334963325183375,
252
+ "eval_SUFFIX_f1": 0.7956989247311829,
253
+ "eval_TIME_f1": 0.9883268482490272,
254
+ "eval_URL_f1": 0.9941860465116279,
255
+ "eval_USERAGENT_f1": 0.9724770642201835,
256
+ "eval_USERNAME_f1": 0.9451476793248945,
257
+ "eval_VEHICLEVIN_f1": 0.9635036496350364,
258
+ "eval_VEHICLEVRM_f1": 0.9261744966442953,
259
+ "eval_ZIPCODE_f1": 0.9254498714652957,
260
+ "eval_loss": 0.04885776713490486,
261
+ "eval_overall_accuracy": 0.9795129099963984,
262
+ "eval_overall_f1": 0.9467927822013217,
263
+ "eval_overall_precision": 0.9476198965947584,
264
+ "eval_overall_recall": 0.9459671104150352,
265
+ "eval_runtime": 85.058,
266
+ "eval_samples_per_second": 125.714,
267
+ "eval_steps_per_second": 15.719,
268
+ "step": 4011
269
+ },
270
+ {
271
+ "epoch": 3.37,
272
+ "learning_rate": 3.244957927073594e-05,
273
+ "loss": 0.0544,
274
+ "step": 4500
275
+ },
276
+ {
277
+ "epoch": 3.74,
278
+ "learning_rate": 2.9110458127420864e-05,
279
+ "loss": 0.0511,
280
+ "step": 5000
281
+ },
282
+ {
283
+ "epoch": 4.0,
284
+ "eval_ACCOUNTNAME_f1": 0.9901639344262295,
285
+ "eval_ACCOUNTNUMBER_f1": 1.0,
286
+ "eval_AMOUNT_f1": 0.24609374999999997,
287
+ "eval_BIC_f1": 0.9594594594594594,
288
+ "eval_BITCOINADDRESS_f1": 0.8837209302325583,
289
+ "eval_BUILDINGNUMBER_f1": 0.8328445747800587,
290
+ "eval_CITY_f1": 0.9973214285714286,
291
+ "eval_COMPANY_NAME_f1": 0.9815303430079155,
292
+ "eval_COUNTY_f1": 1.0,
293
+ "eval_CREDITCARDCVV_f1": 0.991869918699187,
294
+ "eval_CREDITCARDISSUER_f1": 0.9439252336448598,
295
+ "eval_CREDITCARDNUMBER_f1": 0.977198697068404,
296
+ "eval_CURRENCYCODE_f1": 0.9333333333333333,
297
+ "eval_CURRENCYNAME_f1": 0.7027027027027026,
298
+ "eval_CURRENCYSYMBOL_f1": 0.5191489361702128,
299
+ "eval_CURRENCY_f1": 0.8871181938911021,
300
+ "eval_DATE_f1": 0.9976019184652278,
301
+ "eval_DISPLAYNAME_f1": 0.6153846153846154,
302
+ "eval_EMAIL_f1": 1.0,
303
+ "eval_ETHEREUMADDRESS_f1": 0.9726775956284153,
304
+ "eval_FIRSTNAME_f1": 0.8918918918918919,
305
+ "eval_FULLNAME_f1": 0.9890151515151515,
306
+ "eval_GENDER_f1": 0.9827586206896551,
307
+ "eval_IBAN_f1": 0.9807692307692307,
308
+ "eval_IPV4_f1": 0.8565022421524664,
309
+ "eval_IPV6_f1": 0.851063829787234,
310
+ "eval_IP_f1": 0.031746031746031744,
311
+ "eval_JOBAREA_f1": 0.9901823281907434,
312
+ "eval_JOBDESCRIPTOR_f1": 0.9781659388646288,
313
+ "eval_JOBTITLE_f1": 0.9980119284294234,
314
+ "eval_JOBTYPE_f1": 0.9831932773109243,
315
+ "eval_LASTNAME_f1": 0.8038507821901323,
316
+ "eval_LITECOINADDRESS_f1": 0.782122905027933,
317
+ "eval_MAC_f1": 0.9753694581280787,
318
+ "eval_MASKEDNUMBER_f1": 0.9554140127388535,
319
+ "eval_MIDDLENAME_f1": 0.8237232289950577,
320
+ "eval_NAME_f1": 0.9990197451337348,
321
+ "eval_NUMBER_f1": 0.9807692307692307,
322
+ "eval_PASSWORD_f1": 1.0,
323
+ "eval_PHONEIMEI_f1": 1.0,
324
+ "eval_PHONE_NUMBER_f1": 0.9880478087649401,
325
+ "eval_PIN_f1": 0.8679245283018869,
326
+ "eval_PREFIX_f1": 0.920152091254753,
327
+ "eval_SECONDARYADDRESS_f1": 0.9826589595375722,
328
+ "eval_SEXTYPE_f1": 0.8571428571428571,
329
+ "eval_SEX_f1": 0.9673202614379085,
330
+ "eval_SSN_f1": 0.987012987012987,
331
+ "eval_STATE_f1": 0.9948186528497409,
332
+ "eval_STREETADDRESS_f1": 0.911208151382824,
333
+ "eval_STREET_f1": 0.7952941176470588,
334
+ "eval_SUFFIX_f1": 0.9655172413793104,
335
+ "eval_TIME_f1": 0.9921875,
336
+ "eval_URL_f1": 1.0,
337
+ "eval_USERAGENT_f1": 1.0,
338
+ "eval_USERNAME_f1": 0.9560557341907825,
339
+ "eval_VEHICLEVIN_f1": 0.962962962962963,
340
+ "eval_VEHICLEVRM_f1": 0.9784172661870504,
341
+ "eval_ZIPCODE_f1": 0.9973045822102425,
342
+ "eval_loss": 0.033664677292108536,
343
+ "eval_overall_accuracy": 0.9870067934276382,
344
+ "eval_overall_f1": 0.9611536824180503,
345
+ "eval_overall_precision": 0.9579591259458313,
346
+ "eval_overall_recall": 0.9643696162881754,
347
+ "eval_runtime": 88.9305,
348
+ "eval_samples_per_second": 120.24,
349
+ "eval_steps_per_second": 15.034,
350
+ "step": 5348
351
+ },
352
+ {
353
+ "epoch": 4.11,
354
+ "learning_rate": 2.577133698410579e-05,
355
+ "loss": 0.0491,
356
+ "step": 5500
357
+ },
358
+ {
359
+ "epoch": 4.49,
360
+ "learning_rate": 2.2432215840790704e-05,
361
+ "loss": 0.0349,
362
+ "step": 6000
363
+ },
364
+ {
365
+ "epoch": 4.86,
366
+ "learning_rate": 1.9093094697475625e-05,
367
+ "loss": 0.0324,
368
+ "step": 6500
369
+ },
370
+ {
371
+ "epoch": 5.0,
372
+ "eval_ACCOUNTNAME_f1": 0.9967426710097721,
373
+ "eval_ACCOUNTNUMBER_f1": 1.0,
374
+ "eval_AMOUNT_f1": 1.0,
375
+ "eval_BIC_f1": 1.0,
376
+ "eval_BITCOINADDRESS_f1": 0.982905982905983,
377
+ "eval_BUILDINGNUMBER_f1": 0.959349593495935,
378
+ "eval_CITY_f1": 0.9951197870452528,
379
+ "eval_COMPANY_NAME_f1": 0.9948186528497409,
380
+ "eval_COUNTY_f1": 1.0,
381
+ "eval_CREDITCARDCVV_f1": 1.0,
382
+ "eval_CREDITCARDISSUER_f1": 1.0,
383
+ "eval_CREDITCARDNUMBER_f1": 0.9803921568627451,
384
+ "eval_CURRENCYCODE_f1": 0.9473684210526315,
385
+ "eval_CURRENCYNAME_f1": 0.8990825688073395,
386
+ "eval_CURRENCYSYMBOL_f1": 0.9051094890510949,
387
+ "eval_CURRENCY_f1": 0.9527896995708155,
388
+ "eval_DATE_f1": 1.0,
389
+ "eval_DISPLAYNAME_f1": 0.7719298245614035,
390
+ "eval_EMAIL_f1": 0.9990247074122237,
391
+ "eval_ETHEREUMADDRESS_f1": 1.0,
392
+ "eval_FIRSTNAME_f1": 0.9494071146245059,
393
+ "eval_FULLNAME_f1": 0.9937848807711821,
394
+ "eval_GENDER_f1": 1.0,
395
+ "eval_IBAN_f1": 1.0,
396
+ "eval_IPV4_f1": 0.8397291196388262,
397
+ "eval_IPV6_f1": 0.8426666666666667,
398
+ "eval_IP_f1": 0.05194805194805195,
399
+ "eval_JOBAREA_f1": 0.9965059399021663,
400
+ "eval_JOBDESCRIPTOR_f1": 0.9736842105263158,
401
+ "eval_JOBTITLE_f1": 0.9960159362549801,
402
+ "eval_JOBTYPE_f1": 0.9957805907172996,
403
+ "eval_LASTNAME_f1": 0.8947368421052632,
404
+ "eval_LITECOINADDRESS_f1": 0.918918918918919,
405
+ "eval_MAC_f1": 0.9900990099009901,
406
+ "eval_MASKEDNUMBER_f1": 0.9620253164556961,
407
+ "eval_MIDDLENAME_f1": 0.9489603024574669,
408
+ "eval_NAME_f1": 0.9994396189408798,
409
+ "eval_NUMBER_f1": 0.9951690821256038,
410
+ "eval_PASSWORD_f1": 0.9752066115702479,
411
+ "eval_PHONEIMEI_f1": 0.9937888198757764,
412
+ "eval_PHONE_NUMBER_f1": 0.9960159362549801,
413
+ "eval_PIN_f1": 0.9906542056074767,
414
+ "eval_PREFIX_f1": 0.956989247311828,
415
+ "eval_SECONDARYADDRESS_f1": 0.9883720930232558,
416
+ "eval_SEXTYPE_f1": 0.8888888888888888,
417
+ "eval_SEX_f1": 0.9871794871794872,
418
+ "eval_SSN_f1": 1.0,
419
+ "eval_STATE_f1": 0.9988023952095808,
420
+ "eval_STREETADDRESS_f1": 0.9765258215962442,
421
+ "eval_STREET_f1": 0.9037656903765691,
422
+ "eval_SUFFIX_f1": 0.970873786407767,
423
+ "eval_TIME_f1": 0.9961089494163424,
424
+ "eval_URL_f1": 1.0,
425
+ "eval_USERAGENT_f1": 0.9908256880733946,
426
+ "eval_USERNAME_f1": 0.9837133550488599,
427
+ "eval_VEHICLEVIN_f1": 1.0,
428
+ "eval_VEHICLEVRM_f1": 1.0,
429
+ "eval_ZIPCODE_f1": 0.9973045822102425,
430
+ "eval_loss": 0.022747300565242767,
431
+ "eval_overall_accuracy": 0.9911275599547933,
432
+ "eval_overall_f1": 0.9828670955817012,
433
+ "eval_overall_precision": 0.9815064603152066,
434
+ "eval_overall_recall": 0.9842315085071546,
435
+ "eval_runtime": 84.4935,
436
+ "eval_samples_per_second": 126.554,
437
+ "eval_steps_per_second": 15.824,
438
+ "step": 6685
439
+ },
440
+ {
441
+ "epoch": 5.24,
442
+ "learning_rate": 1.5753973554160546e-05,
443
+ "loss": 0.0297,
444
+ "step": 7000
445
+ },
446
+ {
447
+ "epoch": 5.61,
448
+ "learning_rate": 1.2414852410845466e-05,
449
+ "loss": 0.0283,
450
+ "step": 7500
451
+ },
452
+ {
453
+ "epoch": 5.98,
454
+ "learning_rate": 9.075731267530386e-06,
455
+ "loss": 0.0211,
456
+ "step": 8000
457
+ },
458
+ {
459
+ "epoch": 6.0,
460
+ "eval_ACCOUNTNAME_f1": 1.0,
461
+ "eval_ACCOUNTNUMBER_f1": 1.0,
462
+ "eval_AMOUNT_f1": 1.0,
463
+ "eval_BIC_f1": 1.0,
464
+ "eval_BITCOINADDRESS_f1": 1.0,
465
+ "eval_BUILDINGNUMBER_f1": 0.9973045822102425,
466
+ "eval_CITY_f1": 0.9988856697125027,
467
+ "eval_COMPANY_NAME_f1": 1.0,
468
+ "eval_COUNTY_f1": 1.0,
469
+ "eval_CREDITCARDCVV_f1": 1.0,
470
+ "eval_CREDITCARDISSUER_f1": 1.0,
471
+ "eval_CREDITCARDNUMBER_f1": 0.9803921568627451,
472
+ "eval_CURRENCYCODE_f1": 0.9878048780487805,
473
+ "eval_CURRENCYNAME_f1": 0.9207317073170731,
474
+ "eval_CURRENCYSYMBOL_f1": 0.9855072463768116,
475
+ "eval_CURRENCY_f1": 0.972818311874106,
476
+ "eval_DATE_f1": 1.0,
477
+ "eval_DISPLAYNAME_f1": 0.9291338582677166,
478
+ "eval_EMAIL_f1": 1.0,
479
+ "eval_ETHEREUMADDRESS_f1": 1.0,
480
+ "eval_FIRSTNAME_f1": 0.9910261412407336,
481
+ "eval_FULLNAME_f1": 0.9982799261005288,
482
+ "eval_GENDER_f1": 1.0,
483
+ "eval_IBAN_f1": 0.9901960784313726,
484
+ "eval_IPV4_f1": 0.8558352402745996,
485
+ "eval_IPV6_f1": 0.8465608465608466,
486
+ "eval_IP_f1": 0.14117647058823532,
487
+ "eval_JOBAREA_f1": 0.9985994397759104,
488
+ "eval_JOBDESCRIPTOR_f1": 1.0,
489
+ "eval_JOBTITLE_f1": 1.0,
490
+ "eval_JOBTYPE_f1": 0.9957805907172996,
491
+ "eval_LASTNAME_f1": 0.9753483386923902,
492
+ "eval_LITECOINADDRESS_f1": 0.9943502824858756,
493
+ "eval_MAC_f1": 1.0,
494
+ "eval_MASKEDNUMBER_f1": 0.9625,
495
+ "eval_MIDDLENAME_f1": 0.9834254143646408,
496
+ "eval_NAME_f1": 0.9994396189408798,
497
+ "eval_NUMBER_f1": 1.0,
498
+ "eval_PASSWORD_f1": 0.9971830985915493,
499
+ "eval_PHONEIMEI_f1": 1.0,
500
+ "eval_PHONE_NUMBER_f1": 1.0,
501
+ "eval_PIN_f1": 1.0,
502
+ "eval_PREFIX_f1": 0.9757009345794393,
503
+ "eval_SECONDARYADDRESS_f1": 1.0,
504
+ "eval_SEXTYPE_f1": 0.9473684210526316,
505
+ "eval_SEX_f1": 0.9871794871794872,
506
+ "eval_SSN_f1": 1.0,
507
+ "eval_STATE_f1": 0.9988023952095808,
508
+ "eval_STREETADDRESS_f1": 0.9984152139461173,
509
+ "eval_STREET_f1": 0.9878048780487805,
510
+ "eval_SUFFIX_f1": 0.9855072463768114,
511
+ "eval_TIME_f1": 1.0,
512
+ "eval_URL_f1": 1.0,
513
+ "eval_USERAGENT_f1": 1.0,
514
+ "eval_USERNAME_f1": 0.9944873208379272,
515
+ "eval_VEHICLEVIN_f1": 1.0,
516
+ "eval_VEHICLEVRM_f1": 1.0,
517
+ "eval_ZIPCODE_f1": 1.0,
518
+ "eval_loss": 0.01404440775513649,
519
+ "eval_overall_accuracy": 0.992913473838473,
520
+ "eval_overall_f1": 0.9916438502293495,
521
+ "eval_overall_precision": 0.9906223358908781,
522
+ "eval_overall_recall": 0.9926674734818822,
523
+ "eval_runtime": 88.6108,
524
+ "eval_samples_per_second": 120.674,
525
+ "eval_steps_per_second": 15.088,
526
+ "step": 8022
527
+ },
528
+ {
529
+ "epoch": 6.36,
530
+ "learning_rate": 5.736610124215307e-06,
531
+ "loss": 0.0147,
532
+ "step": 8500
533
+ },
534
+ {
535
+ "epoch": 6.73,
536
+ "learning_rate": 2.3974889809002272e-06,
537
+ "loss": 0.0141,
538
+ "step": 9000
539
+ },
540
+ {
541
+ "epoch": 7.0,
542
+ "eval_ACCOUNTNAME_f1": 1.0,
543
+ "eval_ACCOUNTNUMBER_f1": 1.0,
544
+ "eval_AMOUNT_f1": 1.0,
545
+ "eval_BIC_f1": 1.0,
546
+ "eval_BITCOINADDRESS_f1": 1.0,
547
+ "eval_BUILDINGNUMBER_f1": 1.0,
548
+ "eval_CITY_f1": 0.9997771339425006,
549
+ "eval_COMPANY_NAME_f1": 1.0,
550
+ "eval_COUNTY_f1": 1.0,
551
+ "eval_CREDITCARDCVV_f1": 1.0,
552
+ "eval_CREDITCARDISSUER_f1": 1.0,
553
+ "eval_CREDITCARDNUMBER_f1": 0.977198697068404,
554
+ "eval_CURRENCYCODE_f1": 0.9879518072289156,
555
+ "eval_CURRENCYNAME_f1": 0.9970674486803519,
556
+ "eval_CURRENCYSYMBOL_f1": 0.9855072463768116,
557
+ "eval_CURRENCY_f1": 1.0,
558
+ "eval_DATE_f1": 1.0,
559
+ "eval_DISPLAYNAME_f1": 0.9763779527559054,
560
+ "eval_EMAIL_f1": 1.0,
561
+ "eval_ETHEREUMADDRESS_f1": 1.0,
562
+ "eval_FIRSTNAME_f1": 0.9965102753005042,
563
+ "eval_FULLNAME_f1": 0.9994899260392757,
564
+ "eval_GENDER_f1": 1.0,
565
+ "eval_IBAN_f1": 0.9901960784313726,
566
+ "eval_IPV4_f1": 0.9113924050632911,
567
+ "eval_IPV6_f1": 0.8241758241758242,
568
+ "eval_IP_f1": 0.44285714285714284,
569
+ "eval_JOBAREA_f1": 0.9992992291520673,
570
+ "eval_JOBDESCRIPTOR_f1": 1.0,
571
+ "eval_JOBTITLE_f1": 1.0,
572
+ "eval_JOBTYPE_f1": 1.0,
573
+ "eval_LASTNAME_f1": 0.9870410367170626,
574
+ "eval_LITECOINADDRESS_f1": 0.9943502824858756,
575
+ "eval_MAC_f1": 0.7984189723320159,
576
+ "eval_MASKEDNUMBER_f1": 0.9629629629629629,
577
+ "eval_MIDDLENAME_f1": 0.9944954128440368,
578
+ "eval_NAME_f1": 1.0,
579
+ "eval_NUMBER_f1": 1.0,
580
+ "eval_PASSWORD_f1": 1.0,
581
+ "eval_PHONEIMEI_f1": 0.9875,
582
+ "eval_PHONE_NUMBER_f1": 1.0,
583
+ "eval_PIN_f1": 1.0,
584
+ "eval_PREFIX_f1": 0.9962686567164178,
585
+ "eval_SECONDARYADDRESS_f1": 1.0,
586
+ "eval_SEXTYPE_f1": 0.9473684210526316,
587
+ "eval_SEX_f1": 0.9935483870967742,
588
+ "eval_SSN_f1": 0.9935483870967742,
589
+ "eval_STATE_f1": 0.9996004794246904,
590
+ "eval_STREETADDRESS_f1": 1.0,
591
+ "eval_STREET_f1": 0.9939393939393938,
592
+ "eval_SUFFIX_f1": 0.9855072463768114,
593
+ "eval_TIME_f1": 1.0,
594
+ "eval_URL_f1": 1.0,
595
+ "eval_USERAGENT_f1": 1.0,
596
+ "eval_USERNAME_f1": 1.0,
597
+ "eval_VEHICLEVIN_f1": 1.0,
598
+ "eval_VEHICLEVRM_f1": 1.0,
599
+ "eval_ZIPCODE_f1": 1.0,
600
+ "eval_loss": 0.008737307973206043,
601
+ "eval_overall_accuracy": 0.9963114296004669,
602
+ "eval_overall_f1": 0.9932909729864046,
603
+ "eval_overall_precision": 0.9905834041348061,
604
+ "eval_overall_recall": 0.996013383640635,
605
+ "eval_runtime": 85.9696,
606
+ "eval_samples_per_second": 124.381,
607
+ "eval_steps_per_second": 15.552,
608
+ "step": 9359
609
+ }
610
+ ],
611
+ "logging_steps": 500,
612
+ "max_steps": 9359,
613
+ "num_train_epochs": 7,
614
+ "save_steps": 500,
615
+ "total_flos": 3293070338065680.0,
616
+ "trial_name": null,
617
+ "trial_params": null
618
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8e8c274d68a9ab2c8cd8253471b18e6035136643f0483c06a7e2df27f9fd3b
3
+ size 4155
vocab.json ADDED
The diff for this file is too large to render. See raw diff