PuxAI commited on
Commit
c8cb4c3
·
verified ·
1 Parent(s): 375b4e4

Upload folder using huggingface_hub

Browse files
nemotron-pii-ready/span-deberta-v3-base/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaAdvancedSpanClassifier"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "O",
12
+ "1": "ACCOUNT_NUMBER",
13
+ "2": "AGE",
14
+ "3": "API_KEY",
15
+ "4": "BANK_ROUTING_NUMBER",
16
+ "5": "BIOMETRIC_IDENTIFIER",
17
+ "6": "BLOOD_TYPE",
18
+ "7": "CERTIFICATE_LICENSE_NUMBER",
19
+ "8": "CITY",
20
+ "9": "COMPANY_NAME",
21
+ "10": "COORDINATE",
22
+ "11": "COUNTRY",
23
+ "12": "COUNTY",
24
+ "13": "CREDIT_DEBIT_CARD",
25
+ "14": "CUSTOMER_ID",
26
+ "15": "CVV",
27
+ "16": "DATE",
28
+ "17": "DATE_OF_BIRTH",
29
+ "18": "DATE_TIME",
30
+ "19": "DEVICE_IDENTIFIER",
31
+ "20": "EDUCATION_LEVEL",
32
+ "21": "EMAIL",
33
+ "22": "EMPLOYEE_ID",
34
+ "23": "EMPLOYMENT_STATUS",
35
+ "24": "FAX_NUMBER",
36
+ "25": "FIRST_NAME",
37
+ "26": "GENDER",
38
+ "27": "HEALTH_PLAN_BENEFICIARY_NUMBER",
39
+ "28": "HTTP_COOKIE",
40
+ "29": "IPV4",
41
+ "30": "IPV6",
42
+ "31": "LANGUAGE",
43
+ "32": "LAST_NAME",
44
+ "33": "LICENSE_PLATE",
45
+ "34": "MAC_ADDRESS",
46
+ "35": "MEDICAL_RECORD_NUMBER",
47
+ "36": "NATIONAL_ID",
48
+ "37": "OCCUPATION",
49
+ "38": "PASSWORD",
50
+ "39": "PHONE_NUMBER",
51
+ "40": "PIN",
52
+ "41": "POLITICAL_VIEW",
53
+ "42": "POSTCODE",
54
+ "43": "RACE_ETHNICITY",
55
+ "44": "RELIGIOUS_BELIEF",
56
+ "45": "SEXUALITY",
57
+ "46": "SSN",
58
+ "47": "STATE",
59
+ "48": "STREET_ADDRESS",
60
+ "49": "SWIFT_BIC",
61
+ "50": "TAX_ID",
62
+ "51": "TIME",
63
+ "52": "UNIQUE_ID",
64
+ "53": "URL",
65
+ "54": "USER_NAME",
66
+ "55": "VEHICLE_IDENTIFIER"
67
+ },
68
+ "initializer_range": 0.02,
69
+ "intermediate_size": 3072,
70
+ "label2id": {
71
+ "ACCOUNT_NUMBER": 1,
72
+ "AGE": 2,
73
+ "API_KEY": 3,
74
+ "BANK_ROUTING_NUMBER": 4,
75
+ "BIOMETRIC_IDENTIFIER": 5,
76
+ "BLOOD_TYPE": 6,
77
+ "CERTIFICATE_LICENSE_NUMBER": 7,
78
+ "CITY": 8,
79
+ "COMPANY_NAME": 9,
80
+ "COORDINATE": 10,
81
+ "COUNTRY": 11,
82
+ "COUNTY": 12,
83
+ "CREDIT_DEBIT_CARD": 13,
84
+ "CUSTOMER_ID": 14,
85
+ "CVV": 15,
86
+ "DATE": 16,
87
+ "DATE_OF_BIRTH": 17,
88
+ "DATE_TIME": 18,
89
+ "DEVICE_IDENTIFIER": 19,
90
+ "EDUCATION_LEVEL": 20,
91
+ "EMAIL": 21,
92
+ "EMPLOYEE_ID": 22,
93
+ "EMPLOYMENT_STATUS": 23,
94
+ "FAX_NUMBER": 24,
95
+ "FIRST_NAME": 25,
96
+ "GENDER": 26,
97
+ "HEALTH_PLAN_BENEFICIARY_NUMBER": 27,
98
+ "HTTP_COOKIE": 28,
99
+ "IPV4": 29,
100
+ "IPV6": 30,
101
+ "LANGUAGE": 31,
102
+ "LAST_NAME": 32,
103
+ "LICENSE_PLATE": 33,
104
+ "MAC_ADDRESS": 34,
105
+ "MEDICAL_RECORD_NUMBER": 35,
106
+ "NATIONAL_ID": 36,
107
+ "O": 0,
108
+ "OCCUPATION": 37,
109
+ "PASSWORD": 38,
110
+ "PHONE_NUMBER": 39,
111
+ "PIN": 40,
112
+ "POLITICAL_VIEW": 41,
113
+ "POSTCODE": 42,
114
+ "RACE_ETHNICITY": 43,
115
+ "RELIGIOUS_BELIEF": 44,
116
+ "SEXUALITY": 45,
117
+ "SSN": 46,
118
+ "STATE": 47,
119
+ "STREET_ADDRESS": 48,
120
+ "SWIFT_BIC": 49,
121
+ "TAX_ID": 50,
122
+ "TIME": 51,
123
+ "UNIQUE_ID": 52,
124
+ "URL": 53,
125
+ "USER_NAME": 54,
126
+ "VEHICLE_IDENTIFIER": 55
127
+ },
128
+ "layer_norm_eps": 1e-07,
129
+ "legacy": true,
130
+ "max_position_embeddings": 512,
131
+ "max_relative_positions": -1,
132
+ "model_type": "deberta-v2",
133
+ "norm_rel_ebd": "layer_norm",
134
+ "num_attention_heads": 12,
135
+ "num_hidden_layers": 12,
136
+ "pad_token_id": 0,
137
+ "pooler_dropout": 0,
138
+ "pooler_hidden_act": "gelu",
139
+ "pooler_hidden_size": 768,
140
+ "pos_att_type": [
141
+ "p2c",
142
+ "c2p"
143
+ ],
144
+ "position_biased_input": false,
145
+ "position_buckets": 256,
146
+ "relative_attention": true,
147
+ "share_att_key": true,
148
+ "transformers_version": "4.57.6",
149
+ "type_vocab_size": 0,
150
+ "vocab_size": 128100
151
+ }
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a410300c58d20b77025c9933b43168fe29195f5de81fe538ace06117a066b78
3
+ size 741470520
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:235a2a075bcd484c5be7442e6ac03de06d6f04ef7df32682852638f6eaa489ad
3
+ size 1483064715
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ca38ddcbfba5eb58a9cbf1d6bbbdba5b81c8193ef50bcb65965e2471b47252
3
+ size 14645
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929230358a97cf9fafd3707e6205d5ec335b806f371024e4928ff8d1bc73acfe
3
+ size 1465
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "[PAD]",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "[CLS]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "[SEP]",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "[UNK]",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128000": {
37
+ "content": "[MASK]",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "[CLS]",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "[CLS]",
48
+ "do_lower_case": false,
49
+ "eos_token": "[SEP]",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "[MASK]",
52
+ "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "sp_model_kwargs": {},
56
+ "split_by_punct": false,
57
+ "tokenizer_class": "DebertaV2Tokenizer",
58
+ "unk_token": "[UNK]",
59
+ "vocab_type": "spm"
60
+ }
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/trainer_state.json ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 50000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.01,
14
+ "grad_norm": 0.35511472821235657,
15
+ "learning_rate": 4.99e-07,
16
+ "loss": 0.0734,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.02,
21
+ "grad_norm": 0.04454495757818222,
22
+ "learning_rate": 9.99e-07,
23
+ "loss": 0.0185,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.03,
28
+ "grad_norm": 0.03567986190319061,
29
+ "learning_rate": 1.4990000000000002e-06,
30
+ "loss": 0.0102,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.04,
35
+ "grad_norm": 0.020181164145469666,
36
+ "learning_rate": 1.9990000000000003e-06,
37
+ "loss": 0.0076,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.05,
42
+ "grad_norm": 0.011542358435690403,
43
+ "learning_rate": 2.499e-06,
44
+ "loss": 0.0061,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.06,
49
+ "grad_norm": 0.017048083245754242,
50
+ "learning_rate": 2.9990000000000004e-06,
51
+ "loss": 0.0051,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.07,
56
+ "grad_norm": 0.023319030180573463,
57
+ "learning_rate": 3.4990000000000003e-06,
58
+ "loss": 0.0041,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.08,
63
+ "grad_norm": 0.010842502117156982,
64
+ "learning_rate": 3.999e-06,
65
+ "loss": 0.0032,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.09,
70
+ "grad_norm": 0.024452155455946922,
71
+ "learning_rate": 4.4990000000000005e-06,
72
+ "loss": 0.0025,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.1,
77
+ "grad_norm": 0.008086216636002064,
78
+ "learning_rate": 4.999000000000001e-06,
79
+ "loss": 0.0019,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.11,
84
+ "grad_norm": 0.010070053860545158,
85
+ "learning_rate": 4.944555555555556e-06,
86
+ "loss": 0.0016,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.12,
91
+ "grad_norm": 0.026417866349220276,
92
+ "learning_rate": 4.889e-06,
93
+ "loss": 0.0013,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.13,
98
+ "grad_norm": 0.00695574888959527,
99
+ "learning_rate": 4.833444444444445e-06,
100
+ "loss": 0.0009,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.14,
105
+ "grad_norm": 0.007032735738903284,
106
+ "learning_rate": 4.777888888888889e-06,
107
+ "loss": 0.0008,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.15,
112
+ "grad_norm": 0.011454381980001926,
113
+ "learning_rate": 4.722333333333334e-06,
114
+ "loss": 0.0007,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.16,
119
+ "grad_norm": 0.003068750025704503,
120
+ "learning_rate": 4.666777777777779e-06,
121
+ "loss": 0.0006,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.17,
126
+ "grad_norm": 0.007033022586256266,
127
+ "learning_rate": 4.611222222222223e-06,
128
+ "loss": 0.0005,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.18,
133
+ "grad_norm": 0.009220571257174015,
134
+ "learning_rate": 4.555666666666667e-06,
135
+ "loss": 0.0005,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.19,
140
+ "grad_norm": 0.002409809036180377,
141
+ "learning_rate": 4.500111111111111e-06,
142
+ "loss": 0.0004,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.2,
147
+ "grad_norm": 0.0033221933990716934,
148
+ "learning_rate": 4.444555555555556e-06,
149
+ "loss": 0.0004,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.21,
154
+ "grad_norm": 0.014717096462845802,
155
+ "learning_rate": 4.389000000000001e-06,
156
+ "loss": 0.0003,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.22,
161
+ "grad_norm": 0.0026381644420325756,
162
+ "learning_rate": 4.333444444444445e-06,
163
+ "loss": 0.0004,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.23,
168
+ "grad_norm": 0.004202376119792461,
169
+ "learning_rate": 4.27788888888889e-06,
170
+ "loss": 0.0003,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.24,
175
+ "grad_norm": 0.003984359558671713,
176
+ "learning_rate": 4.222333333333334e-06,
177
+ "loss": 0.0003,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.25,
182
+ "grad_norm": 0.0124363349750638,
183
+ "learning_rate": 4.166777777777778e-06,
184
+ "loss": 0.0003,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.26,
189
+ "grad_norm": 0.012383795343339443,
190
+ "learning_rate": 4.1112222222222225e-06,
191
+ "loss": 0.0003,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 0.27,
196
+ "grad_norm": 0.012548881582915783,
197
+ "learning_rate": 4.055666666666667e-06,
198
+ "loss": 0.0003,
199
+ "step": 13500
200
+ },
201
+ {
202
+ "epoch": 0.28,
203
+ "grad_norm": 0.017820972949266434,
204
+ "learning_rate": 4.0001111111111115e-06,
205
+ "loss": 0.0003,
206
+ "step": 14000
207
+ },
208
+ {
209
+ "epoch": 0.29,
210
+ "grad_norm": 0.020476065576076508,
211
+ "learning_rate": 3.944555555555556e-06,
212
+ "loss": 0.0003,
213
+ "step": 14500
214
+ },
215
+ {
216
+ "epoch": 0.3,
217
+ "grad_norm": 0.001494369120337069,
218
+ "learning_rate": 3.889e-06,
219
+ "loss": 0.0003,
220
+ "step": 15000
221
+ },
222
+ {
223
+ "epoch": 0.31,
224
+ "grad_norm": 0.0007974837208166718,
225
+ "learning_rate": 3.833444444444444e-06,
226
+ "loss": 0.0002,
227
+ "step": 15500
228
+ },
229
+ {
230
+ "epoch": 0.32,
231
+ "grad_norm": 0.01685766875743866,
232
+ "learning_rate": 3.7778888888888893e-06,
233
+ "loss": 0.0002,
234
+ "step": 16000
235
+ },
236
+ {
237
+ "epoch": 0.33,
238
+ "grad_norm": 0.001746984664350748,
239
+ "learning_rate": 3.7223333333333337e-06,
240
+ "loss": 0.0002,
241
+ "step": 16500
242
+ },
243
+ {
244
+ "epoch": 0.34,
245
+ "grad_norm": 0.0019295086385682225,
246
+ "learning_rate": 3.6667777777777778e-06,
247
+ "loss": 0.0002,
248
+ "step": 17000
249
+ },
250
+ {
251
+ "epoch": 0.35,
252
+ "grad_norm": 0.0022591373417526484,
253
+ "learning_rate": 3.6112222222222227e-06,
254
+ "loss": 0.0002,
255
+ "step": 17500
256
+ },
257
+ {
258
+ "epoch": 0.36,
259
+ "grad_norm": 0.0020980711560696363,
260
+ "learning_rate": 3.5556666666666667e-06,
261
+ "loss": 0.0002,
262
+ "step": 18000
263
+ },
264
+ {
265
+ "epoch": 0.37,
266
+ "grad_norm": 0.010617815889418125,
267
+ "learning_rate": 3.500111111111111e-06,
268
+ "loss": 0.0002,
269
+ "step": 18500
270
+ },
271
+ {
272
+ "epoch": 0.38,
273
+ "grad_norm": 0.0028420474845916033,
274
+ "learning_rate": 3.444555555555556e-06,
275
+ "loss": 0.0002,
276
+ "step": 19000
277
+ },
278
+ {
279
+ "epoch": 0.39,
280
+ "grad_norm": 0.017619455233216286,
281
+ "learning_rate": 3.389e-06,
282
+ "loss": 0.0002,
283
+ "step": 19500
284
+ },
285
+ {
286
+ "epoch": 0.4,
287
+ "grad_norm": 0.01581072062253952,
288
+ "learning_rate": 3.3334444444444445e-06,
289
+ "loss": 0.0002,
290
+ "step": 20000
291
+ },
292
+ {
293
+ "epoch": 0.41,
294
+ "grad_norm": 0.0025768582709133625,
295
+ "learning_rate": 3.2778888888888894e-06,
296
+ "loss": 0.0002,
297
+ "step": 20500
298
+ },
299
+ {
300
+ "epoch": 0.42,
301
+ "grad_norm": 0.009266170673072338,
302
+ "learning_rate": 3.2223333333333335e-06,
303
+ "loss": 0.0002,
304
+ "step": 21000
305
+ },
306
+ {
307
+ "epoch": 0.43,
308
+ "grad_norm": 0.005836064927279949,
309
+ "learning_rate": 3.166777777777778e-06,
310
+ "loss": 0.0002,
311
+ "step": 21500
312
+ },
313
+ {
314
+ "epoch": 0.44,
315
+ "grad_norm": 0.003142708446830511,
316
+ "learning_rate": 3.111222222222223e-06,
317
+ "loss": 0.0002,
318
+ "step": 22000
319
+ },
320
+ {
321
+ "epoch": 0.45,
322
+ "grad_norm": 0.0011461629765108228,
323
+ "learning_rate": 3.055666666666667e-06,
324
+ "loss": 0.0002,
325
+ "step": 22500
326
+ },
327
+ {
328
+ "epoch": 0.46,
329
+ "grad_norm": 0.0012801799457520247,
330
+ "learning_rate": 3.0001111111111113e-06,
331
+ "loss": 0.0002,
332
+ "step": 23000
333
+ },
334
+ {
335
+ "epoch": 0.47,
336
+ "grad_norm": 0.00472261942923069,
337
+ "learning_rate": 2.944555555555556e-06,
338
+ "loss": 0.0002,
339
+ "step": 23500
340
+ },
341
+ {
342
+ "epoch": 0.48,
343
+ "grad_norm": 0.00753053417429328,
344
+ "learning_rate": 2.889e-06,
345
+ "loss": 0.0002,
346
+ "step": 24000
347
+ },
348
+ {
349
+ "epoch": 0.49,
350
+ "grad_norm": 0.009959095157682896,
351
+ "learning_rate": 2.8334444444444447e-06,
352
+ "loss": 0.0002,
353
+ "step": 24500
354
+ },
355
+ {
356
+ "epoch": 0.5,
357
+ "grad_norm": 0.005424452014267445,
358
+ "learning_rate": 2.7778888888888887e-06,
359
+ "loss": 0.0002,
360
+ "step": 25000
361
+ },
362
+ {
363
+ "epoch": 0.51,
364
+ "grad_norm": 0.0014462786493822932,
365
+ "learning_rate": 2.7223333333333336e-06,
366
+ "loss": 0.0002,
367
+ "step": 25500
368
+ },
369
+ {
370
+ "epoch": 0.52,
371
+ "grad_norm": 0.0010618987726047635,
372
+ "learning_rate": 2.666777777777778e-06,
373
+ "loss": 0.0002,
374
+ "step": 26000
375
+ },
376
+ {
377
+ "epoch": 0.53,
378
+ "grad_norm": 0.0015679823700338602,
379
+ "learning_rate": 2.611222222222222e-06,
380
+ "loss": 0.0002,
381
+ "step": 26500
382
+ },
383
+ {
384
+ "epoch": 0.54,
385
+ "grad_norm": 0.0037062859628349543,
386
+ "learning_rate": 2.555666666666667e-06,
387
+ "loss": 0.0002,
388
+ "step": 27000
389
+ },
390
+ {
391
+ "epoch": 0.55,
392
+ "grad_norm": 0.0019893173594027758,
393
+ "learning_rate": 2.5001111111111114e-06,
394
+ "loss": 0.0002,
395
+ "step": 27500
396
+ },
397
+ {
398
+ "epoch": 0.56,
399
+ "grad_norm": 0.0012017602566629648,
400
+ "learning_rate": 2.444555555555556e-06,
401
+ "loss": 0.0002,
402
+ "step": 28000
403
+ },
404
+ {
405
+ "epoch": 0.57,
406
+ "grad_norm": 0.0011259852908551693,
407
+ "learning_rate": 2.3890000000000003e-06,
408
+ "loss": 0.0002,
409
+ "step": 28500
410
+ },
411
+ {
412
+ "epoch": 0.58,
413
+ "grad_norm": 0.007102410774677992,
414
+ "learning_rate": 2.3334444444444444e-06,
415
+ "loss": 0.0002,
416
+ "step": 29000
417
+ },
418
+ {
419
+ "epoch": 0.59,
420
+ "grad_norm": 0.0013766338815912604,
421
+ "learning_rate": 2.2778888888888893e-06,
422
+ "loss": 0.0001,
423
+ "step": 29500
424
+ },
425
+ {
426
+ "epoch": 0.6,
427
+ "grad_norm": 0.002701902762055397,
428
+ "learning_rate": 2.2223333333333337e-06,
429
+ "loss": 0.0002,
430
+ "step": 30000
431
+ },
432
+ {
433
+ "epoch": 0.61,
434
+ "grad_norm": 0.003973294515162706,
435
+ "learning_rate": 2.1667777777777777e-06,
436
+ "loss": 0.0002,
437
+ "step": 30500
438
+ },
439
+ {
440
+ "epoch": 0.62,
441
+ "grad_norm": 0.001020593335852027,
442
+ "learning_rate": 2.111222222222222e-06,
443
+ "loss": 0.0002,
444
+ "step": 31000
445
+ },
446
+ {
447
+ "epoch": 0.63,
448
+ "grad_norm": 0.0016460069455206394,
449
+ "learning_rate": 2.055666666666667e-06,
450
+ "loss": 0.0002,
451
+ "step": 31500
452
+ },
453
+ {
454
+ "epoch": 0.64,
455
+ "grad_norm": 0.0035029498394578695,
456
+ "learning_rate": 2.000111111111111e-06,
457
+ "loss": 0.0002,
458
+ "step": 32000
459
+ },
460
+ {
461
+ "epoch": 0.65,
462
+ "grad_norm": 0.002191362204030156,
463
+ "learning_rate": 1.9445555555555556e-06,
464
+ "loss": 0.0002,
465
+ "step": 32500
466
+ },
467
+ {
468
+ "epoch": 0.66,
469
+ "grad_norm": 0.002301236381754279,
470
+ "learning_rate": 1.8890000000000003e-06,
471
+ "loss": 0.0001,
472
+ "step": 33000
473
+ },
474
+ {
475
+ "epoch": 0.67,
476
+ "grad_norm": 0.000640725833363831,
477
+ "learning_rate": 1.8334444444444447e-06,
478
+ "loss": 0.0001,
479
+ "step": 33500
480
+ },
481
+ {
482
+ "epoch": 0.68,
483
+ "grad_norm": 0.002908015623688698,
484
+ "learning_rate": 1.777888888888889e-06,
485
+ "loss": 0.0002,
486
+ "step": 34000
487
+ },
488
+ {
489
+ "epoch": 0.69,
490
+ "grad_norm": 0.008972103707492352,
491
+ "learning_rate": 1.7223333333333334e-06,
492
+ "loss": 0.0002,
493
+ "step": 34500
494
+ },
495
+ {
496
+ "epoch": 0.7,
497
+ "grad_norm": 0.001777523080818355,
498
+ "learning_rate": 1.666777777777778e-06,
499
+ "loss": 0.0002,
500
+ "step": 35000
501
+ },
502
+ {
503
+ "epoch": 0.71,
504
+ "grad_norm": 0.0011188536882400513,
505
+ "learning_rate": 1.6112222222222223e-06,
506
+ "loss": 0.0002,
507
+ "step": 35500
508
+ },
509
+ {
510
+ "epoch": 0.72,
511
+ "grad_norm": 0.019039396196603775,
512
+ "learning_rate": 1.5556666666666668e-06,
513
+ "loss": 0.0001,
514
+ "step": 36000
515
+ },
516
+ {
517
+ "epoch": 0.73,
518
+ "grad_norm": 0.0026075986679643393,
519
+ "learning_rate": 1.5001111111111113e-06,
520
+ "loss": 0.0002,
521
+ "step": 36500
522
+ },
523
+ {
524
+ "epoch": 0.74,
525
+ "grad_norm": 0.001765447435900569,
526
+ "learning_rate": 1.4445555555555557e-06,
527
+ "loss": 0.0002,
528
+ "step": 37000
529
+ },
530
+ {
531
+ "epoch": 0.75,
532
+ "grad_norm": 0.002983854152262211,
533
+ "learning_rate": 1.3890000000000002e-06,
534
+ "loss": 0.0001,
535
+ "step": 37500
536
+ },
537
+ {
538
+ "epoch": 0.76,
539
+ "grad_norm": 0.001525428961031139,
540
+ "learning_rate": 1.3334444444444444e-06,
541
+ "loss": 0.0001,
542
+ "step": 38000
543
+ },
544
+ {
545
+ "epoch": 0.77,
546
+ "grad_norm": 0.025928320363163948,
547
+ "learning_rate": 1.277888888888889e-06,
548
+ "loss": 0.0001,
549
+ "step": 38500
550
+ },
551
+ {
552
+ "epoch": 0.78,
553
+ "grad_norm": 0.006851640529930592,
554
+ "learning_rate": 1.2223333333333333e-06,
555
+ "loss": 0.0002,
556
+ "step": 39000
557
+ },
558
+ {
559
+ "epoch": 0.79,
560
+ "grad_norm": 0.01913316920399666,
561
+ "learning_rate": 1.166777777777778e-06,
562
+ "loss": 0.0002,
563
+ "step": 39500
564
+ },
565
+ {
566
+ "epoch": 0.8,
567
+ "grad_norm": 0.001256144605576992,
568
+ "learning_rate": 1.1112222222222223e-06,
569
+ "loss": 0.0002,
570
+ "step": 40000
571
+ },
572
+ {
573
+ "epoch": 0.81,
574
+ "grad_norm": 0.0007173811318352818,
575
+ "learning_rate": 1.0556666666666667e-06,
576
+ "loss": 0.0002,
577
+ "step": 40500
578
+ },
579
+ {
580
+ "epoch": 0.82,
581
+ "grad_norm": 0.0007406665827147663,
582
+ "learning_rate": 1.0001111111111112e-06,
583
+ "loss": 0.0001,
584
+ "step": 41000
585
+ },
586
+ {
587
+ "epoch": 0.83,
588
+ "grad_norm": 0.0008800509967841208,
589
+ "learning_rate": 9.445555555555556e-07,
590
+ "loss": 0.0002,
591
+ "step": 41500
592
+ },
593
+ {
594
+ "epoch": 0.84,
595
+ "grad_norm": 0.0021663799416273832,
596
+ "learning_rate": 8.890000000000002e-07,
597
+ "loss": 0.0001,
598
+ "step": 42000
599
+ },
600
+ {
601
+ "epoch": 0.85,
602
+ "grad_norm": 0.0016956336330622435,
603
+ "learning_rate": 8.334444444444445e-07,
604
+ "loss": 0.0001,
605
+ "step": 42500
606
+ },
607
+ {
608
+ "epoch": 0.86,
609
+ "grad_norm": 0.0018851294880732894,
610
+ "learning_rate": 7.77888888888889e-07,
611
+ "loss": 0.0002,
612
+ "step": 43000
613
+ },
614
+ {
615
+ "epoch": 0.87,
616
+ "grad_norm": 0.0005814626347273588,
617
+ "learning_rate": 7.223333333333334e-07,
618
+ "loss": 0.0001,
619
+ "step": 43500
620
+ },
621
+ {
622
+ "epoch": 0.88,
623
+ "grad_norm": 0.0006882678717374802,
624
+ "learning_rate": 6.667777777777778e-07,
625
+ "loss": 0.0002,
626
+ "step": 44000
627
+ },
628
+ {
629
+ "epoch": 0.89,
630
+ "grad_norm": 0.000447490019723773,
631
+ "learning_rate": 6.112222222222223e-07,
632
+ "loss": 0.0001,
633
+ "step": 44500
634
+ },
635
+ {
636
+ "epoch": 0.9,
637
+ "grad_norm": 0.0006894184043630958,
638
+ "learning_rate": 5.556666666666667e-07,
639
+ "loss": 0.0002,
640
+ "step": 45000
641
+ },
642
+ {
643
+ "epoch": 0.91,
644
+ "grad_norm": 0.009517834521830082,
645
+ "learning_rate": 5.001111111111112e-07,
646
+ "loss": 0.0002,
647
+ "step": 45500
648
+ },
649
+ {
650
+ "epoch": 0.92,
651
+ "grad_norm": 0.002439250238239765,
652
+ "learning_rate": 4.445555555555556e-07,
653
+ "loss": 0.0001,
654
+ "step": 46000
655
+ },
656
+ {
657
+ "epoch": 0.93,
658
+ "grad_norm": 0.002538469387218356,
659
+ "learning_rate": 3.89e-07,
660
+ "loss": 0.0001,
661
+ "step": 46500
662
+ },
663
+ {
664
+ "epoch": 0.94,
665
+ "grad_norm": 0.0015232452424243093,
666
+ "learning_rate": 3.334444444444445e-07,
667
+ "loss": 0.0001,
668
+ "step": 47000
669
+ },
670
+ {
671
+ "epoch": 0.95,
672
+ "grad_norm": 0.009680049493908882,
673
+ "learning_rate": 2.778888888888889e-07,
674
+ "loss": 0.0002,
675
+ "step": 47500
676
+ },
677
+ {
678
+ "epoch": 0.96,
679
+ "grad_norm": 0.001066950149834156,
680
+ "learning_rate": 2.2233333333333335e-07,
681
+ "loss": 0.0001,
682
+ "step": 48000
683
+ },
684
+ {
685
+ "epoch": 0.97,
686
+ "grad_norm": 0.0006020030123181641,
687
+ "learning_rate": 1.6677777777777778e-07,
688
+ "loss": 0.0001,
689
+ "step": 48500
690
+ },
691
+ {
692
+ "epoch": 0.98,
693
+ "grad_norm": 0.007198994047939777,
694
+ "learning_rate": 1.1122222222222223e-07,
695
+ "loss": 0.0001,
696
+ "step": 49000
697
+ },
698
+ {
699
+ "epoch": 0.99,
700
+ "grad_norm": 0.001687090378254652,
701
+ "learning_rate": 5.5666666666666675e-08,
702
+ "loss": 0.0001,
703
+ "step": 49500
704
+ },
705
+ {
706
+ "epoch": 1.0,
707
+ "grad_norm": 0.01796930469572544,
708
+ "learning_rate": 1.1111111111111112e-10,
709
+ "loss": 0.0002,
710
+ "step": 50000
711
+ }
712
+ ],
713
+ "logging_steps": 500,
714
+ "max_steps": 50000,
715
+ "num_input_tokens_seen": 0,
716
+ "num_train_epochs": 1,
717
+ "save_steps": 500,
718
+ "stateful_callbacks": {
719
+ "TrainerControl": {
720
+ "args": {
721
+ "should_epoch_stop": false,
722
+ "should_evaluate": false,
723
+ "should_log": false,
724
+ "should_save": true,
725
+ "should_training_stop": true
726
+ },
727
+ "attributes": {}
728
+ }
729
+ },
730
+ "total_flos": 1.5883492638205056e+16,
731
+ "train_batch_size": 2,
732
+ "trial_name": null,
733
+ "trial_params": null
734
+ }
nemotron-pii-ready/span-deberta-v3-base/checkpoint-50000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad51fc5d014170cae6b32a37b1c26813511873d6c15735d9ed3764a3708ac61
3
+ size 5841
nemotron-pii-ready/span-deberta-v3-base/config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaAdvancedSpanClassifier"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "dtype": "float32",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "O",
12
+ "1": "ACCOUNT_NUMBER",
13
+ "2": "AGE",
14
+ "3": "API_KEY",
15
+ "4": "BANK_ROUTING_NUMBER",
16
+ "5": "BIOMETRIC_IDENTIFIER",
17
+ "6": "BLOOD_TYPE",
18
+ "7": "CERTIFICATE_LICENSE_NUMBER",
19
+ "8": "CITY",
20
+ "9": "COMPANY_NAME",
21
+ "10": "COORDINATE",
22
+ "11": "COUNTRY",
23
+ "12": "COUNTY",
24
+ "13": "CREDIT_DEBIT_CARD",
25
+ "14": "CUSTOMER_ID",
26
+ "15": "CVV",
27
+ "16": "DATE",
28
+ "17": "DATE_OF_BIRTH",
29
+ "18": "DATE_TIME",
30
+ "19": "DEVICE_IDENTIFIER",
31
+ "20": "EDUCATION_LEVEL",
32
+ "21": "EMAIL",
33
+ "22": "EMPLOYEE_ID",
34
+ "23": "EMPLOYMENT_STATUS",
35
+ "24": "FAX_NUMBER",
36
+ "25": "FIRST_NAME",
37
+ "26": "GENDER",
38
+ "27": "HEALTH_PLAN_BENEFICIARY_NUMBER",
39
+ "28": "HTTP_COOKIE",
40
+ "29": "IPV4",
41
+ "30": "IPV6",
42
+ "31": "LANGUAGE",
43
+ "32": "LAST_NAME",
44
+ "33": "LICENSE_PLATE",
45
+ "34": "MAC_ADDRESS",
46
+ "35": "MEDICAL_RECORD_NUMBER",
47
+ "36": "NATIONAL_ID",
48
+ "37": "OCCUPATION",
49
+ "38": "PASSWORD",
50
+ "39": "PHONE_NUMBER",
51
+ "40": "PIN",
52
+ "41": "POLITICAL_VIEW",
53
+ "42": "POSTCODE",
54
+ "43": "RACE_ETHNICITY",
55
+ "44": "RELIGIOUS_BELIEF",
56
+ "45": "SEXUALITY",
57
+ "46": "SSN",
58
+ "47": "STATE",
59
+ "48": "STREET_ADDRESS",
60
+ "49": "SWIFT_BIC",
61
+ "50": "TAX_ID",
62
+ "51": "TIME",
63
+ "52": "UNIQUE_ID",
64
+ "53": "URL",
65
+ "54": "USER_NAME",
66
+ "55": "VEHICLE_IDENTIFIER"
67
+ },
68
+ "initializer_range": 0.02,
69
+ "intermediate_size": 3072,
70
+ "label2id": {
71
+ "ACCOUNT_NUMBER": 1,
72
+ "AGE": 2,
73
+ "API_KEY": 3,
74
+ "BANK_ROUTING_NUMBER": 4,
75
+ "BIOMETRIC_IDENTIFIER": 5,
76
+ "BLOOD_TYPE": 6,
77
+ "CERTIFICATE_LICENSE_NUMBER": 7,
78
+ "CITY": 8,
79
+ "COMPANY_NAME": 9,
80
+ "COORDINATE": 10,
81
+ "COUNTRY": 11,
82
+ "COUNTY": 12,
83
+ "CREDIT_DEBIT_CARD": 13,
84
+ "CUSTOMER_ID": 14,
85
+ "CVV": 15,
86
+ "DATE": 16,
87
+ "DATE_OF_BIRTH": 17,
88
+ "DATE_TIME": 18,
89
+ "DEVICE_IDENTIFIER": 19,
90
+ "EDUCATION_LEVEL": 20,
91
+ "EMAIL": 21,
92
+ "EMPLOYEE_ID": 22,
93
+ "EMPLOYMENT_STATUS": 23,
94
+ "FAX_NUMBER": 24,
95
+ "FIRST_NAME": 25,
96
+ "GENDER": 26,
97
+ "HEALTH_PLAN_BENEFICIARY_NUMBER": 27,
98
+ "HTTP_COOKIE": 28,
99
+ "IPV4": 29,
100
+ "IPV6": 30,
101
+ "LANGUAGE": 31,
102
+ "LAST_NAME": 32,
103
+ "LICENSE_PLATE": 33,
104
+ "MAC_ADDRESS": 34,
105
+ "MEDICAL_RECORD_NUMBER": 35,
106
+ "NATIONAL_ID": 36,
107
+ "O": 0,
108
+ "OCCUPATION": 37,
109
+ "PASSWORD": 38,
110
+ "PHONE_NUMBER": 39,
111
+ "PIN": 40,
112
+ "POLITICAL_VIEW": 41,
113
+ "POSTCODE": 42,
114
+ "RACE_ETHNICITY": 43,
115
+ "RELIGIOUS_BELIEF": 44,
116
+ "SEXUALITY": 45,
117
+ "SSN": 46,
118
+ "STATE": 47,
119
+ "STREET_ADDRESS": 48,
120
+ "SWIFT_BIC": 49,
121
+ "TAX_ID": 50,
122
+ "TIME": 51,
123
+ "UNIQUE_ID": 52,
124
+ "URL": 53,
125
+ "USER_NAME": 54,
126
+ "VEHICLE_IDENTIFIER": 55
127
+ },
128
+ "layer_norm_eps": 1e-07,
129
+ "legacy": true,
130
+ "max_position_embeddings": 512,
131
+ "max_relative_positions": -1,
132
+ "model_type": "deberta-v2",
133
+ "norm_rel_ebd": "layer_norm",
134
+ "num_attention_heads": 12,
135
+ "num_hidden_layers": 12,
136
+ "pad_token_id": 0,
137
+ "pooler_dropout": 0,
138
+ "pooler_hidden_act": "gelu",
139
+ "pooler_hidden_size": 768,
140
+ "pos_att_type": [
141
+ "p2c",
142
+ "c2p"
143
+ ],
144
+ "position_biased_input": false,
145
+ "position_buckets": 256,
146
+ "relative_attention": true,
147
+ "share_att_key": true,
148
+ "transformers_version": "4.57.6",
149
+ "type_vocab_size": 0,
150
+ "vocab_size": 128100
151
+ }
nemotron-pii-ready/span-deberta-v3-base/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17eede9a2c12668914340a0413b001d91412d279035b1947834ed6390476f2e2
3
+ size 741532431
nemotron-pii-ready/span-deberta-v3-base/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
nemotron-pii-ready/span-deberta-v3-base/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
nemotron-pii-ready/span-deberta-v3-base/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nemotron-pii-ready/span-deberta-v3-base/tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "[PAD]",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "[CLS]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "[SEP]",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "[UNK]",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128000": {
37
+ "content": "[MASK]",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "[CLS]",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "[CLS]",
48
+ "do_lower_case": false,
49
+ "eos_token": "[SEP]",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "[MASK]",
52
+ "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "sp_model_kwargs": {},
56
+ "split_by_punct": false,
57
+ "tokenizer_class": "DebertaV2Tokenizer",
58
+ "unk_token": "[UNK]",
59
+ "vocab_type": "spm"
60
+ }
nemotron-pii-ready/span-deberta-v3-base/training_meta.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architecture": "span_based",
3
+ "base_model": "microsoft/deberta-v3-base",
4
+ "dataset": "nemotron-pii-ready",
5
+ "label_list": [
6
+ "O",
7
+ "ACCOUNT_NUMBER",
8
+ "AGE",
9
+ "API_KEY",
10
+ "BANK_ROUTING_NUMBER",
11
+ "BIOMETRIC_IDENTIFIER",
12
+ "BLOOD_TYPE",
13
+ "CERTIFICATE_LICENSE_NUMBER",
14
+ "CITY",
15
+ "COMPANY_NAME",
16
+ "COORDINATE",
17
+ "COUNTRY",
18
+ "COUNTY",
19
+ "CREDIT_DEBIT_CARD",
20
+ "CUSTOMER_ID",
21
+ "CVV",
22
+ "DATE",
23
+ "DATE_OF_BIRTH",
24
+ "DATE_TIME",
25
+ "DEVICE_IDENTIFIER",
26
+ "EDUCATION_LEVEL",
27
+ "EMAIL",
28
+ "EMPLOYEE_ID",
29
+ "EMPLOYMENT_STATUS",
30
+ "FAX_NUMBER",
31
+ "FIRST_NAME",
32
+ "GENDER",
33
+ "HEALTH_PLAN_BENEFICIARY_NUMBER",
34
+ "HTTP_COOKIE",
35
+ "IPV4",
36
+ "IPV6",
37
+ "LANGUAGE",
38
+ "LAST_NAME",
39
+ "LICENSE_PLATE",
40
+ "MAC_ADDRESS",
41
+ "MEDICAL_RECORD_NUMBER",
42
+ "NATIONAL_ID",
43
+ "OCCUPATION",
44
+ "PASSWORD",
45
+ "PHONE_NUMBER",
46
+ "PIN",
47
+ "POLITICAL_VIEW",
48
+ "POSTCODE",
49
+ "RACE_ETHNICITY",
50
+ "RELIGIOUS_BELIEF",
51
+ "SEXUALITY",
52
+ "SSN",
53
+ "STATE",
54
+ "STREET_ADDRESS",
55
+ "SWIFT_BIC",
56
+ "TAX_ID",
57
+ "TIME",
58
+ "UNIQUE_ID",
59
+ "URL",
60
+ "USER_NAME",
61
+ "VEHICLE_IDENTIFIER"
62
+ ],
63
+ "max_seq_length": 512,
64
+ "max_span_width": 30,
65
+ "timestamp": "2026-04-05T04:31:58.254240"
66
+ }