PuxAI commited on
Commit
03caa22
·
verified ·
1 Parent(s): 3f6ade4

Upload folder using huggingface_hub

Browse files
Files changed (32) hide show
  1. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/config.json +120 -0
  2. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/model.safetensors +3 -0
  3. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/optimizer.pt +3 -0
  4. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/rng_state.pth +3 -0
  5. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/scheduler.pt +3 -0
  6. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/tokenizer.json +0 -0
  7. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/tokenizer_config.json +15 -0
  8. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/trainer_state.json +224 -0
  9. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/training_args.bin +3 -0
  10. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/config.json +120 -0
  11. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/model.safetensors +3 -0
  12. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/optimizer.pt +3 -0
  13. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/rng_state.pth +3 -0
  14. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/scheduler.pt +3 -0
  15. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/tokenizer.json +0 -0
  16. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/tokenizer_config.json +15 -0
  17. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/trainer_state.json +414 -0
  18. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/training_args.bin +3 -0
  19. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/config.json +120 -0
  20. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/model.safetensors +3 -0
  21. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/optimizer.pt +3 -0
  22. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/rng_state.pth +3 -0
  23. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/scheduler.pt +3 -0
  24. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/tokenizer.json +0 -0
  25. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/tokenizer_config.json +15 -0
  26. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/trainer_state.json +604 -0
  27. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/training_args.bin +3 -0
  28. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/config.json +120 -0
  29. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/model.safetensors +3 -0
  30. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/tokenizer.json +0 -0
  31. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/tokenizer_config.json +15 -0
  32. open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/training_args.bin +3 -0
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/config.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "O",
17
+ "1": "B-AGE",
18
+ "2": "I-AGE",
19
+ "3": "B-BUILDINGNUM",
20
+ "4": "I-BUILDINGNUM",
21
+ "5": "B-CITY",
22
+ "6": "I-CITY",
23
+ "7": "B-CREDITCARDNUMBER",
24
+ "8": "I-CREDITCARDNUMBER",
25
+ "9": "B-DATE",
26
+ "10": "I-DATE",
27
+ "11": "B-DRIVERLICENSENUM",
28
+ "12": "I-DRIVERLICENSENUM",
29
+ "13": "B-EMAIL",
30
+ "14": "I-EMAIL",
31
+ "15": "B-GENDER",
32
+ "16": "I-GENDER",
33
+ "17": "B-GIVENNAME",
34
+ "18": "I-GIVENNAME",
35
+ "19": "B-IDCARDNUM",
36
+ "20": "I-IDCARDNUM",
37
+ "21": "B-PASSPORTNUM",
38
+ "22": "I-PASSPORTNUM",
39
+ "23": "B-SEX",
40
+ "24": "I-SEX",
41
+ "25": "B-SOCIALNUM",
42
+ "26": "I-SOCIALNUM",
43
+ "27": "B-STREET",
44
+ "28": "I-STREET",
45
+ "29": "B-SURNAME",
46
+ "30": "I-SURNAME",
47
+ "31": "B-TAXNUM",
48
+ "32": "I-TAXNUM",
49
+ "33": "B-TELEPHONENUM",
50
+ "34": "I-TELEPHONENUM",
51
+ "35": "B-TIME",
52
+ "36": "I-TIME",
53
+ "37": "B-TITLE",
54
+ "38": "I-TITLE",
55
+ "39": "B-ZIPCODE",
56
+ "40": "I-ZIPCODE"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "is_decoder": false,
61
+ "label2id": {
62
+ "B-AGE": 1,
63
+ "B-BUILDINGNUM": 3,
64
+ "B-CITY": 5,
65
+ "B-CREDITCARDNUMBER": 7,
66
+ "B-DATE": 9,
67
+ "B-DRIVERLICENSENUM": 11,
68
+ "B-EMAIL": 13,
69
+ "B-GENDER": 15,
70
+ "B-GIVENNAME": 17,
71
+ "B-IDCARDNUM": 19,
72
+ "B-PASSPORTNUM": 21,
73
+ "B-SEX": 23,
74
+ "B-SOCIALNUM": 25,
75
+ "B-STREET": 27,
76
+ "B-SURNAME": 29,
77
+ "B-TAXNUM": 31,
78
+ "B-TELEPHONENUM": 33,
79
+ "B-TIME": 35,
80
+ "B-TITLE": 37,
81
+ "B-ZIPCODE": 39,
82
+ "I-AGE": 2,
83
+ "I-BUILDINGNUM": 4,
84
+ "I-CITY": 6,
85
+ "I-CREDITCARDNUMBER": 8,
86
+ "I-DATE": 10,
87
+ "I-DRIVERLICENSENUM": 12,
88
+ "I-EMAIL": 14,
89
+ "I-GENDER": 16,
90
+ "I-GIVENNAME": 18,
91
+ "I-IDCARDNUM": 20,
92
+ "I-PASSPORTNUM": 22,
93
+ "I-SEX": 24,
94
+ "I-SOCIALNUM": 26,
95
+ "I-STREET": 28,
96
+ "I-SURNAME": 30,
97
+ "I-TAXNUM": 32,
98
+ "I-TELEPHONENUM": 34,
99
+ "I-TIME": 36,
100
+ "I-TITLE": 38,
101
+ "I-ZIPCODE": 40,
102
+ "O": 0
103
+ },
104
+ "layer_norm_eps": 1e-12,
105
+ "max_position_embeddings": 512,
106
+ "model_type": "bert",
107
+ "num_attention_heads": 12,
108
+ "num_hidden_layers": 12,
109
+ "pad_token_id": 0,
110
+ "pooler_fc_size": 768,
111
+ "pooler_num_attention_heads": 12,
112
+ "pooler_num_fc_layers": 3,
113
+ "pooler_size_per_head": 128,
114
+ "pooler_type": "first_token_transform",
115
+ "tie_word_embeddings": true,
116
+ "transformers_version": "5.3.0",
117
+ "type_vocab_size": 2,
118
+ "use_cache": false,
119
+ "vocab_size": 119547
120
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:070dc7f8f87faf4afed4d08ee1dac60e632dcb3831fecffc5d389ebcead5823f
3
+ size 709200844
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a6efc48892807b3b834c0d567d41a90a4be6768e2d200ab1e3422ae12b5b98
3
+ size 1418524683
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aed068987c0c8fe848117e0c4142a1966dd60e23bc706308e1223c1ca4530dd
3
+ size 14645
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef1d1b63c0b9a5ac0f9595b04fad948d0f08c7084fd61fbaa7bac526f8c0e8e
3
+ size 1465
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "is_local": false,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 512,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/trainer_state.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 13055,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.038299502106472615,
14
+ "grad_norm": 0.8714600801467896,
15
+ "learning_rate": 1.9745180645984938e-05,
16
+ "loss": 0.1691785430908203,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.07659900421294523,
21
+ "grad_norm": 0.588369607925415,
22
+ "learning_rate": 1.9489850631941786e-05,
23
+ "loss": 0.04638611221313477,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.11489850631941785,
28
+ "grad_norm": 1.5041358470916748,
29
+ "learning_rate": 1.9234520617898634e-05,
30
+ "loss": 0.03861086273193359,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.15319800842589046,
35
+ "grad_norm": 0.4620295763015747,
36
+ "learning_rate": 1.8979190603855486e-05,
37
+ "loss": 0.03408417129516601,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.19149751053236308,
42
+ "grad_norm": 1.0186210870742798,
43
+ "learning_rate": 1.8723860589812334e-05,
44
+ "loss": 0.030472009658813477,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.2297970126388357,
49
+ "grad_norm": 0.19274073839187622,
50
+ "learning_rate": 1.8468530575769182e-05,
51
+ "loss": 0.02794113540649414,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.2680965147453083,
56
+ "grad_norm": 0.3304294943809509,
57
+ "learning_rate": 1.8213200561726034e-05,
58
+ "loss": 0.026913631439208986,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.3063960168517809,
63
+ "grad_norm": 0.32137593626976013,
64
+ "learning_rate": 1.7957870547682882e-05,
65
+ "loss": 0.025851396560668944,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.34469551895825357,
70
+ "grad_norm": 0.5200027823448181,
71
+ "learning_rate": 1.770254053363973e-05,
72
+ "loss": 0.02558848571777344,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.38299502106472616,
77
+ "grad_norm": 0.6408317685127258,
78
+ "learning_rate": 1.744721051959658e-05,
79
+ "loss": 0.023505245208740234,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.42129452317119875,
84
+ "grad_norm": 0.46780553460121155,
85
+ "learning_rate": 1.719188050555343e-05,
86
+ "loss": 0.023054824829101563,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.4595940252776714,
91
+ "grad_norm": 0.6626068353652954,
92
+ "learning_rate": 1.6936550491510278e-05,
93
+ "loss": 0.02208795166015625,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.497893527384144,
98
+ "grad_norm": 0.25836509466171265,
99
+ "learning_rate": 1.668122047746713e-05,
100
+ "loss": 0.022046304702758788,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.5361930294906166,
105
+ "grad_norm": 0.8243473172187805,
106
+ "learning_rate": 1.6425890463423978e-05,
107
+ "loss": 0.021231672286987305,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.5744925315970892,
112
+ "grad_norm": 0.26098620891571045,
113
+ "learning_rate": 1.6170560449380826e-05,
114
+ "loss": 0.020609188079833984,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.6127920337035618,
119
+ "grad_norm": 0.1398458480834961,
120
+ "learning_rate": 1.5915230435337677e-05,
121
+ "loss": 0.020333499908447267,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.6510915358100344,
126
+ "grad_norm": 0.44954946637153625,
127
+ "learning_rate": 1.5659900421294526e-05,
128
+ "loss": 0.01971204948425293,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.6893910379165071,
133
+ "grad_norm": 0.2548958957195282,
134
+ "learning_rate": 1.5404570407251374e-05,
135
+ "loss": 0.01934459114074707,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.7276905400229797,
140
+ "grad_norm": 0.31454744935035706,
141
+ "learning_rate": 1.5149240393208222e-05,
142
+ "loss": 0.018815528869628905,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.7659900421294523,
147
+ "grad_norm": 0.15480241179466248,
148
+ "learning_rate": 1.4893910379165073e-05,
149
+ "loss": 0.018857412338256836,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.8042895442359249,
154
+ "grad_norm": 0.23583486676216125,
155
+ "learning_rate": 1.4638580365121922e-05,
156
+ "loss": 0.018428108215332032,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.8425890463423975,
161
+ "grad_norm": 0.289045125246048,
162
+ "learning_rate": 1.438325035107877e-05,
163
+ "loss": 0.01816094207763672,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.8808885484488702,
168
+ "grad_norm": 0.13967347145080566,
169
+ "learning_rate": 1.4127920337035618e-05,
170
+ "loss": 0.01740534973144531,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.9191880505553428,
175
+ "grad_norm": 0.39465662837028503,
176
+ "learning_rate": 1.387259032299247e-05,
177
+ "loss": 0.017327314376831056,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.9574875526618154,
182
+ "grad_norm": 0.5613229274749756,
183
+ "learning_rate": 1.3617260308949318e-05,
184
+ "loss": 0.01724307060241699,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.995787054768288,
189
+ "grad_norm": 0.550220787525177,
190
+ "learning_rate": 1.3361930294906168e-05,
191
+ "loss": 0.017301082611083984,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 1.0,
196
+ "eval_loss": 0.019178859889507294,
197
+ "eval_runtime": 195.4351,
198
+ "eval_samples_per_second": 474.991,
199
+ "eval_steps_per_second": 59.375,
200
+ "step": 13055
201
+ }
202
+ ],
203
+ "logging_steps": 500,
204
+ "max_steps": 39165,
205
+ "num_input_tokens_seen": 0,
206
+ "num_train_epochs": 3,
207
+ "save_steps": 500,
208
+ "stateful_callbacks": {
209
+ "TrainerControl": {
210
+ "args": {
211
+ "should_epoch_stop": false,
212
+ "should_evaluate": false,
213
+ "should_log": false,
214
+ "should_save": true,
215
+ "should_training_stop": false
216
+ },
217
+ "attributes": {}
218
+ }
219
+ },
220
+ "total_flos": 2.1838257516635136e+17,
221
+ "train_batch_size": 64,
222
+ "trial_name": null,
223
+ "trial_params": null
224
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-13055/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb80cf54f2fe927cc63c861aa4b3e87525629b3d1868eeda1dedae9009697f1c
3
+ size 5265
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/config.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "O",
17
+ "1": "B-AGE",
18
+ "2": "I-AGE",
19
+ "3": "B-BUILDINGNUM",
20
+ "4": "I-BUILDINGNUM",
21
+ "5": "B-CITY",
22
+ "6": "I-CITY",
23
+ "7": "B-CREDITCARDNUMBER",
24
+ "8": "I-CREDITCARDNUMBER",
25
+ "9": "B-DATE",
26
+ "10": "I-DATE",
27
+ "11": "B-DRIVERLICENSENUM",
28
+ "12": "I-DRIVERLICENSENUM",
29
+ "13": "B-EMAIL",
30
+ "14": "I-EMAIL",
31
+ "15": "B-GENDER",
32
+ "16": "I-GENDER",
33
+ "17": "B-GIVENNAME",
34
+ "18": "I-GIVENNAME",
35
+ "19": "B-IDCARDNUM",
36
+ "20": "I-IDCARDNUM",
37
+ "21": "B-PASSPORTNUM",
38
+ "22": "I-PASSPORTNUM",
39
+ "23": "B-SEX",
40
+ "24": "I-SEX",
41
+ "25": "B-SOCIALNUM",
42
+ "26": "I-SOCIALNUM",
43
+ "27": "B-STREET",
44
+ "28": "I-STREET",
45
+ "29": "B-SURNAME",
46
+ "30": "I-SURNAME",
47
+ "31": "B-TAXNUM",
48
+ "32": "I-TAXNUM",
49
+ "33": "B-TELEPHONENUM",
50
+ "34": "I-TELEPHONENUM",
51
+ "35": "B-TIME",
52
+ "36": "I-TIME",
53
+ "37": "B-TITLE",
54
+ "38": "I-TITLE",
55
+ "39": "B-ZIPCODE",
56
+ "40": "I-ZIPCODE"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "is_decoder": false,
61
+ "label2id": {
62
+ "B-AGE": 1,
63
+ "B-BUILDINGNUM": 3,
64
+ "B-CITY": 5,
65
+ "B-CREDITCARDNUMBER": 7,
66
+ "B-DATE": 9,
67
+ "B-DRIVERLICENSENUM": 11,
68
+ "B-EMAIL": 13,
69
+ "B-GENDER": 15,
70
+ "B-GIVENNAME": 17,
71
+ "B-IDCARDNUM": 19,
72
+ "B-PASSPORTNUM": 21,
73
+ "B-SEX": 23,
74
+ "B-SOCIALNUM": 25,
75
+ "B-STREET": 27,
76
+ "B-SURNAME": 29,
77
+ "B-TAXNUM": 31,
78
+ "B-TELEPHONENUM": 33,
79
+ "B-TIME": 35,
80
+ "B-TITLE": 37,
81
+ "B-ZIPCODE": 39,
82
+ "I-AGE": 2,
83
+ "I-BUILDINGNUM": 4,
84
+ "I-CITY": 6,
85
+ "I-CREDITCARDNUMBER": 8,
86
+ "I-DATE": 10,
87
+ "I-DRIVERLICENSENUM": 12,
88
+ "I-EMAIL": 14,
89
+ "I-GENDER": 16,
90
+ "I-GIVENNAME": 18,
91
+ "I-IDCARDNUM": 20,
92
+ "I-PASSPORTNUM": 22,
93
+ "I-SEX": 24,
94
+ "I-SOCIALNUM": 26,
95
+ "I-STREET": 28,
96
+ "I-SURNAME": 30,
97
+ "I-TAXNUM": 32,
98
+ "I-TELEPHONENUM": 34,
99
+ "I-TIME": 36,
100
+ "I-TITLE": 38,
101
+ "I-ZIPCODE": 40,
102
+ "O": 0
103
+ },
104
+ "layer_norm_eps": 1e-12,
105
+ "max_position_embeddings": 512,
106
+ "model_type": "bert",
107
+ "num_attention_heads": 12,
108
+ "num_hidden_layers": 12,
109
+ "pad_token_id": 0,
110
+ "pooler_fc_size": 768,
111
+ "pooler_num_attention_heads": 12,
112
+ "pooler_num_fc_layers": 3,
113
+ "pooler_size_per_head": 128,
114
+ "pooler_type": "first_token_transform",
115
+ "tie_word_embeddings": true,
116
+ "transformers_version": "5.3.0",
117
+ "type_vocab_size": 2,
118
+ "use_cache": false,
119
+ "vocab_size": 119547
120
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f75a17ac18d5f79d4c2334d5e003a3751d9b5f1b608b582e8838d82f8d0e229e
3
+ size 709200844
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df52a7c057cfeb6c895c1655f300eaeac2acae331d517677c613a4c69c783bc0
3
+ size 1418524683
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a277e53c55289ada91f19b5b0a978dd0e165d40f7ab8277912ecae9f7c5c15b
3
+ size 14645
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:306e1c91f4c04a1a930e746d49bfde9e05f196c12b1678870acc754dc4220834
3
+ size 1465
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "is_local": false,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 512,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/trainer_state.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 26110,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.038299502106472615,
14
+ "grad_norm": 0.8714600801467896,
15
+ "learning_rate": 1.9745180645984938e-05,
16
+ "loss": 0.1691785430908203,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.07659900421294523,
21
+ "grad_norm": 0.588369607925415,
22
+ "learning_rate": 1.9489850631941786e-05,
23
+ "loss": 0.04638611221313477,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.11489850631941785,
28
+ "grad_norm": 1.5041358470916748,
29
+ "learning_rate": 1.9234520617898634e-05,
30
+ "loss": 0.03861086273193359,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.15319800842589046,
35
+ "grad_norm": 0.4620295763015747,
36
+ "learning_rate": 1.8979190603855486e-05,
37
+ "loss": 0.03408417129516601,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.19149751053236308,
42
+ "grad_norm": 1.0186210870742798,
43
+ "learning_rate": 1.8723860589812334e-05,
44
+ "loss": 0.030472009658813477,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.2297970126388357,
49
+ "grad_norm": 0.19274073839187622,
50
+ "learning_rate": 1.8468530575769182e-05,
51
+ "loss": 0.02794113540649414,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.2680965147453083,
56
+ "grad_norm": 0.3304294943809509,
57
+ "learning_rate": 1.8213200561726034e-05,
58
+ "loss": 0.026913631439208986,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.3063960168517809,
63
+ "grad_norm": 0.32137593626976013,
64
+ "learning_rate": 1.7957870547682882e-05,
65
+ "loss": 0.025851396560668944,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.34469551895825357,
70
+ "grad_norm": 0.5200027823448181,
71
+ "learning_rate": 1.770254053363973e-05,
72
+ "loss": 0.02558848571777344,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.38299502106472616,
77
+ "grad_norm": 0.6408317685127258,
78
+ "learning_rate": 1.744721051959658e-05,
79
+ "loss": 0.023505245208740234,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.42129452317119875,
84
+ "grad_norm": 0.46780553460121155,
85
+ "learning_rate": 1.719188050555343e-05,
86
+ "loss": 0.023054824829101563,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.4595940252776714,
91
+ "grad_norm": 0.6626068353652954,
92
+ "learning_rate": 1.6936550491510278e-05,
93
+ "loss": 0.02208795166015625,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.497893527384144,
98
+ "grad_norm": 0.25836509466171265,
99
+ "learning_rate": 1.668122047746713e-05,
100
+ "loss": 0.022046304702758788,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.5361930294906166,
105
+ "grad_norm": 0.8243473172187805,
106
+ "learning_rate": 1.6425890463423978e-05,
107
+ "loss": 0.021231672286987305,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.5744925315970892,
112
+ "grad_norm": 0.26098620891571045,
113
+ "learning_rate": 1.6170560449380826e-05,
114
+ "loss": 0.020609188079833984,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.6127920337035618,
119
+ "grad_norm": 0.1398458480834961,
120
+ "learning_rate": 1.5915230435337677e-05,
121
+ "loss": 0.020333499908447267,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.6510915358100344,
126
+ "grad_norm": 0.44954946637153625,
127
+ "learning_rate": 1.5659900421294526e-05,
128
+ "loss": 0.01971204948425293,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.6893910379165071,
133
+ "grad_norm": 0.2548958957195282,
134
+ "learning_rate": 1.5404570407251374e-05,
135
+ "loss": 0.01934459114074707,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.7276905400229797,
140
+ "grad_norm": 0.31454744935035706,
141
+ "learning_rate": 1.5149240393208222e-05,
142
+ "loss": 0.018815528869628905,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.7659900421294523,
147
+ "grad_norm": 0.15480241179466248,
148
+ "learning_rate": 1.4893910379165073e-05,
149
+ "loss": 0.018857412338256836,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.8042895442359249,
154
+ "grad_norm": 0.23583486676216125,
155
+ "learning_rate": 1.4638580365121922e-05,
156
+ "loss": 0.018428108215332032,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.8425890463423975,
161
+ "grad_norm": 0.289045125246048,
162
+ "learning_rate": 1.438325035107877e-05,
163
+ "loss": 0.01816094207763672,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.8808885484488702,
168
+ "grad_norm": 0.13967347145080566,
169
+ "learning_rate": 1.4127920337035618e-05,
170
+ "loss": 0.01740534973144531,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.9191880505553428,
175
+ "grad_norm": 0.39465662837028503,
176
+ "learning_rate": 1.387259032299247e-05,
177
+ "loss": 0.017327314376831056,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.9574875526618154,
182
+ "grad_norm": 0.5613229274749756,
183
+ "learning_rate": 1.3617260308949318e-05,
184
+ "loss": 0.01724307060241699,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.995787054768288,
189
+ "grad_norm": 0.550220787525177,
190
+ "learning_rate": 1.3361930294906168e-05,
191
+ "loss": 0.017301082611083984,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 1.0,
196
+ "eval_loss": 0.019178859889507294,
197
+ "eval_runtime": 195.4351,
198
+ "eval_samples_per_second": 474.991,
199
+ "eval_steps_per_second": 59.375,
200
+ "step": 13055
201
+ },
202
+ {
203
+ "epoch": 1.0340865568747606,
204
+ "grad_norm": 0.5676391124725342,
205
+ "learning_rate": 1.3106600280863016e-05,
206
+ "loss": 0.014577848434448242,
207
+ "step": 13500
208
+ },
209
+ {
210
+ "epoch": 1.0723860589812333,
211
+ "grad_norm": 0.2776853144168854,
212
+ "learning_rate": 1.2851270266819866e-05,
213
+ "loss": 0.014119720458984375,
214
+ "step": 14000
215
+ },
216
+ {
217
+ "epoch": 1.1106855610877058,
218
+ "grad_norm": 0.38407689332962036,
219
+ "learning_rate": 1.2595940252776716e-05,
220
+ "loss": 0.014150198936462402,
221
+ "step": 14500
222
+ },
223
+ {
224
+ "epoch": 1.1489850631941785,
225
+ "grad_norm": 0.24024367332458496,
226
+ "learning_rate": 1.2340610238733564e-05,
227
+ "loss": 0.014232131958007812,
228
+ "step": 15000
229
+ },
230
+ {
231
+ "epoch": 1.1872845653006512,
232
+ "grad_norm": 0.2031095325946808,
233
+ "learning_rate": 1.2085280224690414e-05,
234
+ "loss": 0.013924983024597168,
235
+ "step": 15500
236
+ },
237
+ {
238
+ "epoch": 1.2255840674071237,
239
+ "grad_norm": 0.4606612026691437,
240
+ "learning_rate": 1.1829950210647263e-05,
241
+ "loss": 0.013915029525756836,
242
+ "step": 16000
243
+ },
244
+ {
245
+ "epoch": 1.2638835695135964,
246
+ "grad_norm": 0.3309486508369446,
247
+ "learning_rate": 1.1574620196604112e-05,
248
+ "loss": 0.01338797664642334,
249
+ "step": 16500
250
+ },
251
+ {
252
+ "epoch": 1.3021830716200689,
253
+ "grad_norm": 0.34972071647644043,
254
+ "learning_rate": 1.131929018256096e-05,
255
+ "loss": 0.013172533988952637,
256
+ "step": 17000
257
+ },
258
+ {
259
+ "epoch": 1.3404825737265416,
260
+ "grad_norm": 0.40405741333961487,
261
+ "learning_rate": 1.1063960168517811e-05,
262
+ "loss": 0.013684582710266114,
263
+ "step": 17500
264
+ },
265
+ {
266
+ "epoch": 1.3787820758330143,
267
+ "grad_norm": 0.4469183385372162,
268
+ "learning_rate": 1.080863015447466e-05,
269
+ "loss": 0.013521049499511719,
270
+ "step": 18000
271
+ },
272
+ {
273
+ "epoch": 1.4170815779394867,
274
+ "grad_norm": 0.44263386726379395,
275
+ "learning_rate": 1.0553300140431508e-05,
276
+ "loss": 0.013442005157470703,
277
+ "step": 18500
278
+ },
279
+ {
280
+ "epoch": 1.4553810800459595,
281
+ "grad_norm": 0.39971089363098145,
282
+ "learning_rate": 1.0297970126388358e-05,
283
+ "loss": 0.013027252197265625,
284
+ "step": 19000
285
+ },
286
+ {
287
+ "epoch": 1.493680582152432,
288
+ "grad_norm": 0.4588576555252075,
289
+ "learning_rate": 1.0042640112345207e-05,
290
+ "loss": 0.012910510063171387,
291
+ "step": 19500
292
+ },
293
+ {
294
+ "epoch": 1.5319800842589046,
295
+ "grad_norm": 0.24441905319690704,
296
+ "learning_rate": 9.787310098302056e-06,
297
+ "loss": 0.012541162490844726,
298
+ "step": 20000
299
+ },
300
+ {
301
+ "epoch": 1.5702795863653773,
302
+ "grad_norm": 0.12215672433376312,
303
+ "learning_rate": 9.531980084258905e-06,
304
+ "loss": 0.012792759895324708,
305
+ "step": 20500
306
+ },
307
+ {
308
+ "epoch": 1.6085790884718498,
309
+ "grad_norm": 0.3312968909740448,
310
+ "learning_rate": 9.276650070215755e-06,
311
+ "loss": 0.012631108283996582,
312
+ "step": 21000
313
+ },
314
+ {
315
+ "epoch": 1.6468785905783225,
316
+ "grad_norm": 0.3255740702152252,
317
+ "learning_rate": 9.021320056172604e-06,
318
+ "loss": 0.01261143684387207,
319
+ "step": 21500
320
+ },
321
+ {
322
+ "epoch": 1.685178092684795,
323
+ "grad_norm": 0.2416062355041504,
324
+ "learning_rate": 8.765990042129453e-06,
325
+ "loss": 0.012039584159851074,
326
+ "step": 22000
327
+ },
328
+ {
329
+ "epoch": 1.7234775947912677,
330
+ "grad_norm": 0.34590524435043335,
331
+ "learning_rate": 8.510660028086303e-06,
332
+ "loss": 0.01236446189880371,
333
+ "step": 22500
334
+ },
335
+ {
336
+ "epoch": 1.7617770968977404,
337
+ "grad_norm": 0.5301225185394287,
338
+ "learning_rate": 8.255330014043151e-06,
339
+ "loss": 0.012290955543518067,
340
+ "step": 23000
341
+ },
342
+ {
343
+ "epoch": 1.800076599004213,
344
+ "grad_norm": 0.23181863129138947,
345
+ "learning_rate": 8.000000000000001e-06,
346
+ "loss": 0.012518532752990722,
347
+ "step": 23500
348
+ },
349
+ {
350
+ "epoch": 1.8383761011106856,
351
+ "grad_norm": 0.4152184724807739,
352
+ "learning_rate": 7.74466998595685e-06,
353
+ "loss": 0.01161912727355957,
354
+ "step": 24000
355
+ },
356
+ {
357
+ "epoch": 1.876675603217158,
358
+ "grad_norm": 0.12319803982973099,
359
+ "learning_rate": 7.489339971913699e-06,
360
+ "loss": 0.011899042129516601,
361
+ "step": 24500
362
+ },
363
+ {
364
+ "epoch": 1.9149751053236308,
365
+ "grad_norm": 0.6045161485671997,
366
+ "learning_rate": 7.234009957870548e-06,
367
+ "loss": 0.012299044609069824,
368
+ "step": 25000
369
+ },
370
+ {
371
+ "epoch": 1.9532746074301035,
372
+ "grad_norm": 0.1614077091217041,
373
+ "learning_rate": 6.9786799438273974e-06,
374
+ "loss": 0.011898996353149414,
375
+ "step": 25500
376
+ },
377
+ {
378
+ "epoch": 1.991574109536576,
379
+ "grad_norm": 0.48715102672576904,
380
+ "learning_rate": 6.7233499297842464e-06,
381
+ "loss": 0.01182526969909668,
382
+ "step": 26000
383
+ },
384
+ {
385
+ "epoch": 2.0,
386
+ "eval_loss": 0.015901656821370125,
387
+ "eval_runtime": 195.5296,
388
+ "eval_samples_per_second": 474.762,
389
+ "eval_steps_per_second": 59.347,
390
+ "step": 26110
391
+ }
392
+ ],
393
+ "logging_steps": 500,
394
+ "max_steps": 39165,
395
+ "num_input_tokens_seen": 0,
396
+ "num_train_epochs": 3,
397
+ "save_steps": 500,
398
+ "stateful_callbacks": {
399
+ "TrainerControl": {
400
+ "args": {
401
+ "should_epoch_stop": false,
402
+ "should_evaluate": false,
403
+ "should_log": false,
404
+ "should_save": true,
405
+ "should_training_stop": false
406
+ },
407
+ "attributes": {}
408
+ }
409
+ },
410
+ "total_flos": 4.367651503327027e+17,
411
+ "train_batch_size": 64,
412
+ "trial_name": null,
413
+ "trial_params": null
414
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-26110/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb80cf54f2fe927cc63c861aa4b3e87525629b3d1868eeda1dedae9009697f1c
3
+ size 5265
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/config.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "O",
17
+ "1": "B-AGE",
18
+ "2": "I-AGE",
19
+ "3": "B-BUILDINGNUM",
20
+ "4": "I-BUILDINGNUM",
21
+ "5": "B-CITY",
22
+ "6": "I-CITY",
23
+ "7": "B-CREDITCARDNUMBER",
24
+ "8": "I-CREDITCARDNUMBER",
25
+ "9": "B-DATE",
26
+ "10": "I-DATE",
27
+ "11": "B-DRIVERLICENSENUM",
28
+ "12": "I-DRIVERLICENSENUM",
29
+ "13": "B-EMAIL",
30
+ "14": "I-EMAIL",
31
+ "15": "B-GENDER",
32
+ "16": "I-GENDER",
33
+ "17": "B-GIVENNAME",
34
+ "18": "I-GIVENNAME",
35
+ "19": "B-IDCARDNUM",
36
+ "20": "I-IDCARDNUM",
37
+ "21": "B-PASSPORTNUM",
38
+ "22": "I-PASSPORTNUM",
39
+ "23": "B-SEX",
40
+ "24": "I-SEX",
41
+ "25": "B-SOCIALNUM",
42
+ "26": "I-SOCIALNUM",
43
+ "27": "B-STREET",
44
+ "28": "I-STREET",
45
+ "29": "B-SURNAME",
46
+ "30": "I-SURNAME",
47
+ "31": "B-TAXNUM",
48
+ "32": "I-TAXNUM",
49
+ "33": "B-TELEPHONENUM",
50
+ "34": "I-TELEPHONENUM",
51
+ "35": "B-TIME",
52
+ "36": "I-TIME",
53
+ "37": "B-TITLE",
54
+ "38": "I-TITLE",
55
+ "39": "B-ZIPCODE",
56
+ "40": "I-ZIPCODE"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "is_decoder": false,
61
+ "label2id": {
62
+ "B-AGE": 1,
63
+ "B-BUILDINGNUM": 3,
64
+ "B-CITY": 5,
65
+ "B-CREDITCARDNUMBER": 7,
66
+ "B-DATE": 9,
67
+ "B-DRIVERLICENSENUM": 11,
68
+ "B-EMAIL": 13,
69
+ "B-GENDER": 15,
70
+ "B-GIVENNAME": 17,
71
+ "B-IDCARDNUM": 19,
72
+ "B-PASSPORTNUM": 21,
73
+ "B-SEX": 23,
74
+ "B-SOCIALNUM": 25,
75
+ "B-STREET": 27,
76
+ "B-SURNAME": 29,
77
+ "B-TAXNUM": 31,
78
+ "B-TELEPHONENUM": 33,
79
+ "B-TIME": 35,
80
+ "B-TITLE": 37,
81
+ "B-ZIPCODE": 39,
82
+ "I-AGE": 2,
83
+ "I-BUILDINGNUM": 4,
84
+ "I-CITY": 6,
85
+ "I-CREDITCARDNUMBER": 8,
86
+ "I-DATE": 10,
87
+ "I-DRIVERLICENSENUM": 12,
88
+ "I-EMAIL": 14,
89
+ "I-GENDER": 16,
90
+ "I-GIVENNAME": 18,
91
+ "I-IDCARDNUM": 20,
92
+ "I-PASSPORTNUM": 22,
93
+ "I-SEX": 24,
94
+ "I-SOCIALNUM": 26,
95
+ "I-STREET": 28,
96
+ "I-SURNAME": 30,
97
+ "I-TAXNUM": 32,
98
+ "I-TELEPHONENUM": 34,
99
+ "I-TIME": 36,
100
+ "I-TITLE": 38,
101
+ "I-ZIPCODE": 40,
102
+ "O": 0
103
+ },
104
+ "layer_norm_eps": 1e-12,
105
+ "max_position_embeddings": 512,
106
+ "model_type": "bert",
107
+ "num_attention_heads": 12,
108
+ "num_hidden_layers": 12,
109
+ "pad_token_id": 0,
110
+ "pooler_fc_size": 768,
111
+ "pooler_num_attention_heads": 12,
112
+ "pooler_num_fc_layers": 3,
113
+ "pooler_size_per_head": 128,
114
+ "pooler_type": "first_token_transform",
115
+ "tie_word_embeddings": true,
116
+ "transformers_version": "5.3.0",
117
+ "type_vocab_size": 2,
118
+ "use_cache": false,
119
+ "vocab_size": 119547
120
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb1bc9434fc8bb90f1ee23d6681062fc6e79f5084569691a6c27b06e052485f
3
+ size 709200844
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d826db9b613c1a06721bd27a59a79e572a97efd2428065e606b69a891c995b75
3
+ size 1418524683
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaadb28835e3c6aa08bb70cd5e8cbcafc183c39195d5e8e45c7751f80b6361fe
3
+ size 14645
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a72ccd1a7d41794f33f843073beb481aebe99715fe6df649ba67dd547b0b1ad1
3
+ size 1465
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "is_local": false,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 512,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/trainer_state.json ADDED
@@ -0,0 +1,604 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 39165,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.038299502106472615,
14
+ "grad_norm": 0.8714600801467896,
15
+ "learning_rate": 1.9745180645984938e-05,
16
+ "loss": 0.1691785430908203,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.07659900421294523,
21
+ "grad_norm": 0.588369607925415,
22
+ "learning_rate": 1.9489850631941786e-05,
23
+ "loss": 0.04638611221313477,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.11489850631941785,
28
+ "grad_norm": 1.5041358470916748,
29
+ "learning_rate": 1.9234520617898634e-05,
30
+ "loss": 0.03861086273193359,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.15319800842589046,
35
+ "grad_norm": 0.4620295763015747,
36
+ "learning_rate": 1.8979190603855486e-05,
37
+ "loss": 0.03408417129516601,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.19149751053236308,
42
+ "grad_norm": 1.0186210870742798,
43
+ "learning_rate": 1.8723860589812334e-05,
44
+ "loss": 0.030472009658813477,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.2297970126388357,
49
+ "grad_norm": 0.19274073839187622,
50
+ "learning_rate": 1.8468530575769182e-05,
51
+ "loss": 0.02794113540649414,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.2680965147453083,
56
+ "grad_norm": 0.3304294943809509,
57
+ "learning_rate": 1.8213200561726034e-05,
58
+ "loss": 0.026913631439208986,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.3063960168517809,
63
+ "grad_norm": 0.32137593626976013,
64
+ "learning_rate": 1.7957870547682882e-05,
65
+ "loss": 0.025851396560668944,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.34469551895825357,
70
+ "grad_norm": 0.5200027823448181,
71
+ "learning_rate": 1.770254053363973e-05,
72
+ "loss": 0.02558848571777344,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.38299502106472616,
77
+ "grad_norm": 0.6408317685127258,
78
+ "learning_rate": 1.744721051959658e-05,
79
+ "loss": 0.023505245208740234,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.42129452317119875,
84
+ "grad_norm": 0.46780553460121155,
85
+ "learning_rate": 1.719188050555343e-05,
86
+ "loss": 0.023054824829101563,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.4595940252776714,
91
+ "grad_norm": 0.6626068353652954,
92
+ "learning_rate": 1.6936550491510278e-05,
93
+ "loss": 0.02208795166015625,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.497893527384144,
98
+ "grad_norm": 0.25836509466171265,
99
+ "learning_rate": 1.668122047746713e-05,
100
+ "loss": 0.022046304702758788,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.5361930294906166,
105
+ "grad_norm": 0.8243473172187805,
106
+ "learning_rate": 1.6425890463423978e-05,
107
+ "loss": 0.021231672286987305,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.5744925315970892,
112
+ "grad_norm": 0.26098620891571045,
113
+ "learning_rate": 1.6170560449380826e-05,
114
+ "loss": 0.020609188079833984,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.6127920337035618,
119
+ "grad_norm": 0.1398458480834961,
120
+ "learning_rate": 1.5915230435337677e-05,
121
+ "loss": 0.020333499908447267,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.6510915358100344,
126
+ "grad_norm": 0.44954946637153625,
127
+ "learning_rate": 1.5659900421294526e-05,
128
+ "loss": 0.01971204948425293,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.6893910379165071,
133
+ "grad_norm": 0.2548958957195282,
134
+ "learning_rate": 1.5404570407251374e-05,
135
+ "loss": 0.01934459114074707,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.7276905400229797,
140
+ "grad_norm": 0.31454744935035706,
141
+ "learning_rate": 1.5149240393208222e-05,
142
+ "loss": 0.018815528869628905,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.7659900421294523,
147
+ "grad_norm": 0.15480241179466248,
148
+ "learning_rate": 1.4893910379165073e-05,
149
+ "loss": 0.018857412338256836,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.8042895442359249,
154
+ "grad_norm": 0.23583486676216125,
155
+ "learning_rate": 1.4638580365121922e-05,
156
+ "loss": 0.018428108215332032,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.8425890463423975,
161
+ "grad_norm": 0.289045125246048,
162
+ "learning_rate": 1.438325035107877e-05,
163
+ "loss": 0.01816094207763672,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.8808885484488702,
168
+ "grad_norm": 0.13967347145080566,
169
+ "learning_rate": 1.4127920337035618e-05,
170
+ "loss": 0.01740534973144531,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.9191880505553428,
175
+ "grad_norm": 0.39465662837028503,
176
+ "learning_rate": 1.387259032299247e-05,
177
+ "loss": 0.017327314376831056,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 0.9574875526618154,
182
+ "grad_norm": 0.5613229274749756,
183
+ "learning_rate": 1.3617260308949318e-05,
184
+ "loss": 0.01724307060241699,
185
+ "step": 12500
186
+ },
187
+ {
188
+ "epoch": 0.995787054768288,
189
+ "grad_norm": 0.550220787525177,
190
+ "learning_rate": 1.3361930294906168e-05,
191
+ "loss": 0.017301082611083984,
192
+ "step": 13000
193
+ },
194
+ {
195
+ "epoch": 1.0,
196
+ "eval_loss": 0.019178859889507294,
197
+ "eval_runtime": 195.4351,
198
+ "eval_samples_per_second": 474.991,
199
+ "eval_steps_per_second": 59.375,
200
+ "step": 13055
201
+ },
202
+ {
203
+ "epoch": 1.0340865568747606,
204
+ "grad_norm": 0.5676391124725342,
205
+ "learning_rate": 1.3106600280863016e-05,
206
+ "loss": 0.014577848434448242,
207
+ "step": 13500
208
+ },
209
+ {
210
+ "epoch": 1.0723860589812333,
211
+ "grad_norm": 0.2776853144168854,
212
+ "learning_rate": 1.2851270266819866e-05,
213
+ "loss": 0.014119720458984375,
214
+ "step": 14000
215
+ },
216
+ {
217
+ "epoch": 1.1106855610877058,
218
+ "grad_norm": 0.38407689332962036,
219
+ "learning_rate": 1.2595940252776716e-05,
220
+ "loss": 0.014150198936462402,
221
+ "step": 14500
222
+ },
223
+ {
224
+ "epoch": 1.1489850631941785,
225
+ "grad_norm": 0.24024367332458496,
226
+ "learning_rate": 1.2340610238733564e-05,
227
+ "loss": 0.014232131958007812,
228
+ "step": 15000
229
+ },
230
+ {
231
+ "epoch": 1.1872845653006512,
232
+ "grad_norm": 0.2031095325946808,
233
+ "learning_rate": 1.2085280224690414e-05,
234
+ "loss": 0.013924983024597168,
235
+ "step": 15500
236
+ },
237
+ {
238
+ "epoch": 1.2255840674071237,
239
+ "grad_norm": 0.4606612026691437,
240
+ "learning_rate": 1.1829950210647263e-05,
241
+ "loss": 0.013915029525756836,
242
+ "step": 16000
243
+ },
244
+ {
245
+ "epoch": 1.2638835695135964,
246
+ "grad_norm": 0.3309486508369446,
247
+ "learning_rate": 1.1574620196604112e-05,
248
+ "loss": 0.01338797664642334,
249
+ "step": 16500
250
+ },
251
+ {
252
+ "epoch": 1.3021830716200689,
253
+ "grad_norm": 0.34972071647644043,
254
+ "learning_rate": 1.131929018256096e-05,
255
+ "loss": 0.013172533988952637,
256
+ "step": 17000
257
+ },
258
+ {
259
+ "epoch": 1.3404825737265416,
260
+ "grad_norm": 0.40405741333961487,
261
+ "learning_rate": 1.1063960168517811e-05,
262
+ "loss": 0.013684582710266114,
263
+ "step": 17500
264
+ },
265
+ {
266
+ "epoch": 1.3787820758330143,
267
+ "grad_norm": 0.4469183385372162,
268
+ "learning_rate": 1.080863015447466e-05,
269
+ "loss": 0.013521049499511719,
270
+ "step": 18000
271
+ },
272
+ {
273
+ "epoch": 1.4170815779394867,
274
+ "grad_norm": 0.44263386726379395,
275
+ "learning_rate": 1.0553300140431508e-05,
276
+ "loss": 0.013442005157470703,
277
+ "step": 18500
278
+ },
279
+ {
280
+ "epoch": 1.4553810800459595,
281
+ "grad_norm": 0.39971089363098145,
282
+ "learning_rate": 1.0297970126388358e-05,
283
+ "loss": 0.013027252197265625,
284
+ "step": 19000
285
+ },
286
+ {
287
+ "epoch": 1.493680582152432,
288
+ "grad_norm": 0.4588576555252075,
289
+ "learning_rate": 1.0042640112345207e-05,
290
+ "loss": 0.012910510063171387,
291
+ "step": 19500
292
+ },
293
+ {
294
+ "epoch": 1.5319800842589046,
295
+ "grad_norm": 0.24441905319690704,
296
+ "learning_rate": 9.787310098302056e-06,
297
+ "loss": 0.012541162490844726,
298
+ "step": 20000
299
+ },
300
+ {
301
+ "epoch": 1.5702795863653773,
302
+ "grad_norm": 0.12215672433376312,
303
+ "learning_rate": 9.531980084258905e-06,
304
+ "loss": 0.012792759895324708,
305
+ "step": 20500
306
+ },
307
+ {
308
+ "epoch": 1.6085790884718498,
309
+ "grad_norm": 0.3312968909740448,
310
+ "learning_rate": 9.276650070215755e-06,
311
+ "loss": 0.012631108283996582,
312
+ "step": 21000
313
+ },
314
+ {
315
+ "epoch": 1.6468785905783225,
316
+ "grad_norm": 0.3255740702152252,
317
+ "learning_rate": 9.021320056172604e-06,
318
+ "loss": 0.01261143684387207,
319
+ "step": 21500
320
+ },
321
+ {
322
+ "epoch": 1.685178092684795,
323
+ "grad_norm": 0.2416062355041504,
324
+ "learning_rate": 8.765990042129453e-06,
325
+ "loss": 0.012039584159851074,
326
+ "step": 22000
327
+ },
328
+ {
329
+ "epoch": 1.7234775947912677,
330
+ "grad_norm": 0.34590524435043335,
331
+ "learning_rate": 8.510660028086303e-06,
332
+ "loss": 0.01236446189880371,
333
+ "step": 22500
334
+ },
335
+ {
336
+ "epoch": 1.7617770968977404,
337
+ "grad_norm": 0.5301225185394287,
338
+ "learning_rate": 8.255330014043151e-06,
339
+ "loss": 0.012290955543518067,
340
+ "step": 23000
341
+ },
342
+ {
343
+ "epoch": 1.800076599004213,
344
+ "grad_norm": 0.23181863129138947,
345
+ "learning_rate": 8.000000000000001e-06,
346
+ "loss": 0.012518532752990722,
347
+ "step": 23500
348
+ },
349
+ {
350
+ "epoch": 1.8383761011106856,
351
+ "grad_norm": 0.4152184724807739,
352
+ "learning_rate": 7.74466998595685e-06,
353
+ "loss": 0.01161912727355957,
354
+ "step": 24000
355
+ },
356
+ {
357
+ "epoch": 1.876675603217158,
358
+ "grad_norm": 0.12319803982973099,
359
+ "learning_rate": 7.489339971913699e-06,
360
+ "loss": 0.011899042129516601,
361
+ "step": 24500
362
+ },
363
+ {
364
+ "epoch": 1.9149751053236308,
365
+ "grad_norm": 0.6045161485671997,
366
+ "learning_rate": 7.234009957870548e-06,
367
+ "loss": 0.012299044609069824,
368
+ "step": 25000
369
+ },
370
+ {
371
+ "epoch": 1.9532746074301035,
372
+ "grad_norm": 0.1614077091217041,
373
+ "learning_rate": 6.9786799438273974e-06,
374
+ "loss": 0.011898996353149414,
375
+ "step": 25500
376
+ },
377
+ {
378
+ "epoch": 1.991574109536576,
379
+ "grad_norm": 0.48715102672576904,
380
+ "learning_rate": 6.7233499297842464e-06,
381
+ "loss": 0.01182526969909668,
382
+ "step": 26000
383
+ },
384
+ {
385
+ "epoch": 2.0,
386
+ "eval_loss": 0.015901656821370125,
387
+ "eval_runtime": 195.5296,
388
+ "eval_samples_per_second": 474.762,
389
+ "eval_steps_per_second": 59.347,
390
+ "step": 26110
391
+ },
392
+ {
393
+ "epoch": 2.0298736116430485,
394
+ "grad_norm": 0.21605870127677917,
395
+ "learning_rate": 6.468019915741096e-06,
396
+ "loss": 0.009737834930419922,
397
+ "step": 26500
398
+ },
399
+ {
400
+ "epoch": 2.068173113749521,
401
+ "grad_norm": 0.13223645091056824,
402
+ "learning_rate": 6.2126899016979445e-06,
403
+ "loss": 0.0094491548538208,
404
+ "step": 27000
405
+ },
406
+ {
407
+ "epoch": 2.106472615855994,
408
+ "grad_norm": 0.2158566415309906,
409
+ "learning_rate": 5.957359887654794e-06,
410
+ "loss": 0.009373339653015136,
411
+ "step": 27500
412
+ },
413
+ {
414
+ "epoch": 2.1447721179624666,
415
+ "grad_norm": 0.2324061244726181,
416
+ "learning_rate": 5.702029873611643e-06,
417
+ "loss": 0.00945040225982666,
418
+ "step": 28000
419
+ },
420
+ {
421
+ "epoch": 2.1830716200689393,
422
+ "grad_norm": 0.14569459855556488,
423
+ "learning_rate": 5.446699859568492e-06,
424
+ "loss": 0.009708081245422363,
425
+ "step": 28500
426
+ },
427
+ {
428
+ "epoch": 2.2213711221754115,
429
+ "grad_norm": 0.3522016406059265,
430
+ "learning_rate": 5.1913698455253414e-06,
431
+ "loss": 0.009668509483337402,
432
+ "step": 29000
433
+ },
434
+ {
435
+ "epoch": 2.2596706242818843,
436
+ "grad_norm": 0.39561134576797485,
437
+ "learning_rate": 4.936039831482191e-06,
438
+ "loss": 0.009597929000854492,
439
+ "step": 29500
440
+ },
441
+ {
442
+ "epoch": 2.297970126388357,
443
+ "grad_norm": 0.12239322811365128,
444
+ "learning_rate": 4.68070981743904e-06,
445
+ "loss": 0.009111111640930175,
446
+ "step": 30000
447
+ },
448
+ {
449
+ "epoch": 2.3362696284948297,
450
+ "grad_norm": 0.25219589471817017,
451
+ "learning_rate": 4.425379803395889e-06,
452
+ "loss": 0.009667729377746582,
453
+ "step": 30500
454
+ },
455
+ {
456
+ "epoch": 2.3745691306013024,
457
+ "grad_norm": 0.22091256082057953,
458
+ "learning_rate": 4.170049789352738e-06,
459
+ "loss": 0.009386078834533691,
460
+ "step": 31000
461
+ },
462
+ {
463
+ "epoch": 2.4128686327077746,
464
+ "grad_norm": 0.2760886549949646,
465
+ "learning_rate": 3.914719775309587e-06,
466
+ "loss": 0.009332194328308105,
467
+ "step": 31500
468
+ },
469
+ {
470
+ "epoch": 2.4511681348142473,
471
+ "grad_norm": 0.27975377440452576,
472
+ "learning_rate": 3.659389761266437e-06,
473
+ "loss": 0.008895779609680176,
474
+ "step": 32000
475
+ },
476
+ {
477
+ "epoch": 2.48946763692072,
478
+ "grad_norm": 0.1745811402797699,
479
+ "learning_rate": 3.4040597472232863e-06,
480
+ "loss": 0.00910122299194336,
481
+ "step": 32500
482
+ },
483
+ {
484
+ "epoch": 2.5277671390271927,
485
+ "grad_norm": 0.09813889116048813,
486
+ "learning_rate": 3.1487297331801353e-06,
487
+ "loss": 0.009047582626342773,
488
+ "step": 33000
489
+ },
490
+ {
491
+ "epoch": 2.5660666411336654,
492
+ "grad_norm": 0.37277743220329285,
493
+ "learning_rate": 2.893399719136985e-06,
494
+ "loss": 0.008934443473815917,
495
+ "step": 33500
496
+ },
497
+ {
498
+ "epoch": 2.6043661432401377,
499
+ "grad_norm": 0.43550804257392883,
500
+ "learning_rate": 2.638069705093834e-06,
501
+ "loss": 0.009341882705688477,
502
+ "step": 34000
503
+ },
504
+ {
505
+ "epoch": 2.6426656453466104,
506
+ "grad_norm": 0.24221749603748322,
507
+ "learning_rate": 2.3827396910506832e-06,
508
+ "loss": 0.008809703826904297,
509
+ "step": 34500
510
+ },
511
+ {
512
+ "epoch": 2.680965147453083,
513
+ "grad_norm": 0.15879695117473602,
514
+ "learning_rate": 2.1274096770075327e-06,
515
+ "loss": 0.009361488342285156,
516
+ "step": 35000
517
+ },
518
+ {
519
+ "epoch": 2.719264649559556,
520
+ "grad_norm": 0.16028359532356262,
521
+ "learning_rate": 1.8720796629643817e-06,
522
+ "loss": 0.008846318244934082,
523
+ "step": 35500
524
+ },
525
+ {
526
+ "epoch": 2.7575641516660285,
527
+ "grad_norm": 0.25445544719696045,
528
+ "learning_rate": 1.616749648921231e-06,
529
+ "loss": 0.008740591049194337,
530
+ "step": 36000
531
+ },
532
+ {
533
+ "epoch": 2.795863653772501,
534
+ "grad_norm": 0.2028861790895462,
535
+ "learning_rate": 1.36141963487808e-06,
536
+ "loss": 0.00920598030090332,
537
+ "step": 36500
538
+ },
539
+ {
540
+ "epoch": 2.8341631558789735,
541
+ "grad_norm": 0.27509695291519165,
542
+ "learning_rate": 1.1060896208349292e-06,
543
+ "loss": 0.008649415016174316,
544
+ "step": 37000
545
+ },
546
+ {
547
+ "epoch": 2.872462657985446,
548
+ "grad_norm": 0.2710762023925781,
549
+ "learning_rate": 8.507596067917784e-07,
550
+ "loss": 0.008570868492126465,
551
+ "step": 37500
552
+ },
553
+ {
554
+ "epoch": 2.910762160091919,
555
+ "grad_norm": 0.6078771352767944,
556
+ "learning_rate": 5.954295927486277e-07,
557
+ "loss": 0.008855979919433593,
558
+ "step": 38000
559
+ },
560
+ {
561
+ "epoch": 2.9490616621983916,
562
+ "grad_norm": 0.28367650508880615,
563
+ "learning_rate": 3.4009957870547684e-07,
564
+ "loss": 0.008818706512451173,
565
+ "step": 38500
566
+ },
567
+ {
568
+ "epoch": 2.987361164304864,
569
+ "grad_norm": 0.11465097963809967,
570
+ "learning_rate": 8.476956466232606e-08,
571
+ "loss": 0.00848841953277588,
572
+ "step": 39000
573
+ },
574
+ {
575
+ "epoch": 3.0,
576
+ "eval_loss": 0.014914697967469692,
577
+ "eval_runtime": 177.3666,
578
+ "eval_samples_per_second": 523.379,
579
+ "eval_steps_per_second": 65.424,
580
+ "step": 39165
581
+ }
582
+ ],
583
+ "logging_steps": 500,
584
+ "max_steps": 39165,
585
+ "num_input_tokens_seen": 0,
586
+ "num_train_epochs": 3,
587
+ "save_steps": 500,
588
+ "stateful_callbacks": {
589
+ "TrainerControl": {
590
+ "args": {
591
+ "should_epoch_stop": false,
592
+ "should_evaluate": false,
593
+ "should_log": false,
594
+ "should_save": true,
595
+ "should_training_stop": true
596
+ },
597
+ "attributes": {}
598
+ }
599
+ },
600
+ "total_flos": 6.551477254990541e+17,
601
+ "train_batch_size": 64,
602
+ "trial_name": null,
603
+ "trial_params": null
604
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/checkpoint-39165/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb80cf54f2fe927cc63c861aa4b3e87525629b3d1868eeda1dedae9009697f1c
3
+ size 5265
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/config.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "O",
17
+ "1": "B-AGE",
18
+ "2": "I-AGE",
19
+ "3": "B-BUILDINGNUM",
20
+ "4": "I-BUILDINGNUM",
21
+ "5": "B-CITY",
22
+ "6": "I-CITY",
23
+ "7": "B-CREDITCARDNUMBER",
24
+ "8": "I-CREDITCARDNUMBER",
25
+ "9": "B-DATE",
26
+ "10": "I-DATE",
27
+ "11": "B-DRIVERLICENSENUM",
28
+ "12": "I-DRIVERLICENSENUM",
29
+ "13": "B-EMAIL",
30
+ "14": "I-EMAIL",
31
+ "15": "B-GENDER",
32
+ "16": "I-GENDER",
33
+ "17": "B-GIVENNAME",
34
+ "18": "I-GIVENNAME",
35
+ "19": "B-IDCARDNUM",
36
+ "20": "I-IDCARDNUM",
37
+ "21": "B-PASSPORTNUM",
38
+ "22": "I-PASSPORTNUM",
39
+ "23": "B-SEX",
40
+ "24": "I-SEX",
41
+ "25": "B-SOCIALNUM",
42
+ "26": "I-SOCIALNUM",
43
+ "27": "B-STREET",
44
+ "28": "I-STREET",
45
+ "29": "B-SURNAME",
46
+ "30": "I-SURNAME",
47
+ "31": "B-TAXNUM",
48
+ "32": "I-TAXNUM",
49
+ "33": "B-TELEPHONENUM",
50
+ "34": "I-TELEPHONENUM",
51
+ "35": "B-TIME",
52
+ "36": "I-TIME",
53
+ "37": "B-TITLE",
54
+ "38": "I-TITLE",
55
+ "39": "B-ZIPCODE",
56
+ "40": "I-ZIPCODE"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "is_decoder": false,
61
+ "label2id": {
62
+ "B-AGE": 1,
63
+ "B-BUILDINGNUM": 3,
64
+ "B-CITY": 5,
65
+ "B-CREDITCARDNUMBER": 7,
66
+ "B-DATE": 9,
67
+ "B-DRIVERLICENSENUM": 11,
68
+ "B-EMAIL": 13,
69
+ "B-GENDER": 15,
70
+ "B-GIVENNAME": 17,
71
+ "B-IDCARDNUM": 19,
72
+ "B-PASSPORTNUM": 21,
73
+ "B-SEX": 23,
74
+ "B-SOCIALNUM": 25,
75
+ "B-STREET": 27,
76
+ "B-SURNAME": 29,
77
+ "B-TAXNUM": 31,
78
+ "B-TELEPHONENUM": 33,
79
+ "B-TIME": 35,
80
+ "B-TITLE": 37,
81
+ "B-ZIPCODE": 39,
82
+ "I-AGE": 2,
83
+ "I-BUILDINGNUM": 4,
84
+ "I-CITY": 6,
85
+ "I-CREDITCARDNUMBER": 8,
86
+ "I-DATE": 10,
87
+ "I-DRIVERLICENSENUM": 12,
88
+ "I-EMAIL": 14,
89
+ "I-GENDER": 16,
90
+ "I-GIVENNAME": 18,
91
+ "I-IDCARDNUM": 20,
92
+ "I-PASSPORTNUM": 22,
93
+ "I-SEX": 24,
94
+ "I-SOCIALNUM": 26,
95
+ "I-STREET": 28,
96
+ "I-SURNAME": 30,
97
+ "I-TAXNUM": 32,
98
+ "I-TELEPHONENUM": 34,
99
+ "I-TIME": 36,
100
+ "I-TITLE": 38,
101
+ "I-ZIPCODE": 40,
102
+ "O": 0
103
+ },
104
+ "layer_norm_eps": 1e-12,
105
+ "max_position_embeddings": 512,
106
+ "model_type": "bert",
107
+ "num_attention_heads": 12,
108
+ "num_hidden_layers": 12,
109
+ "pad_token_id": 0,
110
+ "pooler_fc_size": 768,
111
+ "pooler_num_attention_heads": 12,
112
+ "pooler_num_fc_layers": 3,
113
+ "pooler_size_per_head": 128,
114
+ "pooler_type": "first_token_transform",
115
+ "tie_word_embeddings": true,
116
+ "transformers_version": "5.3.0",
117
+ "type_vocab_size": 2,
118
+ "use_cache": false,
119
+ "vocab_size": 119547
120
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb1bc9434fc8bb90f1ee23d6681062fc6e79f5084569691a6c27b06e052485f
3
+ size 709200844
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "is_local": false,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 512,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
open-pii-masking-500k-ai4privacy-augmented/TokenBased-BERT/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb80cf54f2fe927cc63c861aa4b3e87525629b3d1868eeda1dedae9009697f1c
3
+ size 5265