dsorokin commited on
Commit
75c83d9
·
1 Parent(s): 75d4a1b

add models

Browse files
checkpoint-3000/config.json ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "cs.AI",
13
+ "1": "cs.CL",
14
+ "2": "stat.ML",
15
+ "3": "cs.CV",
16
+ "4": "cs.LG",
17
+ "5": "cs.NE",
18
+ "6": "cs.RO",
19
+ "7": "cs.IR",
20
+ "8": "cs.IT",
21
+ "9": "astro-ph.IM",
22
+ "10": "cs.DB",
23
+ "11": "cs.SE",
24
+ "12": "q-bio.NC",
25
+ "13": "eess.IV",
26
+ "14": "cs.SI",
27
+ "15": "cs.CR",
28
+ "16": "cs.SD",
29
+ "17": "cs.NI",
30
+ "18": "cond-mat.dis-nn",
31
+ "19": "math.OC",
32
+ "20": "math.NA",
33
+ "21": "eess.SP",
34
+ "22": "cs.CC",
35
+ "23": "cs.MM",
36
+ "24": "cs.DC",
37
+ "25": "physics.med-ph",
38
+ "26": "cs.HC",
39
+ "27": "cs.CY",
40
+ "28": "cs.FL",
41
+ "29": "q-bio.QM",
42
+ "30": "eess.AS",
43
+ "31": "quant-ph",
44
+ "32": "q-bio.BM",
45
+ "33": "q-fin.CP",
46
+ "34": "cs.SY",
47
+ "35": "stat.AP",
48
+ "36": "cs.NA",
49
+ "37": "cs.GT",
50
+ "38": "physics.optics",
51
+ "39": "gr-qc",
52
+ "40": "stat.CO",
53
+ "41": "physics.comp-ph",
54
+ "42": "math.ST",
55
+ "43": "nlin.AO",
56
+ "44": "cs.DS",
57
+ "45": "cs.DM",
58
+ "46": "cs.MA",
59
+ "47": "astro-ph",
60
+ "48": "cs.CE",
61
+ "49": "cs.AR",
62
+ "50": "cs.GR",
63
+ "51": "astro-ph.CO",
64
+ "52": "cs.ET",
65
+ "53": "q-fin.ST",
66
+ "54": "physics.data-an",
67
+ "55": "stat.ME",
68
+ "56": "math.PR",
69
+ "57": "cs.DL",
70
+ "58": "physics.soc-ph",
71
+ "59": "nlin.CG",
72
+ "60": "q-fin.PM",
73
+ "61": "cmp-lg",
74
+ "62": "cond-mat.stat-mech",
75
+ "63": "cs.OH",
76
+ "64": "math.DS",
77
+ "65": "econ.EM",
78
+ "66": "cs.PF",
79
+ "67": "math.FA",
80
+ "68": "math.AP",
81
+ "69": "astro-ph.GA",
82
+ "70": "math.GM",
83
+ "71": "q-bio.GN",
84
+ "72": "cs.LO",
85
+ "73": "cs.PL",
86
+ "74": "math.CT",
87
+ "75": "cs.MS",
88
+ "76": "q-bio.PE",
89
+ "77": "math.CO",
90
+ "78": "math.GT",
91
+ "79": "cond-mat.mtrl-sci",
92
+ "80": "physics.chem-ph",
93
+ "81": "cs.CG",
94
+ "82": "math.DG",
95
+ "83": "q-bio.MN",
96
+ "84": "q-fin.TR",
97
+ "85": "hep-ex",
98
+ "86": "astro-ph.EP",
99
+ "87": "math.HO",
100
+ "88": "hep-th",
101
+ "89": "math.AT",
102
+ "90": "physics.bio-ph",
103
+ "91": "nucl-th",
104
+ "92": "hep-ph",
105
+ "93": "math.MG",
106
+ "94": "hep-lat",
107
+ "95": "math.LO",
108
+ "96": "physics.class-ph",
109
+ "97": "q-bio.CB",
110
+ "98": "cond-mat.supr-con",
111
+ "99": "physics.ao-ph",
112
+ "100": "nlin.CD",
113
+ "101": "physics.gen-ph",
114
+ "102": "adap-org",
115
+ "103": "cond-mat",
116
+ "104": "cond-mat.other",
117
+ "105": "math.GR",
118
+ "106": "physics.ins-det",
119
+ "107": "nlin.PS",
120
+ "108": "cs.SC",
121
+ "109": "q-fin.RM",
122
+ "110": "stat.OT",
123
+ "111": "cs.GL",
124
+ "112": "physics.hist-ph",
125
+ "113": "math.NT",
126
+ "114": "q-fin.GN",
127
+ "115": "q-fin.EC",
128
+ "116": "cs.OS",
129
+ "117": "cond-mat.soft",
130
+ "118": "math.AG",
131
+ "119": "math.CA",
132
+ "120": "math.RT",
133
+ "121": "math.RA",
134
+ "122": "physics.geo-ph",
135
+ "123": "astro-ph.SR",
136
+ "124": "q-bio.TO",
137
+ "125": "math.GN"
138
+ },
139
+ "initializer_range": 0.02,
140
+ "label2id": {
141
+ "adap-org": 102,
142
+ "astro-ph": 47,
143
+ "astro-ph.CO": 51,
144
+ "astro-ph.EP": 86,
145
+ "astro-ph.GA": 69,
146
+ "astro-ph.IM": 9,
147
+ "astro-ph.SR": 123,
148
+ "cmp-lg": 61,
149
+ "cond-mat": 103,
150
+ "cond-mat.dis-nn": 18,
151
+ "cond-mat.mtrl-sci": 79,
152
+ "cond-mat.other": 104,
153
+ "cond-mat.soft": 117,
154
+ "cond-mat.stat-mech": 62,
155
+ "cond-mat.supr-con": 98,
156
+ "cs.AI": 0,
157
+ "cs.AR": 49,
158
+ "cs.CC": 22,
159
+ "cs.CE": 48,
160
+ "cs.CG": 81,
161
+ "cs.CL": 1,
162
+ "cs.CR": 15,
163
+ "cs.CV": 3,
164
+ "cs.CY": 27,
165
+ "cs.DB": 10,
166
+ "cs.DC": 24,
167
+ "cs.DL": 57,
168
+ "cs.DM": 45,
169
+ "cs.DS": 44,
170
+ "cs.ET": 52,
171
+ "cs.FL": 28,
172
+ "cs.GL": 111,
173
+ "cs.GR": 50,
174
+ "cs.GT": 37,
175
+ "cs.HC": 26,
176
+ "cs.IR": 7,
177
+ "cs.IT": 8,
178
+ "cs.LG": 4,
179
+ "cs.LO": 72,
180
+ "cs.MA": 46,
181
+ "cs.MM": 23,
182
+ "cs.MS": 75,
183
+ "cs.NA": 36,
184
+ "cs.NE": 5,
185
+ "cs.NI": 17,
186
+ "cs.OH": 63,
187
+ "cs.OS": 116,
188
+ "cs.PF": 66,
189
+ "cs.PL": 73,
190
+ "cs.RO": 6,
191
+ "cs.SC": 108,
192
+ "cs.SD": 16,
193
+ "cs.SE": 11,
194
+ "cs.SI": 14,
195
+ "cs.SY": 34,
196
+ "econ.EM": 65,
197
+ "eess.AS": 30,
198
+ "eess.IV": 13,
199
+ "eess.SP": 21,
200
+ "gr-qc": 39,
201
+ "hep-ex": 85,
202
+ "hep-lat": 94,
203
+ "hep-ph": 92,
204
+ "hep-th": 88,
205
+ "math.AG": 118,
206
+ "math.AP": 68,
207
+ "math.AT": 89,
208
+ "math.CA": 119,
209
+ "math.CO": 77,
210
+ "math.CT": 74,
211
+ "math.DG": 82,
212
+ "math.DS": 64,
213
+ "math.FA": 67,
214
+ "math.GM": 70,
215
+ "math.GN": 125,
216
+ "math.GR": 105,
217
+ "math.GT": 78,
218
+ "math.HO": 87,
219
+ "math.LO": 95,
220
+ "math.MG": 93,
221
+ "math.NA": 20,
222
+ "math.NT": 113,
223
+ "math.OC": 19,
224
+ "math.PR": 56,
225
+ "math.RA": 121,
226
+ "math.RT": 120,
227
+ "math.ST": 42,
228
+ "nlin.AO": 43,
229
+ "nlin.CD": 100,
230
+ "nlin.CG": 59,
231
+ "nlin.PS": 107,
232
+ "nucl-th": 91,
233
+ "physics.ao-ph": 99,
234
+ "physics.bio-ph": 90,
235
+ "physics.chem-ph": 80,
236
+ "physics.class-ph": 96,
237
+ "physics.comp-ph": 41,
238
+ "physics.data-an": 54,
239
+ "physics.gen-ph": 101,
240
+ "physics.geo-ph": 122,
241
+ "physics.hist-ph": 112,
242
+ "physics.ins-det": 106,
243
+ "physics.med-ph": 25,
244
+ "physics.optics": 38,
245
+ "physics.soc-ph": 58,
246
+ "q-bio.BM": 32,
247
+ "q-bio.CB": 97,
248
+ "q-bio.GN": 71,
249
+ "q-bio.MN": 83,
250
+ "q-bio.NC": 12,
251
+ "q-bio.PE": 76,
252
+ "q-bio.QM": 29,
253
+ "q-bio.TO": 124,
254
+ "q-fin.CP": 33,
255
+ "q-fin.EC": 115,
256
+ "q-fin.GN": 114,
257
+ "q-fin.PM": 60,
258
+ "q-fin.RM": 109,
259
+ "q-fin.ST": 53,
260
+ "q-fin.TR": 84,
261
+ "quant-ph": 31,
262
+ "stat.AP": 35,
263
+ "stat.CO": 40,
264
+ "stat.ME": 55,
265
+ "stat.ML": 2,
266
+ "stat.OT": 110
267
+ },
268
+ "max_position_embeddings": 512,
269
+ "model_type": "distilbert",
270
+ "n_heads": 12,
271
+ "n_layers": 6,
272
+ "pad_token_id": 0,
273
+ "problem_type": "single_label_classification",
274
+ "qa_dropout": 0.1,
275
+ "seq_classif_dropout": 0.2,
276
+ "sinusoidal_pos_embds": false,
277
+ "tie_weights_": true,
278
+ "torch_dtype": "float32",
279
+ "transformers_version": "4.23.1",
280
+ "vocab_size": 30522
281
+ }
checkpoint-3000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934c31a7b760170b73f429c3928e4be27dab07dd40108a84529e2b61a71f9ba7
3
+ size 268234289
checkpoint-3000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-3000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-3000/tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "distilbert-base-uncased",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "DistilBertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.1779661016949152,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.53,
12
+ "learning_rate": 1.978813559322034e-05,
13
+ "loss": 2.0034,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 1.06,
18
+ "learning_rate": 1.9576271186440678e-05,
19
+ "loss": 1.3659,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 1.59,
24
+ "learning_rate": 1.9364406779661017e-05,
25
+ "loss": 1.2089,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 2.12,
30
+ "learning_rate": 1.9152542372881357e-05,
31
+ "loss": 1.1463,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 2.65,
36
+ "learning_rate": 1.8940677966101697e-05,
37
+ "loss": 1.0185,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 3.18,
42
+ "learning_rate": 1.8728813559322033e-05,
43
+ "loss": 0.9664,
44
+ "step": 3000
45
+ }
46
+ ],
47
+ "max_steps": 47200,
48
+ "num_train_epochs": 50,
49
+ "total_flos": 9490525857741888.0,
50
+ "trial_name": null,
51
+ "trial_params": null
52
+ }
checkpoint-3000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff