jordyvl's picture
Saving best model of baseline_BERT_50K_steps to hub
fccefe5 verified
{
"_name_or_path": "bert-base-uncased",
"architectures": [
"BertForSequenceClassification"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "adap-org",
"1": "alg-geom",
"2": "astro-ph",
"3": "astro-ph.CO",
"4": "astro-ph.EP",
"5": "astro-ph.GA",
"6": "astro-ph.HE",
"7": "astro-ph.IM",
"8": "astro-ph.SR",
"9": "atom-ph",
"10": "chao-dyn",
"11": "chem-ph",
"12": "cmp-lg",
"13": "comp-gas",
"14": "cond-mat",
"15": "cond-mat.dis-nn",
"16": "cond-mat.mes-hall",
"17": "cond-mat.mtrl-sci",
"18": "cond-mat.other",
"19": "cond-mat.quant-gas",
"20": "cond-mat.soft",
"21": "cond-mat.stat-mech",
"22": "cond-mat.str-el",
"23": "cond-mat.supr-con",
"24": "cs.AI",
"25": "cs.AR",
"26": "cs.CC",
"27": "cs.CE",
"28": "cs.CG",
"29": "cs.CL",
"30": "cs.CR",
"31": "cs.CV",
"32": "cs.CY",
"33": "cs.DB",
"34": "cs.DC",
"35": "cs.DL",
"36": "cs.DM",
"37": "cs.DS",
"38": "cs.ET",
"39": "cs.FL",
"40": "cs.GL",
"41": "cs.GR",
"42": "cs.GT",
"43": "cs.HC",
"44": "cs.IR",
"45": "cs.LG",
"46": "cs.LO",
"47": "cs.MA",
"48": "cs.MM",
"49": "cs.MS",
"50": "cs.NA",
"51": "cs.NE",
"52": "cs.NI",
"53": "cs.OH",
"54": "cs.OS",
"55": "cs.PF",
"56": "cs.PL",
"57": "cs.RO",
"58": "cs.SC",
"59": "cs.SD",
"60": "cs.SE",
"61": "cs.SI",
"62": "cs.SY",
"63": "dg-ga",
"64": "econ.EM",
"65": "econ.GN",
"66": "econ.TH",
"67": "eess.AS",
"68": "eess.IV",
"69": "eess.SP",
"70": "eess.SY",
"71": "funct-an",
"72": "gr-qc",
"73": "hep-ex",
"74": "hep-lat",
"75": "hep-ph",
"76": "hep-th",
"77": "math.AC",
"78": "math.AG",
"79": "math.AP",
"80": "math.AT",
"81": "math.CA",
"82": "math.CO",
"83": "math.CT",
"84": "math.CV",
"85": "math.DG",
"86": "math.DS",
"87": "math.FA",
"88": "math.GM",
"89": "math.GN",
"90": "math.GR",
"91": "math.GT",
"92": "math.HO",
"93": "math.KT",
"94": "math.LO",
"95": "math.MG",
"96": "math.NA",
"97": "math.NT",
"98": "math.OA",
"99": "math.OC",
"100": "math.PR",
"101": "math.QA",
"102": "math.RA",
"103": "math.RT",
"104": "math.SG",
"105": "math.SP",
"106": "mtrl-th",
"107": "nlin.AO",
"108": "nlin.CD",
"109": "nlin.CG",
"110": "nlin.PS",
"111": "nlin.SI",
"112": "nucl-ex",
"113": "nucl-th",
"114": "patt-sol",
"115": "physics.acc-ph",
"116": "physics.ao-ph",
"117": "physics.app-ph",
"118": "physics.atm-clus",
"119": "physics.atom-ph",
"120": "physics.bio-ph",
"121": "physics.chem-ph",
"122": "physics.class-ph",
"123": "physics.comp-ph",
"124": "physics.data-an",
"125": "physics.ed-ph",
"126": "physics.flu-dyn",
"127": "physics.gen-ph",
"128": "physics.geo-ph",
"129": "physics.hist-ph",
"130": "physics.ins-det",
"131": "physics.med-ph",
"132": "physics.optics",
"133": "physics.plasm-ph",
"134": "physics.pop-ph",
"135": "physics.soc-ph",
"136": "physics.space-ph",
"137": "q-alg",
"138": "q-bio.BM",
"139": "q-bio.CB",
"140": "q-bio.GN",
"141": "q-bio.MN",
"142": "q-bio.NC",
"143": "q-bio.OT",
"144": "q-bio.PE",
"145": "q-bio.QM",
"146": "q-bio.SC",
"147": "q-bio.TO",
"148": "q-fin.CP",
"149": "q-fin.EC",
"150": "q-fin.GN",
"151": "q-fin.MF",
"152": "q-fin.PM",
"153": "q-fin.PR",
"154": "q-fin.RM",
"155": "q-fin.ST",
"156": "q-fin.TR",
"157": "quant-ph",
"158": "solv-int",
"159": "stat.AP",
"160": "stat.CO",
"161": "stat.ME",
"162": "stat.ML",
"163": "stat.OT",
"164": "supr-con"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"adap-org": 0,
"alg-geom": 1,
"astro-ph": 2,
"astro-ph.CO": 3,
"astro-ph.EP": 4,
"astro-ph.GA": 5,
"astro-ph.HE": 6,
"astro-ph.IM": 7,
"astro-ph.SR": 8,
"atom-ph": 9,
"chao-dyn": 10,
"chem-ph": 11,
"cmp-lg": 12,
"comp-gas": 13,
"cond-mat": 14,
"cond-mat.dis-nn": 15,
"cond-mat.mes-hall": 16,
"cond-mat.mtrl-sci": 17,
"cond-mat.other": 18,
"cond-mat.quant-gas": 19,
"cond-mat.soft": 20,
"cond-mat.stat-mech": 21,
"cond-mat.str-el": 22,
"cond-mat.supr-con": 23,
"cs.AI": 24,
"cs.AR": 25,
"cs.CC": 26,
"cs.CE": 27,
"cs.CG": 28,
"cs.CL": 29,
"cs.CR": 30,
"cs.CV": 31,
"cs.CY": 32,
"cs.DB": 33,
"cs.DC": 34,
"cs.DL": 35,
"cs.DM": 36,
"cs.DS": 37,
"cs.ET": 38,
"cs.FL": 39,
"cs.GL": 40,
"cs.GR": 41,
"cs.GT": 42,
"cs.HC": 43,
"cs.IR": 44,
"cs.LG": 45,
"cs.LO": 46,
"cs.MA": 47,
"cs.MM": 48,
"cs.MS": 49,
"cs.NA": 50,
"cs.NE": 51,
"cs.NI": 52,
"cs.OH": 53,
"cs.OS": 54,
"cs.PF": 55,
"cs.PL": 56,
"cs.RO": 57,
"cs.SC": 58,
"cs.SD": 59,
"cs.SE": 60,
"cs.SI": 61,
"cs.SY": 62,
"dg-ga": 63,
"econ.EM": 64,
"econ.GN": 65,
"econ.TH": 66,
"eess.AS": 67,
"eess.IV": 68,
"eess.SP": 69,
"eess.SY": 70,
"funct-an": 71,
"gr-qc": 72,
"hep-ex": 73,
"hep-lat": 74,
"hep-ph": 75,
"hep-th": 76,
"math.AC": 77,
"math.AG": 78,
"math.AP": 79,
"math.AT": 80,
"math.CA": 81,
"math.CO": 82,
"math.CT": 83,
"math.CV": 84,
"math.DG": 85,
"math.DS": 86,
"math.FA": 87,
"math.GM": 88,
"math.GN": 89,
"math.GR": 90,
"math.GT": 91,
"math.HO": 92,
"math.KT": 93,
"math.LO": 94,
"math.MG": 95,
"math.NA": 96,
"math.NT": 97,
"math.OA": 98,
"math.OC": 99,
"math.PR": 100,
"math.QA": 101,
"math.RA": 102,
"math.RT": 103,
"math.SG": 104,
"math.SP": 105,
"mtrl-th": 106,
"nlin.AO": 107,
"nlin.CD": 108,
"nlin.CG": 109,
"nlin.PS": 110,
"nlin.SI": 111,
"nucl-ex": 112,
"nucl-th": 113,
"patt-sol": 114,
"physics.acc-ph": 115,
"physics.ao-ph": 116,
"physics.app-ph": 117,
"physics.atm-clus": 118,
"physics.atom-ph": 119,
"physics.bio-ph": 120,
"physics.chem-ph": 121,
"physics.class-ph": 122,
"physics.comp-ph": 123,
"physics.data-an": 124,
"physics.ed-ph": 125,
"physics.flu-dyn": 126,
"physics.gen-ph": 127,
"physics.geo-ph": 128,
"physics.hist-ph": 129,
"physics.ins-det": 130,
"physics.med-ph": 131,
"physics.optics": 132,
"physics.plasm-ph": 133,
"physics.pop-ph": 134,
"physics.soc-ph": 135,
"physics.space-ph": 136,
"q-alg": 137,
"q-bio.BM": 138,
"q-bio.CB": 139,
"q-bio.GN": 140,
"q-bio.MN": 141,
"q-bio.NC": 142,
"q-bio.OT": 143,
"q-bio.PE": 144,
"q-bio.QM": 145,
"q-bio.SC": 146,
"q-bio.TO": 147,
"q-fin.CP": 148,
"q-fin.EC": 149,
"q-fin.GN": 150,
"q-fin.MF": 151,
"q-fin.PM": 152,
"q-fin.PR": 153,
"q-fin.RM": 154,
"q-fin.ST": 155,
"q-fin.TR": 156,
"quant-ph": 157,
"solv-int": 158,
"stat.AP": 159,
"stat.CO": 160,
"stat.ME": 161,
"stat.ML": 162,
"stat.OT": 163,
"supr-con": 164
},
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"problem_type": "multi_label_classification",
"torch_dtype": "float32",
"transformers_version": "4.37.2",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 30522
}