KoichiYasuoka's picture
initial release
c0e2ec7
{
"architectures": [
"RobertaForTokenClassification"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"classifier_dropout": null,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 1024,
"id2label": {
"0": "ADJ",
"1": "ADP",
"2": "ADP+DET",
"3": "ADP+PRON",
"4": "ADV",
"5": "ADV+AUX",
"6": "ADV+PART",
"7": "AUX",
"8": "AUX+PART",
"9": "B-ADJ",
"10": "B-ADJ+ADJ",
"11": "B-ADJ+NOUN",
"12": "B-ADJ+NOUN+NOUN",
"13": "B-ADJ+PART",
"14": "B-ADJ+PROPN",
"15": "B-ADJ+PUNCT",
"16": "B-ADP",
"17": "B-ADP+ADJ",
"18": "B-ADP+NOUN",
"19": "B-ADP+PRON",
"20": "B-ADV",
"21": "B-ADV+AUX",
"22": "B-ADV+PUNCT",
"23": "B-AUX",
"24": "B-AUX+ADV",
"25": "B-AUX+PART",
"26": "B-AUX+PART+VERB",
"27": "B-AUX+VERB",
"28": "B-CCONJ",
"29": "B-DET",
"30": "B-DET+AUX",
"31": "B-DET+NOUN",
"32": "B-INTJ",
"33": "B-INTJ+PUNCT",
"34": "B-NOUN",
"35": "B-NOUN+ADJ",
"36": "B-NOUN+ADP",
"37": "B-NOUN+AUX",
"38": "B-NOUN+NOUN",
"39": "B-NOUN+NOUN+VERB",
"40": "B-NOUN+PART",
"41": "B-NOUN+PROPN",
"42": "B-NOUN+PUNCT",
"43": "B-NOUN+SCONJ",
"44": "B-NOUN+VERB",
"45": "B-NUM",
"46": "B-PART",
"47": "B-PRON",
"48": "B-PRON+ADJ",
"49": "B-PRON+ADV",
"50": "B-PRON+AUX",
"51": "B-PRON+NOUN",
"52": "B-PRON+PART",
"53": "B-PRON+PRON",
"54": "B-PRON+VERB",
"55": "B-PROPN",
"56": "B-PROPN+ADP",
"57": "B-PROPN+AUX",
"58": "B-PROPN+PART",
"59": "B-PROPN+PROPN",
"60": "B-PROPN+PUNCT",
"61": "B-PROPN+PUNCT+PUNCT",
"62": "B-PROPN+VERB",
"63": "B-PUNCT",
"64": "B-PUNCT+PUNCT",
"65": "B-PUNCT+PUNCT+PUNCT",
"66": "B-PUNCT+SYM+PUNCT",
"67": "B-SCONJ",
"68": "B-SYM",
"69": "B-VERB",
"70": "B-VERB+ADJ",
"71": "B-VERB+ADJ+CCONJ",
"72": "B-VERB+ADP",
"73": "B-VERB+ADV",
"74": "B-VERB+ADV+PUNCT",
"75": "B-VERB+AUX",
"76": "B-VERB+CCONJ",
"77": "B-VERB+DET",
"78": "B-VERB+NOUN",
"79": "B-VERB+NOUN+CCONJ",
"80": "B-VERB+NOUN+NOUN",
"81": "B-VERB+PART",
"82": "B-VERB+PRON",
"83": "B-VERB+PRON+ADP",
"84": "B-VERB+PRON+ADV",
"85": "B-VERB+PROPN",
"86": "B-VERB+SCONJ",
"87": "B-VERB+VERB",
"88": "B-VERB+VERB+NOUN",
"89": "B-X",
"90": "B-X+PUNCT",
"91": "B-X+PUNCT+PUNCT",
"92": "B-X+X",
"93": "B-X+X+PRON",
"94": "CCONJ",
"95": "DET",
"96": "DET+NUM",
"97": "I-ADJ",
"98": "I-ADJ+ADJ",
"99": "I-ADJ+NOUN",
"100": "I-ADJ+NOUN+NOUN",
"101": "I-ADJ+PART",
"102": "I-ADJ+PROPN",
"103": "I-ADJ+PUNCT",
"104": "I-ADP",
"105": "I-ADP+ADJ",
"106": "I-ADP+NOUN",
"107": "I-ADP+PRON",
"108": "I-ADV",
"109": "I-ADV+AUX",
"110": "I-ADV+PUNCT",
"111": "I-AUX",
"112": "I-AUX+ADV",
"113": "I-AUX+PART",
"114": "I-AUX+PART+VERB",
"115": "I-AUX+VERB",
"116": "I-CCONJ",
"117": "I-DET",
"118": "I-DET+AUX",
"119": "I-DET+NOUN",
"120": "I-INTJ",
"121": "I-INTJ+PUNCT",
"122": "I-NOUN",
"123": "I-NOUN+ADJ",
"124": "I-NOUN+ADP",
"125": "I-NOUN+AUX",
"126": "I-NOUN+NOUN",
"127": "I-NOUN+NOUN+VERB",
"128": "I-NOUN+PART",
"129": "I-NOUN+PROPN",
"130": "I-NOUN+PUNCT",
"131": "I-NOUN+SCONJ",
"132": "I-NOUN+VERB",
"133": "I-NUM",
"134": "I-PART",
"135": "I-PRON",
"136": "I-PRON+ADJ",
"137": "I-PRON+ADV",
"138": "I-PRON+AUX",
"139": "I-PRON+NOUN",
"140": "I-PRON+PART",
"141": "I-PRON+PRON",
"142": "I-PRON+VERB",
"143": "I-PROPN",
"144": "I-PROPN+ADP",
"145": "I-PROPN+AUX",
"146": "I-PROPN+PART",
"147": "I-PROPN+PROPN",
"148": "I-PROPN+PUNCT",
"149": "I-PROPN+PUNCT+PUNCT",
"150": "I-PROPN+VERB",
"151": "I-PUNCT",
"152": "I-PUNCT+PUNCT",
"153": "I-PUNCT+PUNCT+PUNCT",
"154": "I-PUNCT+SYM+PUNCT",
"155": "I-SCONJ",
"156": "I-SYM",
"157": "I-VERB",
"158": "I-VERB+ADJ",
"159": "I-VERB+ADJ+CCONJ",
"160": "I-VERB+ADP",
"161": "I-VERB+ADV",
"162": "I-VERB+ADV+PUNCT",
"163": "I-VERB+AUX",
"164": "I-VERB+CCONJ",
"165": "I-VERB+DET",
"166": "I-VERB+NOUN",
"167": "I-VERB+NOUN+CCONJ",
"168": "I-VERB+NOUN+NOUN",
"169": "I-VERB+PART",
"170": "I-VERB+PRON",
"171": "I-VERB+PRON+ADP",
"172": "I-VERB+PRON+ADV",
"173": "I-VERB+PROPN",
"174": "I-VERB+SCONJ",
"175": "I-VERB+VERB",
"176": "I-VERB+VERB+NOUN",
"177": "I-X",
"178": "I-X+PUNCT",
"179": "I-X+PUNCT+PUNCT",
"180": "I-X+X",
"181": "I-X+X+PRON",
"182": "INTJ",
"183": "NOUN",
"184": "NOUN+AUX",
"185": "NOUN+PART",
"186": "NUM",
"187": "PART",
"188": "PRON",
"189": "PRON+AUX",
"190": "PRON+VERB",
"191": "PROPN",
"192": "PROPN+PART",
"193": "PUNCT",
"194": "PUNCT+PUNCT",
"195": "PUNCT+PUNCT+PUNCT",
"196": "PUNCT+SYM",
"197": "SCONJ",
"198": "SYM",
"199": "SYM+PUNCT",
"200": "SYM+SYM",
"201": "VERB",
"202": "VERB+ADP",
"203": "VERB+PART",
"204": "VERB+PRON",
"205": "VERB+VERB",
"206": "X",
"207": "X+X"
},
"initializer_range": 0.02,
"intermediate_size": 4096,
"label2id": {
"ADJ": 0,
"ADP": 1,
"ADP+DET": 2,
"ADP+PRON": 3,
"ADV": 4,
"ADV+AUX": 5,
"ADV+PART": 6,
"AUX": 7,
"AUX+PART": 8,
"B-ADJ": 9,
"B-ADJ+ADJ": 10,
"B-ADJ+NOUN": 11,
"B-ADJ+NOUN+NOUN": 12,
"B-ADJ+PART": 13,
"B-ADJ+PROPN": 14,
"B-ADJ+PUNCT": 15,
"B-ADP": 16,
"B-ADP+ADJ": 17,
"B-ADP+NOUN": 18,
"B-ADP+PRON": 19,
"B-ADV": 20,
"B-ADV+AUX": 21,
"B-ADV+PUNCT": 22,
"B-AUX": 23,
"B-AUX+ADV": 24,
"B-AUX+PART": 25,
"B-AUX+PART+VERB": 26,
"B-AUX+VERB": 27,
"B-CCONJ": 28,
"B-DET": 29,
"B-DET+AUX": 30,
"B-DET+NOUN": 31,
"B-INTJ": 32,
"B-INTJ+PUNCT": 33,
"B-NOUN": 34,
"B-NOUN+ADJ": 35,
"B-NOUN+ADP": 36,
"B-NOUN+AUX": 37,
"B-NOUN+NOUN": 38,
"B-NOUN+NOUN+VERB": 39,
"B-NOUN+PART": 40,
"B-NOUN+PROPN": 41,
"B-NOUN+PUNCT": 42,
"B-NOUN+SCONJ": 43,
"B-NOUN+VERB": 44,
"B-NUM": 45,
"B-PART": 46,
"B-PRON": 47,
"B-PRON+ADJ": 48,
"B-PRON+ADV": 49,
"B-PRON+AUX": 50,
"B-PRON+NOUN": 51,
"B-PRON+PART": 52,
"B-PRON+PRON": 53,
"B-PRON+VERB": 54,
"B-PROPN": 55,
"B-PROPN+ADP": 56,
"B-PROPN+AUX": 57,
"B-PROPN+PART": 58,
"B-PROPN+PROPN": 59,
"B-PROPN+PUNCT": 60,
"B-PROPN+PUNCT+PUNCT": 61,
"B-PROPN+VERB": 62,
"B-PUNCT": 63,
"B-PUNCT+PUNCT": 64,
"B-PUNCT+PUNCT+PUNCT": 65,
"B-PUNCT+SYM+PUNCT": 66,
"B-SCONJ": 67,
"B-SYM": 68,
"B-VERB": 69,
"B-VERB+ADJ": 70,
"B-VERB+ADJ+CCONJ": 71,
"B-VERB+ADP": 72,
"B-VERB+ADV": 73,
"B-VERB+ADV+PUNCT": 74,
"B-VERB+AUX": 75,
"B-VERB+CCONJ": 76,
"B-VERB+DET": 77,
"B-VERB+NOUN": 78,
"B-VERB+NOUN+CCONJ": 79,
"B-VERB+NOUN+NOUN": 80,
"B-VERB+PART": 81,
"B-VERB+PRON": 82,
"B-VERB+PRON+ADP": 83,
"B-VERB+PRON+ADV": 84,
"B-VERB+PROPN": 85,
"B-VERB+SCONJ": 86,
"B-VERB+VERB": 87,
"B-VERB+VERB+NOUN": 88,
"B-X": 89,
"B-X+PUNCT": 90,
"B-X+PUNCT+PUNCT": 91,
"B-X+X": 92,
"B-X+X+PRON": 93,
"CCONJ": 94,
"DET": 95,
"DET+NUM": 96,
"I-ADJ": 97,
"I-ADJ+ADJ": 98,
"I-ADJ+NOUN": 99,
"I-ADJ+NOUN+NOUN": 100,
"I-ADJ+PART": 101,
"I-ADJ+PROPN": 102,
"I-ADJ+PUNCT": 103,
"I-ADP": 104,
"I-ADP+ADJ": 105,
"I-ADP+NOUN": 106,
"I-ADP+PRON": 107,
"I-ADV": 108,
"I-ADV+AUX": 109,
"I-ADV+PUNCT": 110,
"I-AUX": 111,
"I-AUX+ADV": 112,
"I-AUX+PART": 113,
"I-AUX+PART+VERB": 114,
"I-AUX+VERB": 115,
"I-CCONJ": 116,
"I-DET": 117,
"I-DET+AUX": 118,
"I-DET+NOUN": 119,
"I-INTJ": 120,
"I-INTJ+PUNCT": 121,
"I-NOUN": 122,
"I-NOUN+ADJ": 123,
"I-NOUN+ADP": 124,
"I-NOUN+AUX": 125,
"I-NOUN+NOUN": 126,
"I-NOUN+NOUN+VERB": 127,
"I-NOUN+PART": 128,
"I-NOUN+PROPN": 129,
"I-NOUN+PUNCT": 130,
"I-NOUN+SCONJ": 131,
"I-NOUN+VERB": 132,
"I-NUM": 133,
"I-PART": 134,
"I-PRON": 135,
"I-PRON+ADJ": 136,
"I-PRON+ADV": 137,
"I-PRON+AUX": 138,
"I-PRON+NOUN": 139,
"I-PRON+PART": 140,
"I-PRON+PRON": 141,
"I-PRON+VERB": 142,
"I-PROPN": 143,
"I-PROPN+ADP": 144,
"I-PROPN+AUX": 145,
"I-PROPN+PART": 146,
"I-PROPN+PROPN": 147,
"I-PROPN+PUNCT": 148,
"I-PROPN+PUNCT+PUNCT": 149,
"I-PROPN+VERB": 150,
"I-PUNCT": 151,
"I-PUNCT+PUNCT": 152,
"I-PUNCT+PUNCT+PUNCT": 153,
"I-PUNCT+SYM+PUNCT": 154,
"I-SCONJ": 155,
"I-SYM": 156,
"I-VERB": 157,
"I-VERB+ADJ": 158,
"I-VERB+ADJ+CCONJ": 159,
"I-VERB+ADP": 160,
"I-VERB+ADV": 161,
"I-VERB+ADV+PUNCT": 162,
"I-VERB+AUX": 163,
"I-VERB+CCONJ": 164,
"I-VERB+DET": 165,
"I-VERB+NOUN": 166,
"I-VERB+NOUN+CCONJ": 167,
"I-VERB+NOUN+NOUN": 168,
"I-VERB+PART": 169,
"I-VERB+PRON": 170,
"I-VERB+PRON+ADP": 171,
"I-VERB+PRON+ADV": 172,
"I-VERB+PROPN": 173,
"I-VERB+SCONJ": 174,
"I-VERB+VERB": 175,
"I-VERB+VERB+NOUN": 176,
"I-X": 177,
"I-X+PUNCT": 178,
"I-X+PUNCT+PUNCT": 179,
"I-X+X": 180,
"I-X+X+PRON": 181,
"INTJ": 182,
"NOUN": 183,
"NOUN+AUX": 184,
"NOUN+PART": 185,
"NUM": 186,
"PART": 187,
"PRON": 188,
"PRON+AUX": 189,
"PRON+VERB": 190,
"PROPN": 191,
"PROPN+PART": 192,
"PUNCT": 193,
"PUNCT+PUNCT": 194,
"PUNCT+PUNCT+PUNCT": 195,
"PUNCT+SYM": 196,
"SCONJ": 197,
"SYM": 198,
"SYM+PUNCT": 199,
"SYM+SYM": 200,
"VERB": 201,
"VERB+ADP": 202,
"VERB+PART": 203,
"VERB+PRON": 204,
"VERB+VERB": 205,
"X": 206,
"X+X": 207
},
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "roberta",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"task_specific_params": {
"upos_multiword": {
"ADJ+ADJ": {
"bigenough": [
"big",
"enough"
],
"interestingsocial": [
"interesting",
"social"
],
"longeight-inch": [
"long",
"eight-inch"
],
"pressingsocial": [
"pressing",
"social"
]
},
"ADJ+NOUN": {
"bigsource": [
"big",
"source"
],
"contrastingseries": [
"contrasting",
"series"
],
"distractingelements": [
"distracting",
"elements"
],
"fascinatingshop": [
"fascinating",
"shop"
],
"gruelingsanctions": [
"grueling",
"sanctions"
],
"increasingsafety": [
"increasing",
"safety"
],
"longexposures": [
"long",
"exposures"
],
"longhair": [
"long",
"hair"
],
"longhistory": [
"long",
"history"
],
"ongoingsummaries": [
"ongoing",
"summaries"
],
"pre-meetingsite": [
"pre-meeting",
"site"
],
"rallyingsigns": [
"rallying",
"signs"
],
"revenue-raisingservices": [
"revenue-raising",
"services"
],
"self-questioningshrug": [
"self-questioning",
"shrug"
],
"simperingsmile": [
"simpering",
"smile"
],
"stronghints": [
"strong",
"hints"
],
"wizardingsport": [
"wizarding",
"sport"
]
},
"ADJ+PART": {
"elses": [
"else",
"s"
]
},
"ADJ+PROPN": {
"Nationwidetints": [
"Nationwide",
"tints"
]
},
"ADJ+PUNCT": {
"Jr..": [
"Jr.",
"."
],
"e.g.:": [
"e.g.",
":"
],
"i.e.,": [
"i.e.",
","
]
},
"ADP+DET": {
"des": [
"de",
"les"
]
},
"ADP+NOUN": {
"Infact": [
"In",
"fact"
],
"overtime": [
"over",
"time"
]
},
"ADP+PRON": {
"init": [
"in",
"it"
]
},
"ADV+AUX": {
"Heres": [
"Here",
"s"
],
"longhave": [
"long",
"have"
]
},
"ADV+PART": {
"into": [
"in",
"to"
]
},
"ADV+PUNCT": {
"E.g.,": [
"E.g.",
","
],
"e.g.,": [
"e.g.",
","
],
"i.e.,": [
"i.e.",
","
],
"i.e.:": [
"i.e.",
":"
]
},
"AUX+ADV": {
"cannot": [
"can",
"not"
]
},
"AUX+PART": {
"Aren't": [
"Are",
"n't"
],
"Aren\u2019t": [
"Are",
"n\u2019t"
],
"CANT": [
"CA",
"NT"
],
"Can't": [
"Ca",
"n't"
],
"Cannot": [
"Can",
"not"
],
"Can\u2019t": [
"Ca",
"n\u2019t"
],
"DON'T": [
"DO",
"N'T"
],
"DONT": [
"DO",
"NT"
],
"Don't": [
"Do",
"n't"
],
"Dont": [
"Do",
"nt"
],
"Don\u2019t": [
"Do",
"n\u2019t"
],
"Haven't": [
"Have",
"n't"
],
"Isn't": [
"Is",
"n't"
],
"Isn\u2019t": [
"Is",
"n\u2019t"
],
"Won't": [
"Wo",
"n't"
],
"ain't": [
"ai",
"n't"
],
"aint": [
"ai",
"nt"
],
"aren't": [
"are",
"not"
],
"arent": [
"are",
"nt"
],
"aren\u2019t": [
"are",
"n\u2019t"
],
"can't": [
"can",
"not"
],
"cannot": [
"can",
"not"
],
"cant": [
"ca",
"nt"
],
"can\u2019t": [
"ca",
"n\u2019t"
],
"didn't": [
"did",
"n't"
],
"didn\u2019t": [
"did",
"n\u2019t"
],
"doesn't": [
"does",
"n't"
],
"doesn\u2019t": [
"does",
"n\u2019t"
],
"don't": [
"do",
"not"
],
"dont": [
"do",
"nt"
],
"don\u2019t": [
"do",
"n\u2019t"
],
"haven't": [
"have",
"n't"
],
"shan't": [
"sha",
"n't"
],
"shouldn't": [
"should",
"not"
],
"wasent": [
"wase",
"nt"
],
"weren't": [
"were",
"n't"
],
"weren\u2019t": [
"were",
"n\u2019t"
],
"won't": [
"will",
"not"
],
"wont": [
"wo",
"nt"
],
"won\u2019t": [
"wo",
"n\u2019t"
]
},
"AUX+PART+VERB": {
"dunno": [
"du",
"n",
"no"
]
},
"AUX+VERB": {
"beingsaid": [
"being",
"said"
],
"beingsent": [
"being",
"sent"
],
"beingshipped": [
"being",
"shipped"
],
"beingspoken": [
"being",
"spoken"
],
"havingsaid": [
"having",
"said"
]
},
"DET+AUX": {
"thes": [
"the",
"s"
]
},
"DET+NOUN": {
"ALOT": [
"A",
"LOT"
],
"Alot": [
"A",
"lot"
],
"apart": [
"a",
"part"
],
"awhile": [
"a",
"while"
],
"sometime": [
"some",
"time"
]
},
"DET+NUM": {
"everyone": [
"every",
"one"
]
},
"INTJ+PUNCT": {
"etc.'": [
"etc.",
"'"
],
"ta',": [
"ta'",
","
]
},
"NOUN+ADJ": {
"nothingspecial": [
"nothing",
"special"
]
},
"NOUN+ADP": {
"flagon": [
"flag",
"on"
],
"groundsof": [
"grounds",
"of"
],
"hashtagon": [
"hashtag",
"on"
],
"meetingsince": [
"meeting",
"since"
]
},
"NOUN+AUX": {
"breathingshould": [
"breathing",
"should"
],
"doghas": [
"dog",
"has"
],
"mythmakingshould": [
"mythmaking",
"should"
]
},
"NOUN+NOUN": {
"Drivingschool": [
"Driving",
"school"
],
"bakingsheet": [
"baking",
"sheet"
],
"bakingsoda": [
"baking",
"soda"
],
"counselingservices": [
"counseling",
"services"
],
"datingservice": [
"dating",
"service"
],
"doghouse": [
"dog",
"house"
],
"drivingschool": [
"driving",
"school"
],
"engineeringservices": [
"engineering",
"services"
],
"eveningschedule": [
"evening",
"schedule"
],
"kingsnake": [
"king",
"snake"
],
"kingsnakes": [
"king",
"snakes"
],
"lightingshowroom": [
"lighting",
"showroom"
],
"lightingsources": [
"lighting",
"sources"
],
"loggingsites": [
"logging",
"sites"
],
"mpgnumber": [
"mpg",
"number"
],
"plughole": [
"plug",
"hole"
],
"runningshorts": [
"running",
"shorts"
],
"tagsets": [
"tag",
"sets"
],
"testingschedule": [
"testing",
"schedule"
],
"towingservices": [
"towing",
"services"
],
"trainingsession": [
"training",
"session"
],
"writingschedule": [
"writing",
"schedule"
],
"writingsystem": [
"writing",
"system"
]
},
"NOUN+NOUN+VERB": {
"RecruitingMeetingscheduled": [
"Recruiting",
"Meeting",
"scheduled"
]
},
"NOUN+PART": {
"DAUGHTERS": [
"DAUGHTER",
"S"
],
"Kids": [
"Kid",
"s"
],
"Mares": [
"Mare",
"s"
],
"Smokers": [
"Smoker",
"s"
],
"Travelers": [
"Traveler",
"s"
],
"animals": [
"animal",
"s"
],
"bachelors": [
"bachelor",
"s"
],
"bakers": [
"baker",
"s"
],
"beginners": [
"beginner",
"s"
],
"bettas": [
"betta",
"s"
],
"boys": [
"boy",
"s"
],
"cars": [
"car",
"s"
],
"cats": [
"cat",
"s"
],
"cycles": [
"cycle",
"s"
],
"dads": [
"dad",
"s"
],
"doctors": [
"doctor",
"s"
],
"dogs": [
"dog",
"s"
],
"drivers": [
"driver",
"s"
],
"friends": [
"friend",
"s"
],
"grandmas": [
"grandma",
"s"
],
"horses": [
"horse",
"s"
],
"humans": [
"human",
"s"
],
"males": [
"male",
"s"
],
"manufacturers": [
"manufacturer",
"s"
],
"mares": [
"mare",
"s"
],
"nights": [
"night",
"s"
],
"owners": [
"owner",
"s"
],
"peoples": [
"people",
"s"
],
"persons": [
"person",
"s"
],
"scammers": [
"scammer",
"s"
],
"sons": [
"son",
"s"
],
"teams": [
"team",
"s"
],
"todays": [
"today",
"s"
],
"trainers": [
"trainer",
"s"
],
"visitors": [
"visitor",
"s"
],
"wits": [
"wit",
"s"
],
"workers": [
"worker",
"s"
],
"years": [
"year",
"s"
]
},
"NOUN+PUNCT": {
"Ed.:": [
"Ed.",
":"
],
"Fax.(": [
"Fax.",
"("
],
"a.m.,": [
"a.m.",
","
],
"lb.,": [
"lb.",
","
],
"mins.,": [
"mins.",
","
],
"oz.,": [
"oz.",
","
],
"p.m.,": [
"p.m.",
","
]
},
"NOUN+SCONJ": {
"buildingsince": [
"building",
"since"
]
},
"NOUN+VERB": {
"dogeat": [
"dog",
"eat"
],
"morningserves": [
"morning",
"serves"
],
"thingsounded": [
"thing",
"sounded"
]
},
"PRON+ADJ": {
"everythingset": [
"everything",
"set"
],
"somethingsuch": [
"something",
"such"
]
},
"PRON+ADV": {
"somethingsometime": [
"something",
"sometime"
]
},
"PRON+AUX": {
"ITS": [
"IT",
"S"
],
"Im": [
"I",
"m"
],
"Its": [
"It",
"s"
],
"Whats": [
"What",
"s"
],
"Your": [
"You",
"r"
],
"hes": [
"he",
"s"
],
"id": [
"i",
"d"
],
"im": [
"i",
"m"
],
"its": [
"it",
"s"
],
"iv": [
"i",
"v"
],
"ive": [
"i",
"ve"
],
"thats": [
"that",
"s"
],
"their": [
"thei",
"r"
],
"there": [
"the",
"re"
],
"ur": [
"u",
"r"
],
"your": [
"you",
"r"
]
},
"PRON+NOUN": {
"alleconomy": [
"all",
"economy"
]
},
"PRON+PART": {
"anyones": [
"anyone",
"s"
]
},
"PRON+PRON": {
"everythingshe": [
"everything",
"she"
]
},
"PRON+VERB": {
"Thats": [
"That",
"s"
],
"Theres": [
"There",
"s"
],
"everythingset": [
"everything",
"set"
],
"iguz": [
"i",
"guz"
],
"im": [
"i",
"m"
],
"its": [
"it",
"s"
],
"theres": [
"there",
"s"
],
"youthank": [
"you",
"thank"
]
},
"PROPN+ADP": {
"Pagin": [
"Pag",
"in"
],
"Petersburgin": [
"Petersburg",
"in"
]
},
"PROPN+AUX": {
"Hedwighad": [
"Hedwig",
"had"
]
},
"PROPN+PART": {
"BJs": [
"BJ",
"s"
],
"Chilis": [
"Chili",
"s"
],
"Friscos": [
"Frisco",
"s"
],
"Hams": [
"Ham",
"s"
],
"Kobeys": [
"Kobey",
"s"
],
"LWs": [
"LW",
"s"
],
"Leonardos": [
"Leonardo",
"s"
],
"Mortons": [
"Morton",
"s"
],
"Travellers": [
"Traveller",
"s"
],
"Valentines": [
"Valentine",
"s"
],
"Years": [
"Year",
"s"
],
"jacks": [
"jack",
"s"
]
},
"PROPN+PROPN": {
"G&GAutomotive": [
"G&G",
"Automotive"
],
"drivingschool": [
"driving",
"school"
]
},
"PROPN+PUNCT": {
"B.,": [
"B.",
","
],
"B.A.\"": [
"B.A.",
"\""
],
"D.C.,": [
"D.C.",
","
],
"Inc.\"": [
"Inc.",
"\""
],
"M.,": [
"M.",
","
],
"N.O.?": [
"N.O.",
"?"
],
"Que.,": [
"Que.",
","
],
"U.N.,": [
"U.N.",
","
],
"U.S.)": [
"U.S.",
")"
],
"U.S.-": [
"U.S.",
"-"
],
"Va.-": [
"Va.",
"-"
]
},
"PROPN+PUNCT+PUNCT": {
"W.H.\",": [
"W.H.",
"\"",
","
]
},
"PROPN+VERB": {
"Orglive": [
"Org",
"live"
],
"Pagyelped": [
"Pag",
"yelped"
]
},
"PUNCT+PUNCT": {
"!\"": [
"!",
"\""
],
"!'": [
"!",
"'"
],
"!)": [
"!",
")"
],
"\"!": [
"\"",
"!"
],
"\"\"": [
"\"",
"\""
],
"\"(": [
"\"",
"("
],
"\")": [
"\"",
")"
],
"\",": [
"\"",
","
],
"\"-": [
"\"",
"-"
],
"\".": [
"\"",
"."
],
"\"...": [
"\"",
"..."
],
"\":": [
"\"",
":"
],
"\"[": [
"\"",
"["
],
"')": [
"'",
")"
],
"',": [
"'",
","
],
"(\"": [
"(",
"\""
],
"(\"\"": [
"(",
"\"\""
],
"('": [
"(",
"'"
],
"((": [
"(",
"("
],
"([": [
"(",
"["
],
")\"": [
")",
"\""
],
")(": [
")",
"("
],
"))": [
")",
")"
],
"),": [
")",
","
],
").": [
")",
"."
],
")...": [
")",
"..."
],
"):": [
")",
":"
],
");": [
")",
";"
],
"*,": [
"*",
","
],
",\"": [
",",
"\""
],
",'": [
",",
"'"
],
",''": [
",",
"''"
],
",...": [
",",
"..."
],
"-\"": [
"-",
"\""
],
".\"": [
".",
"\""
],
".'": [
".",
"'"
],
"..": [
".",
"."
],
"...\"": [
"...",
"\""
],
"....": [
"...",
"."
],
"?\"": [
"?",
"\""
],
"?'": [
"?",
"'"
],
"?)": [
"?",
")"
],
"?]": [
"?",
"]"
],
"],": [
"]",
","
],
"];": [
"]",
";"
]
},
"PUNCT+PUNCT+PUNCT": {
"!),": [
"!",
")",
","
],
"\"),": [
"\"",
")",
","
],
"?),": [
"?",
")",
","
],
"]),": [
"]",
")",
","
]
},
"PUNCT+SYM": {
"($": [
"(",
"$"
]
},
"PUNCT+SYM+PUNCT": {
"(%)": [
"(",
"%",
")"
]
},
"SYM+PUNCT": {
"$,": [
"$",
","
],
"%)": [
"%",
")"
],
"%,": [
"%",
","
],
"-'": [
"-",
"'"
]
},
"SYM+SYM": {
"-$": [
"-",
"$"
]
},
"VERB+ADJ": {
"alteringspecific": [
"altering",
"specific"
],
"doingshoddy": [
"doing",
"shoddy"
],
"facingserious": [
"facing",
"serious"
],
"legalizingsame": [
"legalizing",
"same"
],
"mixinguppercase": [
"mixing",
"uppercase"
],
"motivatingsyntactic": [
"motivating",
"syntactic"
],
"outsourcingspecial": [
"outsourcing",
"special"
],
"reinforcingsimilar": [
"reinforcing",
"similar"
],
"showingsuperb": [
"showing",
"superb"
],
"usingsimple": [
"using",
"simple"
]
},
"VERB+ADJ+CCONJ": {
"lookingsmugand": [
"looking",
"smug",
"and"
]
},
"VERB+ADP": {
"Login": [
"Log",
"in"
],
"gamingsince": [
"gaming",
"since"
],
"goto": [
"go",
"to"
],
"hummingsince": [
"humming",
"since"
],
"investigatingsince": [
"investigating",
"since"
],
"login": [
"log",
"in"
],
"setup": [
"set",
"up"
]
},
"VERB+ADV": {
"advancingslowly": [
"advancing",
"slowly"
],
"behavingsplendidly": [
"behaving",
"splendidly"
],
"bucklingslightly": [
"buckling",
"slightly"
],
"contributingsubstantially": [
"contributing",
"substantially"
],
"exultingeverywhere": [
"exulting",
"everywhere"
],
"includingspecifically": [
"including",
"specifically"
],
"movingsouthward": [
"moving",
"southward"
],
"proposingspecifically": [
"proposing",
"specifically"
],
"scavengingseriously": [
"scavenging",
"seriously"
],
"swellingslightly": [
"swelling",
"slightly"
],
"totalingsomewhere": [
"totaling",
"somewhere"
],
"walkinguptown": [
"walking",
"uptown"
]
},
"VERB+ADV+PUNCT": {
"studyinge.g.,": [
"studying",
"e.g.",
","
]
},
"VERB+AUX": {
"Winningshall": [
"Winning",
"shall"
],
"copyingshould": [
"copying",
"should"
]
},
"VERB+CCONJ": {
"departingeither": [
"departing",
"either"
]
},
"VERB+DET": {
"basingsome": [
"basing",
"some"
],
"demonstratingsuch": [
"demonstrating",
"such"
],
"discussingsome": [
"discussing",
"some"
],
"doingevery": [
"doing",
"every"
],
"doingsome": [
"doing",
"some"
],
"dumpingsome": [
"dumping",
"some"
],
"experiencingsome": [
"experiencing",
"some"
],
"finishingsome": [
"finishing",
"some"
],
"hostingsome": [
"hosting",
"some"
],
"meetingeach": [
"meeting",
"each"
],
"playingsome": [
"playing",
"some"
],
"rangeach": [
"rang",
"each"
],
"readingsome": [
"reading",
"some"
],
"regardingsome": [
"regarding",
"some"
],
"replacingsome": [
"replacing",
"some"
],
"spendingsome": [
"spending",
"some"
],
"usingsome": [
"using",
"some"
]
},
"VERB+NOUN": {
"continuingsource": [
"continuing",
"source"
],
"differingschedules": [
"differing",
"schedules"
],
"doingscissors": [
"doing",
"scissors"
],
"expandingsystem": [
"expanding",
"system"
],
"expressingsadness": [
"expressing",
"sadness"
],
"followingsuggestion": [
"following",
"suggestion"
],
"formingeggs": [
"forming",
"eggs"
],
"gettingsavage": [
"getting",
"savage"
],
"gleamingsand": [
"gleaming",
"sand"
],
"improvingsurveillance": [
"improving",
"surveillance"
],
"meaningshell": [
"meaning",
"shell"
],
"playingsports": [
"playing",
"sports"
],
"printingerrors": [
"printing",
"errors"
],
"producingshrubs": [
"producing",
"shrubs"
],
"providingservices": [
"providing",
"services"
],
"quittingsmoking": [
"quitting",
"smoking"
],
"rushingslipstream": [
"rushing",
"slipstream"
],
"seeingsomeone": [
"seeing",
"someone"
],
"studyingsymmetry": [
"studying",
"symmetry"
]
},
"VERB+PART": {
"Gonna": [
"Gon",
"na"
],
"Gotta": [
"Got",
"ta"
],
"aren't": [
"are",
"n't"
],
"didn't": [
"did",
"n't"
],
"doesn't": [
"does",
"n't"
],
"don't": [
"do",
"n't"
],
"don\u2019t": [
"do",
"n\u2019t"
],
"gonna": [
"gon",
"na"
],
"gotta": [
"got",
"ta"
],
"haven't": [
"have",
"n't"
],
"wana": [
"wan",
"a"
],
"wanna": [
"wan",
"na"
]
},
"VERB+PRON": {
"Lets": [
"Let",
"s"
],
"callyou": [
"call",
"you"
],
"crossingeach": [
"crossing",
"each"
],
"doingeverything": [
"doing",
"everything"
],
"expectingsomeone": [
"expecting",
"someone"
],
"lets": [
"let",
"s"
],
"slunghis": [
"slung",
"his"
]
},
"VERB+PRON+ADP": {
"seeingeverythingaround": [
"seeing",
"everything",
"around"
]
},
"VERB+PRON+ADV": {
"screwingeverythingup": [
"screwing",
"everything",
"up"
]
},
"VERB+PROPN": {
"arrivingsalt": [
"arriving",
"salt"
],
"departingsan": [
"departing",
"san"
],
"leavingsan": [
"leaving",
"san"
],
"leavingsunday": [
"leaving",
"sunday"
]
},
"VERB+SCONJ": {
"decidewhether": [
"decide",
"whether"
]
},
"VERB+VERB": {
"growingsuspended": [
"growing",
"suspended"
],
"had": [
"h",
"ad"
]
},
"VERB+VERB+NOUN": {
"crushingsleepingflowers": [
"crushing",
"sleeping",
"flowers"
],
"hostingvisitingschool": [
"hosting",
"visiting",
"school"
]
},
"X+PUNCT": {
"al.,": [
"al.",
","
],
"e.g.,": [
"e.g.",
","
],
"etc.)": [
"etc.",
")"
],
"etc.,": [
"etc.",
","
],
"etc..": [
"etc.",
"."
]
},
"X+X": {
"'s": [
"'",
"s"
],
").doc": [
")",
".doc"
]
},
"X+X+PRON": {
"http://i.imgur.com/T2zff.jpghttp://i.imgur.com/Xytex.jpgI": [
"http://i.imgur.com/T2zff.jpg",
"http://i.imgur.com/Xytex.jpg",
"I"
]
}
}
},
"tokenizer_class": "RobertaTokenizer",
"torch_dtype": "float32",
"transformers_version": "4.14.1",
"type_vocab_size": 1,
"use_cache": true,
"vocab_size": 50265
}