|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<unk>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<|start_story|>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "<|end_story|>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
} |
|
], |
|
"normalizer": { |
|
"type": "Sequence", |
|
"normalizers": [ |
|
{ |
|
"type": "Prepend", |
|
"prepend": "▁" |
|
}, |
|
{ |
|
"type": "Replace", |
|
"pattern": { |
|
"String": " " |
|
}, |
|
"content": "▁" |
|
} |
|
] |
|
}, |
|
"pre_tokenizer": null, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "<|start_story|>", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
} |
|
], |
|
"pair": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "<|start_story|>", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "<|start_story|>", |
|
"type_id": 1 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "B", |
|
"type_id": 1 |
|
} |
|
} |
|
], |
|
"special_tokens": { |
|
"<|start_story|>": { |
|
"id": "<|start_story|>", |
|
"ids": [ |
|
1 |
|
], |
|
"tokens": [ |
|
"<|start_story|>" |
|
] |
|
} |
|
} |
|
}, |
|
"decoder": { |
|
"type": "Sequence", |
|
"decoders": [ |
|
{ |
|
"type": "Replace", |
|
"pattern": { |
|
"String": "▁" |
|
}, |
|
"content": " " |
|
}, |
|
{ |
|
"type": "ByteFallback" |
|
}, |
|
{ |
|
"type": "Fuse" |
|
}, |
|
{ |
|
"type": "Strip", |
|
"content": " ", |
|
"start": 1, |
|
"stop": 0 |
|
} |
|
] |
|
}, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": "<unk>", |
|
"continuing_subword_prefix": null, |
|
"end_of_word_suffix": null, |
|
"fuse_unk": true, |
|
"byte_fallback": true, |
|
"ignore_merges": false, |
|
"vocab": { |
|
"<unk>": 0, |
|
"<|start_story|>": 1, |
|
"<|end_story|>": 2, |
|
"\n": 3, |
|
"!": 4, |
|
"\"": 5, |
|
"$": 6, |
|
"'": 7, |
|
",": 8, |
|
"-": 9, |
|
".": 10, |
|
"0": 11, |
|
"1": 12, |
|
"2": 13, |
|
"3": 14, |
|
"4": 15, |
|
"5": 16, |
|
"6": 17, |
|
"7": 18, |
|
"8": 19, |
|
"9": 20, |
|
":": 21, |
|
";": 22, |
|
"<": 23, |
|
">": 24, |
|
"?": 25, |
|
"A": 26, |
|
"B": 27, |
|
"C": 28, |
|
"D": 29, |
|
"E": 30, |
|
"F": 31, |
|
"G": 32, |
|
"H": 33, |
|
"I": 34, |
|
"J": 35, |
|
"K": 36, |
|
"L": 37, |
|
"M": 38, |
|
"N": 39, |
|
"O": 40, |
|
"P": 41, |
|
"Q": 42, |
|
"R": 43, |
|
"S": 44, |
|
"T": 45, |
|
"U": 46, |
|
"V": 47, |
|
"W": 48, |
|
"X": 49, |
|
"Y": 50, |
|
"Z": 51, |
|
"_": 52, |
|
"a": 53, |
|
"b": 54, |
|
"c": 55, |
|
"d": 56, |
|
"e": 57, |
|
"f": 58, |
|
"g": 59, |
|
"h": 60, |
|
"i": 61, |
|
"j": 62, |
|
"k": 63, |
|
"l": 64, |
|
"m": 65, |
|
"n": 66, |
|
"o": 67, |
|
"p": 68, |
|
"q": 69, |
|
"r": 70, |
|
"s": 71, |
|
"t": 72, |
|
"u": 73, |
|
"v": 74, |
|
"w": 75, |
|
"x": 76, |
|
"y": 77, |
|
"z": 78, |
|
"|": 79, |
|
"▁": 80, |
|
"e▁": 81, |
|
"d▁": 82, |
|
"th": 83, |
|
".▁": 84, |
|
"▁a": 85, |
|
"t▁": 86, |
|
"y▁": 87, |
|
"to": 88, |
|
"s▁": 89, |
|
"nd▁": 90, |
|
"er": 91, |
|
"ed▁": 92, |
|
"the▁": 93, |
|
",▁": 94, |
|
"wa": 95, |
|
"in": 96, |
|
"he▁": 97, |
|
"to▁": 98, |
|
"ou": 99, |
|
"▁and▁": 100, |
|
"ar": 101, |
|
"en": 102, |
|
".▁T": 103, |
|
"▁a▁": 104, |
|
"ha": 105, |
|
"om": 106, |
|
"sa": 107, |
|
"▁the▁": 108, |
|
"he": 109, |
|
"im": 110, |
|
"on": 111, |
|
"g▁": 112, |
|
"ll": 113, |
|
"st": 114, |
|
"was▁": 115, |
|
"an": 116, |
|
"or": 117, |
|
"ay": 118, |
|
"it": 119, |
|
"er▁": 120, |
|
"id": 121, |
|
".\n": 122, |
|
"re": 123, |
|
"is▁": 124, |
|
"pl": 125, |
|
"ir": 126, |
|
"am": 127, |
|
"ed▁to▁": 128, |
|
"il": 129, |
|
"wi": 130, |
|
"the": 131, |
|
"ri": 132, |
|
"lo": 133, |
|
"a▁": 134, |
|
"and▁": 135, |
|
".▁H": 136, |
|
"hey▁": 137, |
|
".▁S": 138, |
|
"o▁": 139, |
|
".▁He▁": 140, |
|
"▁h": 141, |
|
"no": 142, |
|
"at▁": 143, |
|
".▁They▁": 144, |
|
"ere▁": 145, |
|
"with": 146, |
|
"On": 147, |
|
"un": 148, |
|
"ing▁": 149, |
|
"ver": 150, |
|
"pp": 151, |
|
".▁The▁": 152, |
|
"\"▁": 153, |
|
"ck": 154, |
|
"ry": 155, |
|
"bi": 156, |
|
"art": 157, |
|
"li": 158, |
|
"ll▁": 159, |
|
",▁\"": 160, |
|
"le": 161, |
|
"sto": 162, |
|
".▁She▁": 163, |
|
"be": 164, |
|
"day": 165, |
|
"start": 166, |
|
"it▁": 167, |
|
"ut▁": 168, |
|
"sh": 169, |
|
"om▁": 170, |
|
"ok": 171, |
|
"w▁": 172, |
|
"you": 173, |
|
"said": 174, |
|
"ma": 175, |
|
"ing": 176, |
|
"le▁": 177, |
|
"se": 178, |
|
"bo": 179, |
|
"ld▁": 180, |
|
"happ": 181, |
|
"im▁": 182, |
|
"end": 183, |
|
"fri": 184, |
|
"do": 185, |
|
"gh": 186, |
|
"of": 187, |
|
"ba": 188, |
|
"up": 189, |
|
"ay▁": 190, |
|
"ch": 191, |
|
"ed": 192, |
|
"very▁": 193, |
|
"an▁": 194, |
|
"ne": 195, |
|
"for": 196, |
|
"was": 197, |
|
"had▁": 198, |
|
"wan": 199, |
|
"l▁": 200, |
|
"ce▁": 201, |
|
"big▁": 202, |
|
"en▁": 203, |
|
"said,▁\"": 204, |
|
"story": 205, |
|
"friend": 206, |
|
"itt": 207, |
|
"<|": 208, |
|
"_story": 209, |
|
"|>": 210, |
|
"▁<|": 211, |
|
"start_story": 212, |
|
"▁<|start_story": 213, |
|
"▁<|start_story|>": 214, |
|
"e,▁": 215, |
|
"Lil": 216, |
|
"they▁": 217, |
|
"ve▁": 218, |
|
"ro": 219, |
|
"play": 220, |
|
"not▁": 221, |
|
".▁I": 222, |
|
"One▁": 223, |
|
"ge": 224, |
|
"ittle▁": 225, |
|
"was▁a▁": 226, |
|
"ke▁": 227, |
|
"'s▁": 228, |
|
"little▁": 229, |
|
"tim": 230, |
|
"his▁": 231, |
|
"at": 232, |
|
"es": 233, |
|
"that▁": 234, |
|
"One▁day": 235, |
|
|