gpt2-lichess-uci-2016-01_11 / tokenizer.json
austindavis's picture
Upload tokenizer
1a0e6e2 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "Whitespace"
},
{
"type": "Split",
"pattern": {
"Regex": "\\d"
},
"behavior": "MergedWithPrevious",
"invert": false
}
]
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<s>": {
"id": "<s>",
"ids": [
1
],
"tokens": [
"<s>"
]
}
}
},
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"<pad>": 0,
"<s>": 1,
"</s>": 2,
"<unk>": 3,
"a1": 4,
"b1": 5,
"c1": 6,
"d1": 7,
"e1": 8,
"f1": 9,
"g1": 10,
"h1": 11,
"a2": 12,
"b2": 13,
"c2": 14,
"d2": 15,
"e2": 16,
"f2": 17,
"g2": 18,
"h2": 19,
"a3": 20,
"b3": 21,
"c3": 22,
"d3": 23,
"e3": 24,
"f3": 25,
"g3": 26,
"h3": 27,
"a4": 28,
"b4": 29,
"c4": 30,
"d4": 31,
"e4": 32,
"f4": 33,
"g4": 34,
"h4": 35,
"a5": 36,
"b5": 37,
"c5": 38,
"d5": 39,
"e5": 40,
"f5": 41,
"g5": 42,
"h5": 43,
"a6": 44,
"b6": 45,
"c6": 46,
"d6": 47,
"e6": 48,
"f6": 49,
"g6": 50,
"h6": 51,
"a7": 52,
"b7": 53,
"c7": 54,
"d7": 55,
"e7": 56,
"f7": 57,
"g7": 58,
"h7": 59,
"a8": 60,
"b8": 61,
"c8": 62,
"d8": 63,
"e8": 64,
"f8": 65,
"g8": 66,
"h8": 67,
"q": 68,
"r": 69,
"b": 70,
"n": 71
},
"unk_token": "<unk>"
}
}