susnato commited on
Commit
6c5cf1e
1 Parent(s): 87b9a25

Update tokenizer with the right file and added vocab

Browse files
Files changed (2) hide show
  1. tokenizer.json +0 -0
  2. vocab.json +1 -0
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"te": 136, "con": 135, "wh": 72, "ong": 222, "pp": 246, "ough": 202, "from": 154, "there": 172, "in": 41, "ul": 167, "ca": 183, "said": 252, "who": 214, "ou": 45, "id": 83, "ter": 114, "were": 184, "m": 26, "up": 152, "ant": 159, "en": 50, "ind": 168, "ere": 138, "into": 228, "el": 249, "ow": 69, "whi": 230, "y": 38, "th": 40, "n": 27, "od": 207, "ity": 240, "ate": 186, "you": 74, "ag": 143, "as": 55, "for": 87, "ha": 102, "so": 123, "f": 19, "if": 140, "ty": 210, "em": 225, "u": 34, "der": 234, "at": 48, "ous": 206, "but": 128, "have": 148, "li": 75, "co": 174, "back": 241, "r": 31, "way": 180, "e": 18, "j": 23, "ur": 121, "mo": 115, "this": 147, "ab": 109, "k": 24, "me": 80, "ill": 216, "o": 28, "was": 81, "some": 171, "pe": 124, "lo": 99, "res": 175, "we": 100, "bl": 251, "p": 29, "what": 193, "[SPACE]": 2, "ad": 68, "t": 33, "ld": 79, "use": 250, "z": 39, "could": 221, "al": 56, "ve": 76, "ay": 88, "ke": 93, "ts": 192, "ac": 77, "ain": 137, "pl": 178, "ch": 71, "qu": 194, "ight": 196, "w": 36, "ation": 158, "ir": 98, "ine": 203, "ri": 101, "il": 151, "h": 21, "my": 125, "just": 226, "ound": 189, "can": 201, "when": 199, "red": 219, "'": 4, "tion": 181, "do": 134, "any": 229, "had": 116, "go": 119, "oun": 146, "-": 8, "their": 224, "b": 15, "es": 61, "ther": 131, "st": 63, "sh": 120, "ap": 155, "ok": 166, "i": 22, "per": 205, "of": 58, "er": 44, "now": 145, "ak": 213, "ting": 242, "with": 103, "they": 141, "been": 237, "ted": 188, "ll": 84, "ut": 96, "am": 122, "ome": 129, "more": 244, "su": 133, "a": 14, "ack": 157, "them": 217, "fo": 182, "know": 209, "by": 185, "po": 139, "ce": 117, ";": 12, "ally": 195, "then": 247, "s": 32, "no": 95, "se": 66, "about": 215, "im": 106, "ti": 78, "our": 160, "ye": 220, "br": 243, "the": 42, "x": 37, "ard": 177, "gr": 198, "ink": 239, "ge": 112, "own": 190, "ide": 208, "gh": 82, "fe": 132, "ven": 200, "!": 3, "did": 235, "(": 5, "very": 211, "wi": 85, "his": 94, ")": 6, "ne": 111, "sa": 150, "tw": 232, "ly": 70, "fr": 130, "man": 176, "get": 254, "that": 73, "om": 65, "si": 212, "ast": 233, "l": 25, "us": 142, "an": 43, "and": 53, "end": 204, "g": 20, "to": 51, "one": 110, "ould": 113, "ver": 90, "ro": 89, "op": 161, "sp": 248, "on": 47, "him": 156, "c": 16, "ar": 59, "out": 105, "/": 10, "or": 57, "le": 64, "ion": 107, "com": 170, "would": 191, "ed": 49, "all": 108, "ent": 86, "thing": 153, "q": 30, "it": 60, "[UNK]": 1, "her": 92, ",": 7, "ex": 169, "ake": 245, "ers": 173, "d": 17, "ing": 52, "not": 149, "[STOP]": 0, ".": 9, "like": 162, "she": 118, "be": 67, "ace": 238, "un": 97, "tr": 144, "are": 127, "v": 35, "ic": 91, "is": 54, "over": 253, "your": 223, "ght": 104, "?": 13, "ie": 236, "he": 62, "est": 218, "ust": 163, "re": 46, "wor": 179, ":": 11, "de": 126, "other": 227, "ess": 164, "pro": 187, "um": 231, "ck": 197, "bo": 165}