bert-base-cantonese / added_tokens.json
indiejoseph's picture
Training in progress, step 500
f805574
raw
history blame
8 kB
{
"㓟": 21530,
"㚻": 21315,
"㞘": 21533,
"㨃": 21571,
"㨘": 21409,
"㩒": 21330,
"㩧": 21428,
"㷫": 21197,
"䁪": 21492,
"䊦": 21310,
"䌫": 21605,
"䴉": 21417,
"丏": 21622,
"乸": 21146,
"亶": 21582,
"佮": 21305,
"偲": 21252,
"僆": 21340,
"僊": 21584,
"僞": 21555,
"儁": 21363,
"儇": 21346,
"兗": 21382,
"冑": 21594,
"冚": 21140,
"冧": 21138,
"剦": 21386,
"卌": 21385,
"卽": 21175,
"厓": 21484,
"吔": 21286,
"呔": 21179,
"咃": 21580,
"咇": 21439,
"哣": 21444,
"唂": 21235,
"唞": 21181,
"唥": 21151,
"唨": 21302,
"唪": 21159,
"唻": 21216,
"啋": 21525,
"啩": 21198,
"啹": 21321,
"喐": 21144,
"喼": 21283,
"嗌": 21130,
"嗍": 21332,
"嗱": 21189,
"嘥": 21149,
"噃": 21145,
"噅": 21413,
"噉": 21129,
"噍": 21371,
"噏": 21174,
"嚙": 21270,
"嚜": 21536,
"嚡": 21275,
"嚤": 21427,
"嚫": 21581,
"嚿": 21132,
"囘": 21389,
"坭": 21280,
"坼": 21599,
"埐": 21537,
"埞": 21196,
"埲": 21221,
"堊": 21225,
"塱": 21195,
"塹": 21309,
"塽": 21426,
"壙": 21514,
"夀": 21559,
"奀": 21285,
"奭": 21423,
"姖": 21467,
"娸": 21368,
"婄": 21493,
"媺": 21353,
"嫽": 21379,
"嬋": 21422,
"嬲": 21153,
"孭": 21178,
"孲": 21204,
"孻": 21217,
"尐": 21155,
"屘": 21193,
"屙": 21177,
"岃": 21618,
"岋": 21578,
"岜": 21542,
"崢": 21358,
"嶠": 21436,
"巉": 21366,
"巹": 21625,
"幗": 21237,
"幪": 21266,
"廄": 21397,
"廩": 21603,
"廸": 21317,
"徂": 21148,
"怐": 21499,
"惲": 21545,
"愔": 21607,
"愨": 21520,
"慤": 21240,
"懽": 21573,
"戇": 21201,
"戙": 21258,
"戥": 21188,
"扠": 21523,
"扤": 21418,
"扻": 21527,
"扽": 21288,
"抌": 21265,
"拃": 21182,
"拏": 21273,
"挐": 21255,
"捵": 21359,
"捹": 21388,
"捽": 21325,
"掅": 21503,
"掕": 21202,
"掗": 21261,
"掟": 21161,
"掯": 21522,
"掹": 21290,
"揈": 21251,
"揗": 21212,
"揞": 21342,
"揦": 21229,
"揳": 21627,
"揼": 21184,
"揾": 21165,
"搣": 21219,
"搦": 21304,
"搲": 21257,
"搾": 21513,
"摑": 21561,
"摵": 21446,
"摷": 21296,
"摼": 21587,
"撠": 21333,
"撳": 21176,
"撾": 21141,
"擗": 21488,
"擝": 21352,
"擳": 21377,
"擸": 21218,
"攋": 21277,
"攰": 21226,
"攴": 21442,
"攷": 21128,
"旚": 21248,
"旯": 21616,
"旼": 21475,
"昃": 21593,
"昪": 21604,
"昰": 21367,
"昺": 21239,
"曱": 21247,
"曺": 21360,
"朊": 21504,
"枴": 21432,
"柊": 21343,
"栢": 21185,
"桴": 21544,
"梘": 21200,
"棖": 21477,
"棯": 21551,
"椏": 21271,
"椥": 21619,
"椴": 21464,
"榎": 21624,
"樖": 21143,
"樘": 21623,
"樨": 21324,
"橈": 21546,
"橛": 21167,
"櫈": 21186,
"櫟": 21364,
"櫳": 21365,
"殮": 21194,
"殻": 21576,
"殽": 21612,
"毬": 21596,
"氂": 21517,
"氘": 21506,
"氚": 21404,
"氼": 21244,
"沚": 21345,
"泂": 21260,
"淝": 21512,
"淥": 21263,
"淰": 21457,
"淸": 21448,
"湉": 21509,
"湼": 21347,
"溦": 21337,
"滘": 21160,
"漖": 21579,
"潁": 21291,
"潯": 21531,
"澂": 21445,
"澌": 21472,
"澠": 21592,
"濰": 21206,
"瀄": 21460,
"瀡": 21583,
"灕": 21314,
"炆": 21191,
"炩": 21569,
"烚": 21230,
"烴": 21154,
"焓": 21375,
"焫": 21264,
"煇": 21192,
"煠": 21344,
"煬": 21355,
"燶": 21326,
"牀": 21208,
"牘": 21539,
"犂": 21405,
"犛": 21518,
"猢": 21282,
"猻": 21243,
"獴": 21268,
"玗": 21485,
"珓": 21565,
"琤": 21566,
"琿": 21552,
"瑭": 21562,
"璘": 21440,
"璠": 21516,
"璣": 21322,
"瓘": 21407,
"瓚": 21535,
"甂": 21287,
"甑": 21558,
"甴": 21267,
"畧": 21381,
"疋": 21528,
"疎": 21620,
"痾": 21203,
"癆": 21316,
"癐": 21209,
"癩": 21474,
"睄": 21577,
"睚": 21540,
"睺": 21585,
"睼": 21395,
"砵": 21166,
"硃": 21429,
"硏": 21601,
"硤": 21170,
"碲": 21393,
"礐": 21613,
"礬": 21410,
"礮": 21281,
"禕": 21490,
"禤": 21408,
"稈": 21279,
"穏": 21420,
"窰": 21187,
"竈": 21470,
"竉": 21190,
"笊": 21621,
"笪": 21135,
"篋": 21508,
"篸": 21615,
"篾": 21323,
"簋": 21415,
"簒": 21241,
"簕": 21262,
"糭": 21207,
"糴": 21378,
"糶": 21549,
"紥": 21349,
"緡": 21276,
"縉": 21515,
"縞": 21339,
"繑": 21491,
"繙": 21183,
"繯": 21541,
"罅": 21164,
"罉": 21400,
"罘": 21483,
"罟": 21220,
"罨": 21383,
"羋": 21336,
"胐": 21289,
"胵": 21473,
"脧": 21311,
"脷": 21136,
"腍": 21210,
"膥": 21180,
"膶": 21223,
"舘": 21361,
"苴": 21463,
"茛": 21295,
"莨": 21447,
"菢": 21452,
"菫": 21500,
"菴": 21307,
"葶": 21150,
"蒴": 21519,
"蓀": 21611,
"蔴": 21168,
"蕓": 21312,
"薾": 21570,
"藪": 21301,
"藶": 21152,
"藺": 21402,
"蘄": 21478,
"蘅": 21563,
"蚺": 21392,
"蛉": 21543,
"蛺": 21233,
"蜑": 21425,
"蜞": 21507,
"蟧": 21494,
"蠄": 21435,
"蠏": 21465,
"裇": 21224,
"褦": 21211,
"褸": 21171,
"觚": 21387,
"觜": 21294,
"詏": 21228,
"諤": 21376,
"謚": 21172,
"謳": 21370,
"谿": 21399,
"豸": 21521,
"貍": 21412,
"贇": 21556,
"趯": 21297,
"趲": 21595,
"趷": 21232,
"跣": 21306,
"踎": 21259,
"踭": 21173,
"躄": 21137,
"躝": 21313,
"軚": 21250,
"軛": 21357,
"軫": 21231,
"輋": 21169,
"輦": 21602,
"轤": 21547,
"迆": 21338,
"逑": 21610,
"逳": 21449,
"郟": 21617,
"鄕": 21498,
"鄴": 21450,
"醂": 21391,
"釤": 21560,
"釩": 21588,
"釹": 21529,
"鈁": 21511,
"鈧": 21590,
"鈮": 21554,
"鈰": 21534,
"鈷": 21482,
"鈸": 21495,
"鈹": 21586,
"鈿": 21557,
"鉈": 21430,
"鉋": 21591,
"鉍": 21318,
"鉎": 21606,
"鉬": 21348,
"鉭": 21496,
"鉸": 21222,
"銣": 21419,
"銦": 21394,
"銨": 21303,
"銫": 21356,
"銲": 21489,
"銻": 21319,
"銼": 21245,
"鋇": 21398,
"鋨": 21548,
"鋯": 21487,
"鋹": 21437,
"錒": 21236,
"錕": 21572,
"錡": 21242,
"鍔": 21471,
"鍬": 21461,
"鍶": 21278,
"鍼": 21550,
"鎅": 21328,
"鎘": 21469,
"鎢": 21411,
"鎵": 21486,
"鏇": 21510,
"鏌": 21396,
"鏐": 21481,
"鏵": 21597,
"鐖": 21403,
"鐙": 21532,
"鑌": 21589,
"鑪": 21574,
"鑭": 21249,
"閂": 21163,
"閆": 21526,
"閪": 21284,
"閬": 21380,
"閭": 21480,
"闐": 21205,
"闓": 21466,
"闞": 21497,
"隗": 21608,
"鞮": 21600,
"韃": 21213,
"韙": 21454,
"韞": 21341,
"韮": 21501,
"頊": 21384,
"頴": 21234,
"顓": 21524,
"顥": 21308,
"顳": 21327,
"颮": 21575,
"餬": 21254,
"餸": 21139,
"馱": 21401,
"駟": 21456,
"駢": 21479,
"騤": 21468,
"騫": 21156,
"騮": 21158,
"騾": 21458,
"驃": 21335,
"驄": 21300,
"驤": 21538,
"骱": 21567,
"骹": 21256,
"髀": 21147,
"髁": 21614,
"髙": 21421,
"髧": 21374,
"髹": 21334,
"鬅": 21246,
"鬭": 21354,
"魨": 21238,
"魴": 21434,
"鮋": 21416,
"鮓": 21350,
"鮟": 21134,
"鮫": 21476,
"鯁": 21351,
"鯇": 21455,
"鯡": 21406,
"鯥": 21441,
"鯪": 21227,
"鯭": 21269,
"鯷": 21553,
"鰂": 21157,
"鰨": 21451,
"鰹": 21331,
"鱇": 21133,
"鱒": 21424,
"鱟": 21502,
"鱲": 21199,
"鳧": 21568,
"鴒": 21431,
"鴞": 21215,
"鴟": 21453,
"鴣": 21414,
"鴴": 21214,
"鵐": 21253,
"鵞": 21362,
"鵪": 21505,
"鵯": 21564,
"鶇": 21369,
"鶉": 21373,
"鶲": 21459,
"鶺": 21438,
"鶿": 21299,
"鷂": 21390,
"鷄": 21162,
"鷈": 21293,
"鷓": 21433,
"鷸": 21329,
"鷿": 21292,
"鸌": 21609,
"鸏": 21598,
"鸕": 21298,
"鸛": 21274,
"麪": 21131,
"黐": 21142,
"鼆": 21272,
"鼇": 21443,
"鼩": 21626,
"龑": 21372,
"龠": 21462,
"龢": 21320
}