electra-minuscule-generator / tokenizer.json
lgrobol's picture
add model
202bce1
{"version":"1.0","truncation":{"max_length":510,"strategy":"LongestFirst","stride":0},"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"<pad>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"</s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"<unk>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":4,"special":true,"content":"<mask>","single_word":false,"lstrip":true,"rstrip":false,"normalized":true}],"normalizer":null,"pre_tokenizer":{"type":"ByteLevel","add_prefix_space":false,"trim_offsets":true},"post_processor":{"type":"RobertaProcessing","sep":["</s>",2],"cls":["<s>",0],"trim_offsets":true,"add_prefix_space":true},"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"<unk>","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"!":5,"(":6,")":7,"*":8,",":9,"-":10,".":11,"/":12,"0":13,"1":14,"2":15,"3":16,"4":17,"5":18,"6":19,"7":20,"8":21,":":22,";":23,"A":24,"B":25,"C":26,"D":27,"E":28,"F":29,"G":30,"H":31,"I":32,"J":33,"L":34,"M":35,"N":36,"O":37,"P":38,"Q":39,"R":40,"S":41,"T":42,"U":43,"V":44,"W":45,"Y":46,"[":47,"]":48,"_":49,"a":50,"b":51,"c":52,"d":53,"e":54,"f":55,"g":56,"h":57,"i":58,"j":59,"k":60,"l":61,"m":62,"n":63,"o":64,"p":65,"q":66,"r":67,"s":68,"t":69,"u":70,"v":71,"w":72,"x":73,"y":74,"z":75,"¦":76,"§":77,"¨":78,"©":79,"ª":80,"«":81,"®":82,"¯":83,"´":84,"¶":85,"»":86,"Â":87,"Ã":88,"â":89,"Ċ":90,"Ġ":91,"Ģ":92,"Ĩ":93,"ī":94,"ij":95,"Ķ":96,"Ļ":97,"ł":98,"de":99,"Ġt":100,"Ġl":101,"ou":102,"Ġe":103,"Ġp":104,"Ġm":105,"Ġc":106,"Ġde":107,"âĢ":108,"âĢĻ":109,"nt":110,"Ġs":111,"re":112,"qu":113,"é":114,"es":115,"on":116,"er":117,"ai":118,"Ġet":119,"Ġmo":120,"Ġqu":121,"eu":122,"Ġa":123,"Ġd":124,"Ġpa":125,"me":126,"Ġmode":127,"Ġn":128,"Ġle":129,"Ġtr":130,"le":131,"Ġla":132,"our":133,"in":134,"om":135,"Ġque":136,"ie":137,"ant":138,"Ġtu":139,"Ġou":140,"an":141,"us":142,"Ġen":143,"eur":144,"is":145,"or":146,"Ġv":147,"Ġpl":148,"Ãł":149,"un":150,"os":151,"il":152,"ais":153,"Ġé":154,"au":155,"it":156,"Ġf":157,"Ġse":158,"Ġcom":159,"Ġch":160,"ch":161,"ĠÃł":162,"Ġtou":163,"Ġles":164,"Ġpar":165,"um":166,"Ġj":167,"te":168,"ur":169,"onn":170,"Ġmin":171,"Ġdes":172,"st":173,"Ġ;":174,"ien":175,"ti":176,"Ġpas":177,"en":178,"Ġton":179,"Ġét":180,"Ġcomme":181,"Ġmineur":182,"ium":183,"Ġpour":184,"Ġplus":185,"tre":186,"Ġb":187,"ent":188,"Ġre":189,"ce":190,"Ġg":191,"as":192,"eux":193,"ĠL":194,"ans":195,"è":196,"ain":197,"air":198,"Ġne":199,"ire":200,"pp":201,"que":202,"Ġac":203,"ours":204,"Ġy":205,"Ġma":206,"Ġtrou":207,"Ġtris":208,"Ġchant":209,"ĠM":210,"Ġce":211,"ons":212,"ar":213,"ver":214,"Ġest":215,"lle":216,"Ġo":217,"hor":218,"Ġqui":219,"Ġsou":220,"ment":221,"uran":222,"dre":223,"Ġni":224,"ag":225,"tes":226,"Ġcor":227,"és":228,"ins":229,"Ġver":230,"ois":231,"ux":232,"vant":233,"ys":234,"ĠV":235,"Ġsi":236,"erch":237,"Ġmoins":238,"iel":239,"Ġtout":240,"iè":241,"jours":242,"ne":243,"Ġent":244,"Ġcl":245,"Ġcar":246,"Ġdeux":247,"Ġson":248,"Ġdans":249,"Ġcherch":250,"Ġtoujours":251,"Ġbien":252,"ĠLa":253,"Ġacti":254,"elle":255},"merges":["d e","Ġ t","Ġ l","o u","Ġ e","Ġ p","Ġ m","Ġ c","Ġ de","â Ģ","âĢ Ļ","n t","Ġ s","r e","q u","à ©","e s","o n","e r","a i","Ġe t","Ġm o","Ġ qu","e u","Ġ a","Ġ d","Ġp a","m e","Ġmo de","Ġ n","Ġl e","Ġt r","l e","Ġl a","ou r","i n","o m","Ġqu e","i e","a nt","Ġt u","Ġ ou","a n","u s","Ġe n","eu r","i s","o r","Ġ v","Ġp l","à ł","u n","o s","i l","ai s","Ġ é","a u","i t","Ġ f","Ġs e","Ġc om","Ġc h","c h","Ġ Ãł","Ġt ou","Ġl es","Ġpa r","u m","Ġ j","t e","u r","on n","Ġm in","Ġde s","s t","Ġ ;","ie n","t i","Ġpa s","e n","Ġt on","Ġé t","Ġcom me","Ġmin eur","i um","Ġp our","Ġpl us","t re","Ġ b","e nt","Ġ re","c e","Ġ g","a s","eu x","Ġ L","an s","à ¨","ai n","ai r","Ġn e","i re","p p","qu e","Ġa c","our s","Ġ y","Ġm a","Ġtr ou","Ġtr is","Ġch ant","Ġ M","Ġc e","on s","a r","v er","Ġe st","l le","Ġ o","h or","Ġqu i","Ġs ou","me nt","ur an","d re","Ġn i","a g","t es","Ġc or","é s","in s","Ġv er","o is","u x","v ant","y s","Ġ V","Ġs i","er ch","Ġmo ins","ie l","Ġtou t","i è","j ours","n e","Ġe nt","Ġc l","Ġc ar","Ġde ux","Ġs on","Ġd ans","Ġch erch","Ġtou jours","Ġb ien","ĠL a","Ġac ti","e lle"]}}