"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{930:function(e,t,s){s.d(t,{t2:function(){return e7}});var n=s(8375),i=s(8577),r=s(7488),o=s(5118),a=s(2586),l=s(9215);async function c(e,t){let s=await Promise.all([(0,i.yM)(e,"tokenizer.json",!0,t),(0,i.yM)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(s[1].legacy=t.legacy),s}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(let[e,s]of g)t=t.replaceAll(e,s);return RegExp(t,"gu")}if(void 0===e.String)return console.warn("Unknown pattern type:",e),null;{let s=(0,n.hr)(e.String);return RegExp(t?s:`(${s})`,"gu")}}function u(e){return new Map(Object.entries(e))}function d(e){let t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function _(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function p(e){return e.replace(/[\u0300-\u036f]/g,"")}let f="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",g=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class m{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class k extends n.Ag{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new x(e);case"Unigram":return new w(e,...t);case"BPE":return new v(e);default:if(e.vocab)return new z(e,...t);throw Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=function(e,t,s){let n=[],i=0;for(;ithis.tokens_to_ids.get(e)??this.unk_token_id)}convert_ids_to_tokens(e){return e.map(e=>this.vocab[e]??this.unk_token)}}class x extends k{constructor(e){for(let[t,s]of(super(e),this.tokens_to_ids=u(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[s]=t}encode(e){let t=[];for(let s of e){let e=[...s];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let n=!1,i=0,r=[];for(;i0&&(n=this.config.continuing_subword_prefix+n),this.tokens_to_ids.has(n)){s=n;break}--t}if(null===s){n=!0;break}r.push(s),i=t}n?t.push(this.unk_token):t.push(...r)}return t}}class w extends k{constructor(e,t){super(e);let s=e.vocab.length;this.vocab=Array(s),this.scores=Array(s);for(let t=0;t[e,t])),this.bosToken=" ",this.bosTokenId=this.tokens_to_ids.get(this.bosToken),this.eosToken=t.eos_token,this.eosTokenId=this.tokens_to_ids.get(this.eosToken),this.unkToken=this.vocab[this.unk_token_id],this.minScore=(0,r.VV)(this.scores)[0],this.unkScore=this.minScore-10,this.scores[this.unk_token_id]=this.unkScore,this.trie=new a.GA,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){let t=e.sentence,s=t.length,n=0;for(;n{let e=[...Array.from({length:94},(e,t)=>t+33),...Array.from({length:12},(e,t)=>t+161),...Array.from({length:82},(e,t)=>t+174)],t=e.slice(),s=0;for(let n=0;n<256;++n)e.includes(n)||(e.push(n),t.push(256+s),s+=1);let n=t.map(e=>String.fromCharCode(e));return Object.fromEntries(e.map((e,t)=>[e,n[t]]))})(),b=(0,n.$2)(y);class v extends k{constructor(e){for(let[t,s]of(super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=u(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[s]=t;this.bpe_ranks=new Map(e.merges.map((e,t)=>[e,t])),this.merges=e.merges.map(e=>e.split(this.BPE_SPLIT_TOKEN)),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.cache=new Map}bpe(e){if(0===e.length)return[];let t=this.cache.get(e);if(void 0!==t)return t;let s=Array.from(e);this.end_of_word_suffix&&(s[s.length-1]+=this.end_of_word_suffix);let n=[];if(s.length>1){let e=new a.Z3((e,t)=>e.score`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`)):t.push(this.unk_token);return t}}class z extends k{constructor(e,t){for(let[s,n]of(super(e),this.tokens_to_ids=u(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[n]=s}encode(e){return e}}class S extends n.Ag{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new N(e);case"Precompiled":return new eo(e);case"Sequence":return new R(e);case"Replace":return new A(e);case"NFC":return new E(e);case"NFKC":return new T(e);case"NFKD":return new C(e);case"Strip":return new M(e);case"StripAccents":return new P(e);case"Lowercase":return new $(e);case"Prepend":return new j(e);default:throw Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class A extends S{normalize(e){let t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class E extends S{normalize(e){return e=e.normalize("NFC")}}class T extends S{normalize(e){return e=e.normalize("NFKC")}}class C extends S{normalize(e){return e=e.normalize("NFKD")}}class M extends S{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class P extends S{normalize(e){return e=p(e)}}class $ extends S{normalize(e){return e=e.toLowerCase()}}class j extends S{normalize(e){return e=this.config.prepend+e}}class R extends S{constructor(e){super(e),this.normalizers=e.normalizers.map(e=>S.fromConfig(e))}normalize(e){return this.normalizers.reduce((e,t)=>t.normalize(e),e)}}class N extends S{_tokenize_chinese_chars(e){let t=[];for(let s=0;s=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}stripAccents(e){return e.normalize("NFD").replace(/[\u0300-\u036f]/g,"")}_is_control(e){switch(e){case" ":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){let t=[];for(let s of e){let e=s.charCodeAt(0);0===e||65533===e||this._is_control(s)||(/^\s$/.test(s)?t.push(" "):t.push(s))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class F extends n.Ag{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new L(e);case"Sequence":return new ea(e);case"Whitespace":return new el(e);case"WhitespaceSplit":return new ec(e);case"Metaspace":return new ei(e);case"ByteLevel":return new U(e);case"Split":return new O(e);case"Punctuation":return new W(e);case"Digits":return new G(e);case"Replace":return new eh(e);default:throw Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map(e=>this.pre_tokenize_text(e,t)):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class L extends F{constructor(e){super(),this.pattern=RegExp(`[^\\s${f}]+|[${f}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class U extends F{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=y,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){return this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e),(this.use_regex?e.match(this.pattern)||[]:[e]).map(e=>Array.from(this.text_encoder.encode(e),e=>this.byte_encoder[e]).join(""))}}class O extends F{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){let s=[],n=0;for(let i of e.matchAll(t)){let t=i[0];n0&&s.push(t),n=i.index+t.length}return ne.replaceAll(t,this.config.content))}}class V extends K{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){let t=[],s=[];for(let n of e){let e=null;if(6===n.length&&n.startsWith("<0x")&&n.endsWith(">")){let t=parseInt(n.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)s.push(e);else{if(s.length>0){let e=this.text_decoder.decode(Uint8Array.from(s));t.push(e),s=[]}t.push(n)}}if(s.length>0){let e=this.text_decoder.decode(Uint8Array.from(s));t.push(e),s=[]}return t}}class H extends K{decode_chain(e){return[e.join("")]}}class J extends K{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map(e=>{let t=0;for(let s=0;s(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=_(e)),e))}}class X extends K{constructor(e){super(e),this.byte_decoder=b,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){let t=e.join(""),s=new Uint8Array([...t].map(e=>this.byte_decoder[e]));return this.text_decoder.decode(s)}decode_chain(e){let t=[],s=[];for(let n of e)void 0!==this.added_tokens.find(e=>e.content===n)?(s.length>0&&(t.push(this.convert_tokens_to_string(s)),s=[]),t.push(n)):s.push(n);return s.length>0&&t.push(this.convert_tokens_to_string(s)),t}}class ee extends K{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";let t=[e[0]];for(let s=1;se!==this.pad_token).join("");return this.cleanup&&(s=_(s).replaceAll(this.word_delimiter_token," ").trim()),s}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class et extends K{constructor(e){super(e),this.decoders=e.decoders.map(e=>K.fromConfig(e))}decode_chain(e){return this.decoders.reduce((e,t)=>t.decode_chain(e),e)}}class es extends K{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map((t,s)=>t.replaceAll(this.suffix,s===e.length-1?"":" "))}}class en extends K{decode_chain(e){let t="";for(let s=1;se.normalize("NFKC")).join("~"):e.normalize("NFKC")}}class ea extends F{constructor(e){super(),this.tokenizers=e.pretokenizers.map(e=>F.fromConfig(e))}pre_tokenize_text(e,t){return this.tokenizers.reduce((e,s)=>s.pre_tokenize(e,t),[e])}}class el extends F{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class ec extends F{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\S+/g)||[]}}class eh extends F{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}let eu=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];class ed extends n.Ag{return_token_type_ids=!1;_default_chat_template=`{% for message in messages %}{{'<|im_start|>' + message['role'] + ' ' + message['content'] + '<|im_end|>' + ' '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant ' }}{% endif %}`;constructor(e,t){for(let s of(super(),this._tokenizer_config=t,this.normalizer=S.fromConfig(e.normalizer),this.pre_tokenizer=F.fromConfig(e.pre_tokenizer),this.model=k.fromConfig(e.model,t),this.post_processor=I.fromConfig(e.post_processor),this.decoder=K.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[],e.added_tokens)){let e=new m(s);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.map(e=>`${e.lstrip?"\\s*":""}(${(0,n.hr)(e.content)})${e.rstrip?"\\s*":""}`).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,this.padding_side="right",this.legacy=!1,this.chat_template=t.chat_template??null,this._compiled_template_cache=new Map}getToken(...e){for(let t of e){let e=this._tokenizer_config[t];if(e){if("object"!=typeof e)return e;if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}}return null}static async from_pretrained(e,{progress_callback:t=null,config:s=null,cache_dir:n=null,local_files_only:i=!1,revision:r="main",legacy:o=null}={}){return new this(...await c(e,{progress_callback:t,config:s,cache_dir:n,local_files_only:i,revision:r,legacy:o}))}_call(e,{text_pair:t=null,add_special_tokens:s=!0,padding:i=!1,truncation:a=null,max_length:l=null,return_tensor:c=!0}={}){let h;let u=Array.isArray(e);if(u){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(Array.isArray(t)){if(e.length!==t.length)throw Error("text and text_pair must have the same length")}else throw Error("text_pair must also be an array");h=e.map((e,n)=>this._encode_plus(e,t[n],{add_special_tokens:s}))}else h=e.map(e=>this._encode_plus(e,null,{add_special_tokens:s}))}else{if(null===e)throw Error("text may not be null");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");h=[this._encode_plus(e,t,{add_special_tokens:s})]}if(null===l?l="max_length"===i?this.model_max_length:(0,r.Fp)(h.map(e=>e.input_ids.length))[0]:a||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),l=Math.min(l,this.model_max_length),i||a)for(let e=0;el?a&&function(e,t){for(let s of Object.keys(e))e[s].length=t}(h[e],l):i&&function(e,t,s,i){for(let r of Object.keys(e)){let o=t-e[r].length,a=s(r),l=Array(o).fill(a);e[r]="right"===i?(0,n.eG)(e[r],l):(0,n.eG)(l,e[r])}}(h[e],l,e=>"input_ids"===e?this.pad_token_id:0,this.padding_side));let d={};if(c){if(!(i&&a)&&h.some(e=>{for(let t of Object.keys(e))if(e[t].length!==h[0][t]?.length)return!0;return!1}))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");let e=[h.length,h[0].input_ids.length];for(let t of Object.keys(h[0]))d[t]=new o.es("int64",BigInt64Array.from(h.flatMap(e=>e[t]).map(BigInt)),e)}else{for(let e of Object.keys(h[0]))d[e]=h.map(t=>t[e]);if(!u)for(let e of Object.keys(d))d[e]=d[e][0]}return d}_encode_text(e){return null===e?null:(this.added_tokens_regex?e.split(this.added_tokens_regex).filter(e=>e):[e]).map((e,t)=>{if(void 0!==this.added_tokens.find(t=>t.content===e))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=p(e.toLowerCase())),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];let s=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(s)}}).flat()}_encode_plus(e,t=null,{add_special_tokens:s=!0}={}){let i=this._encode_text(e),r=this._encode_text(t),o=this.post_processor?this.post_processor(i,r,{add_special_tokens:s}):{tokens:(0,n.eG)(i??[],r??[])},a=this.model.convert_tokens_to_ids(o.tokens),l={input_ids:a,attention_mask:Array(a.length).fill(1)};return this.return_token_type_ids&&o.token_type_ids&&(l.token_type_ids=o.token_type_ids),l}encode(e,t=null,{add_special_tokens:s=!0}={}){let{input_ids:n}=this._encode_plus(e,t,{add_special_tokens:s});return n}batch_decode(e,t={}){return e instanceof o.es&&(e=e.tolist()),e.map(e=>this.decode(e,t))}decode(e,t={}){if(e instanceof o.es&&(e=d(e)),!Array.isArray(e)||0===e.length||!(0,n.Wy)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:s=null}){let n=this.model.convert_ids_to_tokens(e);t&&(n=n.filter(e=>!this.special_tokens.includes(e)));let i=this.decoder?this.decoder(n):n.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(i=i.replaceAll(this.decoder.end_of_word_suffix," "),t&&(i=i.trim())),(s??this.clean_up_tokenization_spaces)&&(i=_(i)),i}get default_chat_template(){return this._warned_about_chat_template||(console.warn("No chat template is defined for this tokenizer - using a default chat template that implements the ChatML format. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information."),this._warned_about_chat_template=!0),this._default_chat_template}apply_chat_template(e,{chat_template:t=null,add_generation_prompt:s=!1,tokenize:n=!0,padding:i=!1,truncation:r=!1,max_length:o=null,return_tensor:a=!0}={}){t??=this.chat_template??this.default_chat_template;let c=this._compiled_template_cache.get(t);void 0===c&&(c=new l.YS(t),this._compiled_template_cache.set(t,c));let h=Object.create(null);for(let e of eu){let t=this.getToken(e);t&&(h[e]=t)}let u=c.render({messages:e,add_generation_prompt:s,...h});return n?this._call(u,{add_special_tokens:!1,padding:i,truncation:r,max_length:o,return_tensor:a}).input_ids:u}}class e_ extends ed{return_token_type_ids=!0}class ep extends ed{return_token_type_ids=!0}class ef extends ed{return_token_type_ids=!0}class eg extends ed{return_token_type_ids=!0}class em extends ed{return_token_type_ids=!0}class ek extends ed{return_token_type_ids=!0}class ex extends ed{return_token_type_ids=!0}class ew extends ed{return_token_type_ids=!0}class ey extends ed{return_token_type_ids=!0}class eb extends ed{}class ev extends ed{}class ez extends ed{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class eS extends ed{return_token_type_ids=!0}class eA extends ed{}class eE extends ed{_default_chat_template='{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}'}class eT extends ed{}class eC extends ed{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)),this.lang_to_token=e=>e}_build_translation_inputs(e,t,s){return eI(this,e,t,s)}}class eM extends eC{}class eP extends ed{}class e$ extends eE{constructor(e,t){let s=".,!?…。,、।۔،",n=e.pre_tokenizer?.pretokenizers[0]?.pattern;n&&n.Regex===` ?[^(\\s|[${s}])]+`&&(n.Regex=` ?[^\\s${s}]+`),super(e,t)}}class ej extends ed{_default_chat_template=`{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<> ' + system_message + ' <> ' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<> ' + content.strip() + ' <> ' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}`;DEFAULT_SYSTEM_PROMPT="You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.";constructor(e,t){super(e,t),this.use_default_system_prompt=t.use_default_system_prompt??!1,this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new ei({replacement:"▁",add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text("▁"+e.replaceAll("▁"," "));return t.length>1&&"▁"===t[0]&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}get default_chat_template(){return super.default_chat_template.replaceAll("USE_DEFAULT_PROMPT",this.use_default_system_prompt?"true":"false").replaceAll("DEFAULT_SYSTEM_MESSAGE",this.DEFAULT_SYSTEM_PROMPT.replaceAll("\n","\\n").replaceAll("'","\\'"))}}class eR extends ej{}class eN extends ed{}class eF extends ed{}class eL extends ed{}class eU extends ed{}class eO extends ed{}class eW extends ed{}class eG extends ed{_default_chat_template="{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}"}function eI(e,t,s,n){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e)||!(e.languageRegex instanceof RegExp))throw Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");let i=n.src_lang,r=n.tgt_lang;if(!e.language_codes.includes(r))throw Error(`Target language code "${r}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==i){if(!e.language_codes.includes(i))throw Error(`Source language code "${i}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(let t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(i);break}}return n.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(r)])[0],e._call(t,s)}class eB extends ed{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)),this.lang_to_token=e=>e}_build_translation_inputs(e,t,s){return eI(this,e,t,s)}}class eD extends ed{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)).map(e=>e.slice(2,-2)),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,s){return eI(this,e,t,s)}}let eq=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],eY=new Map(eq),eK=new Map([...eq.map(([e,t])=>[t,e]),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);class eZ extends ed{_default_chat_template='{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}';_decode_asr(e,{return_timestamps:t=!1,return_language:s=!1,time_precision:n=null,force_full_sequences:i=!0}={}){if(null===n)throw Error("Must specify time_precision");let o=null,a="word"===t;function l(){return{language:o,timestamp:[null,null],text:""}}let c=[],h=l(),u=0,d=this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1,_=[],p=[],f=!1,g=null,m=new Set(this.all_special_ids);for(let s of e){let e=s.tokens,i=a?s.token_timestamps:null,k=null,x=d;if("stride"in s){let[t,i,r]=s.stride;if(u-=i,g=t-r,i&&(x=i/n+d),r)for(let t=e.length-1;t>=0;--t){let s=e[t];if(s>=d){if(null!==k&&(s-d)*n=d){let e=(g-d)*n+u,t=(0,r.NM)(e,2);if(null!==k&&g>=k)f=!0;else if(f||_.length>0&&g0?(_.push(w),a&&p.push(y)):_.every(e=>0===e.length)&&(h=l(),_=[],w=[],p=[],y=[])}if(_.length>0){if(i&&t)throw Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");let[e,s]=this.findLongestCommonSequence(_,p),n=this.decode(e);h.text=n,a&&(h.words=this.collateWordTimestamps(e,s,o)),c.push(h)}let k=Object.create(null),x=c.map(e=>e.text).join("");if(t||s){for(let e=0;e0,o=r?[]:null,a=r?t[0]:null;for(let l=1;le===_[t]).length,f=p/e+t;p>1&&f>h&&(h=f,u=[i,r,a,l])}let[_,p,f,g]=u,m=Math.floor((p+_)/2),k=Math.floor((g+f)/2);i.push(...s.slice(0,m)),n=(s=c.slice(k)).length,r&&(o.push(...a.slice(0,m)),a=t[l].slice(k))}return(i.push(...s),r)?(o.push(...a),[i,o]):[i,[]]}collateWordTimestamps(e,t,s){let[n,i,r]=this.combineTokensIntoWords(e,s),o=[];for(let e=0;e=n){let e=(0,r.NM)((t-n)*s,2);i.push(`<|${e}|>`),i.push([])}else i[i.length-1].push(t);return(i=i.map(e=>"string"==typeof e?e:super.decode(e,t))).join("")}splitTokensOnUnicode(e){let t=this.decode(e,{decode_with_timestamps:!0}),s=[],n=[],i=[],r=[],o=[],a=0;for(let l=0;l=this.model.tokens_to_ids.get("<|endoftext|>"),d=l.startsWith(" "),_=l.trim(),p=a.test(_);if(u||d||p||0===i.length)i.push(l),r.push(c),o.push(h);else{let e=i.length-1;i[e]+=l,r[e].push(...c),o[e].push(...h)}}return[i,r,o]}mergePunctuations(e,t,s,i,r){let o=structuredClone(e),a=structuredClone(t),l=structuredClone(s),c=o.length-2,h=o.length-1;for(;c>=0;)o[c].startsWith(" ")&&i.includes(o[c].trim())?(o[h]=o[c]+o[h],a[h]=(0,n.eG)(a[c],a[h]),l[h]=(0,n.eG)(l[c],l[h]),o[c]="",a[c]=[],l[c]=[]):h=c,--c;for(c=0,h=1;he),a.filter(e=>e.length>0),l.filter(e=>e.length>0)]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:s=!0}={}){let n=[];if(e){e=e.toLowerCase();let t=eK.get(e);if(void 0===t){if(eY.has(e))t=e;else{let t=2===e.length?eY.keys():eY.values();throw Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}}let s=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===s)throw Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.`);n.push(s)}else n.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);let e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.`);n.push(e)}else n.push(null);if(s){let e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw Error('Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.');n.push(e)}return n.map((e,t)=>[t+1,e]).filter(e=>null!==e[1])}}class eV extends ed{}class eH extends ed{}class eJ extends ed{}class eQ extends ed{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter(e=>this.languageRegex.test(e)),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;let[t,...s]=e.trim().split(this.languageRegex);if(0===s.length)return super._encode_text(t);if(2===s.length){let[e,t]=s;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,n.eG)([e],super._encode_text(t))}}}class eX extends ed{}class e0 extends ed{_default_chat_template="{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}"}class e1 extends e0{}class e2 extends ed{}class e3 extends ed{}class e8 extends ed{constructor(e,t){super(e,t),this.decoder=new en({})}}class e7{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:eA,DistilBertTokenizer:eb,CamembertTokenizer:ev,DebertaTokenizer:em,DebertaV2Tokenizer:ek,BertTokenizer:e_,HerbertTokenizer:ex,ConvBertTokenizer:ew,RoFormerTokenizer:ey,XLMTokenizer:ez,ElectraTokenizer:eS,MobileBertTokenizer:ef,SqueezeBertTokenizer:eg,AlbertTokenizer:ep,GPT2Tokenizer:eE,BartTokenizer:eT,MBartTokenizer:eC,MBart50Tokenizer:eM,RobertaTokenizer:eP,WhisperTokenizer:eZ,CodeGenTokenizer:eV,CLIPTokenizer:eH,SiglipTokenizer:eJ,MarianTokenizer:eQ,BloomTokenizer:e$,NllbTokenizer:eB,M2M100Tokenizer:eD,LlamaTokenizer:ej,CodeLlamaTokenizer:eR,XLMRobertaTokenizer:eN,MPNetTokenizer:eF,FalconTokenizer:eL,GPTNeoXTokenizer:eU,EsmTokenizer:eO,Wav2Vec2CTCTokenizer:eX,BlenderbotTokenizer:e0,BlenderbotSmallTokenizer:e1,SpeechT5Tokenizer:e2,NougatTokenizer:e3,VitsTokenizer:e8,Qwen2Tokenizer:eW,GemmaTokenizer:eG,PreTrainedTokenizer:ed};static async from_pretrained(e,{quantized:t=!0,progress_callback:s=null,config:n=null,cache_dir:i=null,local_files_only:r=!1,revision:o="main",legacy:a=null}={}){let[l,h]=await c(e,{quantized:t,progress_callback:s,config:n,cache_dir:i,local_files_only:r,revision:o,legacy:a}),u=h.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer",d=this.TOKENIZER_CLASS_MAPPING[u];return d||(console.warn(`Unknown tokenizer class "${u}", attempting to construct from base class.`),d=ed),new d(l,h)}}}}]);