AIFunOver commited on
Commit
7bfdfac
1 Parent(s): 43de9a6

Upload openvino_detokenizer.xml with huggingface_hub

Browse files
Files changed (1) hide show
  1. openvino_detokenizer.xml +235 -0
openvino_detokenizer.xml ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="detokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_489962" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="Parameter_489962">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="Convert_489978" type="Convert" version="opset1">
14
+ <data destination_type="i32" />
15
+ <input>
16
+ <port id="0" precision="I64">
17
+ <dim>-1</dim>
18
+ <dim>-1</dim>
19
+ </port>
20
+ </input>
21
+ <output>
22
+ <port id="1" precision="I32">
23
+ <dim>-1</dim>
24
+ <dim>-1</dim>
25
+ </port>
26
+ </output>
27
+ </layer>
28
+ <layer id="2" name="Constant_489929" type="Const" version="opset1">
29
+ <data element_type="u8" shape="1351800" offset="0" size="1351800" />
30
+ <output>
31
+ <port id="0" precision="U8">
32
+ <dim>1351800</dim>
33
+ </port>
34
+ </output>
35
+ </layer>
36
+ <layer id="3" name="StringTensorUnpack_489930" type="StringTensorUnpack" version="extension">
37
+ <data mode="begins_ends" />
38
+ <input>
39
+ <port id="0" precision="U8">
40
+ <dim>1351800</dim>
41
+ </port>
42
+ </input>
43
+ <output>
44
+ <port id="1" precision="I32">
45
+ <dim>-1</dim>
46
+ </port>
47
+ <port id="2" precision="I32">
48
+ <dim>-1</dim>
49
+ </port>
50
+ <port id="3" precision="U8">
51
+ <dim>-1</dim>
52
+ </port>
53
+ </output>
54
+ </layer>
55
+ <layer id="4" name="VocabDecoder_489963" type="VocabDecoder" version="extension">
56
+ <data skip_tokens="128000, 128001, 128002, 128003, 128004, 128005, 128006, 128007, 128008, 128009, 128010, 128011, 128012, 128013, 128014, 128015, 128016, 128017, 128018, 128019, 128020, 128021, 128022, 128023, 128024, 128025, 128026, 128027, 128028, 128029, 128030, 128031, 128032, 128033, 128034, 128035, 128036, 128037, 128038, 128039, 128040, 128041, 128042, 128043, 128044, 128045, 128046, 128047, 128048, 128049, 128050, 128051, 128052, 128053, 128054, 128055, 128056, 128057, 128058, 128059, 128060, 128061, 128062, 128063, 128064, 128065, 128066, 128067, 128068, 128069, 128070, 128071, 128072, 128073, 128074, 128075, 128076, 128077, 128078, 128079, 128080, 128081, 128082, 128083, 128084, 128085, 128086, 128087, 128088, 128089, 128090, 128091, 128092, 128093, 128094, 128095, 128096, 128097, 128098, 128099, 128100, 128101, 128102, 128103, 128104, 128105, 128106, 128107, 128108, 128109, 128110, 128111, 128112, 128113, 128114, 128115, 128116, 128117, 128118, 128119, 128120, 128121, 128122, 128123, 128124, 128125, 128126, 128127, 128128, 128129, 128130, 128131, 128132, 128133, 128134, 128135, 128136, 128137, 128138, 128139, 128140, 128141, 128142, 128143, 128144, 128145, 128146, 128147, 128148, 128149, 128150, 128151, 128152, 128153, 128154, 128155, 128156, 128157, 128158, 128159, 128160, 128161, 128162, 128163, 128164, 128165, 128166, 128167, 128168, 128169, 128170, 128171, 128172, 128173, 128174, 128175, 128176, 128177, 128178, 128179, 128180, 128181, 128182, 128183, 128184, 128185, 128186, 128187, 128188, 128189, 128190, 128191, 128192, 128193, 128194, 128195, 128196, 128197, 128198, 128199, 128200, 128201, 128202, 128203, 128204, 128205, 128206, 128207, 128208, 128209, 128210, 128211, 128212, 128213, 128214, 128215, 128216, 128217, 128218, 128219, 128220, 128221, 128222, 128223, 128224, 128225, 128226, 128227, 128228, 128229, 128230, 128231, 128232, 128233, 128234, 128235, 128236, 128237, 128238, 128239, 128240, 128241, 128242, 128243, 128244, 128245, 128246, 128247, 128248, 128249, 128250, 128251, 128252, 128253, 128254, 128255" />
57
+ <input>
58
+ <port id="0" precision="I32">
59
+ <dim>-1</dim>
60
+ <dim>-1</dim>
61
+ </port>
62
+ <port id="1" precision="I32">
63
+ <dim>-1</dim>
64
+ </port>
65
+ <port id="2" precision="I32">
66
+ <dim>-1</dim>
67
+ </port>
68
+ <port id="3" precision="U8">
69
+ <dim>-1</dim>
70
+ </port>
71
+ </input>
72
+ <output>
73
+ <port id="4" precision="I32">
74
+ <dim>-1</dim>
75
+ </port>
76
+ <port id="5" precision="I32">
77
+ <dim>-1</dim>
78
+ </port>
79
+ <port id="6" precision="I32">
80
+ <dim>-1</dim>
81
+ </port>
82
+ <port id="7" precision="I32">
83
+ <dim>-1</dim>
84
+ </port>
85
+ <port id="8" precision="U8">
86
+ <dim>-1</dim>
87
+ </port>
88
+ </output>
89
+ </layer>
90
+ <layer id="5" name="FuzeRagged_489964" type="FuzeRagged" version="extension">
91
+ <input>
92
+ <port id="0" precision="I32">
93
+ <dim>-1</dim>
94
+ </port>
95
+ <port id="1" precision="I32">
96
+ <dim>-1</dim>
97
+ </port>
98
+ <port id="2" precision="I32">
99
+ <dim>-1</dim>
100
+ </port>
101
+ <port id="3" precision="I32">
102
+ <dim>-1</dim>
103
+ </port>
104
+ </input>
105
+ <output>
106
+ <port id="4" precision="I32">
107
+ <dim>-1</dim>
108
+ </port>
109
+ <port id="5" precision="I32">
110
+ <dim>-1</dim>
111
+ </port>
112
+ </output>
113
+ </layer>
114
+ <layer id="6" name="Constant_489966" type="Const" version="opset1">
115
+ <data element_type="u8" shape="51" offset="1351800" size="51" />
116
+ <output>
117
+ <port id="0" precision="U8">
118
+ <dim>51</dim>
119
+ </port>
120
+ </output>
121
+ </layer>
122
+ <layer id="7" name="Constant_489968" type="Const" version="opset1">
123
+ <data element_type="u8" shape="2" offset="1351851" size="2" />
124
+ <output>
125
+ <port id="0" precision="U8">
126
+ <dim>2</dim>
127
+ </port>
128
+ </output>
129
+ </layer>
130
+ <layer id="8" name="RegexNormalization_489969" type="RegexNormalization" version="extension">
131
+ <data global_replace="true" />
132
+ <input>
133
+ <port id="0" precision="I32">
134
+ <dim>-1</dim>
135
+ </port>
136
+ <port id="1" precision="I32">
137
+ <dim>-1</dim>
138
+ </port>
139
+ <port id="2" precision="U8">
140
+ <dim>-1</dim>
141
+ </port>
142
+ <port id="3" precision="U8">
143
+ <dim>51</dim>
144
+ </port>
145
+ <port id="4" precision="U8">
146
+ <dim>2</dim>
147
+ </port>
148
+ </input>
149
+ <output>
150
+ <port id="5" precision="I32">
151
+ <dim>-1</dim>
152
+ </port>
153
+ <port id="6" precision="I32">
154
+ <dim>-1</dim>
155
+ </port>
156
+ <port id="7" precision="U8">
157
+ <dim>-1</dim>
158
+ </port>
159
+ </output>
160
+ </layer>
161
+ <layer id="9" name="StringTensorPack_489970" type="StringTensorPack" version="extension">
162
+ <data mode="begins_ends" />
163
+ <input>
164
+ <port id="0" precision="I32">
165
+ <dim>-1</dim>
166
+ </port>
167
+ <port id="1" precision="I32">
168
+ <dim>-1</dim>
169
+ </port>
170
+ <port id="2" precision="U8">
171
+ <dim>-1</dim>
172
+ </port>
173
+ </input>
174
+ <output>
175
+ <port id="3" precision="STRING" names="string_output">
176
+ <dim>-1</dim>
177
+ </port>
178
+ </output>
179
+ </layer>
180
+ <layer id="10" name="Result_489971" type="Result" version="opset1">
181
+ <input>
182
+ <port id="0" precision="STRING">
183
+ <dim>-1</dim>
184
+ </port>
185
+ </input>
186
+ </layer>
187
+ </layers>
188
+ <edges>
189
+ <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
190
+ <edge from-layer="1" from-port="1" to-layer="4" to-port="0" />
191
+ <edge from-layer="2" from-port="0" to-layer="3" to-port="0" />
192
+ <edge from-layer="3" from-port="1" to-layer="4" to-port="1" />
193
+ <edge from-layer="3" from-port="2" to-layer="4" to-port="2" />
194
+ <edge from-layer="3" from-port="3" to-layer="4" to-port="3" />
195
+ <edge from-layer="4" from-port="7" to-layer="5" to-port="3" />
196
+ <edge from-layer="4" from-port="8" to-layer="8" to-port="2" />
197
+ <edge from-layer="4" from-port="6" to-layer="5" to-port="2" />
198
+ <edge from-layer="4" from-port="5" to-layer="5" to-port="1" />
199
+ <edge from-layer="4" from-port="4" to-layer="5" to-port="0" />
200
+ <edge from-layer="5" from-port="4" to-layer="8" to-port="0" />
201
+ <edge from-layer="5" from-port="5" to-layer="8" to-port="1" />
202
+ <edge from-layer="6" from-port="0" to-layer="8" to-port="3" />
203
+ <edge from-layer="7" from-port="0" to-layer="8" to-port="4" />
204
+ <edge from-layer="8" from-port="5" to-layer="9" to-port="0" />
205
+ <edge from-layer="8" from-port="6" to-layer="9" to-port="1" />
206
+ <edge from-layer="8" from-port="7" to-layer="9" to-port="2" />
207
+ <edge from-layer="9" from-port="3" to-layer="10" to-port="0" />
208
+ </edges>
209
+ <rt_info>
210
+ <add_attention_mask value="True" />
211
+ <add_prefix_space />
212
+ <add_special_tokens value="True" />
213
+ <bos_token_id value="128000" />
214
+ <chat_template value="{{- bos_token }}&#10;{%- if custom_tools is defined %}&#10; {%- set tools = custom_tools %}&#10;{%- endif %}&#10;{%- if not tools_in_user_message is defined %}&#10; {%- set tools_in_user_message = true %}&#10;{%- endif %}&#10;{%- if not date_string is defined %}&#10; {%- set date_string = &quot;26 Jul 2024&quot; %}&#10;{%- endif %}&#10;{%- if not tools is defined %}&#10; {%- set tools = none %}&#10;{%- endif %}&#10;&#10;{#- This block extracts the system message, so we can slot it into the right place. #}&#10;{%- if messages[0]['role'] == 'system' %}&#10; {%- set system_message = messages[0]['content']|trim %}&#10; {%- set messages = messages[1:] %}&#10;{%- else %}&#10; {%- set system_message = &quot;&quot; %}&#10;{%- endif %}&#10;&#10;{#- System message + builtin tools #}&#10;{{- &quot;&lt;|start_header_id|>system&lt;|end_header_id|>\n\n&quot; }}&#10;{%- if builtin_tools is defined or tools is not none %}&#10; {{- &quot;Environment: ipython\n&quot; }}&#10;{%- endif %}&#10;{%- if builtin_tools is defined %}&#10; {{- &quot;Tools: &quot; + builtin_tools | reject('equalto', 'code_interpreter') | join(&quot;, &quot;) + &quot;\n\n&quot;}}&#10;{%- endif %}&#10;{{- &quot;Cutting Knowledge Date: December 2023\n&quot; }}&#10;{{- &quot;Today Date: &quot; + date_string + &quot;\n\n&quot; }}&#10;{%- if tools is not none and not tools_in_user_message %}&#10; {{- &quot;You have access to the following functions. To call a function, please respond with JSON for a function call.&quot; }}&#10; {{- 'Respond in the format {&quot;name&quot;: function name, &quot;parameters&quot;: dictionary of argument name and its value}.' }}&#10; {{- &quot;Do not use variables.\n\n&quot; }}&#10; {%- for t in tools %}&#10; {{- t | tojson(indent=4) }}&#10; {{- &quot;\n\n&quot; }}&#10; {%- endfor %}&#10;{%- endif %}&#10;{{- system_message }}&#10;{{- &quot;&lt;|eot_id|>&quot; }}&#10;&#10;{#- Custom tools are passed in a user message with some extra guidance #}&#10;{%- if tools_in_user_message and not tools is none %}&#10; {#- Extract the first user message so we can plug it in here #}&#10; {%- if messages | length != 0 %}&#10; {%- set first_user_message = messages[0]['content']|trim %}&#10; {%- set messages = messages[1:] %}&#10; {%- else %}&#10; {{- raise_exception(&quot;Cannot put tools in the first user message when there's no first user message!&quot;) }}&#10;{%- endif %}&#10; {{- '&lt;|start_header_id|>user&lt;|end_header_id|>\n\n' -}}&#10; {{- &quot;Given the following functions, please respond with a JSON for a function call &quot; }}&#10; {{- &quot;with its proper arguments that best answers the given prompt.\n\n&quot; }}&#10; {{- 'Respond in the format {&quot;name&quot;: function name, &quot;parameters&quot;: dictionary of argument name and its value}.' }}&#10; {{- &quot;Do not use variables.\n\n&quot; }}&#10; {%- for t in tools %}&#10; {{- t | tojson(indent=4) }}&#10; {{- &quot;\n\n&quot; }}&#10; {%- endfor %}&#10; {{- first_user_message + &quot;&lt;|eot_id|>&quot;}}&#10;{%- endif %}&#10;&#10;{%- for message in messages %}&#10; {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}&#10; {{- '&lt;|start_header_id|>' + message['role'] + '&lt;|end_header_id|>\n\n'+ message['content'] | trim + '&lt;|eot_id|>' }}&#10; {%- elif 'tool_calls' in message %}&#10; {%- if not message.tool_calls|length == 1 %}&#10; {{- raise_exception(&quot;This model only supports single tool-calls at once!&quot;) }}&#10; {%- endif %}&#10; {%- set tool_call = message.tool_calls[0].function %}&#10; {%- if builtin_tools is defined and tool_call.name in builtin_tools %}&#10; {{- '&lt;|start_header_id|>assistant&lt;|end_header_id|>\n\n' -}}&#10; {{- &quot;&lt;|python_tag|>&quot; + tool_call.name + &quot;.call(&quot; }}&#10; {%- for arg_name, arg_val in tool_call.arguments | items %}&#10; {{- arg_name + '=&quot;' + arg_val + '&quot;' }}&#10; {%- if not loop.last %}&#10; {{- &quot;, &quot; }}&#10; {%- endif %}&#10; {%- endfor %}&#10; {{- &quot;)&quot; }}&#10; {%- else %}&#10; {{- '&lt;|start_header_id|>assistant&lt;|end_header_id|>\n\n' -}}&#10; {{- '{&quot;name&quot;: &quot;' + tool_call.name + '&quot;, ' }}&#10; {{- '&quot;parameters&quot;: ' }}&#10; {{- tool_call.arguments | tojson }}&#10; {{- &quot;}&quot; }}&#10; {%- endif %}&#10; {%- if builtin_tools is defined %}&#10; {#- This means we're in ipython mode #}&#10; {{- &quot;&lt;|eom_id|>&quot; }}&#10; {%- else %}&#10; {{- &quot;&lt;|eot_id|>&quot; }}&#10; {%- endif %}&#10; {%- elif message.role == &quot;tool&quot; or message.role == &quot;ipython&quot; %}&#10; {{- &quot;&lt;|start_header_id|>ipython&lt;|end_header_id|>\n\n&quot; }}&#10; {%- if message.content is mapping or message.content is iterable %}&#10; {{- message.content | tojson }}&#10; {%- else %}&#10; {{- message.content }}&#10; {%- endif %}&#10; {{- &quot;&lt;|eot_id|>&quot; }}&#10; {%- endif %}&#10;{%- endfor %}&#10;{%- if add_generation_prompt %}&#10; {{- '&lt;|start_header_id|>assistant&lt;|end_header_id|>\n\n' }}&#10;{%- endif %}&#10;" />
215
+ <clean_up_tokenization_spaces />
216
+ <detokenizer_input_type value="i64" />
217
+ <eos_token_id value="128009" />
218
+ <handle_special_tokens_with_re />
219
+ <number_of_inputs value="1" />
220
+ <openvino_tokenizers_version value="2025.0.0.0.dev20241114" />
221
+ <openvino_version value="2025.0.0.dev20241114" />
222
+ <original_tokenizer_class value="&lt;class 'transformers.tokenization_utils_fast.PreTrainedTokenizerFast'>" />
223
+ <sentencepiece_version value="0.2.0" />
224
+ <skip_special_tokens value="True" />
225
+ <streaming_detokenizer value="False" />
226
+ <tiktoken_version value="0.8.0" />
227
+ <tokenizer_output_type value="i64" />
228
+ <tokenizers_version value="0.20.1" />
229
+ <transformers_version value="4.45.2" />
230
+ <use_max_padding value="False" />
231
+ <use_sentencepiece_backend value="False" />
232
+ <utf8_replace_mode />
233
+ <with_detokenizer value="True" />
234
+ </rt_info>
235
+ </net>