Files changed (1) hide show
  1. README.md +33 -8
README.md CHANGED
@@ -136,14 +136,39 @@ Using uv:
136
  uv pip install vllm --extra-index-url https://wheels.vllm.ai/nightly
137
  ```
138
 
 
 
 
 
 
 
 
139
 
140
  ### Model Inference
141
-
142
  ```python
143
  from vllm import LLM, SamplingParams
144
  from PIL import Image
145
  from transformers import AutoProcessor
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  model_path = "tencent/HunyuanOCR"
148
  llm = LLM(model=model_path, trust_remote_code=True)
149
  processor = AutoProcessor.from_pretrained(model_path)
@@ -160,7 +185,7 @@ messages = [
160
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
161
  inputs = {"prompt": prompt, "multi_modal_data": {"image": [img]}}
162
  output = llm.generate([inputs], sampling_params)[0]
163
- print(output.outputs[0].text)
164
  ```
165
 
166
  ## 💬 Application-oriented Prompts
@@ -175,12 +200,12 @@ print(output.outputs[0].text)
175
 
176
  ## 📚 Citation
177
  ```
178
- @software{hunyuanocr2025,
179
- author = {Tencent Hunyuan Vision Team},
180
- title = {HunyuanOCR Technical Report},
181
- year = {2025},
182
- url = {https://github.com/Tencent-Hunyuan/HunyuanOCR},
183
- publisher = {GitHub}
184
  }
185
  ```
186
 
 
136
  uv pip install vllm --extra-index-url https://wheels.vllm.ai/nightly
137
  ```
138
 
139
+ ### Model Deploy
140
+ ```bash
141
+ vllm serve tencent/HunyuanOCR \
142
+ --no-enable-prefix-caching \
143
+ --mm-processor-cache-gb 0 \
144
+ --gpu-memory-utilization 0.2
145
+ ```
146
 
147
  ### Model Inference
 
148
  ```python
149
  from vllm import LLM, SamplingParams
150
  from PIL import Image
151
  from transformers import AutoProcessor
152
 
153
+ def clean_repeated_substrings(text):
154
+ """Clean repeated substrings in text"""
155
+ n = len(text)
156
+ if n<8000:
157
+ return text
158
+ for length in range(2, n // 10 + 1):
159
+ candidate = text[-length:]
160
+ count = 0
161
+ i = n - length
162
+
163
+ while i >= 0 and text[i:i + length] == candidate:
164
+ count += 1
165
+ i -= length
166
+
167
+ if count >= 10:
168
+ return text[:n - length * (count - 1)]
169
+
170
+ return text
171
+
172
  model_path = "tencent/HunyuanOCR"
173
  llm = LLM(model=model_path, trust_remote_code=True)
174
  processor = AutoProcessor.from_pretrained(model_path)
 
185
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
186
  inputs = {"prompt": prompt, "multi_modal_data": {"image": [img]}}
187
  output = llm.generate([inputs], sampling_params)[0]
188
+ print(clean_repeated_substrings(output.outputs[0].text))
189
  ```
190
 
191
  ## 💬 Application-oriented Prompts
 
200
 
201
  ## 📚 Citation
202
  ```
203
+ @misc{hunyuanvisionteam2025hunyuanocrtechnicalreport,
204
+ title={HunyuanOCR Technical Report},
205
+ author={Hunyuan Vision Team and Pengyuan Lyu and Xingyu Wan and Gengluo Li and Shangpin Peng and Weinong Wang and Liang Wu and Huawen Shen and Yu Zhou and Canhui Tang and Qi Yang and Qiming Peng and Bin Luo and Hower Yang and Xinsong Zhang and Jinnian Zhang and Houwen Peng and Hongming Yang and Senhao Xie and Longsha Zhou and Ge Pei and Binghong Wu and Kan Wu and Mana Yang and Sergey Wang and Raccoon Liu and Dick Zhu and Jie Jiang and Linus and Han Hu and Chengquan Zhang},
206
+ year={2025},
207
+ journal={arXiv preprint arXiv:2510.18234},
208
+ url={https://arxiv.org/abs/2511.19575},
209
  }
210
  ```
211