manaestras commited on
Commit
5bf2a1f
·
verified ·
1 Parent(s): 4be5a16

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +32 -11
README.md CHANGED
@@ -6,6 +6,7 @@ language:
6
  pipeline_tag: image-text-to-text
7
  library_name: transformers
8
  ---
 
9
  <div align="center">
10
 
11
  # HunyuanOCR
@@ -45,6 +46,25 @@ from transformers import HunYuanVLForConditionalGeneration
45
  from PIL import Image
46
  import torch
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  model_name_or_path = "tencent/HunyuanOCR"
49
  processor = AutoProcessor.from_pretrained(model_name_or_path, use_fast=False)
50
  img_path = "path/to/your/image.jpg"
@@ -93,9 +113,9 @@ else:
93
  generated_ids_trimmed = [
94
  out_ids[len(in_ids):] for in_ids, out_ids in zip(input_ids, generated_ids)
95
  ]
96
- output_texts = processor.batch_decode(
97
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
98
- )
99
  print(output_texts)
100
  ```
101
 
@@ -114,9 +134,9 @@ from PIL import Image
114
  from transformers import AutoProcessor
115
 
116
  model_path = "tencent/HunyuanOCR"
117
- llm = LLM(model=model_path)
118
  processor = AutoProcessor.from_pretrained(model_path)
119
- sampling_params = SamplingParams(temperature=0.0, max_tokens=16384)
120
 
121
  img_path = "/path/to/image.jpg"
122
  img = Image.open(img_path)
@@ -143,14 +163,15 @@ print(output.outputs[0].text)
143
 
144
 
145
  ## 📚 Citation
146
- @misc{hunyuanocr2025,
147
- title={HunyuanOCR Technical Report},
148
- author={Tencent Hunyuan Vision Team},
149
- year={2025},
150
- publisher={GitHub},
151
- journal={GitHub repository},
152
- howpublished={\url{https://github.com/Tencent-Hunyuan/HunyuanOCR}}
153
  }
 
154
 
155
  ## 🙏 Acknowledgements
156
  We would like to thank [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [MinerU](https://github.com/opendatalab/MinerU), [MonkeyOCR](https://github.com/Yuliang-Liu/MonkeyOCR), [DeepSeek-OCR](https://github.com/deepseek-ai/DeepSeek-OCR), [dots.ocr](https://github.com/rednote-hilab/dots.ocr) for their valuable models and ideas.
 
6
  pipeline_tag: image-text-to-text
7
  library_name: transformers
8
  ---
9
+
10
  <div align="center">
11
 
12
  # HunyuanOCR
 
46
  from PIL import Image
47
  import torch
48
 
49
+ def clean_repeated_substrings(text):
50
+ """Clean repeated substrings in text"""
51
+ n = len(text)
52
+ if n<8000:
53
+ return text
54
+ for length in range(2, n // 10 + 1):
55
+ candidate = text[-length:]
56
+ count = 0
57
+ i = n - length
58
+
59
+ while i >= 0 and text[i:i + length] == candidate:
60
+ count += 1
61
+ i -= length
62
+
63
+ if count >= 10:
64
+ return text[:n - length * (count - 1)]
65
+
66
+ return text
67
+
68
  model_name_or_path = "tencent/HunyuanOCR"
69
  processor = AutoProcessor.from_pretrained(model_name_or_path, use_fast=False)
70
  img_path = "path/to/your/image.jpg"
 
113
  generated_ids_trimmed = [
114
  out_ids[len(in_ids):] for in_ids, out_ids in zip(input_ids, generated_ids)
115
  ]
116
+ output_texts = clean_repeated_substrings(processor.batch_decode(
117
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
118
+ ))
119
  print(output_texts)
120
  ```
121
 
 
134
  from transformers import AutoProcessor
135
 
136
  model_path = "tencent/HunyuanOCR"
137
+ llm = LLM(model=model_path, trust_remote_code=True)
138
  processor = AutoProcessor.from_pretrained(model_path)
139
+ sampling_params = SamplingParams(temperature=0, max_tokens=16384)
140
 
141
  img_path = "/path/to/image.jpg"
142
  img = Image.open(img_path)
 
163
 
164
 
165
  ## 📚 Citation
166
+ ```
167
+ @software{hunyuanocr2025,
168
+ author = {Tencent Hunyuan Vision Team},
169
+ title = {HunyuanOCR Technical Report},
170
+ year = {2025},
171
+ url = {https://github.com/Tencent-Hunyuan/HunyuanOCR},
172
+ publisher = {GitHub}
173
  }
174
+ ```
175
 
176
  ## 🙏 Acknowledgements
177
  We would like to thank [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [MinerU](https://github.com/opendatalab/MinerU), [MonkeyOCR](https://github.com/Yuliang-Liu/MonkeyOCR), [DeepSeek-OCR](https://github.com/deepseek-ai/DeepSeek-OCR), [dots.ocr](https://github.com/rednote-hilab/dots.ocr) for their valuable models and ideas.