Spaces:
Sleeping
Sleeping
Upload ocr_service.py
Browse files- ocr_service.py +9 -0
ocr_service.py
CHANGED
|
@@ -259,6 +259,15 @@ async def get_ocr_model():
|
|
| 259 |
else:
|
| 260 |
_ocr_model = _ocr_model.to(dtype=torch.float32)
|
| 261 |
print(" - DeepSeek-OCR on CPU (float32)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
return _ocr_model, _ocr_tokenizer
|
| 263 |
|
| 264 |
|
|
|
|
| 259 |
else:
|
| 260 |
_ocr_model = _ocr_model.to(dtype=torch.float32)
|
| 261 |
print(" - DeepSeek-OCR on CPU (float32)")
|
| 262 |
+
|
| 263 |
+
# Configure generation to silence warnings
|
| 264 |
+
gc = _ocr_model.generation_config
|
| 265 |
+
gc.do_sample = False # Greedy decoding
|
| 266 |
+
gc.temperature = 1.0 # Don't mix temperature=0 with do_sample=False
|
| 267 |
+
if _ocr_tokenizer.pad_token_id is None:
|
| 268 |
+
_ocr_tokenizer.pad_token = _ocr_tokenizer.eos_token or _ocr_tokenizer.unk_token
|
| 269 |
+
_ocr_model.generation_config.pad_token_id = _ocr_tokenizer.pad_token_id
|
| 270 |
+
print(" - Generation config set (do_sample=False, temperature=1.0, pad_token_id set)")
|
| 271 |
return _ocr_model, _ocr_tokenizer
|
| 272 |
|
| 273 |
|