Spaces:
Paused
Paused
implement full page ocr
Browse files
src/parsers/docling_parser.py
CHANGED
|
@@ -142,7 +142,7 @@ class DoclingParser(DocumentParser):
|
|
| 142 |
print(f"Using tesseract at: {tesseract_path}")
|
| 143 |
|
| 144 |
# Configure OCR options
|
| 145 |
-
ocr_options =
|
| 146 |
pipeline_options.ocr_options = ocr_options
|
| 147 |
|
| 148 |
# Set up format options based on file type
|
|
|
|
| 142 |
print(f"Using tesseract at: {tesseract_path}")
|
| 143 |
|
| 144 |
# Configure OCR options
|
| 145 |
+
ocr_options = TesseractCliOcrOptions(force_full_page_ocr=True) # Using standard options instead of CLI
|
| 146 |
pipeline_options.ocr_options = ocr_options
|
| 147 |
|
| 148 |
# Set up format options based on file type
|