AnseMin commited on
Commit
7161f9e
·
1 Parent(s): f1d63ad

implement full page ocr

Browse files
Files changed (1) hide show
  1. src/parsers/docling_parser.py +1 -1
src/parsers/docling_parser.py CHANGED
@@ -142,7 +142,7 @@ class DoclingParser(DocumentParser):
142
  print(f"Using tesseract at: {tesseract_path}")
143
 
144
  # Configure OCR options
145
- ocr_options = TesseractOcrOptions(force_full_page_ocr=True) # Using standard options instead of CLI
146
  pipeline_options.ocr_options = ocr_options
147
 
148
  # Set up format options based on file type
 
142
  print(f"Using tesseract at: {tesseract_path}")
143
 
144
  # Configure OCR options
145
+ ocr_options = TesseractCliOcrOptions(force_full_page_ocr=True) # Using standard options instead of CLI
146
  pipeline_options.ocr_options = ocr_options
147
 
148
  # Set up format options based on file type