Query-PDF / scan_pdf_parser.py
Nirav-Khanpara's picture
Upload 2 files
4ad1f12
raw
history blame
No virus
245 Bytes
import pytesseract
from pdf2image import convert_from_path
def get_text_from_scanned_pdf(pdf_path):
text = ''
images = convert_from_path(pdf_path)
for img in images:
text += pytesseract.image_to_string(img)
return text