4ad1f12
1
2
3
4
5
6
7
8
9
10
11
import pytesseract from pdf2image import convert_from_path def get_text_from_scanned_pdf(pdf_path): text = '' images = convert_from_path(pdf_path) for img in images: text += pytesseract.image_to_string(img) return text