File size: 245 Bytes
4ad1f12
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
import pytesseract
from pdf2image import convert_from_path


def get_text_from_scanned_pdf(pdf_path):
    text = ''
    images = convert_from_path(pdf_path)
    for img in images:
        text += pytesseract.image_to_string(img)
    return text