Spaces:

blaxx14
/

pdf-parser-api

Sleeping

pdf-parser-api / backend /text_recog.py

add all files

81f6231 5 months ago

539 Bytes

	import cv2
	import pytesseract
	from .file_utils import convert_image_to_word

	def parsing_image(image, filename):
	pytesseract.pytesseract.tesseract_cmd = r'C:\Users\hp\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'
	image = cv2.imread(image)

	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)

	custom_config = r'--oem 3 --psm 6'
	data = pytesseract.image_to_string(thresh, config=custom_config)
	convert_image_to_word(data, filename)

	return {}