Spaces:

eniafou
/

FactureOCR

Runtime error

App Files Files Community

FactureOCR / doc.py

Soufiane

initial

8565879 5 months ago

raw

history blame contribute delete

No virus

1.8 kB

	import cv2
	import base64
	from utils import *

	CIN = ["Nom complet", "Date de naissance", "Date de validité", "Lieu de naissance", "Numéro CIN"]



	def extract_face(image, scale_factor=1.2):
	# Load the pre-trained face detector
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml')


	# Convert the image to grayscale
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Detect faces in the image
	faces = face_cascade.detectMultiScale(gray, scaleFactor=scale_factor, minNeighbors=5, minSize=(30, 30))
	print(len(faces))
	if len(faces) == 0:
	print("No faces found")
	return None

	# Assume only one face in the image for simplicity
	(x, y, w, h) = max(faces, key=lambda x: x[2])

	# Expand the bounding box to include a slightly larger region
	x -= int(0.1 * w)
	y -= int(0.1 * h)
	w += int(0.2 * w)
	h += int(0.2 * h)

	# Ensure the coordinates are within the image boundaries
	x = max(x, 0)
	y = max(y, 0)
	w = min(w, image.shape[1])
	h = min(h, image.shape[0])

	# Extract the face region from the image
	face = image[y:y+h, x:x+w]

	# Convert the face to base64 string
	_, encoded_image = cv2.imencode('.jpg', face)
	encoded_image_str = base64.b64encode(encoded_image).decode('utf-8')

	# Create a dictionary to store image information
	image_info = {
	"width": face.shape[1],
	"height": face.shape[0],
	"data": encoded_image_str
	}

	return image_info


	if __name__ == '__main__':
	lang = "french"
	to_be_extracted = CIN
	image_path = "./docs for ocr/CIN 2.png"
	image = cv2.imread(image_path)

	text_data = extract_data(lang, to_be_extracted, image)
	print(text_data)
	face_data = extract_face(image)