Spaces:

pawanmau01
/

TestAPI

Sleeping

TestAPI / app /services /panServices /panDataExtractor.py

Pavan+2-at-244075126032

Comment out all ocr services to test uidai

75a02d0 about 1 month ago

4.18 kB

	# import re
	# def filter_array(arr):
	# # Define the regex patterns
	# pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+\|[^.\s\w]+[\w]+')
	# pattern_numeric = re.compile(r'^[0-9]+$')
	# pattern_special_chars = re.compile(r'[^a-zA-Z.\s]+')

	# # Filter the array
	# filtered_array = [
	# item for item in arr
	# if not (pattern_alphanumeric_special.search(item) or
	# pattern_numeric.match(item) or
	# pattern_special_chars.search(item))
	# ]
	# return filtered_array

	# def extract_panData(data):
	# unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
	# "पिता का नाम", 'नाम / Name', "पिता का नाम/ Father's Name", '414 / Name', 'पिता का नाम / Fath',
	# "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
	# 'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
	# 'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
	# "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name',"inent Account Number", "anent Account Number C","Permanent Account Number Car",
	# 'ugr Name']




	# # Initialize result object
	# result = {
	# "statusCode": 200,
	# "error": '',
	# "data": {
	# "panNo": '',
	# "name": '',
	# "fatherName": '',
	# "dob": ''
	# }
	# }

	# # Clean the array by removing unwanted words and invalid entries
	# cleaned_data = []
	# combination_pattern = re.compile(r'(?=.[0-9])(?=.[!@#$%^&*(),?":{}\|<>])')

	# for item in data:
	# if item not in unwanted_words and not combination_pattern.search(item):
	# cleaned_data.append(item)


	# # Check and extract PAN number
	# pan_pattern = re.compile(r'^[A-Z]{5}\s[0-9]{4}\s[A-Z]$')
	# for item in cleaned_data:
	# if pan_pattern.match(item):
	# result["data"]["panNo"] = item
	# cleaned_data.remove(item)
	# break

	# # Check and extract date of birth
	# dob_pattern = re.compile(r'^\d{2}[-/]\d{2}[-/]\d{4}$')
	# for item in cleaned_data:
	# if dob_pattern.match(item):
	# result["data"]["dob"] = item
	# cleaned_data.remove(item)
	# break

	# # If only two values are left, assume they are name and father's name
	# cleaned_data = filter_array(cleaned_data)
	# if len(cleaned_data) == 2:
	# result["data"]["name"] = cleaned_data[0]
	# result["data"]["fatherName"] = cleaned_data[1]
	# else:
	# # Further cleaning of the data array to extract name and father's name
	# cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
	# print(cleaned_data, "after cleaning")
	# # Check and extract name
	# name_pattern = re.compile(r'^[A-Za-z .]+$')
	# if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
	# result["data"]["name"] = cleaned_data[0]
	# else:
	# result["data"]["name"] = ''

	# # Check and extract father's name
	# if len(cleaned_data) > 1 and name_pattern.match(cleaned_data[1]):
	# result["data"]["fatherName"] = cleaned_data[1]
	# else:
	# result["data"]["fatherName"] = ''

	# # Check if any value is empty and set error message
	# for key, value in result["data"].items():
	# if value == '':
	# result["statusCode"] = 400
	# result["error"] = f"{key} value is not found due to bad image."
	# break

	# return result