pawandev commited on
Commit
43ca488
1 Parent(s): d62425f

Added new pan model and change ocrsetting and extraction regex as per new model

Browse files
app/__init__.py CHANGED
@@ -10,7 +10,7 @@ def create_app():
10
  # Load model once
11
  app.models = {
12
  'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
13
- 'panModel': YOLO('models/PanYolo_v4.pt') # Load additional models as needed
14
  }
15
 
16
  return app
 
10
  # Load model once
11
  app.models = {
12
  'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
13
+ 'panModel': YOLO('models/PanModel_v5.pt') # Load additional models as needed
14
  }
15
 
16
  return app
app/routes/panApi.py CHANGED
@@ -28,6 +28,7 @@ def ocrPan(mode, session):
28
  img_data = base64.b64decode(imgBuffer)
29
  img = Image.open(BytesIO(img_data))
30
  img.verify() # Verify image format
 
31
  img = Image.open(io.BytesIO(img_data)) # Re-open image after verification
32
  except (base64.binascii.Error, ValueError) as decode_err:
33
  return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
@@ -42,6 +43,7 @@ def ocrPan(mode, session):
42
  response = session.get(img_url)
43
  response.raise_for_status()
44
  img = Image.open(BytesIO(response.content))
 
45
  img.verify() # Verify image format
46
  img = Image.open(BytesIO(response.content)) # Re-open image after verification
47
 
@@ -54,7 +56,7 @@ def ocrPan(mode, session):
54
 
55
  # Run detection
56
  model = current_app.models.get('panModel')
57
- results = model.predict(source=img, save=False)
58
  # print(results,"model result")
59
  extracted_data = process_results(results, img)
60
  # print(extracted_data, "extracted data")
 
28
  img_data = base64.b64decode(imgBuffer)
29
  img = Image.open(BytesIO(img_data))
30
  img.verify() # Verify image format
31
+ print(img, "img")
32
  img = Image.open(io.BytesIO(img_data)) # Re-open image after verification
33
  except (base64.binascii.Error, ValueError) as decode_err:
34
  return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
 
43
  response = session.get(img_url)
44
  response.raise_for_status()
45
  img = Image.open(BytesIO(response.content))
46
+ print(img, "img")
47
  img.verify() # Verify image format
48
  img = Image.open(BytesIO(response.content)) # Re-open image after verification
49
 
 
56
 
57
  # Run detection
58
  model = current_app.models.get('panModel')
59
+ results = model.predict(source=img, imgsz=680, iou=0.7, augment=True)
60
  # print(results,"model result")
61
  extracted_data = process_results(results, img)
62
  # print(extracted_data, "extracted data")
app/services/panServices/panDataExtractor.py CHANGED
@@ -3,14 +3,14 @@ def filter_array(arr):
3
  # Define the regex patterns
4
  pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
5
  pattern_numeric = re.compile(r'^[0-9]+$')
6
- pattern_non_alpha = re.compile(r'[^.\s]*[^a-zA-Z\s][^.\s]*')
7
 
8
  # Filter the array
9
  filtered_array = [
10
  item for item in arr
11
  if not (pattern_alphanumeric_special.search(item) or
12
  pattern_numeric.match(item) or
13
- pattern_non_alpha.search(item))
14
  ]
15
  return filtered_array
16
 
@@ -20,7 +20,8 @@ def extract_panData(data):
20
  "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
21
  'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
22
  'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
23
- "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name']
 
24
 
25
 
26
 
@@ -47,7 +48,7 @@ def extract_panData(data):
47
 
48
 
49
  # Check and extract PAN number
50
- pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
51
  for item in cleaned_data:
52
  if pan_pattern.match(item):
53
  result["data"]["panNo"] = item
 
3
  # Define the regex patterns
4
  pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
5
  pattern_numeric = re.compile(r'^[0-9]+$')
6
+ pattern_special_chars = re.compile(r'[^a-zA-Z.\s]+')
7
 
8
  # Filter the array
9
  filtered_array = [
10
  item for item in arr
11
  if not (pattern_alphanumeric_special.search(item) or
12
  pattern_numeric.match(item) or
13
+ pattern_special_chars.search(item))
14
  ]
15
  return filtered_array
16
 
 
20
  "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
21
  'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
22
  'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
23
+ "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name',"inent Account Number", "anent Account Number C","Permanent Account Number Car",
24
+ 'ugr Name']
25
 
26
 
27
 
 
48
 
49
 
50
  # Check and extract PAN number
51
+ pan_pattern = re.compile(r'^[A-Z]{5}\s*[0-9]{4}\s*[A-Z]$')
52
  for item in cleaned_data:
53
  if pan_pattern.match(item):
54
  result["data"]["panNo"] = item
app/services/panServices/panOcr.py CHANGED
@@ -6,7 +6,7 @@ from .panDataExtractor import extract_panData
6
 
7
  def process_results(results, img):
8
  label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
9
- confidence_threshold = 0.3
10
  input_image_format = img.format if img.format else "PNG"
11
  valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
12
  input_image_format = input_image_format if input_image_format in valid_formats else "PNG"
 
6
 
7
  def process_results(results, img):
8
  label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
9
+ confidence_threshold = 0.4
10
  input_image_format = img.format if img.format else "PNG"
11
  valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
12
  input_image_format = input_image_format if input_image_format in valid_formats else "PNG"