Soumen commited on
Commit
4372a52
1 Parent(s): f4332f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -36
app.py CHANGED
@@ -48,42 +48,6 @@ import pytesseract
48
 
49
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
50
  from PIL import Image
51
- def mark_region(im):
52
-
53
- #im = cv2.imread(image_path)
54
-
55
- gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
56
- blur = cv2.GaussianBlur(gray, (9,9), 0)
57
- thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)
58
-
59
- # Dilate to combine adjacent text contours
60
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
61
- dilate = cv2.dilate(thresh, kernel, iterations=4)
62
-
63
- # Find contours, highlight text areas, and extract ROIs
64
- cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
65
- cnts = cnts[0] if len(cnts) == 2 else cnts[1]
66
-
67
- line_items_coordinates = []
68
- for c in cnts:
69
- area = cv2.contourArea(c)
70
- x,y,w,h = cv2.boundingRect(c)
71
-
72
- if y >= 600 and x <= 1000:
73
- if area > 10000:
74
- image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
75
- line_items_coordinates.append([(x,y), (2200, y+h)])
76
-
77
- if y >= 2400 and x<= 2000:
78
- image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
79
- line_items_coordinates.append([(x,y), (2200, y+h)])
80
-
81
-
82
- return image, line_items_coordinates
83
-
84
-
85
-
86
-
87
  @st.experimental_singleton
88
  def read_pdf(file):
89
  images=pdf2image.convert_from_path(file)
 
48
 
49
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
50
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  @st.experimental_singleton
52
  def read_pdf(file):
53
  images=pdf2image.convert_from_path(file)