allopeap commited on
Commit
8801ece
·
verified ·
1 Parent(s): c10f2b3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.colab.patches import cv2_imshow
2
+ import fitz # PyMuPDF
3
+ from PIL import Image
4
+ import os
5
+ from itertools import islice
6
+ from collections import namedtuple
7
+ import pytesseract
8
+ import argparse
9
+ import imutils
10
+ import cv2
11
+ import shutil
12
+ import os
13
+
14
+
15
+ def cleanup_text(text):
16
+ return "".join([c if ord(c) < 128 else "" for c in text]).strip()
17
+
18
+
19
+
20
+
21
+ def detectarCatastro(pdf):
22
+ if pdf.endswith(".pdf"):
23
+ images = []
24
+ # Iterar sobre cada página del PDF
25
+ for page_num in range(len(pdf)):
26
+ page = pdf.load_page(page_num)
27
+ pix = page.get_pixmap()
28
+ images.append(pix)
29
+
30
+
31
+ aligned_images = []
32
+ template = cv2.imread('alignImage1.png')
33
+ aligned_image = align_images(images[0], template, debug=True)
34
+ aligned_images.append(aligned_image)
35
+
36
+ template = cv2.imread('alignImage2.png')
37
+ aligned_image = align_images(images[1], template, debug=True)
38
+ aligned_images.append(aligned_image)
39
+
40
+ filtered_image = cv2.bilateralFilter(aligned_images[0], 9, 75, 75)
41
+
42
+ alignedImage = filtered_image
43
+ alignedImage = cv2.resize(alignedImage, None, fx=1, fy=1, interpolation=cv2.INTER_LINEAR)
44
+
45
+ OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "filter_keywords"])
46
+
47
+ OCR_LOCATIONS = [
48
+ OCRLocation("Numero de la parcela", (385, 33, 225, 20), ["numero", "de", "la", "parcela"]),
49
+ ]
50
+
51
+ mostrar = "Numero de la parcela: "
52
+
53
+ for loc in OCR_LOCATIONS:
54
+ (x, y, w, h) = loc.bbox
55
+ roi = alignedImage[y:y + h, x:x + w]
56
+
57
+ rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
58
+ text = pytesseract.image_to_string(rgb)
59
+
60
+ mostrar = mostrar + text + " | Cultivos: "
61
+
62
+
63
+ filtered_image = cv2.bilateralFilter(aligned_images[1], 9, 75, 75)
64
+
65
+ alignedImage = filtered_image
66
+ alignedImage = cv2.resize(alignedImage, None, fx=1, fy=1, interpolation=cv2.INTER_LINEAR)
67
+
68
+
69
+
70
+
71
+ OCR_LOCATIONS = [
72
+ OCRLocation("Cultivos", (75, 58, 180, 190), ["cultivos", "y", "aprovechamientos"]),
73
+ ]
74
+
75
+
76
+
77
+
78
+ for loc in OCR_LOCATIONS:
79
+
80
+ (x, y, w, h) = loc.bbox
81
+ roi = alignedImage[y:y + h, x:x + w]
82
+
83
+ rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
84
+ text = pytesseract.image_to_string(rgb)
85
+ mostrar = mostrar + text
86
+
87
+ return text
88
+
89
+
90
+ pdf = gr.File(label="Input PDF")
91
+ method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR")
92
+ output = gr.Textbox(label="Output")
93
+
94
+ demo = gr.Interface(
95
+ detectarCatastro,
96
+ [pdf],
97
+ output,
98
+ title="DetectorCatastro",
99
+ css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
100
+ article = """<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
101
+ <a href="mailto:letstalk@pragnakalp.com" target="_blank">letstalk@pragnakalp.com</a>
102
+ <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com/" target="_blank">Pragnakalp Techlabs</a></p>"""
103
+
104
+
105
+ )