Spaces:

allopeap
/

trabajo-ia-equipo-4

Sleeping

App Files Files Community

allopeap commited on May 22, 2024

Commit

8801ece

verified ·

1 Parent(s): c10f2b3

Create app.py

Browse files

Files changed (1) hide show

app.py +105 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from google.colab.patches import cv2_imshow
+import fitz  # PyMuPDF
+from PIL import Image
+import os
+from itertools import islice
+from collections import namedtuple
+import pytesseract
+import argparse
+import imutils
+import cv2
+import shutil
+import os
+def cleanup_text(text):
+	return "".join([c if ord(c) < 128 else "" for c in text]).strip()
+def detectarCatastro(pdf):
+    if pdf.endswith(".pdf"):
+        images = []
+        # Iterar sobre cada página del PDF
+        for page_num in range(len(pdf)):
+            page = pdf.load_page(page_num)
+            pix = page.get_pixmap()
+            images.append(pix)
+        aligned_images = []
+        template = cv2.imread('alignImage1.png')
+        aligned_image = align_images(images[0], template, debug=True)
+        aligned_images.append(aligned_image)
+        template = cv2.imread('alignImage2.png')
+        aligned_image = align_images(images[1], template, debug=True)
+        aligned_images.append(aligned_image)
+        filtered_image = cv2.bilateralFilter(aligned_images[0], 9, 75, 75)
+        alignedImage = filtered_image
+        alignedImage = cv2.resize(alignedImage, None, fx=1, fy=1, interpolation=cv2.INTER_LINEAR)
+        OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "filter_keywords"])
+        OCR_LOCATIONS = [
+            OCRLocation("Numero de la parcela", (385, 33, 225, 20), ["numero", "de", "la", "parcela"]),
+        ]
+        mostrar = "Numero de la parcela: "
+        for loc in OCR_LOCATIONS:
+            (x, y, w, h) = loc.bbox
+            roi = alignedImage[y:y + h, x:x + w]
+            rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
+            text = pytesseract.image_to_string(rgb)
+        mostrar = mostrar + text + " | Cultivos: "
+        filtered_image = cv2.bilateralFilter(aligned_images[1], 9, 75, 75)
+        alignedImage = filtered_image
+        alignedImage = cv2.resize(alignedImage, None, fx=1, fy=1, interpolation=cv2.INTER_LINEAR)
+        OCR_LOCATIONS = [
+            OCRLocation("Cultivos", (75, 58, 180, 190), ["cultivos", "y", "aprovechamientos"]),
+        ]
+        for loc in OCR_LOCATIONS:
+            (x, y, w, h) = loc.bbox
+            roi = alignedImage[y:y + h, x:x + w]
+            rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
+            text = pytesseract.image_to_string(rgb)
+        mostrar = mostrar + text
+        return text
+pdf = gr.File(label="Input PDF")
+method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR")
+output = gr.Textbox(label="Output")
+demo = gr.Interface(
+    detectarCatastro,
+    [pdf],
+    output,
+    title="DetectorCatastro",
+    css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
+    article = """<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
+                    <a href="mailto:letstalk@pragnakalp.com" target="_blank">letstalk@pragnakalp.com</a>
+                    <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com/" target="_blank">Pragnakalp Techlabs</a></p>"""
+)