Spaces:

eagle0504
/

IDP-Demo

Running

App Files Files Community

eagle0504 commited on Mar 18

Commit

6738563

•

1 Parent(s): 8f7024b

app updated

Browse files

Files changed (3) hide show

app.py +4 -0
utils/cnn_transformer.py +5 -6
utils/helpers.py +32 -29

app.py CHANGED Viewed

@@ -156,6 +156,10 @@ def main():
                             {"Key": keys, "Values": values}
                         )
                         # Convert DataFrame to CSV
                         csv = sample_payload_output.to_csv(index=False)

                             {"Key": keys, "Values": values}
                         )
+                        # Display table
+                        with st.expander("Inspect table (before download)"):
+                            st.table(sample_payload_output)
                         # Convert DataFrame to CSV
                         csv = sample_payload_output.to_csv(index=False)

utils/cnn_transformer.py CHANGED Viewed

@@ -3,11 +3,11 @@ import os
 os.environ["KERAS_BACKEND"] = "tensorflow"
 import re
-import numpy as np
-import matplotlib.pyplot as plt
-import tensorflow as tf
 import keras
 from keras import layers
 from keras.applications import efficientnet
 from keras.layers import TextVectorization
@@ -319,7 +319,6 @@ class ImageCaptioningModel(keras.Model):
         return [self.loss_tracker, self.acc_tracker]
 strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
 strip_chars = strip_chars.replace("<", "")
 strip_chars = strip_chars.replace(">", "")
@@ -350,7 +349,7 @@ def generate_caption(caption_model: None):
     # Pass the image to the CNN
     # img = tf.expand_dims(sample_img, 0)
-    #TOOD
     img = None
     img = caption_model.cnn_model(img)
@@ -376,4 +375,4 @@ def generate_caption(caption_model: None):
     decoded_caption = decoded_caption.replace("<start> ", "")
     decoded_caption = decoded_caption.replace(" <end>", "").strip()
-    print("Predicted Caption: ", decoded_caption)

 os.environ["KERAS_BACKEND"] = "tensorflow"
 import re
 import keras
+import matplotlib.pyplot as plt
+import numpy as np
+import tensorflow as tf
 from keras import layers
 from keras.applications import efficientnet
 from keras.layers import TextVectorization
         return [self.loss_tracker, self.acc_tracker]
 strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
 strip_chars = strip_chars.replace("<", "")
 strip_chars = strip_chars.replace(">", "")
     # Pass the image to the CNN
     # img = tf.expand_dims(sample_img, 0)
+    # TOOD
     img = None
     img = caption_model.cnn_model(img)
     decoded_caption = decoded_caption.replace("<start> ", "")
     decoded_caption = decoded_caption.replace(" <end>", "").strip()
+    print("Predicted Caption: ", decoded_caption)

utils/helpers.py CHANGED Viewed

@@ -4,22 +4,20 @@ import json
 import os
 from typing import Any, Dict, List
 import pandas as pd
 import requests
 import streamlit as st
-from PIL import Image, ImageDraw, ImageFont
-import google.generativeai as palm
-from pypdf import PdfReader
 from langchain.text_splitter import (
     RecursiveCharacterTextSplitter,
     SentenceTransformersTokenTextSplitter,
 )
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-import streamlit as st
-import chromadb
-from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
 # API Key (You should set this in your environment variables)
 # api_key = st.secrets["PALM_API_KEY"]
@@ -187,10 +185,10 @@ def displayPDF(file: str) -> None:
     # Opening the PDF file in binary read mode
     with open(file, "rb") as f:
         # Encoding the PDF file content to base64
-        base64_pdf: str = base64.b64encode(f.read()).decode('utf-8')
     # Creating an HTML embed string for displaying the PDF
-    pdf_display: str = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
     # Using Streamlit to display the HTML embed string as unsafe HTML
     st.markdown(pdf_display, unsafe_allow_html=True)
@@ -199,16 +197,16 @@ def displayPDF(file: str) -> None:
 def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
     """
     Draws bounding boxes and labels onto an image based on provided predictions.
     Parameters:
     - image (Any): The image to annotate, which should support the PIL drawing interface.
     - predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
-      containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
       'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.
     Returns:
     - Any: The annotated image with bounding boxes and labels drawn on it.
     Note:
     - This function assumes that the incoming image supports the PIL ImageDraw interface.
     - The function directly modifies the input image and returns it.
@@ -235,7 +233,9 @@ def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
     return image
-def draw_bounding_boxes_for_textract(image: Image.Image, json_data: Dict[str, Any]) -> Image.Image:
     """
     Draws bounding boxes on an image based on the provided JSON data from Textract.
@@ -248,31 +248,34 @@ def draw_bounding_boxes_for_textract(image: Image.Image, json_data: Dict[str, An
     """
     # Load the image from the provided path
     draw = ImageDraw.Draw(image)
     # Parse the JSON data
     try:
         data = json_data
-        blocks = json.loads(data['body']) if 'body' in data else None
     except json.JSONDecodeError:
-        st.error('Invalid JSON data.')
         return image
     if blocks is None:
-        st.error('No bounding box data found.')
         return image
     # Iterate through the elements to find bounding boxes and draw them
     for item in blocks:
-        if 'BlockType' in item and item['BlockType'] in ['LINE', 'WORD']:
-            bbox = item['Geometry']['BoundingBox']
             # Extract coordinates and dimensions
-            left, top, width, height = bbox['Left'], bbox['Top'], bbox['Width'], bbox['Height']
             # Calculate bounding box coordinates in image space
             left_top = (left * image.width, top * image.height)
             right_bottom = ((left + width) * image.width, (top + height) * image.height)
             # Draw rectangle
-            draw.rectangle([left_top, right_bottom], outline='red', width=2)
-    return image

 import os
 from typing import Any, Dict, List
+import chromadb
+import google.generativeai as palm
+import matplotlib.patches as patches
+import matplotlib.pyplot as plt
 import pandas as pd
 import requests
 import streamlit as st
+from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
 from langchain.text_splitter import (
     RecursiveCharacterTextSplitter,
     SentenceTransformersTokenTextSplitter,
 )
+from PIL import Image, ImageDraw, ImageFont
+from pypdf import PdfReader
 # API Key (You should set this in your environment variables)
 # api_key = st.secrets["PALM_API_KEY"]
     # Opening the PDF file in binary read mode
     with open(file, "rb") as f:
         # Encoding the PDF file content to base64
+        base64_pdf: str = base64.b64encode(f.read()).decode("utf-8")
     # Creating an HTML embed string for displaying the PDF
+    pdf_display: str = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
     # Using Streamlit to display the HTML embed string as unsafe HTML
     st.markdown(pdf_display, unsafe_allow_html=True)
 def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
     """
     Draws bounding boxes and labels onto an image based on provided predictions.
     Parameters:
     - image (Any): The image to annotate, which should support the PIL drawing interface.
     - predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
+      containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
       'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.
     Returns:
     - Any: The annotated image with bounding boxes and labels drawn on it.
     Note:
     - This function assumes that the incoming image supports the PIL ImageDraw interface.
     - The function directly modifies the input image and returns it.
     return image
+def draw_bounding_boxes_for_textract(
+    image: Image.Image, json_data: Dict[str, Any]
+) -> Image.Image:
     """
     Draws bounding boxes on an image based on the provided JSON data from Textract.
     """
     # Load the image from the provided path
     draw = ImageDraw.Draw(image)
     # Parse the JSON data
     try:
         data = json_data
+        blocks = json.loads(data["body"]) if "body" in data else None
     except json.JSONDecodeError:
+        st.error("Invalid JSON data.")
         return image
     if blocks is None:
+        st.error("No bounding box data found.")
         return image
     # Iterate through the elements to find bounding boxes and draw them
     for item in blocks:
+        if "BlockType" in item and item["BlockType"] in ["LINE", "WORD"]:
+            bbox = item["Geometry"]["BoundingBox"]
             # Extract coordinates and dimensions
+            left, top, width, height = (
+                bbox["Left"],
+                bbox["Top"],
+                bbox["Width"],
+                bbox["Height"],
+            )
             # Calculate bounding box coordinates in image space
             left_top = (left * image.width, top * image.height)
             right_bottom = ((left + width) * image.width, (top + height) * image.height)
             # Draw rectangle
+            draw.rectangle([left_top, right_bottom], outline="red", width=2)
+    return image