eagle0504 commited on
Commit
6738563
1 Parent(s): 8f7024b

app updated

Browse files
Files changed (3) hide show
  1. app.py +4 -0
  2. utils/cnn_transformer.py +5 -6
  3. utils/helpers.py +32 -29
app.py CHANGED
@@ -156,6 +156,10 @@ def main():
156
  {"Key": keys, "Values": values}
157
  )
158
 
 
 
 
 
159
  # Convert DataFrame to CSV
160
  csv = sample_payload_output.to_csv(index=False)
161
 
 
156
  {"Key": keys, "Values": values}
157
  )
158
 
159
+ # Display table
160
+ with st.expander("Inspect table (before download)"):
161
+ st.table(sample_payload_output)
162
+
163
  # Convert DataFrame to CSV
164
  csv = sample_payload_output.to_csv(index=False)
165
 
utils/cnn_transformer.py CHANGED
@@ -3,11 +3,11 @@ import os
3
  os.environ["KERAS_BACKEND"] = "tensorflow"
4
 
5
  import re
6
- import numpy as np
7
- import matplotlib.pyplot as plt
8
 
9
- import tensorflow as tf
10
  import keras
 
 
 
11
  from keras import layers
12
  from keras.applications import efficientnet
13
  from keras.layers import TextVectorization
@@ -319,7 +319,6 @@ class ImageCaptioningModel(keras.Model):
319
  return [self.loss_tracker, self.acc_tracker]
320
 
321
 
322
-
323
  strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
324
  strip_chars = strip_chars.replace("<", "")
325
  strip_chars = strip_chars.replace(">", "")
@@ -350,7 +349,7 @@ def generate_caption(caption_model: None):
350
 
351
  # Pass the image to the CNN
352
  # img = tf.expand_dims(sample_img, 0)
353
- #TOOD
354
  img = None
355
  img = caption_model.cnn_model(img)
356
 
@@ -376,4 +375,4 @@ def generate_caption(caption_model: None):
376
 
377
  decoded_caption = decoded_caption.replace("<start> ", "")
378
  decoded_caption = decoded_caption.replace(" <end>", "").strip()
379
- print("Predicted Caption: ", decoded_caption)
 
3
  os.environ["KERAS_BACKEND"] = "tensorflow"
4
 
5
  import re
 
 
6
 
 
7
  import keras
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
+ import tensorflow as tf
11
  from keras import layers
12
  from keras.applications import efficientnet
13
  from keras.layers import TextVectorization
 
319
  return [self.loss_tracker, self.acc_tracker]
320
 
321
 
 
322
  strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
323
  strip_chars = strip_chars.replace("<", "")
324
  strip_chars = strip_chars.replace(">", "")
 
349
 
350
  # Pass the image to the CNN
351
  # img = tf.expand_dims(sample_img, 0)
352
+ # TOOD
353
  img = None
354
  img = caption_model.cnn_model(img)
355
 
 
375
 
376
  decoded_caption = decoded_caption.replace("<start> ", "")
377
  decoded_caption = decoded_caption.replace(" <end>", "").strip()
378
+ print("Predicted Caption: ", decoded_caption)
utils/helpers.py CHANGED
@@ -4,22 +4,20 @@ import json
4
  import os
5
  from typing import Any, Dict, List
6
 
 
 
 
 
7
  import pandas as pd
8
  import requests
9
  import streamlit as st
10
- from PIL import Image, ImageDraw, ImageFont
11
- import google.generativeai as palm
12
- from pypdf import PdfReader
13
  from langchain.text_splitter import (
14
  RecursiveCharacterTextSplitter,
15
  SentenceTransformersTokenTextSplitter,
16
  )
17
- import matplotlib.pyplot as plt
18
- import matplotlib.patches as patches
19
- import streamlit as st
20
- import chromadb
21
- from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
22
-
23
 
24
  # API Key (You should set this in your environment variables)
25
  # api_key = st.secrets["PALM_API_KEY"]
@@ -187,10 +185,10 @@ def displayPDF(file: str) -> None:
187
  # Opening the PDF file in binary read mode
188
  with open(file, "rb") as f:
189
  # Encoding the PDF file content to base64
190
- base64_pdf: str = base64.b64encode(f.read()).decode('utf-8')
191
 
192
  # Creating an HTML embed string for displaying the PDF
193
- pdf_display: str = F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
194
 
195
  # Using Streamlit to display the HTML embed string as unsafe HTML
196
  st.markdown(pdf_display, unsafe_allow_html=True)
@@ -199,16 +197,16 @@ def displayPDF(file: str) -> None:
199
  def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
200
  """
201
  Draws bounding boxes and labels onto an image based on provided predictions.
202
-
203
  Parameters:
204
  - image (Any): The image to annotate, which should support the PIL drawing interface.
205
  - predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
206
- containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
207
  'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.
208
-
209
  Returns:
210
  - Any: The annotated image with bounding boxes and labels drawn on it.
211
-
212
  Note:
213
  - This function assumes that the incoming image supports the PIL ImageDraw interface.
214
  - The function directly modifies the input image and returns it.
@@ -235,7 +233,9 @@ def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
235
  return image
236
 
237
 
238
- def draw_bounding_boxes_for_textract(image: Image.Image, json_data: Dict[str, Any]) -> Image.Image:
 
 
239
  """
240
  Draws bounding boxes on an image based on the provided JSON data from Textract.
241
 
@@ -248,31 +248,34 @@ def draw_bounding_boxes_for_textract(image: Image.Image, json_data: Dict[str, An
248
  """
249
  # Load the image from the provided path
250
  draw = ImageDraw.Draw(image)
251
-
252
  # Parse the JSON data
253
  try:
254
  data = json_data
255
- blocks = json.loads(data['body']) if 'body' in data else None
256
  except json.JSONDecodeError:
257
- st.error('Invalid JSON data.')
258
  return image
259
-
260
  if blocks is None:
261
- st.error('No bounding box data found.')
262
  return image
263
-
264
  # Iterate through the elements to find bounding boxes and draw them
265
  for item in blocks:
266
- if 'BlockType' in item and item['BlockType'] in ['LINE', 'WORD']:
267
- bbox = item['Geometry']['BoundingBox']
268
  # Extract coordinates and dimensions
269
- left, top, width, height = bbox['Left'], bbox['Top'], bbox['Width'], bbox['Height']
 
 
 
 
 
270
  # Calculate bounding box coordinates in image space
271
  left_top = (left * image.width, top * image.height)
272
  right_bottom = ((left + width) * image.width, (top + height) * image.height)
273
  # Draw rectangle
274
- draw.rectangle([left_top, right_bottom], outline='red', width=2)
275
-
276
- return image
277
-
278
 
 
 
4
  import os
5
  from typing import Any, Dict, List
6
 
7
+ import chromadb
8
+ import google.generativeai as palm
9
+ import matplotlib.patches as patches
10
+ import matplotlib.pyplot as plt
11
  import pandas as pd
12
  import requests
13
  import streamlit as st
14
+ from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
 
 
15
  from langchain.text_splitter import (
16
  RecursiveCharacterTextSplitter,
17
  SentenceTransformersTokenTextSplitter,
18
  )
19
+ from PIL import Image, ImageDraw, ImageFont
20
+ from pypdf import PdfReader
 
 
 
 
21
 
22
  # API Key (You should set this in your environment variables)
23
  # api_key = st.secrets["PALM_API_KEY"]
 
185
  # Opening the PDF file in binary read mode
186
  with open(file, "rb") as f:
187
  # Encoding the PDF file content to base64
188
+ base64_pdf: str = base64.b64encode(f.read()).decode("utf-8")
189
 
190
  # Creating an HTML embed string for displaying the PDF
191
+ pdf_display: str = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
192
 
193
  # Using Streamlit to display the HTML embed string as unsafe HTML
194
  st.markdown(pdf_display, unsafe_allow_html=True)
 
197
  def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
198
  """
199
  Draws bounding boxes and labels onto an image based on provided predictions.
200
+
201
  Parameters:
202
  - image (Any): The image to annotate, which should support the PIL drawing interface.
203
  - predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
204
+ containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
205
  'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.
206
+
207
  Returns:
208
  - Any: The annotated image with bounding boxes and labels drawn on it.
209
+
210
  Note:
211
  - This function assumes that the incoming image supports the PIL ImageDraw interface.
212
  - The function directly modifies the input image and returns it.
 
233
  return image
234
 
235
 
236
+ def draw_bounding_boxes_for_textract(
237
+ image: Image.Image, json_data: Dict[str, Any]
238
+ ) -> Image.Image:
239
  """
240
  Draws bounding boxes on an image based on the provided JSON data from Textract.
241
 
 
248
  """
249
  # Load the image from the provided path
250
  draw = ImageDraw.Draw(image)
251
+
252
  # Parse the JSON data
253
  try:
254
  data = json_data
255
+ blocks = json.loads(data["body"]) if "body" in data else None
256
  except json.JSONDecodeError:
257
+ st.error("Invalid JSON data.")
258
  return image
259
+
260
  if blocks is None:
261
+ st.error("No bounding box data found.")
262
  return image
263
+
264
  # Iterate through the elements to find bounding boxes and draw them
265
  for item in blocks:
266
+ if "BlockType" in item and item["BlockType"] in ["LINE", "WORD"]:
267
+ bbox = item["Geometry"]["BoundingBox"]
268
  # Extract coordinates and dimensions
269
+ left, top, width, height = (
270
+ bbox["Left"],
271
+ bbox["Top"],
272
+ bbox["Width"],
273
+ bbox["Height"],
274
+ )
275
  # Calculate bounding box coordinates in image space
276
  left_top = (left * image.width, top * image.height)
277
  right_bottom = ((left + width) * image.width, (top + height) * image.height)
278
  # Draw rectangle
279
+ draw.rectangle([left_top, right_bottom], outline="red", width=2)
 
 
 
280
 
281
+ return image