3martini commited on
Commit
68b5c0f
1 Parent(s): fd6f0a4

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +37 -106
  3. sandbox/sam_example.py +42 -0
.gitignore CHANGED
@@ -13,4 +13,5 @@ yolo*.pt
13
  **/.jpg
14
  pdf_downloads
15
  node_modules
 
16
 
 
13
  **/.jpg
14
  pdf_downloads
15
  node_modules
16
+ .devcontainer/*.env
17
 
app.py CHANGED
@@ -3,7 +3,7 @@ import logging
3
  import json
4
 
5
  from fastapi import FastAPI, UploadFile
6
- from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse
7
  import gradio as gr
8
  from PIL import Image
9
  import PIL
@@ -14,10 +14,15 @@ from ultralytics.engine.results import Results, Masks
14
  import uvicorn
15
  import cv2
16
  import uuid
17
- from functools import partial
18
  from openai import OpenAI
19
 
20
- PROMPT = "You are analyzing the spec sheet of a solar panel. Plese answer the following questions, format them as a JSON dictionary.\n"
 
 
 
 
 
 
21
 
22
  # from solareyes.sam import SAM
23
 
@@ -28,10 +33,6 @@ client = OpenAI(
28
 
29
  app = FastAPI()
30
 
31
- # Load the model
32
- # model: YOLO = YOLO('model/autodistill_best.pt') # Path to trained model
33
- # seg_model: YOLO = YOLO('model/autodistill_best_seg.pt') # Path to trained model
34
-
35
  # Directories
36
  image_dir = './pdf_images/'
37
  cropped_dir = './output/'
@@ -40,31 +41,6 @@ os.makedirs(image_dir, exist_ok=True)
40
  os.makedirs(cropped_dir, exist_ok=True)
41
  os.makedirs(pdf_dir, exist_ok=True)
42
 
43
- HTML = """
44
- <!DOCTYPE html>
45
- <html>
46
- <h1>Gradio Request Demo</h1>
47
- <p>Click the button to be redirected to the gradio app!</p>
48
- <button onclick="window.location.pathname='/gradio'">Redirect</button>
49
- </html>
50
- """
51
-
52
- # sam = SAM()
53
-
54
- # @app.get("/")
55
- # def read_main():
56
- # return HTMLResponse(HTML)
57
-
58
-
59
- # @app.get("/foo")
60
- # def redirect():
61
- # return RedirectResponse("/gradio")
62
-
63
-
64
- # def detect_solar_panel(image) -> Results:
65
- # # Perform inference
66
- # results: Results = model(image)
67
- # return results
68
 
69
  def parse_pdf_text(file):
70
  pdf = pdfium.PdfDocument(file)
@@ -72,7 +48,9 @@ def parse_pdf_text(file):
72
  for page in pdf:
73
  textpage = page.get_textpage()
74
  text_all = textpage.get_text_bounded()
75
- all_text += text_all
 
 
76
 
77
  #use openai to ask questions about text
78
  q1 = "What are module dimensions in L x W x H? Result key should be \"module_dimensions\""
@@ -101,6 +79,7 @@ def segment_solar_panel(image) -> Results:
101
  results: Results = seg_model.predict(image, imgsz=(841, 595), retina_masks=True)
102
  return results
103
 
 
104
  def resize_and_pad(subject_image: Image.Image):
105
  # Resize subject image to 80% of 1200px while maintaining aspect ratio
106
  target_height = int(1200 * 0.8)
@@ -168,14 +147,20 @@ def segment_image_core(img: np.ndarray | Image.Image) -> Image.Image:
168
  return (img, sections), jpg_img
169
 
170
 
171
- def process_pdf_core(pdf) -> Image.Image:
172
  pdf = pdfium.PdfDocument(pdf)
173
- img_input.clear()
 
 
 
 
 
 
 
174
 
175
- # Get just the first page
176
- page = pdf[0]
177
- image = page.render(scale=4).to_pil()
178
- return image
179
 
180
 
181
  with gr.Blocks() as demo:
@@ -185,93 +170,39 @@ with gr.Blocks() as demo:
185
  def segment_image(img):
186
  img_sections, jpg_img = segment_image_core(img)
187
  return img_sections
188
-
189
-
190
- # def process_image(img):
191
- # results = detect_solar_panel(img)
192
- # sections = []
193
- # for result in results:
194
- # result: Results
195
- # # print(result)
196
- # try:
197
- # boxes = result.boxes.xyxy[0].tolist()
198
- # # Unpack boxes
199
- # x1, y1, x2, y2 = boxes
200
- # sections.append(((int(x1), int(y1), int(x2), int(y2)), f"{section_labels[0]} Bounding Box"))
201
- # #Create 4 centroids around the true centroid shifted by a delta value
202
- # delta = 0.3
203
- # delta_x = (x2 - x1) * delta
204
- # delta_y = (y2 - y1) * delta
205
- # x_centroid = (x1 + x2) / 2
206
- # y_centroid = (y1 + y2) / 2
207
- # xtop_centroid = x_centroid
208
- # ytop_centroid = y_centroid + delta_y
209
- # xright_centroid = x_centroid + delta_x
210
- # yright_centroid = y_centroid
211
- # xbottom_centroid = x_centroid
212
- # ybottom_centroid = y_centroid - delta_y
213
- # xleft_centroid = x_centroid - delta_x
214
- # yleft_centroid = y_centroid
215
- # sam_mask, sam_scores = sam.segment(img, [[
216
- # [xtop_centroid, ytop_centroid],
217
- # [xright_centroid, yright_centroid],
218
- # [xbottom_centroid, ybottom_centroid],
219
- # [xleft_centroid, yleft_centroid]
220
- # ]])
221
- # squeezed_sam_mask_tensor = sam_mask[0].squeeze()
222
- # squeezed_sam_scores_tensor = sam_scores[0].squeeze()
223
- # print(f"sqeezed sam mask shape {squeezed_sam_mask_tensor.shape}")
224
- # print(f"sqeezed sam scores shape {squeezed_sam_scores_tensor.shape}")
225
- # for i in range(0, squeezed_sam_mask_tensor.shape[0]):
226
- # flat_mask = squeezed_sam_mask_tensor[i].numpy()
227
- # sections.append((flat_mask, f"{section_labels[0]} Mask {i} - Score: {squeezed_sam_scores_tensor[i]}"))
228
- # i += 1
229
- # except IndexError as e:
230
- # print(f"Error processing image: {e}, probably no boxes.")
231
- # return (img, sections)
232
 
233
 
234
  def process_pdf(pdf):
235
- image = process_pdf_core(pdf)
236
  return segment_image(image)
237
-
238
- with gr.Row():
239
- img_input = gr.Image(label="Upload Image", height=400)
240
- img_output = gr.AnnotatedImage(height=400)
241
-
242
- section_btn = gr.Button("Identify Solar Panel From Image")
243
-
244
- # Choose a random file in input directory
245
- gr.Examples(
246
- inputs = img_input,
247
- # examples = [os.path.join(image_dir, file) for file in random.sample(os.listdir(image_dir), 15)]
248
- examples = [os.path.join(image_dir, file) for file in os.listdir(image_dir)],
249
- )
250
 
 
 
 
 
251
  with gr.Row():
252
- pdf_input = gr.File(label="Upload PDF", file_types=['pdf'], height=200)
253
- img_output_pdf = gr.AnnotatedImage(height=400)
254
  with gr.Row():
255
  text_input = gr.Textbox(label="Enter Text", placeholder=PROMPT)
256
  text_output = gr.Textbox(label="Output", placeholder="Spec analysis will appear here")
257
-
258
- pdf_btn = gr.Button("Identify Solar Panel from PDF")
259
  pdf_text_btn = gr.Button("Extract specs from PDF Text")
 
260
 
261
  gr.Examples(
262
  inputs = pdf_input,
263
  examples = [os.path.join(pdf_dir, file) for file in os.listdir(pdf_dir)],
264
  )
265
- section_btn.click(segment_image, [img_input], img_output)
266
- pdf_btn.click(process_pdf, [pdf_input], img_output_pdf)
267
  pdf_text_btn.click(parse_pdf_text, [pdf_input], text_output)
 
268
 
269
 
270
  #Accept a PDF file, return a jpeg image
271
  @app.post("/uploadPdf", response_class=FileResponse)
272
  def extract_image(uploadFile: UploadFile) -> FileResponse:
273
  file = uploadFile.file.read()
274
- image = process_pdf_core(file)
275
  img_segments, jpeg_image = segment_image_core(image)
276
  id = str(uuid.uuid4())
277
  filename = f"{cropped_dir}/cropped_{id}.jpg"
@@ -282,10 +213,10 @@ def extract_image(uploadFile: UploadFile) -> FileResponse:
282
  #Accept a PDF file, return a text summary
283
  @app.post("/parsePdf")
284
  def parse_info(uploadFile: UploadFile):
 
285
  file = uploadFile.file.read()
286
- logging.info(f"Received file {file}")
287
  answer = parse_pdf_text(file)
288
- logging.info(f"Answer: {answer}")
289
  return {"answer": json.loads(answer)}
290
 
291
 
 
3
  import json
4
 
5
  from fastapi import FastAPI, UploadFile
6
+ from fastapi.responses import FileResponse
7
  import gradio as gr
8
  from PIL import Image
9
  import PIL
 
14
  import uvicorn
15
  import cv2
16
  import uuid
 
17
  from openai import OpenAI
18
 
19
+ logger = logging.getLogger(__name__)
20
+ logging.basicConfig(level=logging.DEBUG)
21
+
22
+ PROMPT = """
23
+ You are analyzing the spec sheet of a solar panel. If there is no text after the line \"PDF Extract Text Contents Below:\" report that there is no spec data provided as dictionary with a field called 'error'.
24
+ If there is text, please answer the following questions, format them as a JSON dictionary. Include the units of dimensions, weight, and cable lengths.\n
25
+ """
26
 
27
  # from solareyes.sam import SAM
28
 
 
33
 
34
  app = FastAPI()
35
 
 
 
 
 
36
  # Directories
37
  image_dir = './pdf_images/'
38
  cropped_dir = './output/'
 
41
  os.makedirs(cropped_dir, exist_ok=True)
42
  os.makedirs(pdf_dir, exist_ok=True)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def parse_pdf_text(file):
46
  pdf = pdfium.PdfDocument(file)
 
48
  for page in pdf:
49
  textpage = page.get_textpage()
50
  text_all = textpage.get_text_bounded()
51
+ all_text += text_all
52
+
53
+ logger.debug(f"Text: {all_text}")
54
 
55
  #use openai to ask questions about text
56
  q1 = "What are module dimensions in L x W x H? Result key should be \"module_dimensions\""
 
79
  results: Results = seg_model.predict(image, imgsz=(841, 595), retina_masks=True)
80
  return results
81
 
82
+
83
  def resize_and_pad(subject_image: Image.Image):
84
  # Resize subject image to 80% of 1200px while maintaining aspect ratio
85
  target_height = int(1200 * 0.8)
 
147
  return (img, sections), jpg_img
148
 
149
 
150
+ def pdf_to_image(pdf, end, start = 0) -> list[Image.Image]:
151
  pdf = pdfium.PdfDocument(pdf)
152
+ page_images = []
153
+ if not end:
154
+ end = len(pdf) # get the number of pages in the document
155
+ for i in range(start, end):
156
+ page = pdf[i]
157
+ page_image = page.render(scale=4).to_pil()
158
+ page_images.append(page_image)
159
+ return page_images
160
 
161
+
162
+ def pdf_first_page_to_image(pdf) -> Image.Image:
163
+ return pdf_to_image(pdf, 1, 0)[0]
 
164
 
165
 
166
  with gr.Blocks() as demo:
 
170
  def segment_image(img):
171
  img_sections, jpg_img = segment_image_core(img)
172
  return img_sections
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
 
175
  def process_pdf(pdf):
176
+ image = pdf_first_page_to_image(pdf)
177
  return segment_image(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+
180
+ pdf_input = gr.File(label="Upload PDF", file_types=['pdf'], height=100)
181
+ pdf_image = gr.Gallery(label="PDF Page Images")
182
+ pdf_to_image_btn = gr.Button("Convert PDF to Image")
183
  with gr.Row():
184
+ img_output_pdf = gr.AnnotatedImage(label="Extracted product image", height=400)
185
+ pdf_extract_btn = gr.Button("Identify Solar Panel from PDF")
186
  with gr.Row():
187
  text_input = gr.Textbox(label="Enter Text", placeholder=PROMPT)
188
  text_output = gr.Textbox(label="Output", placeholder="Spec analysis will appear here")
 
 
189
  pdf_text_btn = gr.Button("Extract specs from PDF Text")
190
+
191
 
192
  gr.Examples(
193
  inputs = pdf_input,
194
  examples = [os.path.join(pdf_dir, file) for file in os.listdir(pdf_dir)],
195
  )
196
+ pdf_extract_btn.click(process_pdf, [pdf_input], img_output_pdf)
 
197
  pdf_text_btn.click(parse_pdf_text, [pdf_input], text_output)
198
+ pdf_to_image_btn.click(pdf_to_image, [pdf_input], pdf_image)
199
 
200
 
201
  #Accept a PDF file, return a jpeg image
202
  @app.post("/uploadPdf", response_class=FileResponse)
203
  def extract_image(uploadFile: UploadFile) -> FileResponse:
204
  file = uploadFile.file.read()
205
+ image = pdf_to_image(file)
206
  img_segments, jpeg_image = segment_image_core(image)
207
  id = str(uuid.uuid4())
208
  filename = f"{cropped_dir}/cropped_{id}.jpg"
 
213
  #Accept a PDF file, return a text summary
214
  @app.post("/parsePdf")
215
  def parse_info(uploadFile: UploadFile):
216
+ logger.info(f"Receiving file {uploadFile.filename}")
217
  file = uploadFile.file.read()
218
+ logger.info(f"Received file {uploadFile.filename}")
219
  answer = parse_pdf_text(file)
 
220
  return {"answer": json.loads(answer)}
221
 
222
 
sandbox/sam_example.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # def process_image(img):
2
+ # results = detect_solar_panel(img)
3
+ # sections = []
4
+ # for result in results:
5
+ # result: Results
6
+ # # print(result)
7
+ # try:
8
+ # boxes = result.boxes.xyxy[0].tolist()
9
+ # # Unpack boxes
10
+ # x1, y1, x2, y2 = boxes
11
+ # sections.append(((int(x1), int(y1), int(x2), int(y2)), f"{section_labels[0]} Bounding Box"))
12
+ # #Create 4 centroids around the true centroid shifted by a delta value
13
+ # delta = 0.3
14
+ # delta_x = (x2 - x1) * delta
15
+ # delta_y = (y2 - y1) * delta
16
+ # x_centroid = (x1 + x2) / 2
17
+ # y_centroid = (y1 + y2) / 2
18
+ # xtop_centroid = x_centroid
19
+ # ytop_centroid = y_centroid + delta_y
20
+ # xright_centroid = x_centroid + delta_x
21
+ # yright_centroid = y_centroid
22
+ # xbottom_centroid = x_centroid
23
+ # ybottom_centroid = y_centroid - delta_y
24
+ # xleft_centroid = x_centroid - delta_x
25
+ # yleft_centroid = y_centroid
26
+ # sam_mask, sam_scores = sam.segment(img, [[
27
+ # [xtop_centroid, ytop_centroid],
28
+ # [xright_centroid, yright_centroid],
29
+ # [xbottom_centroid, ybottom_centroid],
30
+ # [xleft_centroid, yleft_centroid]
31
+ # ]])
32
+ # squeezed_sam_mask_tensor = sam_mask[0].squeeze()
33
+ # squeezed_sam_scores_tensor = sam_scores[0].squeeze()
34
+ # print(f"sqeezed sam mask shape {squeezed_sam_mask_tensor.shape}")
35
+ # print(f"sqeezed sam scores shape {squeezed_sam_scores_tensor.shape}")
36
+ # for i in range(0, squeezed_sam_mask_tensor.shape[0]):
37
+ # flat_mask = squeezed_sam_mask_tensor[i].numpy()
38
+ # sections.append((flat_mask, f"{section_labels[0]} Mask {i} - Score: {squeezed_sam_scores_tensor[i]}"))
39
+ # i += 1
40
+ # except IndexError as e:
41
+ # print(f"Error processing image: {e}, probably no boxes.")
42
+ # return (img, sections)