Ankur Goyal commited on
Commit
47b5f74
1 Parent(s): 98e826c

Add better examples and confidence threshold

Browse files
Files changed (7) hide show
  1. acze.png +0 -0
  2. app.py +22 -13
  3. contract.jpeg +0 -0
  4. north_sea.pdf +0 -0
  5. north_sea.png +0 -0
  6. statement.pdf +0 -0
  7. statement.png +0 -0
acze.png ADDED
app.py CHANGED
@@ -73,19 +73,21 @@ def normalize_bbox(box, width, height, padding=0.005):
73
  EXAMPLES = [
74
  [
75
  "invoice.png",
76
- "Invoice 1",
77
  ],
78
  [
79
- "contract.jpeg",
80
- "What is the purchase amount?",
81
  ],
82
  [
83
- "statement.png",
84
- "What are net sales for 2020?",
85
  ],
86
  ]
87
 
88
- QUESTION_FILES = {}
 
 
89
 
90
  FIELDS = {
91
  "Vendor Name": ["Vendor Name - Logo?", "Vendor Name - Address?"],
@@ -98,6 +100,8 @@ FIELDS = {
98
  "Invoice Total": ["Invoice Total?"],
99
  "Amount Due": ["Amount Due?"],
100
  "Payment Terms": ["Payment Terms?"],
 
 
101
  }
102
 
103
 
@@ -150,7 +154,7 @@ colors = ["#64A087", "green", "black"]
150
 
151
 
152
  def annotate_page(prediction, pages, document):
153
- if "word_ids" in prediction:
154
  image = pages[prediction["page"]]
155
  draw = ImageDraw.Draw(image, "RGBA")
156
  word_boxes = lift_word_boxes(document, prediction["page"])
@@ -192,9 +196,14 @@ def process_fields(document, fields, model=list(CHECKPOINTS.keys())[0]):
192
  table = []
193
 
194
  for (field_name, questions) in fields.items():
195
- answers = [run_pipeline(model, q, document, top_k=1) for q in questions]
 
 
 
 
 
196
  answers.sort(key=lambda x: -x.get("score", 0) if x else 0)
197
- top = answers[0]
198
  annotate_page(top, pages, document)
199
  ret[field_name] = top
200
  table.append([field_name, top.get("answer") if top is not None else None])
@@ -400,8 +409,8 @@ with gr.Blocks(css=CSS) as demo:
400
  None, # document
401
  # {**FIELDS}, # fields
402
  gr.update(visible=False, value=None), # output
403
- # gr.update(**empty_table(FIELDS)), # output_table
404
- # gr.update(visible=False),
405
  None,
406
  None,
407
  None,
@@ -414,8 +423,8 @@ with gr.Blocks(css=CSS) as demo:
414
  document,
415
  # fields,
416
  output,
417
- # output_table,
418
- # img_clear_button,
419
  example_image,
420
  upload,
421
  url,
73
  EXAMPLES = [
74
  [
75
  "invoice.png",
76
+ "East Repair Invoice",
77
  ],
78
  [
79
+ "acze.png",
80
+ "ACZE Invoice",
81
  ],
82
  [
83
+ "north_sea.png",
84
+ "North Sea Invoice",
85
  ],
86
  ]
87
 
88
+ QUESTION_FILES = {
89
+ "North Sea Invoice": "north_sea.pdf",
90
+ }
91
 
92
  FIELDS = {
93
  "Vendor Name": ["Vendor Name - Logo?", "Vendor Name - Address?"],
100
  "Invoice Total": ["Invoice Total?"],
101
  "Amount Due": ["Amount Due?"],
102
  "Payment Terms": ["Payment Terms?"],
103
+ "Remit To Name": ["Remit To Name?"],
104
+ "Remit To Address": ["Remit To Address?"],
105
  }
106
 
107
 
154
 
155
 
156
  def annotate_page(prediction, pages, document):
157
+ if prediction is not None and "word_ids" in prediction:
158
  image = pages[prediction["page"]]
159
  draw = ImageDraw.Draw(image, "RGBA")
160
  word_boxes = lift_word_boxes(document, prediction["page"])
196
  table = []
197
 
198
  for (field_name, questions) in fields.items():
199
+ answers = [
200
+ a
201
+ for q in questions
202
+ for a in ensure_list(run_pipeline(model, q, document, top_k=1))
203
+ if a.get("score", 1) > 0.5
204
+ ]
205
  answers.sort(key=lambda x: -x.get("score", 0) if x else 0)
206
+ top = answers[0] if len(answers) > 0 else None
207
  annotate_page(top, pages, document)
208
  ret[field_name] = top
209
  table.append([field_name, top.get("answer") if top is not None else None])
409
  None, # document
410
  # {**FIELDS}, # fields
411
  gr.update(visible=False, value=None), # output
412
+ gr.update(**empty_table(fields.value)), # output_table
413
+ gr.update(visible=False),
414
  None,
415
  None,
416
  None,
423
  document,
424
  # fields,
425
  output,
426
+ output_table,
427
+ img_clear_button,
428
  example_image,
429
  upload,
430
  url,
contract.jpeg DELETED
Binary file (124 kB)
north_sea.pdf ADDED
Binary file (70.9 kB). View file
north_sea.png ADDED
statement.pdf DELETED
The diff for this file is too large to render. See raw diff
statement.png DELETED
Binary file (140 kB)