awacke1 commited on
Commit
9883346
1 Parent(s): 213441a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -37
app.py CHANGED
@@ -1,17 +1,14 @@
 
1
  import os
2
-
3
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
-
5
- from PIL import Image, ImageDraw
6
  import traceback
7
 
8
- import gradio as gr
9
-
10
- import torch
11
  from docquery import pipeline
12
  from docquery.document import load_document, ImageDocument
13
  from docquery.ocr_reader import get_ocr_reader
 
14
 
 
15
 
16
  def ensure_list(x):
17
  if isinstance(x, list):
@@ -19,47 +16,36 @@ def ensure_list(x):
19
  else:
20
  return [x]
21
 
22
-
23
  CHECKPOINTS = {
24
  "LayoutLMv1 🦉": "impira/layoutlm-document-qa",
25
  "LayoutLMv1 for Invoices 💸": "impira/layoutlm-invoices",
26
  "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
27
  }
28
-
29
  PIPELINES = {}
30
 
31
-
32
  def construct_pipeline(task, model):
33
  global PIPELINES
34
  if model in PIPELINES:
35
  return PIPELINES[model]
36
-
37
  device = "cuda" if torch.cuda.is_available() else "cpu"
38
  ret = pipeline(task=task, model=CHECKPOINTS[model], device=device)
39
  PIPELINES[model] = ret
40
  return ret
41
 
42
-
43
  def run_pipeline(model, question, document, top_k):
44
  pipeline = construct_pipeline("document-question-answering", model)
45
  return pipeline(question=question, **document.context, top_k=top_k)
46
 
47
-
48
- # TODO: Move into docquery
49
- # TODO: Support words past the first page (or window?)
50
  def lift_word_boxes(document, page):
51
  return document.context["image"][page][1]
52
 
53
-
54
  def expand_bbox(word_boxes):
55
  if len(word_boxes) == 0:
56
  return None
57
-
58
  min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
59
  min_x, min_y, max_x, max_y = [min(min_x), min(min_y), max(max_x), max(max_y)]
60
  return [min_x, min_y, max_x, max_y]
61
 
62
-
63
  # LayoutLM boxes are normalized to 0, 1000
64
  def normalize_bbox(box, width, height, padding=0.005):
65
  min_x, min_y, max_x, max_y = [c / 1000 for c in box]
@@ -70,7 +56,6 @@ def normalize_bbox(box, width, height, padding=0.005):
70
  max_y = min(max_y + padding, 1)
71
  return [min_x * width, min_y * height, max_x * width, max_y * height]
72
 
73
-
74
  examples = [
75
  [
76
  "invoice.png",
@@ -84,14 +69,6 @@ examples = [
84
  "statement.png",
85
  "What are net sales for 2020?",
86
  ],
87
- # [
88
- # "docquery.png",
89
- # "How many likes does the space have?",
90
- # ],
91
- # [
92
- # "hacker_news.png",
93
- # "What is the title of post number 5?",
94
- # ],
95
  ]
96
 
97
  question_files = {
@@ -100,7 +77,6 @@ question_files = {
100
  "What is the title of post number 5?": "https://news.ycombinator.com",
101
  }
102
 
103
-
104
  def process_path(path):
105
  error = None
106
  if path:
@@ -141,7 +117,6 @@ def process_upload(file):
141
  None,
142
  )
143
 
144
-
145
  colors = ["#64A087", "green", "black"]
146
 
147
 
@@ -156,8 +131,6 @@ def process_question(question, document, model=list(CHECKPOINTS.keys())[0]):
156
  if i == 0:
157
  text_value = p["answer"]
158
  else:
159
- # Keep the code around to produce multiple boxes, but only show the top
160
- # prediction for now
161
  break
162
 
163
  if "word_ids" in p:
@@ -297,11 +270,9 @@ with gr.Blocks(css=CSS) as demo:
297
  " click one of the examples to load them."
298
  " DocQuery is MIT-licensed and available on [Github](https://github.com/impira/docquery)."
299
  )
300
-
301
  document = gr.Variable()
302
  example_question = gr.Textbox(visible=False)
303
  example_image = gr.Image(visible=False)
304
-
305
  with gr.Row(equal_height=True):
306
  with gr.Column():
307
  with gr.Row():
@@ -399,25 +370,21 @@ with gr.Blocks(css=CSS) as demo:
399
  inputs=[url],
400
  outputs=[document, image, img_clear_button, output, output_text, url_error],
401
  )
402
-
403
  question.submit(
404
  fn=process_question,
405
  inputs=[question, document, model],
406
  outputs=[image, output, output_text],
407
  )
408
-
409
  submit_button.click(
410
  process_question,
411
  inputs=[question, document, model],
412
  outputs=[image, output, output_text],
413
  )
414
-
415
  model.change(
416
  process_question,
417
  inputs=[question, document, model],
418
  outputs=[image, output, output_text],
419
  )
420
-
421
  example_image.change(
422
  fn=load_example_document,
423
  inputs=[example_image, example_question, model],
 
1
+ import gradio as gr
2
  import os
3
+ import torch
 
 
 
4
  import traceback
5
 
 
 
 
6
  from docquery import pipeline
7
  from docquery.document import load_document, ImageDocument
8
  from docquery.ocr_reader import get_ocr_reader
9
+ from PIL import Image, ImageDraw
10
 
11
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
12
 
13
  def ensure_list(x):
14
  if isinstance(x, list):
 
16
  else:
17
  return [x]
18
 
 
19
  CHECKPOINTS = {
20
  "LayoutLMv1 🦉": "impira/layoutlm-document-qa",
21
  "LayoutLMv1 for Invoices 💸": "impira/layoutlm-invoices",
22
  "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
23
  }
 
24
  PIPELINES = {}
25
 
 
26
  def construct_pipeline(task, model):
27
  global PIPELINES
28
  if model in PIPELINES:
29
  return PIPELINES[model]
 
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
  ret = pipeline(task=task, model=CHECKPOINTS[model], device=device)
32
  PIPELINES[model] = ret
33
  return ret
34
 
 
35
  def run_pipeline(model, question, document, top_k):
36
  pipeline = construct_pipeline("document-question-answering", model)
37
  return pipeline(question=question, **document.context, top_k=top_k)
38
 
 
 
 
39
  def lift_word_boxes(document, page):
40
  return document.context["image"][page][1]
41
 
 
42
  def expand_bbox(word_boxes):
43
  if len(word_boxes) == 0:
44
  return None
 
45
  min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
46
  min_x, min_y, max_x, max_y = [min(min_x), min(min_y), max(max_x), max(max_y)]
47
  return [min_x, min_y, max_x, max_y]
48
 
 
49
  # LayoutLM boxes are normalized to 0, 1000
50
  def normalize_bbox(box, width, height, padding=0.005):
51
  min_x, min_y, max_x, max_y = [c / 1000 for c in box]
 
56
  max_y = min(max_y + padding, 1)
57
  return [min_x * width, min_y * height, max_x * width, max_y * height]
58
 
 
59
  examples = [
60
  [
61
  "invoice.png",
 
69
  "statement.png",
70
  "What are net sales for 2020?",
71
  ],
 
 
 
 
 
 
 
 
72
  ]
73
 
74
  question_files = {
 
77
  "What is the title of post number 5?": "https://news.ycombinator.com",
78
  }
79
 
 
80
  def process_path(path):
81
  error = None
82
  if path:
 
117
  None,
118
  )
119
 
 
120
  colors = ["#64A087", "green", "black"]
121
 
122
 
 
131
  if i == 0:
132
  text_value = p["answer"]
133
  else:
 
 
134
  break
135
 
136
  if "word_ids" in p:
 
270
  " click one of the examples to load them."
271
  " DocQuery is MIT-licensed and available on [Github](https://github.com/impira/docquery)."
272
  )
 
273
  document = gr.Variable()
274
  example_question = gr.Textbox(visible=False)
275
  example_image = gr.Image(visible=False)
 
276
  with gr.Row(equal_height=True):
277
  with gr.Column():
278
  with gr.Row():
 
370
  inputs=[url],
371
  outputs=[document, image, img_clear_button, output, output_text, url_error],
372
  )
 
373
  question.submit(
374
  fn=process_question,
375
  inputs=[question, document, model],
376
  outputs=[image, output, output_text],
377
  )
 
378
  submit_button.click(
379
  process_question,
380
  inputs=[question, document, model],
381
  outputs=[image, output, output_text],
382
  )
 
383
  model.change(
384
  process_question,
385
  inputs=[question, document, model],
386
  outputs=[image, output, output_text],
387
  )
 
388
  example_image.change(
389
  fn=load_example_document,
390
  inputs=[example_image, example_question, model],