Ankur Goyal commited on
Commit
8171e8e
1 Parent(s): 6cc15a7

Properly cache pipeline and display

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -2,47 +2,46 @@ import os
2
 
3
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
 
 
 
5
  import streamlit as st
6
 
7
  import torch
8
  from docquery.pipeline import get_pipeline
9
  from docquery.document import load_bytes
10
 
11
- device = "cuda" if torch.cuda.is_available() else "cpu"
12
- pipeline = get_pipeline(device=device)
13
-
14
-
15
- def process_document(file, question):
16
- # prepare encoder inputs
17
- document = load_document(file.name)
18
- return pipeline(question=question, **document.context)
19
-
20
-
21
  def ensure_list(x):
22
  if isinstance(x, list):
23
  return x
24
  else:
25
  return [x]
26
 
 
 
 
 
 
 
 
 
 
27
 
28
  st.title("DocQuery: Query Documents Using NLP")
29
  file = st.file_uploader("Upload a PDF or Image document")
30
  question = st.text_input("QUESTION", "")
31
 
32
- document = None
33
-
34
  if file is not None:
35
  col1, col2 = st.columns(2)
36
 
37
  document = load_bytes(file, file.name)
38
  col1.image(document.preview, use_column_width=True)
39
 
40
- if document is not None and question is not None and len(question) > 0:
41
- predictions = pipeline(question=question, **document.context)
42
 
43
- col2.header("Probabilities")
44
  for p in ensure_list(predictions):
45
- col2.subheader(f"{ p['answer'] }: { round(p['score'] * 100, 1)}%")
46
 
47
 
48
  "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
2
 
3
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
 
5
+ print("Importing")
6
+
7
  import streamlit as st
8
 
9
  import torch
10
  from docquery.pipeline import get_pipeline
11
  from docquery.document import load_bytes
12
 
 
 
 
 
 
 
 
 
 
 
13
  def ensure_list(x):
14
  if isinstance(x, list):
15
  return x
16
  else:
17
  return [x]
18
 
19
+ @st.experimental_singleton
20
+ def construct_pipeline():
21
+ device = "cuda" if torch.cuda.is_available() else "cpu"
22
+ ret = get_pipeline(device=device)
23
+ return ret
24
+
25
+ @st.cache
26
+ def run_pipeline(question, document):
27
+ return construct_pipeline()(question=question, **document.context)
28
 
29
  st.title("DocQuery: Query Documents Using NLP")
30
  file = st.file_uploader("Upload a PDF or Image document")
31
  question = st.text_input("QUESTION", "")
32
 
 
 
33
  if file is not None:
34
  col1, col2 = st.columns(2)
35
 
36
  document = load_bytes(file, file.name)
37
  col1.image(document.preview, use_column_width=True)
38
 
39
+ if file is not None and question is not None and len(question) > 0:
40
+ predictions = run_pipeline(question=question, document=document)
41
 
42
+ col2.header("Answers")
43
  for p in ensure_list(predictions):
44
+ col2.subheader(f"{ p['answer'] }: ({round(p['score'] * 100, 1)}%)")
45
 
46
 
47
  "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."