samarthagarwal23 commited on
Commit
bc84182
1 Parent(s): a58a4e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -11,7 +11,7 @@ import torch
11
  from transformers import pipeline
12
  import pdfminer
13
  from pdfminer.high_level import extract_text
14
- from termcolor import colored
15
 
16
  def read_pdf(file):
17
  text = extract_text(file.name)
@@ -61,8 +61,12 @@ def qa_ranker(query, docs_, top_k_ranker):
61
  ans.append(answer)
62
  return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
63
 
 
 
 
64
  def print_colored(text, start_idx, end_idx):
65
- a = colored(text[:start_idx]) + colored(text[start_idx:end_idx], 'red', 'on_yellow') + colored(text[end_idx:])
 
66
  return a
67
 
68
  def final_qa_pipeline(file, query):
@@ -78,7 +82,8 @@ def final_qa_pipeline(file, query):
78
 
79
  if len(lvl1) > 0:
80
  fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
81
- return (fnl_rank[0]["answer"], np.round(fnl_rank[0]["score"],3), fnl_rank[0]['doc'])
 
82
  #for fnl_ in fnl_rank:
83
  # print("\n")
84
  # print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
@@ -87,17 +92,18 @@ def final_qa_pipeline(file, query):
87
  return ("No match", 0)
88
 
89
  examples = [
 
 
 
90
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
91
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "How high is shareholders equity ?"],
92
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
93
- [os.path.abspath("dbs-annual-report-2020.pdf"), "how much dividend was paid to shareholders ?"],
94
- [os.path.abspath("dbs-annual-report-2020.pdf"), "what are the key risks ?"],
95
  ]
96
 
97
  iface = gr.Interface(
98
  fn = final_qa_pipeline,
99
  inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
100
- outputs = [gr.outputs.Textbox(label="Answer"), gr.outputs.Textbox(label="Score"), gr.outputs.HTML(label="Reference text")],
101
  examples=examples,
102
  title = "Question Answering on company annual reports",
103
  description = "Simply upload any annual report pdf you are interested in and ask model a question OR load an example from below."
 
11
  from transformers import pipeline
12
  import pdfminer
13
  from pdfminer.high_level import extract_text
14
+ #from termcolor import colored
15
 
16
  def read_pdf(file):
17
  text = extract_text(file.name)
 
61
  ans.append(answer)
62
  return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
63
 
64
+ def cstr(s, color='black'):
65
+ return "<text style=color:{}>{}</text>".format(color, s)
66
+
67
  def print_colored(text, start_idx, end_idx):
68
+ a = cstr(' '.join([text[:start_idx], cstr(text[start_idx:end_idx], color='red'), text[end_idx:]]), color='black')
69
+ #a = colored(text[:start_idx]) + colored(text[start_idx:end_idx], 'red', 'on_yellow') + colored(text[end_idx:])
70
  return a
71
 
72
  def final_qa_pipeline(file, query):
 
82
 
83
  if len(lvl1) > 0:
84
  fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
85
+ #return (fnl_rank[0]["answer"], str(np.round(100*fnl_rank[0]["score"],2))+"%" , fnl_rank[0]['doc'])
86
+ return (fnl_rank[0]["answer"], str(np.round(100*fnl_rank[0]["score"],2))+"%" , print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start_idx'], fnl_rank[0]['end_idx']))
87
  #for fnl_ in fnl_rank:
88
  # print("\n")
89
  # print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
 
92
  return ("No match", 0)
93
 
94
  examples = [
95
+ [os.path.abspath("dbs-annual-report-2020.pdf"), "how much dividend was paid to shareholders ?"],
96
+ [os.path.abspath("dbs-annual-report-2020.pdf"), "what are the key risks ?"],
97
+ [os.path.abspath("dbs-annual-report-2020.pdf"), "what is the sustainability focus ?"],
98
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
99
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "How high is shareholders equity ?"],
100
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
 
 
101
  ]
102
 
103
  iface = gr.Interface(
104
  fn = final_qa_pipeline,
105
  inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
106
+ outputs = [gr.outputs.Textbox(label="Answer"), gr.outputs.Textbox(label="Confidence"), gr.outputs.HTML(label="Reference text")],
107
  examples=examples,
108
  title = "Question Answering on company annual reports",
109
  description = "Simply upload any annual report pdf you are interested in and ask model a question OR load an example from below."