Spaces:
Runtime error
Runtime error
samarthagarwal23
commited on
Commit
•
bc84182
1
Parent(s):
a58a4e4
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import torch
|
|
11 |
from transformers import pipeline
|
12 |
import pdfminer
|
13 |
from pdfminer.high_level import extract_text
|
14 |
-
from termcolor import colored
|
15 |
|
16 |
def read_pdf(file):
|
17 |
text = extract_text(file.name)
|
@@ -61,8 +61,12 @@ def qa_ranker(query, docs_, top_k_ranker):
|
|
61 |
ans.append(answer)
|
62 |
return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
|
63 |
|
|
|
|
|
|
|
64 |
def print_colored(text, start_idx, end_idx):
|
65 |
-
a =
|
|
|
66 |
return a
|
67 |
|
68 |
def final_qa_pipeline(file, query):
|
@@ -78,7 +82,8 @@ def final_qa_pipeline(file, query):
|
|
78 |
|
79 |
if len(lvl1) > 0:
|
80 |
fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
|
81 |
-
return (fnl_rank[0]["answer"], np.round(fnl_rank[0]["score"],
|
|
|
82 |
#for fnl_ in fnl_rank:
|
83 |
# print("\n")
|
84 |
# print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
|
@@ -87,17 +92,18 @@ def final_qa_pipeline(file, query):
|
|
87 |
return ("No match", 0)
|
88 |
|
89 |
examples = [
|
|
|
|
|
|
|
90 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
|
91 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "How high is shareholders equity ?"],
|
92 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
|
93 |
-
[os.path.abspath("dbs-annual-report-2020.pdf"), "how much dividend was paid to shareholders ?"],
|
94 |
-
[os.path.abspath("dbs-annual-report-2020.pdf"), "what are the key risks ?"],
|
95 |
]
|
96 |
|
97 |
iface = gr.Interface(
|
98 |
fn = final_qa_pipeline,
|
99 |
inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
|
100 |
-
outputs = [gr.outputs.Textbox(label="Answer"), gr.outputs.Textbox(label="
|
101 |
examples=examples,
|
102 |
title = "Question Answering on company annual reports",
|
103 |
description = "Simply upload any annual report pdf you are interested in and ask model a question OR load an example from below."
|
|
|
11 |
from transformers import pipeline
|
12 |
import pdfminer
|
13 |
from pdfminer.high_level import extract_text
|
14 |
+
#from termcolor import colored
|
15 |
|
16 |
def read_pdf(file):
|
17 |
text = extract_text(file.name)
|
|
|
61 |
ans.append(answer)
|
62 |
return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
|
63 |
|
64 |
+
def cstr(s, color='black'):
|
65 |
+
return "<text style=color:{}>{}</text>".format(color, s)
|
66 |
+
|
67 |
def print_colored(text, start_idx, end_idx):
|
68 |
+
a = cstr(' '.join([text[:start_idx], cstr(text[start_idx:end_idx], color='red'), text[end_idx:]]), color='black')
|
69 |
+
#a = colored(text[:start_idx]) + colored(text[start_idx:end_idx], 'red', 'on_yellow') + colored(text[end_idx:])
|
70 |
return a
|
71 |
|
72 |
def final_qa_pipeline(file, query):
|
|
|
82 |
|
83 |
if len(lvl1) > 0:
|
84 |
fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
|
85 |
+
#return (fnl_rank[0]["answer"], str(np.round(100*fnl_rank[0]["score"],2))+"%" , fnl_rank[0]['doc'])
|
86 |
+
return (fnl_rank[0]["answer"], str(np.round(100*fnl_rank[0]["score"],2))+"%" , print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start_idx'], fnl_rank[0]['end_idx']))
|
87 |
#for fnl_ in fnl_rank:
|
88 |
# print("\n")
|
89 |
# print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
|
|
|
92 |
return ("No match", 0)
|
93 |
|
94 |
examples = [
|
95 |
+
[os.path.abspath("dbs-annual-report-2020.pdf"), "how much dividend was paid to shareholders ?"],
|
96 |
+
[os.path.abspath("dbs-annual-report-2020.pdf"), "what are the key risks ?"],
|
97 |
+
[os.path.abspath("dbs-annual-report-2020.pdf"), "what is the sustainability focus ?"],
|
98 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
|
99 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "How high is shareholders equity ?"],
|
100 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
|
|
|
|
|
101 |
]
|
102 |
|
103 |
iface = gr.Interface(
|
104 |
fn = final_qa_pipeline,
|
105 |
inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
|
106 |
+
outputs = [gr.outputs.Textbox(label="Answer"), gr.outputs.Textbox(label="Confidence"), gr.outputs.HTML(label="Reference text")],
|
107 |
examples=examples,
|
108 |
title = "Question Answering on company annual reports",
|
109 |
description = "Simply upload any annual report pdf you are interested in and ask model a question OR load an example from below."
|