Shredder commited on
Commit
4b31fb8
1 Parent(s): 10f176f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -45
app.py CHANGED
@@ -13,8 +13,9 @@ import pandas as pd
13
  import en_core_web_sm
14
  from fincat_utils import extract_context_words
15
  from fincat_utils import bert_embedding_extract
 
16
  import pickle
17
- lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
18
 
19
  nlp = en_core_web_sm.load()
20
  nltk.download('punkt')
@@ -41,35 +42,7 @@ def get_sustainability(text):
41
  #SUSTAINABILITY ENDS
42
 
43
  #CLAIM STARTS
44
- def score_fincat(txt):
45
- li = []
46
- highlight = []
47
- txt = " " + txt + " "
48
- k = ''
49
- for word in txt.split():
50
- if any(char.isdigit() for char in word):
51
- if word[-1] in ['.', ',', ';', ":", "-", "!", "?", ")", '"', "'"]:
52
- k = word[-1]
53
- word = word[:-1]
54
- st = txt.find(" " + word + k + " ")+1
55
- k = ''
56
- ed = st + len(word)
57
- x = {'paragraph' : txt, 'offset_start':st, 'offset_end':ed}
58
- context_text = extract_context_words(x)
59
- features = bert_embedding_extract(context_text, word)
60
- if(features[0]=='None'):
61
- highlight.append(('None', ' '))
62
- return highlight
63
- prediction = lr_clf.predict(features.reshape(1, 768))
64
- prediction_probability = '{:.4f}'.format(round(lr_clf.predict_proba(features.reshape(1, 768))[:,1][0], 4))
65
- highlight.append((word, ' In-claim' if prediction==1 else 'Out-of-Claim'))
66
- # li.append([word,' In-claim' if prediction==1 else 'Out-of-Claim', prediction_probability])
67
- else:
68
- highlight.append((word, ' '))
69
- #headers = ['numeral', 'prediction', 'probability']
70
- #dff = pd.DataFrame(li)
71
- # dff.columns = headers
72
- return highlight
73
 
74
 
75
  ##Summarization
@@ -123,6 +96,9 @@ def load_questions_short():
123
  return questions_short
124
  questions = load_questions()
125
  questions_short = load_questions_short()
 
 
 
126
  def quad(query,file):
127
  with open(file.name) as f:
128
  paragraph = f.read()
@@ -132,6 +108,7 @@ def quad(query,file):
132
  print('getting predictions')
133
  predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
134
  answer = ""
 
135
  if predictions['0'] == "":
136
  answer = 'No answer found in document'
137
  else:
@@ -140,23 +117,11 @@ def quad(query,file):
140
  for i in range(1):
141
  raw_answer=data['0'][i]['text']
142
  answer += f"{data['0'][i]['text']} -- \n"
143
- answer += f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
144
- #summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
145
- #resp = summarizer(answer)
146
- #stext = resp[0]['summary_text']
147
-
148
- # highlight,dff=score_fincat(answer)
149
- return answer,summarize_text(answer),fin_ner(answer),score_fincat(answer),get_sustainability(answer),fls(answer)
150
 
151
 
152
- # b6 = gr.Button("Get Sustainability")
153
- #b6.click(get_sustainability, inputs = text, outputs = gr.HighlightedText())
154
-
155
-
156
- #iface = gr.Interface(fn=get_sustainability, inputs="textbox", title="CONBERT",description="SUSTAINABILITY TOOL", outputs=gr.HighlightedText(), allow_flagging="never")
157
- #iface.launch()
158
-
159
- iface = gr.Interface(fn=quad, inputs=[gr.Dropdown(choices=questions,label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="SUSTAINABILITY TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),gr.HighlightedText(label='NER'),gr.HighlightedText(label='CLAIM'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")
160
 
161
 
162
  iface.launch()
 
13
  import en_core_web_sm
14
  from fincat_utils import extract_context_words
15
  from fincat_utils import bert_embedding_extract
16
+ from score_fincat import score_fincat
17
  import pickle
18
+ #lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
19
 
20
  nlp = en_core_web_sm.load()
21
  nltk.download('punkt')
 
42
  #SUSTAINABILITY ENDS
43
 
44
  #CLAIM STARTS
45
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  ##Summarization
 
96
  return questions_short
97
  questions = load_questions()
98
  questions_short = load_questions_short()
99
+
100
+
101
+
102
  def quad(query,file):
103
  with open(file.name) as f:
104
  paragraph = f.read()
 
108
  print('getting predictions')
109
  predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
110
  answer = ""
111
+ answer_p=""
112
  if predictions['0'] == "":
113
  answer = 'No answer found in document'
114
  else:
 
117
  for i in range(1):
118
  raw_answer=data['0'][i]['text']
119
  answer += f"{data['0'][i]['text']} -- \n"
120
+ answer_p =answer+ f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
121
+ return answer_p,summarize_text(answer),fin_ner(answer),score_fincat(answer),get_sustainability(answer),fls(answer)
 
 
 
 
 
122
 
123
 
124
+ iface = gr.Interface(fn=quad, inputs=[gr.Dropdown(choices=questions_short,label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="CONTRACT REVIEW TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),gr.HighlightedText(label='NER'),gr.HighlightedText(label='CLAIM'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")
 
 
 
 
 
 
 
125
 
126
 
127
  iface.launch()