Shredder commited on
Commit
3090b4b
1 Parent(s): 1be99c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -6
app.py CHANGED
@@ -1,22 +1,27 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForTokenClassification,RobertaTokenizer
 
 
 
 
3
  import torch
4
  import nltk
5
  from nltk.tokenize import sent_tokenize
6
  from fin_readability_sustainability import BERTClass, do_predict
7
  import pandas as pd
 
8
 
9
- nltk.download('punkt')
10
 
 
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
 
13
-
14
  tokenizer_sus = RobertaTokenizer.from_pretrained('roberta-base')
15
  model_sustain = BERTClass(2, "sustanability")
16
  model_sustain.to(device)
17
  model_sustain.load_state_dict(torch.load('sustainability_model.bin', map_location=device)['model_state_dict'])
18
 
19
-
20
  def get_sustainability(text):
21
  df = pd.DataFrame({'sentence':sent_tokenize(text)})
22
  actual_predictions_sustainability = do_predict(model_sustain, tokenizer_sus, df)
@@ -29,12 +34,70 @@ def get_sustainability(text):
29
  else:
30
  highlight.append((sent, '-'))
31
  return highlight
 
32
 
 
 
 
 
 
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # b6 = gr.Button("Get Sustainability")
36
  #b6.click(get_sustainability, inputs = text, outputs = gr.HighlightedText())
37
 
38
 
39
- iface = gr.Interface(fn=get_sustainability, inputs="textbox", title="CONBERT",description="SUSTAINABILITY TOOL", outputs=gr.HighlightedText(), allow_flagging="never")
 
 
 
40
  iface.launch()
 
1
+ from predict import run_prediction
2
+ from io import StringIO
3
+ import json
4
+ import spacy
5
+ from spacy import displacy
6
+ from transformers import AutoTokenizer, AutoModelForTokenClassification,RobertaTokenizer,pipeline
7
  import torch
8
  import nltk
9
  from nltk.tokenize import sent_tokenize
10
  from fin_readability_sustainability import BERTClass, do_predict
11
  import pandas as pd
12
+ import en_core_web_sm
13
 
 
14
 
15
+ nlp = en_core_web_sm.load()
16
+ nltk.download('punkt')
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
 
19
+ #SUSTAINABILITY STARTS
20
  tokenizer_sus = RobertaTokenizer.from_pretrained('roberta-base')
21
  model_sustain = BERTClass(2, "sustanability")
22
  model_sustain.to(device)
23
  model_sustain.load_state_dict(torch.load('sustainability_model.bin', map_location=device)['model_state_dict'])
24
 
 
25
  def get_sustainability(text):
26
  df = pd.DataFrame({'sentence':sent_tokenize(text)})
27
  actual_predictions_sustainability = do_predict(model_sustain, tokenizer_sus, df)
 
34
  else:
35
  highlight.append((sent, '-'))
36
  return highlight
37
+ #SUSTAINABILITY ENDS
38
 
39
+ ##Summarization
40
+ def summarize_text(text):
41
+ summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
42
+ resp = summarizer(text)
43
+ stext = resp[0]['summary_text']
44
+ return stext
45
 
46
+ ##Forward Looking Statement
47
+ def fls(text):
48
+ fls_model = pipeline("text-classification", model="yiyanghkust/finbert-fls", tokenizer="yiyanghkust/finbert-fls")
49
+ results = fls_model(split_in_sentences(text))
50
+ return make_spans(text,results)
51
+
52
+ ##Company Extraction
53
+ def fin_ner(text):
54
+ ner=pipeline('ner',model='Jean-Baptiste/camembert-ner-with-dates',tokenizer='Jean-Baptiste/camembert-ner-with-dates', aggregation_strategy="simple")
55
+ replaced_spans = ner(text)
56
+ return replaced_spans
57
+
58
+
59
+ #CUAD STARTS
60
+ def load_questions():
61
+ questions = []
62
+ with open('questions.txt') as f:
63
+ questions = f.readlines()
64
+ return questions
65
 
66
+
67
+ def load_questions_short():
68
+ questions_short = []
69
+ with open('questionshort.txt') as f:
70
+ questions_short = f.readlines()
71
+ return questions_short
72
+
73
+ def quad(query, paragraph):
74
+ questions = load_questions()
75
+ questions_short = load_questions_short()
76
+ if (not len(paragraph)==0) and not (len(question)==0):
77
+ print('getting predictions')
78
+ predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
79
+ answer = ""
80
+ if predictions['0'] == "":
81
+ answer = 'No answer found in document'
82
+ else:
83
+ with open("nbest.json") as jf:
84
+ data = json.load(jf)
85
+ for i in range(1):
86
+ raw_answer=data['0'][i]['text']
87
+ answer += f"Answer {i+1}: {data['0'][i]['text']} -- \n"
88
+ answer += f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
89
+ summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
90
+ resp = summarizer(answer)
91
+ stext = resp[0]['summary_text']
92
+ return stext,answer
93
+
94
+
95
  # b6 = gr.Button("Get Sustainability")
96
  #b6.click(get_sustainability, inputs = text, outputs = gr.HighlightedText())
97
 
98
 
99
+ #iface = gr.Interface(fn=get_sustainability, inputs="textbox", title="CONBERT",description="SUSTAINABILITY TOOL", outputs=gr.HighlightedText(), allow_flagging="never")
100
+ #iface.launch()
101
+
102
+ iface = gr.Interface(fn=get_sustainability, inputs=[gr.inputs.Textbox(label='SEARCH QUERY'),gr.inputs.file(label='TXT FILE')], title="CONBERT",description="SUSTAINABILITY TOOL",theme='hugging face',article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary')], allow_flagging="never")
103
  iface.launch()