Shredder commited on
Commit
1330102
1 Parent(s): 99cd595

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -40
app.py CHANGED
@@ -4,42 +4,20 @@ import json
4
  import gradio as gr
5
  import spacy
6
  from spacy import displacy
7
- from transformers import AutoTokenizer, AutoModelForTokenClassification,RobertaTokenizer,pipeline
8
  import torch
9
  import nltk
10
  from nltk.tokenize import sent_tokenize
11
  from fin_readability_sustainability import BERTClass, do_predict
12
  import pandas as pd
13
  import en_core_web_sm
14
- #from fincat_utils import extract_context_words
15
- #from fincat_utils import bert_embedding_extract
16
  from score_fincat import score_fincat
17
- import pickle
18
- #lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
19
-
20
  nlp = en_core_web_sm.load()
21
  nltk.download('punkt')
22
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
 
24
- #SUSTAINABILITY STARTS
25
- tokenizer_sus = RobertaTokenizer.from_pretrained('roberta-base')
26
- model_sustain = BERTClass(2, "sustanability")
27
- model_sustain.to(device)
28
- model_sustain.load_state_dict(torch.load('sustainability_model.bin', map_location=device)['model_state_dict'])
29
 
30
- def get_sustainability(text):
31
- df = pd.DataFrame({'sentence':sent_tokenize(text)})
32
- actual_predictions_sustainability = do_predict(model_sustain, tokenizer_sus, df)
33
- highlight = []
34
- for sent, prob in zip(df['sentence'].values, actual_predictions_sustainability[1]):
35
- if prob>=4.384316:
36
- highlight.append((sent, 'non-sustainable'))
37
- elif prob<=1.423736:
38
- highlight.append((sent, 'sustainable'))
39
- else:
40
- highlight.append((sent, '-'))
41
- return highlight
42
- #SUSTAINABILITY ENDS
43
 
44
 
45
  ##Summarization
@@ -49,22 +27,6 @@ def summarize_text(text):
49
  stext = resp[0]['summary_text']
50
  return stext
51
 
52
- ##Forward Looking Statement
53
- def split_in_sentences(text):
54
- doc = nlp(text)
55
- return [str(sent).strip() for sent in doc.sents]
56
- def make_spans(text,results):
57
- results_list = []
58
- for i in range(len(results)):
59
- results_list.append(results[i]['label'])
60
- facts_spans = []
61
- facts_spans = list(zip(split_in_sentences(text),results_list))
62
- return facts_spans
63
-
64
- fls_model = pipeline("text-classification", model="yiyanghkust/finbert-fls", tokenizer="yiyanghkust/finbert-fls")
65
- def fls(text):
66
- results = fls_model(split_in_sentences(text))
67
- return make_spans(text,results)
68
 
69
  ##Company Extraction
70
  ner=pipeline('ner',model='Jean-Baptiste/camembert-ner-with-dates',tokenizer='Jean-Baptiste/camembert-ner-with-dates', aggregation_strategy="simple")
 
4
  import gradio as gr
5
  import spacy
6
  from spacy import displacy
7
+ from transformers import RobertaTokenizer,pipeline
8
  import torch
9
  import nltk
10
  from nltk.tokenize import sent_tokenize
11
  from fin_readability_sustainability import BERTClass, do_predict
12
  import pandas as pd
13
  import en_core_web_sm
 
 
14
  from score_fincat import score_fincat
15
+ from sus_fls import get_sustainability,fls
 
 
16
  nlp = en_core_web_sm.load()
17
  nltk.download('punkt')
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
 
 
 
 
 
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  ##Summarization
 
27
  stext = resp[0]['summary_text']
28
  return stext
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  ##Company Extraction
32
  ner=pipeline('ner',model='Jean-Baptiste/camembert-ner-with-dates',tokenizer='Jean-Baptiste/camembert-ner-with-dates', aggregation_strategy="simple")