leavoigt commited on
Commit
48bf795
1 Parent(s): e5fe546

Rename appStore/sdg_analysis.py to appStore/vulnerability_analysis.py

Browse files
appStore/{sdg_analysis.py → vulnerability_analysis.py} RENAMED
@@ -10,9 +10,8 @@ import pandas as pd
10
  import streamlit as st
11
  from st_aggrid import AgGrid
12
  from st_aggrid.shared import ColumnsAutoSizeMode
13
- from utils.sdg_classifier import sdg_classification
14
- from utils.sdg_classifier import runSDGPreprocessingPipeline, load_sdgClassifier
15
- from utils.keyword_extraction import textrank
16
  import logging
17
  logger = logging.getLogger(__name__)
18
  from utils.checkconfig import getconfig
@@ -20,21 +19,21 @@ from utils.checkconfig import getconfig
20
 
21
  # Declare all the necessary variables
22
  config = getconfig('paramconfig.cfg')
23
- model_name = config.get('sdg','MODEL')
24
- split_by = config.get('sdg','SPLIT_BY')
25
- split_length = int(config.get('sdg','SPLIT_LENGTH'))
26
- split_overlap = int(config.get('sdg','SPLIT_OVERLAP'))
27
- remove_punc = bool(int(config.get('sdg','REMOVE_PUNC')))
28
- split_respect_sentence_boundary = bool(int(config.get('sdg','RESPECT_SENTENCE_BOUNDARY')))
29
- threshold = float(config.get('sdg','THRESHOLD'))
30
- top_n = int(config.get('sdg','TOP_KEY'))
31
 
32
 
33
  def app():
34
 
35
  #### APP INFO #####
36
  with st.container():
37
- st.markdown("<h1 style='text-align: center; color: black;'> SDG Classification and Keyphrase Extraction </h1>", unsafe_allow_html=True)
38
  st.write(' ')
39
  st.write(' ')
40
 
@@ -106,14 +105,14 @@ def app():
106
 
107
  ### Main app code ###
108
  with st.container():
109
- if st.button("RUN SDG Analysis"):
110
 
111
  if 'filepath' in st.session_state:
112
  file_name = st.session_state['filename']
113
  file_path = st.session_state['filepath']
114
- classifier = load_sdgClassifier(classifier_name=model_name)
115
- st.session_state['sdg_classifier'] = classifier
116
- all_documents = runSDGPreprocessingPipeline(file_name= file_name,
117
  file_path= file_path, split_by= split_by,
118
  split_length= split_length,
119
  split_respect_sentence_boundary= split_respect_sentence_boundary,
@@ -124,18 +123,18 @@ def app():
124
  else:
125
  warning_msg = ""
126
 
127
- with st.spinner("Running SDG Classification{}".format(warning_msg)):
128
 
129
- df, x = sdg_classification(haystack_doc=all_documents['documents'],
130
  threshold= threshold)
131
  df = df.drop(['Relevancy'], axis = 1)
132
- sdg_labels = x.SDG.unique()
133
  textrank_keyword_list = []
134
  for label in sdg_labels:
135
- sdgdata = " ".join(df[df.SDG == label].text.to_list())
136
  textranklist_ = textrank(textdata=sdgdata, words= top_n)
137
  if len(textranklist_) > 0:
138
- textrank_keyword_list.append({'SDG':label, 'TextRank Keywords':",".join(textranklist_)})
139
  textrank_keywords_df = pd.DataFrame(textrank_keyword_list)
140
 
141
 
@@ -151,7 +150,7 @@ def app():
151
  # fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
152
 
153
 
154
- st.markdown("#### Anything related to SDGs? ####")
155
 
156
  c4, c5, c6 = st.columns([1,2,2])
157
 
@@ -162,13 +161,13 @@ def app():
162
  labeldf = "<br>".join(labeldf)
163
  st.markdown(labeldf, unsafe_allow_html=True)
164
  st.write("")
165
- st.markdown("###### What keywords are present under SDG classified text? ######")
166
 
167
  AgGrid(textrank_keywords_df, reload_data = False,
168
  update_mode="value_changed",
169
  columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
170
  st.write("")
171
- st.markdown("###### Top few SDG Classified paragraph/text results ######")
172
 
173
  AgGrid(df, reload_data = False, update_mode="value_changed",
174
  columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
 
10
  import streamlit as st
11
  from st_aggrid import AgGrid
12
  from st_aggrid.shared import ColumnsAutoSizeMode
13
+ from utils.vulnerability_classifier import vulnerability_classification
14
+ from utils.vulnerability_classifier import runPreprocessingPipeline, load_Classifier
 
15
  import logging
16
  logger = logging.getLogger(__name__)
17
  from utils.checkconfig import getconfig
 
19
 
20
  # Declare all the necessary variables
21
  config = getconfig('paramconfig.cfg')
22
+ model_name = config.get('vulnerability','MODEL')
23
+ split_by = config.get('vulnerability','SPLIT_BY')
24
+ split_length = int(config.get('vulnerability','SPLIT_LENGTH'))
25
+ split_overlap = int(config.get('vulnerability','SPLIT_OVERLAP'))
26
+ remove_punc = bool(int(config.get('vulnerability','REMOVE_PUNC')))
27
+ split_respect_sentence_boundary = bool(int(config.get('vulnerability','RESPECT_SENTENCE_BOUNDARY')))
28
+ threshold = float(config.get('vulnerability','THRESHOLD'))
29
+ top_n = int(config.get('vulnerability','TOP_KEY'))
30
 
31
 
32
  def app():
33
 
34
  #### APP INFO #####
35
  with st.container():
36
+ st.markdown("<h1 style='text-align: center; color: black;'> Vulnerability Classification </h1>", unsafe_allow_html=True)
37
  st.write(' ')
38
  st.write(' ')
39
 
 
105
 
106
  ### Main app code ###
107
  with st.container():
108
+ if st.button("RUN Vulnerability Analysis"):
109
 
110
  if 'filepath' in st.session_state:
111
  file_name = st.session_state['filename']
112
  file_path = st.session_state['filepath']
113
+ classifier = load_Classifier(classifier_name=model_name)
114
+ st.session_state['vulnerability_classifier'] = classifier
115
+ all_documents = runPreprocessingPipeline(file_name= file_name,
116
  file_path= file_path, split_by= split_by,
117
  split_length= split_length,
118
  split_respect_sentence_boundary= split_respect_sentence_boundary,
 
123
  else:
124
  warning_msg = ""
125
 
126
+ with st.spinner("Running Classification{}".format(warning_msg)):
127
 
128
+ df, x = vulnerability_classification(haystack_doc=all_documents['documents'],
129
  threshold= threshold)
130
  df = df.drop(['Relevancy'], axis = 1)
131
+ vulnerability_labels = x.vulnerability.unique()
132
  textrank_keyword_list = []
133
  for label in sdg_labels:
134
+ vulnerability_data = " ".join(df[df.vulnerability == label].text.to_list())
135
  textranklist_ = textrank(textdata=sdgdata, words= top_n)
136
  if len(textranklist_) > 0:
137
+ textrank_keyword_list.append({'Vulnerability':label, 'TextRank Keywords':",".join(textranklist_)})
138
  textrank_keywords_df = pd.DataFrame(textrank_keyword_list)
139
 
140
 
 
150
  # fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
151
 
152
 
153
+ st.markdown("#### Anything related to Vulnerabilities? ####")
154
 
155
  c4, c5, c6 = st.columns([1,2,2])
156
 
 
161
  labeldf = "<br>".join(labeldf)
162
  st.markdown(labeldf, unsafe_allow_html=True)
163
  st.write("")
164
+ st.markdown("###### What keywords are present under vulnerability classified text? ######")
165
 
166
  AgGrid(textrank_keywords_df, reload_data = False,
167
  update_mode="value_changed",
168
  columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
169
  st.write("")
170
+ st.markdown("###### Top few vulnerability Classified paragraph/text results ######")
171
 
172
  AgGrid(df, reload_data = False, update_mode="value_changed",
173
  columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)