Rename utils/indicator_classifier.py to utils/vulnerability_classifier.py
Browse files
utils/{indicator_classifier.py → vulnerability_classifier.py}
RENAMED
@@ -10,7 +10,7 @@ from transformers import pipeline
|
|
10 |
|
11 |
|
12 |
@st.cache_resource
|
13 |
-
def
|
14 |
"""
|
15 |
loads the document classifier using haystack, where the name/path of model
|
16 |
in HF-hub as string is used to fetch the model object.Either configfile or
|
@@ -30,9 +30,9 @@ def load_indicatorClassifier(config_file:str = None, classifier_name:str = None)
|
|
30 |
return
|
31 |
else:
|
32 |
config = getconfig(config_file)
|
33 |
-
classifier_name = config.get('
|
34 |
|
35 |
-
logging.info("Loading
|
36 |
# we are using the pipeline as the model is multilabel and DocumentClassifier
|
37 |
# from Haystack doesnt support multilabel
|
38 |
# in pipeline we use 'sigmoid' to explicitly tell pipeline to make it multilabel
|
@@ -51,7 +51,7 @@ def load_indicatorClassifier(config_file:str = None, classifier_name:str = None)
|
|
51 |
|
52 |
|
53 |
@st.cache_data
|
54 |
-
def
|
55 |
threshold:float = 0.5,
|
56 |
classifier_model:pipeline= None
|
57 |
)->Tuple[DataFrame,Series]:
|
@@ -74,14 +74,14 @@ def indicator_classification(haystack_doc:pd.DataFrame,
|
|
74 |
x: Series object with the unique SDG covered in the document uploaded and
|
75 |
the number of times it is covered/discussed/count_of_paragraphs.
|
76 |
"""
|
77 |
-
logging.info("Working on
|
78 |
haystack_doc['Indicator Label'] = 'NA'
|
79 |
haystack_doc['PA_check'] = haystack_doc['Policy-Action Label'].apply(lambda x: True if len(x) != 0 else False)
|
80 |
|
81 |
df1 = haystack_doc[haystack_doc['PA_check'] == True]
|
82 |
df = haystack_doc[haystack_doc['PA_check'] == False]
|
83 |
if not classifier_model:
|
84 |
-
classifier_model = st.session_state['
|
85 |
|
86 |
predictions = classifier_model(list(df1.text))
|
87 |
|
|
|
10 |
|
11 |
|
12 |
@st.cache_resource
|
13 |
+
def load_vulnerabilityClassifier(config_file:str = None, classifier_name:str = None):
|
14 |
"""
|
15 |
loads the document classifier using haystack, where the name/path of model
|
16 |
in HF-hub as string is used to fetch the model object.Either configfile or
|
|
|
30 |
return
|
31 |
else:
|
32 |
config = getconfig(config_file)
|
33 |
+
classifier_name = config.get('vulnerability','MODEL')
|
34 |
|
35 |
+
logging.info("Loading vulnerability classifier")
|
36 |
# we are using the pipeline as the model is multilabel and DocumentClassifier
|
37 |
# from Haystack doesnt support multilabel
|
38 |
# in pipeline we use 'sigmoid' to explicitly tell pipeline to make it multilabel
|
|
|
51 |
|
52 |
|
53 |
@st.cache_data
|
54 |
+
def vulnerability_classification(haystack_doc:pd.DataFrame,
|
55 |
threshold:float = 0.5,
|
56 |
classifier_model:pipeline= None
|
57 |
)->Tuple[DataFrame,Series]:
|
|
|
74 |
x: Series object with the unique SDG covered in the document uploaded and
|
75 |
the number of times it is covered/discussed/count_of_paragraphs.
|
76 |
"""
|
77 |
+
logging.info("Working on vulnerability Identification")
|
78 |
haystack_doc['Indicator Label'] = 'NA'
|
79 |
haystack_doc['PA_check'] = haystack_doc['Policy-Action Label'].apply(lambda x: True if len(x) != 0 else False)
|
80 |
|
81 |
df1 = haystack_doc[haystack_doc['PA_check'] == True]
|
82 |
df = haystack_doc[haystack_doc['PA_check'] == False]
|
83 |
if not classifier_model:
|
84 |
+
classifier_model = st.session_state['vulnerability_classifier']
|
85 |
|
86 |
predictions = classifier_model(list(df1.text))
|
87 |
|