ppsingh commited on
Commit
21a47da
1 Parent(s): 3353eb1

add conditional

Browse files
app.py CHANGED
@@ -4,6 +4,7 @@ import appStore.sector as sector
4
  import appStore.adapmit as adapmit
5
  import appStore.ghg as ghg
6
  import appStore.policyaction as policyaction
 
7
  import appStore.indicator as indicator
8
  import appStore.doc_processing as processing
9
  from utils.uploadAndExample import add_upload
@@ -60,7 +61,7 @@ with st.expander("ℹ️ - About this app", expanded=False):
60
  wide scale. """)
61
  st.write("")
62
  apps = [processing.app, target_extraction.app, netzero.app, ghg.app,
63
- policyaction.app, indicator.app]
64
  # sector.app, adapmit.app]
65
 
66
  # policyaction.app, indicator.app, ]
 
4
  import appStore.adapmit as adapmit
5
  import appStore.ghg as ghg
6
  import appStore.policyaction as policyaction
7
+ import appStore.conditional as conditional
8
  import appStore.indicator as indicator
9
  import appStore.doc_processing as processing
10
  from utils.uploadAndExample import add_upload
 
61
  wide scale. """)
62
  st.write("")
63
  apps = [processing.app, target_extraction.app, netzero.app, ghg.app,
64
+ policyaction.app, indicator.app, conditional.app]
65
  # sector.app, adapmit.app]
66
 
67
  # policyaction.app, indicator.app, ]
appStore/conditional.py CHANGED
@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
10
  import streamlit as st
11
- from utils.netzero_classifier import load_netzeroClassifier, netzero_classification
12
  import logging
13
  logger = logging.getLogger(__name__)
14
  from utils.config import get_classifier_params
@@ -18,7 +18,7 @@ import plotly.express as px
18
 
19
 
20
  # Declare all the necessary variables
21
- classifier_identifier = 'netzero'
22
  params = get_classifier_params(classifier_identifier)
23
 
24
 
@@ -29,7 +29,7 @@ def app():
29
  df = st.session_state.key1
30
 
31
  # Load the classifier model
32
- classifier = load_netzeroClassifier(classifier_name=params['model_name'])
33
  st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
34
 
35
  if sum(df['Target Label'] == 'TARGET') > 100:
@@ -37,7 +37,7 @@ def app():
37
  else:
38
  warning_msg = ""
39
 
40
- df = netzero_classification(haystack_doc=df,
41
  threshold= params['threshold'])
42
  st.session_state.key1 = df
43
 
 
8
  import numpy as np
9
  import pandas as pd
10
  import streamlit as st
11
+ from utils.netzero_classifier import load_conditionalClassifier, conditional_classification
12
  import logging
13
  logger = logging.getLogger(__name__)
14
  from utils.config import get_classifier_params
 
18
 
19
 
20
  # Declare all the necessary variables
21
+ classifier_identifier = 'conditional'
22
  params = get_classifier_params(classifier_identifier)
23
 
24
 
 
29
  df = st.session_state.key1
30
 
31
  # Load the classifier model
32
+ classifier = load_conditionalClassifier(classifier_name=params['model_name'])
33
  st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
34
 
35
  if sum(df['Target Label'] == 'TARGET') > 100:
 
37
  else:
38
  warning_msg = ""
39
 
40
+ df = conditional_classification(haystack_doc=df,
41
  threshold= params['threshold'])
42
  st.session_state.key1 = df
43
 
utils/conditional_classifier.py CHANGED
@@ -16,7 +16,7 @@ _lab_dict = {
16
  }
17
 
18
  @st.cache_resource
19
- def load_netzeroClassifier(config_file:str = None, classifier_name:str = None):
20
  """
21
  loads the document classifier using haystack, where the name/path of model
22
  in HF-hub as string is used to fetch the model object.Either configfile or
@@ -36,9 +36,9 @@ def load_netzeroClassifier(config_file:str = None, classifier_name:str = None):
36
  return
37
  else:
38
  config = getconfig(config_file)
39
- classifier_name = config.get('netzero','MODEL')
40
 
41
- logging.info("Loading netzero classifier")
42
  doc_classifier = pipeline("text-classification",
43
  model=classifier_name,
44
  top_k =1)
@@ -47,7 +47,7 @@ def load_netzeroClassifier(config_file:str = None, classifier_name:str = None):
47
 
48
 
49
  @st.cache_data
50
- def netzero_classification(haystack_doc:pd.DataFrame,
51
  threshold:float = 0.8,
52
  classifier_model:pipeline= None
53
  )->Tuple[DataFrame,Series]:
@@ -68,9 +68,9 @@ def netzero_classification(haystack_doc:pd.DataFrame,
68
  ----------
69
  df: Dataframe
70
  """
71
- logging.info("Working on Netzero Extraction")
72
- haystack_doc['Netzero Label'] = 'NA'
73
- haystack_doc['Netzero Score'] = 'NA'
74
  # we apply Netzero to only paragraphs which are classified as 'Target' related
75
  temp = haystack_doc[haystack_doc['Target Label'] == 'TARGET']
76
  temp = temp.reset_index(drop=True)
@@ -78,12 +78,12 @@ def netzero_classification(haystack_doc:pd.DataFrame,
78
  df = df.reset_index(drop=True)
79
 
80
  if not classifier_model:
81
- classifier_model = st.session_state['netzero_classifier']
82
 
83
  results = classifier_model(list(temp.text))
84
  labels_= [(l[0]['label'],l[0]['score']) for l in results]
85
- temp['Netzero Label'],temp['Netzero Score'] = zip(*labels_)
86
- temp['Netzero Label'] = temp['Netzero Label'].apply(lambda x: _lab_dict[x])
87
  # merging Target with Non Target dataframe
88
  df = pd.concat([df,temp])
89
  df = df.reset_index(drop =True)
 
16
  }
17
 
18
  @st.cache_resource
19
+ def load_conditionalClassifier(config_file:str = None, classifier_name:str = None):
20
  """
21
  loads the document classifier using haystack, where the name/path of model
22
  in HF-hub as string is used to fetch the model object.Either configfile or
 
36
  return
37
  else:
38
  config = getconfig(config_file)
39
+ classifier_name = config.get('conditional','MODEL')
40
 
41
+ logging.info("Loading conditional classifier")
42
  doc_classifier = pipeline("text-classification",
43
  model=classifier_name,
44
  top_k =1)
 
47
 
48
 
49
  @st.cache_data
50
+ def conditional_classification(haystack_doc:pd.DataFrame,
51
  threshold:float = 0.8,
52
  classifier_model:pipeline= None
53
  )->Tuple[DataFrame,Series]:
 
68
  ----------
69
  df: Dataframe
70
  """
71
+ logging.info("Working on Conditionality Identification")
72
+ haystack_doc['Conditional Label'] = 'NA'
73
+ haystack_doc['Conditional Score'] = 'NA'
74
  # we apply Netzero to only paragraphs which are classified as 'Target' related
75
  temp = haystack_doc[haystack_doc['Target Label'] == 'TARGET']
76
  temp = temp.reset_index(drop=True)
 
78
  df = df.reset_index(drop=True)
79
 
80
  if not classifier_model:
81
+ classifier_model = st.session_state['conditional_classifier']
82
 
83
  results = classifier_model(list(temp.text))
84
  labels_= [(l[0]['label'],l[0]['score']) for l in results]
85
+ temp['Conditional Label'],temp['Conditional Score'] = zip(*labels_)
86
+ # temp[' Label'] = temp['Netzero Label'].apply(lambda x: _lab_dict[x])
87
  # merging Target with Non Target dataframe
88
  df = pd.concat([df,temp])
89
  df = df.reset_index(drop =True)