Spaces:
GIZ
/
Running on CPU Upgrade

prashant commited on
Commit
1b62a9f
1 Parent(s): fa8823d

coherence results display

Browse files
Files changed (2) hide show
  1. appStore/coherence.py +37 -19
  2. paramconfig.cfg +1 -3
appStore/coherence.py CHANGED
@@ -7,8 +7,10 @@ import ast
7
  import logging
8
  from utils.ndc_explorer import countrySpecificCCA, countrySpecificCCM
9
  from utils.checkconfig import getconfig
10
- from utils.semantic_search import runSemanticPreprocessingPipeline
11
-
 
 
12
 
13
  # Reading data and Declaring necessary variables
14
  with open('docStore/ndcs/countryList.txt') as dfile:
@@ -35,9 +37,9 @@ embedding_model = config.get('coherence','RETRIEVER')
35
  embedding_model_format = config.get('coherence','RETRIEVER_FORMAT')
36
  embedding_layer = int(config.get('coherence','RETRIEVER_EMB_LAYER'))
37
  embedding_dim = int(config.get('coherence','EMBEDDING_DIM'))
 
38
  retriever_top_k = int(config.get('coherence','RETRIEVER_TOP_K'))
39
- reader_model = config.get('coherence','READER')
40
- reader_top_k = int(config.get('coherence','RETRIEVER_TOP_K'))
41
 
42
 
43
  def app():
@@ -57,7 +59,8 @@ def app():
57
  coherence between a given policy document and a country’s (Intended)\
58
  Nationally Determined Contribution (INDCs/NDCs) using open-source \
59
  data from the German Institute of Development and Sustainability’s \
60
- (IDOS) [NDC Explorer](https://klimalog.idos-research.de/ndc/#NDCExplorer/worldMap?NewAndUpdatedNDC??income???catIncome).\
 
61
  """)
62
  st.write("")
63
  st.write(""" User can select a country context via the drop-down menu \
@@ -81,6 +84,10 @@ def app():
81
  option = st.selectbox('Select Country', (countrynames))
82
  countryCode = countryList[option]
83
  st.markdown("---")
 
 
 
 
84
 
85
  with st.container():
86
  if st.button("Check Coherence"):
@@ -89,14 +96,14 @@ def app():
89
 
90
  if 'filepath' in st.session_state:
91
  allDocuments = runSemanticPreprocessingPipeline(
92
- file_path= st.session_state['filepath'],
93
- file_name = st.session_state['filename'],
94
- split_by=split_by,
95
- split_length= split_length,
96
- split_overlap=split_overlap,
97
- removePunc= remove_punc,
98
- split_respect_sentence_boundary=split_respect_sentence_boundary)
99
- genre = st.radio( "Select Category",('Climate Change Adaptation', 'Climate Change Mitigation'))
100
  if genre == 'Climate Change Adaptation':
101
  sent_dict = sent_cca
102
  else:
@@ -105,14 +112,25 @@ def app():
105
  for key,sent in sent_dict.items():
106
  sent_labels.append(sent)
107
  if len(allDocuments['documents']) > 100:
108
- warning_msg = ": This might take sometime, please sit back and relax."
109
  else:
110
  warning_msg = ""
111
- logging.info("starting Coherence analysis, country selected {}".format(option))
112
- with st.spinner("Performing Similar/Contextual search{}".format(warning_msg)):
113
- pass
114
-
115
-
 
 
 
 
 
 
 
 
 
 
 
116
  else:
117
  st.info("🤔 No document found, please try to upload it at the sidebar!")
118
  logging.warning("Terminated as no document provided")
 
7
  import logging
8
  from utils.ndc_explorer import countrySpecificCCA, countrySpecificCCM
9
  from utils.checkconfig import getconfig
10
+ from utils.semantic_search import runSemanticPreprocessingPipeline,process_semantic_output
11
+ from utils.semantic_search import semanticSearchPipeline, runSemanticPipeline
12
+ from st_aggrid import AgGrid
13
+ from st_aggrid.shared import ColumnsAutoSizeMode
14
 
15
  # Reading data and Declaring necessary variables
16
  with open('docStore/ndcs/countryList.txt') as dfile:
 
37
  embedding_model_format = config.get('coherence','RETRIEVER_FORMAT')
38
  embedding_layer = int(config.get('coherence','RETRIEVER_EMB_LAYER'))
39
  embedding_dim = int(config.get('coherence','EMBEDDING_DIM'))
40
+ max_seq_len = int(config.get('coherence','MAX_SEQ_LENGTH'))
41
  retriever_top_k = int(config.get('coherence','RETRIEVER_TOP_K'))
42
+
 
43
 
44
 
45
  def app():
 
59
  coherence between a given policy document and a country’s (Intended)\
60
  Nationally Determined Contribution (INDCs/NDCs) using open-source \
61
  data from the German Institute of Development and Sustainability’s \
62
+ (IDOS) [NDC Explorer]
63
+ (https://klimalog.idos-research.de/ndc/#NDCExplorer/worldMap?NewAndUpdatedNDC??income???catIncome).\
64
  """)
65
  st.write("")
66
  st.write(""" User can select a country context via the drop-down menu \
 
84
  option = st.selectbox('Select Country', (countrynames))
85
  countryCode = countryList[option]
86
  st.markdown("---")
87
+
88
+ genre = st.radio( "Select Category",('Climate Change Adaptation',
89
+ 'Climate Change Mitigation'))
90
+ st.markdown("---")
91
 
92
  with st.container():
93
  if st.button("Check Coherence"):
 
96
 
97
  if 'filepath' in st.session_state:
98
  allDocuments = runSemanticPreprocessingPipeline(
99
+ file_path= st.session_state['filepath'],
100
+ file_name = st.session_state['filename'],
101
+ split_by=split_by,
102
+ split_length= split_length,
103
+ split_overlap=split_overlap,
104
+ removePunc= remove_punc,
105
+ split_respect_sentence_boundary=split_respect_sentence_boundary)
106
+ # genre = st.radio( "Select Category",('Climate Change Adaptation', 'Climate Change Mitigation'))
107
  if genre == 'Climate Change Adaptation':
108
  sent_dict = sent_cca
109
  else:
 
112
  for key,sent in sent_dict.items():
113
  sent_labels.append(sent)
114
  if len(allDocuments['documents']) > 100:
115
+ warning_msg = ": This might take sometime, please sit back and relax."
116
  else:
117
  warning_msg = ""
118
+ logging.info("starting Coherence analysis, \
119
+ country selected {}".format(option))
120
+ with st.spinner("Performing Coherence Analysis for {} \
121
+ under {} category{}".format(option,genre,warning_msg)):
122
+ semanticsearch_pipeline, doc_store = semanticSearchPipeline(documents = allDocuments['documents'],
123
+ embedding_model= embedding_model,
124
+ embedding_layer= embedding_layer,
125
+ embedding_model_format= embedding_model_format,
126
+ retriever_top_k= retriever_top_k,
127
+ embedding_dim=embedding_dim,
128
+ max_seq_len=max_seq_len, useQueryCheck=False)
129
+ raw_output = runSemanticPipeline(pipeline=semanticsearch_pipeline,queries=sent_labels)
130
+ results_df = process_semantic_output(raw_output)
131
+ AgGrid(results_df, reload_data = False, update_mode="value_changed",
132
+ columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
133
+
134
  else:
135
  st.info("🤔 No document found, please try to upload it at the sidebar!")
136
  logging.warning("Terminated as no document provided")
paramconfig.cfg CHANGED
@@ -36,13 +36,11 @@ TOP_N = 20
36
 
37
  [coherence]
38
  RETRIEVER_TOP_K = 10
39
- MAX_SEQ_LENGTH = 64
40
  RETRIEVER = all-MiniLM-L6-v2
41
  RETRIEVER_FORMAT = sentence_transformers
42
  RETRIEVER_EMB_LAYER = -1
43
  EMBEDDING_DIM = 384
44
- READER = deepset/tinyroberta-squad2
45
- READER_TOP_K = 10
46
  THRESHOLD = 0.55
47
  SPLIT_BY = sentence
48
  SPLIT_LENGTH = 3
 
36
 
37
  [coherence]
38
  RETRIEVER_TOP_K = 10
39
+ MAX_SEQ_LENGTH = 256
40
  RETRIEVER = all-MiniLM-L6-v2
41
  RETRIEVER_FORMAT = sentence_transformers
42
  RETRIEVER_EMB_LAYER = -1
43
  EMBEDDING_DIM = 384
 
 
44
  THRESHOLD = 0.55
45
  SPLIT_BY = sentence
46
  SPLIT_LENGTH = 3