Spaces:
GIZ
/
Running on CPU Upgrade

prashant commited on
Commit
cc5c327
1 Parent(s): 4a20529

lexical search app update

Browse files
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # import appStore.keyword_search as keyword_search
2
  import appStore.sdg_analysis as sdg_analysis
3
  #import appStore.coherence as coherence
4
  import appStore.info as info
@@ -12,6 +12,6 @@ app = MultiApp()
12
 
13
  app.add_app("About","house", info.app)
14
  app.add_app("SDG Analysis","gear",sdg_analysis.app)
15
- # app.add_app("Search","search", keyword_search.app)
16
 
17
  app.run()
 
1
+ import appStore.keyword_search as keyword_search
2
  import appStore.sdg_analysis as sdg_analysis
3
  #import appStore.coherence as coherence
4
  import appStore.info as info
 
12
 
13
  app.add_app("About","house", info.app)
14
  app.add_app("SDG Analysis","gear",sdg_analysis.app)
15
+ app.add_app("Search","search", keyword_search.app)
16
 
17
  app.run()
appStore/keyword_search.py CHANGED
@@ -1,38 +1,12 @@
1
  # set path
2
- import glob, os, sys
3
- from udfPreprocess.search import semantic_search
4
- sys.path.append('../udfPreprocess')
5
 
6
- #import helper
7
- import udfPreprocess.docPreprocessing as pre
8
- import udfPreprocess.cleaning as clean
9
- from udfPreprocess.search import bm25_tokenizer, bm25TokenizeDoc, lexical_search
10
- #import needed libraries
11
- import seaborn as sns
12
- from pandas import DataFrame
13
- from sentence_transformers import SentenceTransformer, CrossEncoder, util
14
- # from keybert import KeyBERT
15
- from transformers import pipeline
16
- import matplotlib.pyplot as plt
17
- import numpy as np
18
  import streamlit as st
19
- import pandas as pd
20
- from rank_bm25 import BM25Okapi
21
- from sklearn.feature_extraction import _stop_words
22
- import string
23
- from tqdm.autonotebook import tqdm
24
- import numpy as np
25
- import docx
26
- from docx.shared import Inches
27
- from docx.shared import Pt
28
- from docx.enum.style import WD_STYLE_TYPE
29
- import logging
30
- logger = logging.getLogger(__name__)
31
- import tempfile
32
- import sqlite3
33
  import json
34
- import configparser
35
-
 
36
 
37
  def app():
38
 
@@ -54,11 +28,9 @@ def app():
54
  """)
55
 
56
  st.markdown("")
57
-
58
 
59
-
60
  with st.sidebar:
61
- with open('sample/keywordexample.json','r') as json_file:
62
  keywordexample = json.load(json_file)
63
 
64
  genre = st.radio("Select Keyword Category", list(keywordexample.keys()))
@@ -76,93 +48,32 @@ def app():
76
  keywordList = None
77
 
78
  searchtype = st.selectbox("Do you want to find exact macthes or similar meaning/context", ['Exact Matches', 'Similar context/meaning'])
79
-
80
-
81
  with st.container():
82
  if keywordList is not None:
83
  queryList = st.text_input("You selcted the {} category we will look for these keywords in document".format(genre),
84
  value="{}".format(keywordList))
85
  else:
86
  queryList = st.text_input("Please enter here your question and we will look \
87
- for an answer in the document OR enter the keyword you \
88
- are looking for and we will \
89
- we will look for similar context \
90
- in the document.",
91
  placeholder="Enter keyword here")
92
-
93
  if st.button("Find them"):
94
 
95
  if queryList == "":
96
  st.info("🤔 No keyword provided, if you dont have any, please try example sets from sidebar!")
97
  logging.warning("Terminated as no keyword provided")
98
  else:
99
-
100
- if 'docs' in st.session_state:
101
- docs = st.session_state['docs']
102
- paraList = st.session_state['paraList']
103
-
104
  if searchtype == 'Exact Matches':
105
- queryList = list(queryList.split(","))
106
  logging.info("performing lexical search")
107
- tokenized_corpus = bm25TokenizeDoc(paraList)
108
- # st.write(len(tokenized_corpus))
109
- document_bm25 = BM25Okapi(tokenized_corpus)
110
-
111
  with st.spinner("Performing Exact matching search (Lexical search) for you"):
112
- st.markdown("##### Top few lexical search (BM25) hits #####")
113
-
114
- for keyword in queryList:
115
-
116
- bm25_hits = lexical_search(keyword,document_bm25)
117
-
118
-
119
- counter = 0
120
- for hit in bm25_hits:
121
- if hit['score'] > 0.00:
122
- counter += 1
123
- if counter == 1:
124
- st.markdown("###### Results for keyword: **{}** ######".format(keyword))
125
- # st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
126
- st.write("\t {}: {}\t".format(counter, paraList[hit['corpus_id']].replace("\n", " ")))
127
-
128
-
129
- if counter == 0:
130
- st.write("No results found for '**{}**' ".format(keyword))
131
-
132
- st.markdown("---")
133
- else:
134
- logging.info("starting semantic search")
135
- with st.spinner("Performing Similar/Contextual search"):
136
- query = "Find {} related issues ?".format(queryList)
137
- config = configparser.ConfigParser()
138
- config.read_file(open('udfPreprocess/paramconfig.cfg'))
139
- threshold = float(config.get('semantic_search','THRESHOLD'))
140
- # st.write(query)
141
- semantic_hits = semantic_search(query,paraList)
142
- st.markdown("##### Few Semantic search hits for {} related topics #####".format(queryList))
143
-
144
- for i,queryhit in enumerate(semantic_hits):
145
-
146
- # st.markdown("###### Results for query: **{}** ######".format(queryList[i]))
147
- counter = 0
148
- for hit in queryhit:
149
- counter += 1
150
-
151
-
152
- if hit['score'] > threshold:
153
- # st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
154
- st.write("\t {}: \t {}".format(counter, paraList[hit['corpus_id']].replace("\n", " ")))
155
-
156
- # document.add_paragraph("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
157
- st.markdown("---")
158
- # st.write(semantic_hits)
159
-
160
-
161
-
162
-
163
- else:
164
- st.info("🤔 No document found, please try to upload it at the sidebar!")
165
- logging.warning("Terminated as no keyword provided")
166
 
167
-
168
-
 
1
  # set path
2
+ import glob, os, sys;
3
+ sys.path.append('../utils')
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import json
7
+ import logging
8
+ from utils.search import runLexicalPreprocessingPipeline, tokenize_lexical_query
9
+ from utils.search import runSpacyMatcher, lexical_search
10
 
11
  def app():
12
 
 
28
  """)
29
 
30
  st.markdown("")
 
31
 
 
32
  with st.sidebar:
33
+ with open('docStore/sample/keywordexample.json','r') as json_file:
34
  keywordexample = json.load(json_file)
35
 
36
  genre = st.radio("Select Keyword Category", list(keywordexample.keys()))
 
48
  keywordList = None
49
 
50
  searchtype = st.selectbox("Do you want to find exact macthes or similar meaning/context", ['Exact Matches', 'Similar context/meaning'])
51
+
 
52
  with st.container():
53
  if keywordList is not None:
54
  queryList = st.text_input("You selcted the {} category we will look for these keywords in document".format(genre),
55
  value="{}".format(keywordList))
56
  else:
57
  queryList = st.text_input("Please enter here your question and we will look \
58
+ for an answer in the document OR enter the keyword you \
59
+ are looking for and we will \
60
+ we will look for similar context \
61
+ in the document.",
62
  placeholder="Enter keyword here")
63
+
64
  if st.button("Find them"):
65
 
66
  if queryList == "":
67
  st.info("🤔 No keyword provided, if you dont have any, please try example sets from sidebar!")
68
  logging.warning("Terminated as no keyword provided")
69
  else:
70
+ if 'filepath' in st.session_state:
71
+ paraList = runLexicalPreprocessingPipeline()
72
+
 
 
73
  if searchtype == 'Exact Matches':
74
+ # queryList = list(queryList.split(","))
75
  logging.info("performing lexical search")
76
+ # token_list = tokenize_lexical_query(queryList)
 
 
 
77
  with st.spinner("Performing Exact matching search (Lexical search) for you"):
78
+ lexical_search(queryList,paraList)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
 
 
appStore/sdg_analysis.py CHANGED
@@ -46,7 +46,12 @@ def app():
46
 
47
  if 'filepath' in st.session_state:
48
  paraList = runSDGPreprocessingPipeline()
49
- with st.spinner("Running SDG"):
 
 
 
 
 
50
 
51
  df, x = sdg_classification(paraList)
52
 
 
46
 
47
  if 'filepath' in st.session_state:
48
  paraList = runSDGPreprocessingPipeline()
49
+ if len(paraList) > 150:
50
+ warning_msg = ": This might take some, please sit back and relax."
51
+ else:
52
+ warning_msg = ""
53
+
54
+ with st.spinner("Running SDG Classification{}".format(warning_msg)):
55
 
56
  df, x = sdg_classification(paraList)
57
 
paramconfig.cfg CHANGED
@@ -1,6 +1,8 @@
1
  [lexical_search]
2
  TOP_K = 10
3
  THRESHOLD = 0.1
 
 
4
 
5
  [semantic_search]
6
  TOP_K = 10
 
1
  [lexical_search]
2
  TOP_K = 10
3
  THRESHOLD = 0.1
4
+ SPLIT_BY = sentence
5
+ SPLIT_LENGTH = 3
6
 
7
  [semantic_search]
8
  TOP_K = 10
utils/search.py CHANGED
@@ -7,17 +7,55 @@ from spacy.matcher import Matcher
7
  import streamlit as st
8
  from markdown import markdown
9
  from annotated_text import annotation
 
 
 
10
 
11
  config = configparser.ConfigParser()
12
  config.read_file(open('paramconfig.py'))
13
 
14
 
15
- def tokenize_lexical_query(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  nlp = spacy.load("en_core_web_sm")
17
  token_list = [token.text.lower() for token in nlp(query) if not token.is_stop]
18
  return token_list
19
 
20
- def runSpacyMatcher(token_list, document):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  nlp = spacy.load("en_core_web_sm")
22
  spacydoc = nlp(document)
23
  matcher = Matcher(nlp.vocab)
@@ -25,20 +63,47 @@ def runSpacyMatcher(token_list, document):
25
  matcher.add(",".join(token_list), token_pattern)
26
  spacymatches = matcher(spacydoc)
27
 
 
28
  matches = []
29
  for match_id, start, end in spacymatches:
30
  matches = matches + [[start, end]]
31
 
32
  return matches, spacydoc
33
 
34
- def runRegexMatcher(token_list, document):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  matches = []
36
  for token in token_list:
37
  matches = matches + [[val.start(), val.start()+ len(token)] for val in re.finditer(token, document)]
38
 
39
  return matches, document
40
 
41
- def searchAnnotator(matches, document):
 
 
 
 
 
 
 
 
42
  start = 0
43
  annotated_text = ""
44
  for match in matches:
@@ -52,10 +117,16 @@ def searchAnnotator(matches, document):
52
  unsafe_allow_html=True,
53
  )
54
 
55
- def lexical_search(query,documents):
 
 
 
 
56
 
57
  document_store = InMemoryDocumentStore()
58
  document_store.write_documents(documents)
 
 
59
  retriever = TfidfRetriever(document_store)
60
  results = retriever.retrieve(query=query,
61
  top_k= int(config.get('lexical_search','TOP_K')))
@@ -64,5 +135,31 @@ def lexical_search(query,documents):
64
  matches, doc = runSpacyMatcher(query_tokens,result.content)
65
  searchAnnotator(matches, doc)
66
 
67
-
68
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import streamlit as st
8
  from markdown import markdown
9
  from annotated_text import annotation
10
+ from haystack.schema import Document
11
+ from typing import List, Tuple, Text
12
+ from utils.preprocessing import processingpipeline
13
 
14
  config = configparser.ConfigParser()
15
  config.read_file(open('paramconfig.py'))
16
 
17
 
18
+ def tokenize_lexical_query(query:str)-> List[str]:
19
+ """
20
+ Removes the stop words from query and returns the list of important keywords
21
+ in query. For the lexical search the relevent paragraphs in document are
22
+ retreived using TfIDFretreiver from Haystack. However to highlight these
23
+ keywords we need the tokenized form of query.
24
+
25
+ Params
26
+ --------
27
+ query: string which represents either list of keywords user is looking for
28
+ or a query in form of Question.
29
+
30
+ Return
31
+ -----------
32
+ token_list: list of important keywords in the query.
33
+
34
+ """
35
  nlp = spacy.load("en_core_web_sm")
36
  token_list = [token.text.lower() for token in nlp(query) if not token.is_stop]
37
  return token_list
38
 
39
+ def runSpacyMatcher(token_list:List[str], document:Text):
40
+ """
41
+ Using the spacy in backend finds the keywords in the document using the
42
+ Matcher class from spacy. We can alternatively use the regex, but spacy
43
+ finds all keywords in serialized manner which helps in annotation of answers.
44
+
45
+ Params
46
+ -------
47
+ token_list: this is token list which tokenize_lexical_query function returns
48
+ document: text in which we need to find the tokens
49
+
50
+ Return
51
+ --------
52
+ matches: List of [start_index, end_index] in the spacydoc(at word level not
53
+ character) for the keywords in token list.
54
+
55
+ spacydoc: the keyword index in the spacydoc are at word level and not character,
56
+ therefore to allow the annotator to work seamlessly we return the spacydoc.
57
+
58
+ """
59
  nlp = spacy.load("en_core_web_sm")
60
  spacydoc = nlp(document)
61
  matcher = Matcher(nlp.vocab)
 
63
  matcher.add(",".join(token_list), token_pattern)
64
  spacymatches = matcher(spacydoc)
65
 
66
+ # getting start and end index in spacydoc so that annotator can work seamlessly
67
  matches = []
68
  for match_id, start, end in spacymatches:
69
  matches = matches + [[start, end]]
70
 
71
  return matches, spacydoc
72
 
73
+ def runRegexMatcher(token_list:List[str], document:Text):
74
+ """
75
+ Using the regex in backend finds the keywords in the document.
76
+
77
+ Params
78
+ -------
79
+ token_list: this is token list which tokenize_lexical_query function returns
80
+
81
+ document: text in which we need to find the tokens
82
+
83
+ Return
84
+ --------
85
+ matches: List of [start_index, end_index] in the document for the keywords
86
+ in token list at character level.
87
+
88
+ document: the keyword index returned by regex are at character level,
89
+ therefore to allow the annotator to work seamlessly we return the text back.
90
+
91
+ """
92
  matches = []
93
  for token in token_list:
94
  matches = matches + [[val.start(), val.start()+ len(token)] for val in re.finditer(token, document)]
95
 
96
  return matches, document
97
 
98
+ def searchAnnotator(matches: List[List[int]], document):
99
+ """
100
+ Annotates the text in the document defined by list of [start index, end index]
101
+ Example: "How are you today", if document type is text, matches = [[0,3]]
102
+ will give answer = "How", however in case we used the spacy matcher then the
103
+ matches = [[0,3]] will give answer = "How are you". However if spacy is used
104
+ to find "How" then the matches = [[0,1]] for the string defined above.
105
+
106
+ """
107
  start = 0
108
  annotated_text = ""
109
  for match in matches:
 
117
  unsafe_allow_html=True,
118
  )
119
 
120
+ def lexical_search(query:Text,documents:List[Document]):
121
+ """
122
+ Performs the Lexical search on the List of haystack documents which is
123
+ returned by preprocessing Pipeline.
124
+ """
125
 
126
  document_store = InMemoryDocumentStore()
127
  document_store.write_documents(documents)
128
+
129
+ # Haystack Retriever works with document stores only.
130
  retriever = TfidfRetriever(document_store)
131
  results = retriever.retrieve(query=query,
132
  top_k= int(config.get('lexical_search','TOP_K')))
 
135
  matches, doc = runSpacyMatcher(query_tokens,result.content)
136
  searchAnnotator(matches, doc)
137
 
138
+ def runLexicalPreprocessingPipeline()->List[Document]:
139
+ """
140
+ creates the pipeline and runs the preprocessing pipeline,
141
+ the params for pipeline are fetched from paramconfig
142
+
143
+ Return
144
+ --------------
145
+ List[Document]: When preprocessing pipeline is run, the output dictionary
146
+ has four objects. For the Haysatck implementation of SDG classification we,
147
+ need to use the List of Haystack Document, which can be fetched by
148
+ key = 'documents' on output.
149
+
150
+ """
151
+ file_path = st.session_state['filepath']
152
+ file_name = st.session_state['filename']
153
+ sdg_processing_pipeline = processingpipeline()
154
+ split_by = config.get('lexical_search','SPLIT_BY')
155
+ split_length = int(config.get('lexical_search','SPLIT_LENGTH'))
156
+
157
+ output_lexical_pre = sdg_processing_pipeline.run(file_paths = file_path,
158
+ params= {"FileConverter": {"file_path": file_path, \
159
+ "file_name": file_name},
160
+ "UdfPreProcessor": {"removePunc": False, \
161
+ "split_by": split_by, \
162
+ "split_length":split_length}})
163
+
164
+ return output_lexical_pre['documents']
165
+
ver0.1 scripts/keyword_search.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # set path
2
+ import glob, os, sys
3
+ from udfPreprocess.search import semantic_search
4
+ sys.path.append('../udfPreprocess')
5
+
6
+ #import helper
7
+ import udfPreprocess.docPreprocessing as pre
8
+ import udfPreprocess.cleaning as clean
9
+ from udfPreprocess.search import bm25_tokenizer, bm25TokenizeDoc, lexical_search
10
+ #import needed libraries
11
+ import seaborn as sns
12
+ from pandas import DataFrame
13
+ from sentence_transformers import SentenceTransformer, CrossEncoder, util
14
+ # from keybert import KeyBERT
15
+ from transformers import pipeline
16
+ import matplotlib.pyplot as plt
17
+ import numpy as np
18
+ import streamlit as st
19
+ import pandas as pd
20
+ from rank_bm25 import BM25Okapi
21
+ from sklearn.feature_extraction import _stop_words
22
+ import string
23
+ from tqdm.autonotebook import tqdm
24
+ import numpy as np
25
+ import docx
26
+ from docx.shared import Inches
27
+ from docx.shared import Pt
28
+ from docx.enum.style import WD_STYLE_TYPE
29
+ import logging
30
+ logger = logging.getLogger(__name__)
31
+ import tempfile
32
+ import sqlite3
33
+ import json
34
+ import configparser
35
+
36
+
37
+ def app():
38
+
39
+ with st.container():
40
+ st.markdown("<h1 style='text-align: center; \
41
+ color: black;'> Search</h1>",
42
+ unsafe_allow_html=True)
43
+ st.write(' ')
44
+ st.write(' ')
45
+
46
+ with st.expander("ℹ️ - About this app", expanded=False):
47
+
48
+ st.write(
49
+ """
50
+ The *Keyword Search* app is an easy-to-use interface \
51
+ built in Streamlit for doing keyword search in \
52
+ policy document - developed by GIZ Data and the \
53
+ Sustainable Development Solution Network.
54
+ """)
55
+
56
+ st.markdown("")
57
+
58
+
59
+
60
+ with st.sidebar:
61
+ with open('sample/keywordexample.json','r') as json_file:
62
+ keywordexample = json.load(json_file)
63
+
64
+ genre = st.radio("Select Keyword Category", list(keywordexample.keys()))
65
+ if genre == 'Food':
66
+ keywordList = keywordexample['Food']
67
+ elif genre == 'Climate':
68
+ keywordList = keywordexample['Climate']
69
+ elif genre == 'Social':
70
+ keywordList = keywordexample['Social']
71
+ elif genre == 'Nature':
72
+ keywordList = keywordexample['Nature']
73
+ elif genre == 'Implementation':
74
+ keywordList = keywordexample['Implementation']
75
+ else:
76
+ keywordList = None
77
+
78
+ searchtype = st.selectbox("Do you want to find exact macthes or similar meaning/context", ['Exact Matches', 'Similar context/meaning'])
79
+
80
+
81
+ with st.container():
82
+ if keywordList is not None:
83
+ queryList = st.text_input("You selcted the {} category we will look for these keywords in document".format(genre),
84
+ value="{}".format(keywordList))
85
+ else:
86
+ queryList = st.text_input("Please enter here your question and we will look \
87
+ for an answer in the document OR enter the keyword you \
88
+ are looking for and we will \
89
+ we will look for similar context \
90
+ in the document.",
91
+ placeholder="Enter keyword here")
92
+
93
+ if st.button("Find them"):
94
+
95
+ if queryList == "":
96
+ st.info("🤔 No keyword provided, if you dont have any, please try example sets from sidebar!")
97
+ logging.warning("Terminated as no keyword provided")
98
+ else:
99
+
100
+ if 'docs' in st.session_state:
101
+ docs = st.session_state['docs']
102
+ paraList = st.session_state['paraList']
103
+
104
+ if searchtype == 'Exact Matches':
105
+ queryList = list(queryList.split(","))
106
+ logging.info("performing lexical search")
107
+ tokenized_corpus = bm25TokenizeDoc(paraList)
108
+ # st.write(len(tokenized_corpus))
109
+ document_bm25 = BM25Okapi(tokenized_corpus)
110
+
111
+ with st.spinner("Performing Exact matching search (Lexical search) for you"):
112
+ st.markdown("##### Top few lexical search (BM25) hits #####")
113
+
114
+ for keyword in queryList:
115
+
116
+ bm25_hits = lexical_search(keyword,document_bm25)
117
+
118
+
119
+ counter = 0
120
+ for hit in bm25_hits:
121
+ if hit['score'] > 0.00:
122
+ counter += 1
123
+ if counter == 1:
124
+ st.markdown("###### Results for keyword: **{}** ######".format(keyword))
125
+ # st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
126
+ st.write("\t {}: {}\t".format(counter, paraList[hit['corpus_id']].replace("\n", " ")))
127
+
128
+
129
+ if counter == 0:
130
+ st.write("No results found for '**{}**' ".format(keyword))
131
+
132
+ st.markdown("---")
133
+ else:
134
+ logging.info("starting semantic search")
135
+ with st.spinner("Performing Similar/Contextual search"):
136
+ query = "Find {} related issues ?".format(queryList)
137
+ config = configparser.ConfigParser()
138
+ config.read_file(open('udfPreprocess/paramconfig.cfg'))
139
+ threshold = float(config.get('semantic_search','THRESHOLD'))
140
+ # st.write(query)
141
+ semantic_hits = semantic_search(query,paraList)
142
+ st.markdown("##### Few Semantic search hits for {} related topics #####".format(queryList))
143
+
144
+ for i,queryhit in enumerate(semantic_hits):
145
+
146
+ # st.markdown("###### Results for query: **{}** ######".format(queryList[i]))
147
+ counter = 0
148
+ for hit in queryhit:
149
+ counter += 1
150
+
151
+
152
+ if hit['score'] > threshold:
153
+ # st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
154
+ st.write("\t {}: \t {}".format(counter, paraList[hit['corpus_id']].replace("\n", " ")))
155
+
156
+ # document.add_paragraph("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
157
+ st.markdown("---")
158
+ # st.write(semantic_hits)
159
+
160
+
161
+
162
+
163
+ else:
164
+ st.info("🤔 No document found, please try to upload it at the sidebar!")
165
+ logging.warning("Terminated as no keyword provided")
166
+
167
+
168
+
169
+