ugmSorcero commited on
Commit
6a6afbf
1 Parent(s): 843bc9e

Fixes linting

Browse files
core/pipelines.py CHANGED
@@ -19,7 +19,7 @@ def keyword_search(index="documents", split_word_length=100):
19
 
20
  - Documents that have more lexical overlap with the query are more likely to be relevant
21
  - Words that occur in fewer documents are more significant than words that occur in many documents
22
-
23
  :warning: **(HAYSTACK BUG) Keyword Search doesn't work if you reindex:** Please refresh page in order to reindex
24
  """
25
  document_store = InMemoryDocumentStore(index=index)
 
19
 
20
  - Documents that have more lexical overlap with the query are more likely to be relevant
21
  - Words that occur in fewer documents are more significant than words that occur in many documents
22
+
23
  :warning: **(HAYSTACK BUG) Keyword Search doesn't work if you reindex:** Please refresh page in order to reindex
24
  """
25
  document_store = InMemoryDocumentStore(index=index)
core/search_index.py CHANGED
@@ -21,9 +21,7 @@ def format_docs(documents):
21
  def index(documents, pipeline, clear_index=True):
22
  documents, doc_ids = format_docs(documents)
23
  if clear_index:
24
- document_stores = pipeline.get_nodes_by_class(
25
- class_type=BaseDocumentStore
26
- )
27
  for docstore in document_stores:
28
  docstore.delete_index(docstore.index)
29
  pipeline.run(documents=documents)
@@ -45,7 +43,7 @@ def search(queries, pipeline):
45
  "score": res.score,
46
  "id": res.meta["id"],
47
  "fragment_id": res.id,
48
- "meta": res.meta
49
  }
50
  )
51
  if not score_is_empty:
 
21
  def index(documents, pipeline, clear_index=True):
22
  documents, doc_ids = format_docs(documents)
23
  if clear_index:
24
+ document_stores = pipeline.get_nodes_by_class(class_type=BaseDocumentStore)
 
 
25
  for docstore in document_stores:
26
  docstore.delete_index(docstore.index)
27
  pipeline.run(documents=documents)
 
43
  "score": res.score,
44
  "id": res.meta["id"],
45
  "fragment_id": res.id,
46
+ "meta": res.meta,
47
  }
48
  )
49
  if not score_is_empty:
interface/components.py CHANGED
@@ -42,7 +42,7 @@ def component_select_pipeline(container):
42
  "index_pipeline": index_pipeline,
43
  "doc": pipeline_funcs[index_pipe].__doc__,
44
  }
45
- st.session_state['doc_id'] = 0
46
 
47
 
48
  def component_show_pipeline(pipeline, pipeline_name):
@@ -63,7 +63,7 @@ def component_show_search_result(container, results):
63
  st.markdown(f"### Match {idx+1}")
64
  st.markdown(f"**Text**: {document['text']}")
65
  st.markdown(f"**Document**: {document['id']}")
66
- if '_split_id' in document['meta']:
67
  st.markdown(f"**Document Chunk**: {document['meta']['_split_id']}")
68
  if document["score"] is not None:
69
  st.markdown(f"**Score**: {document['score']:.3f}")
@@ -78,14 +78,12 @@ def component_text_input(container, doc_id):
78
  while True:
79
  text = st.text_input(f"Document {doc_id}", key=doc_id)
80
  if text != "":
81
- texts.append({"text": text, 'doc_id': doc_id})
82
  doc_id += 1
83
  st.markdown("---")
84
  else:
85
  break
86
- corpus = [
87
- {"text": doc["text"], "id": doc["doc_id"]} for doc in texts
88
- ]
89
  return corpus, doc_id
90
 
91
 
@@ -97,7 +95,7 @@ def component_article_url(container, doc_id):
97
  while True:
98
  url = st.text_input(f"URL {doc_id}", key=doc_id)
99
  if url != "":
100
- urls.append({"text": extract_text_from_url(url), 'doc_id': doc_id})
101
  doc_id += 1
102
  st.markdown("---")
103
  else:
@@ -105,11 +103,9 @@ def component_article_url(container, doc_id):
105
 
106
  for idx, doc in enumerate(urls):
107
  with st.expander(f"Preview URL {idx}"):
108
- st.write(doc['text'])
109
 
110
- corpus = [
111
- {"text": doc["text"], "id": doc["doc_id"]} for doc in urls
112
- ]
113
  return corpus, doc_id
114
 
115
 
@@ -125,7 +121,7 @@ def component_file_input(container, doc_id):
125
  if file != None:
126
  extracted_text = extract_text_from_file(file)
127
  if extracted_text != None:
128
- files.append({"text": extracted_text, 'doc_id': doc_id})
129
  doc_id += 1
130
  st.markdown("---")
131
  else:
@@ -135,9 +131,7 @@ def component_file_input(container, doc_id):
135
 
136
  for idx, doc in enumerate(files):
137
  with st.expander(f"Preview File {idx}"):
138
- st.write(doc['text'])
139
 
140
- corpus = [
141
- {"text": doc["text"], "id": doc["doc_id"]} for doc in files
142
- ]
143
  return corpus, doc_id
 
42
  "index_pipeline": index_pipeline,
43
  "doc": pipeline_funcs[index_pipe].__doc__,
44
  }
45
+ st.session_state["doc_id"] = 0
46
 
47
 
48
  def component_show_pipeline(pipeline, pipeline_name):
 
63
  st.markdown(f"### Match {idx+1}")
64
  st.markdown(f"**Text**: {document['text']}")
65
  st.markdown(f"**Document**: {document['id']}")
66
+ if "_split_id" in document["meta"]:
67
  st.markdown(f"**Document Chunk**: {document['meta']['_split_id']}")
68
  if document["score"] is not None:
69
  st.markdown(f"**Score**: {document['score']:.3f}")
 
78
  while True:
79
  text = st.text_input(f"Document {doc_id}", key=doc_id)
80
  if text != "":
81
+ texts.append({"text": text, "doc_id": doc_id})
82
  doc_id += 1
83
  st.markdown("---")
84
  else:
85
  break
86
+ corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in texts]
 
 
87
  return corpus, doc_id
88
 
89
 
 
95
  while True:
96
  url = st.text_input(f"URL {doc_id}", key=doc_id)
97
  if url != "":
98
+ urls.append({"text": extract_text_from_url(url), "doc_id": doc_id})
99
  doc_id += 1
100
  st.markdown("---")
101
  else:
 
103
 
104
  for idx, doc in enumerate(urls):
105
  with st.expander(f"Preview URL {idx}"):
106
+ st.write(doc["text"])
107
 
108
+ corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in urls]
 
 
109
  return corpus, doc_id
110
 
111
 
 
121
  if file != None:
122
  extracted_text = extract_text_from_file(file)
123
  if extracted_text != None:
124
+ files.append({"text": extracted_text, "doc_id": doc_id})
125
  doc_id += 1
126
  st.markdown("---")
127
  else:
 
131
 
132
  for idx, doc in enumerate(files):
133
  with st.expander(f"Preview File {idx}"):
134
+ st.write(doc["text"])
135
 
136
+ corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in files]
 
 
137
  return corpus, doc_id
interface/config.py CHANGED
@@ -4,7 +4,7 @@ from interface.pages import page_landing_page, page_search, page_index
4
  session_state_variables = {
5
  "pipeline": None,
6
  "pipeline_func_parameters": [],
7
- "doc_id": 0
8
  }
9
 
10
  # Define Pages for the demo
 
4
  session_state_variables = {
5
  "pipeline": None,
6
  "pipeline_func_parameters": [],
7
+ "doc_id": 0,
8
  }
9
 
10
  # Define Pages for the demo
interface/pages.py CHANGED
@@ -78,20 +78,18 @@ def page_index(container):
78
  default_index=0,
79
  orientation="horizontal",
80
  )
81
-
82
- clear_index = st.sidebar.checkbox('Clear Index', True)
83
 
84
- doc_id = st.session_state['doc_id']
 
 
85
  corpus, doc_id = input_funcs[selected_input][0](container, doc_id)
86
 
87
  if len(corpus) > 0:
88
  index_results = None
89
  if st.button("Index"):
90
  index_results = index(
91
- corpus,
92
- st.session_state["pipeline"]["index_pipeline"],
93
- clear_index
94
  )
95
- st.session_state['doc_id'] = doc_id
96
  if index_results:
97
  st.write(index_results)
 
78
  default_index=0,
79
  orientation="horizontal",
80
  )
 
 
81
 
82
+ clear_index = st.sidebar.checkbox("Clear Index", True)
83
+
84
+ doc_id = st.session_state["doc_id"]
85
  corpus, doc_id = input_funcs[selected_input][0](container, doc_id)
86
 
87
  if len(corpus) > 0:
88
  index_results = None
89
  if st.button("Index"):
90
  index_results = index(
91
+ corpus, st.session_state["pipeline"]["index_pipeline"], clear_index
 
 
92
  )
93
+ st.session_state["doc_id"] = doc_id
94
  if index_results:
95
  st.write(index_results)