Stefano Fiorucci commited on
Commit
592c3d9
β€’
1 Parent(s): 22d2aff

improve the app

Browse files
Files changed (1) hide show
  1. app.py +41 -21
app.py CHANGED
@@ -1,21 +1,17 @@
1
- import os
2
  import time
3
  import streamlit as st
4
- import subprocess
5
- import sys
6
  import logging
7
  import pandas as pd
8
  from json import JSONDecodeError
9
- from pathlib import Path
10
  from markdown import markdown
11
  import random
12
  from typing import List, Dict, Any, Tuple
13
 
14
- from haystack.document_stores import ElasticsearchDocumentStore, FAISSDocumentStore
15
  from haystack.nodes import EmbeddingRetriever
16
  from haystack.pipelines import ExtractiveQAPipeline
17
- from haystack.preprocessor.preprocessor import PreProcessor
18
- from haystack.nodes import FARMReader, TransformersReader
19
  from haystack.pipelines import ExtractiveQAPipeline
20
  from annotated_text import annotation
21
  import shutil
@@ -48,21 +44,20 @@ def set_state_if_absent(key, value):
48
  if key not in st.session_state:
49
  st.session_state[key] = value
50
 
51
- def get_backlink(result, ip) -> str:
52
- """
53
- Build URL from metadata and Google VM IP
54
- (quick and dirty)
55
- """
56
- meta = result['meta']
57
- fpath = meta['filepath'].rpartition('/')[-1]
58
- fname = fpath.rpartition('.')[0]
59
- return f'http://{ip}:8000/data/final/ner_html/{fname}.html'
60
-
61
-
62
  def query(pipe, question):
63
  """Run query and get answers"""
64
  return (pipe.run(question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}), None)
65
 
 
 
 
 
 
 
 
 
 
 
66
  def main():
67
  # st.set_page_config(page_title='Who killed Laura Palmer?',
68
  # page_icon="https://static.wikia.nocookie.net/twinpeaks/images/4/4a/Site-favicon.ico/revision/latest?cb=20210710003705")
@@ -146,11 +141,28 @@ Ask any question on [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
146
 
147
  # Run button
148
  run_pressed = col1.button("Run")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  run_query = (run_pressed or question != st.session_state.question) and not st.session_state.random_question_requested
151
 
152
  # Get results for query
153
  if run_query and question:
 
154
  reset_results()
155
  st.session_state.question = question
156
 
@@ -160,6 +172,8 @@ Ask any question on [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
160
  ):
161
  try:
162
  st.session_state.results, st.session_state.raw_json = query(pipe, question)
 
 
163
  except JSONDecodeError as je:
164
  st.error("πŸ‘“    An error occurred reading the results. Is the document store working?")
165
  return
@@ -190,7 +204,13 @@ Ask any question on [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
190
  #url = get_backlink(result, my_ip)
191
  # Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
192
  st.write(markdown("- ..."+context[:start_idx] + str(annotation(answer, "ANSWER", "#8ef")) + context[end_idx:]+"..."), unsafe_allow_html=True)
193
- #st.write(markdown(f"<a href='{url}'>{title} - <i>{authors}</i></a>"), unsafe_allow_html=True)
194
- #st.write(markdown(f"**Relevance:** {result['score']:.2f}"), unsafe_allow_html=True)
195
-
 
 
 
 
 
 
196
  main()
1
+
2
  import time
3
  import streamlit as st
 
 
4
  import logging
5
  import pandas as pd
6
  from json import JSONDecodeError
 
7
  from markdown import markdown
8
  import random
9
  from typing import List, Dict, Any, Tuple
10
 
11
+ from haystack.document_stores import FAISSDocumentStore
12
  from haystack.nodes import EmbeddingRetriever
13
  from haystack.pipelines import ExtractiveQAPipeline
14
+ from haystack.nodes import FARMReader
 
15
  from haystack.pipelines import ExtractiveQAPipeline
16
  from annotated_text import annotation
17
  import shutil
44
  if key not in st.session_state:
45
  st.session_state[key] = value
46
 
 
 
 
 
 
 
 
 
 
 
 
47
  def query(pipe, question):
48
  """Run query and get answers"""
49
  return (pipe.run(question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}), None)
50
 
51
+ def get_backlink(result) -> Tuple[str, str]:
52
+ if result.get("document", None):
53
+ doc = result["document"]
54
+ if isinstance(doc, dict):
55
+ if doc.get("meta", None):
56
+ if isinstance(doc["meta"], dict):
57
+ if doc["meta"].get("url", None) :
58
+ return doc["meta"]["url"]
59
+ return None
60
+
61
  def main():
62
  # st.set_page_config(page_title='Who killed Laura Palmer?',
63
  # page_icon="https://static.wikia.nocookie.net/twinpeaks/images/4/4a/Site-favicon.ico/revision/latest?cb=20210710003705")
141
 
142
  # Run button
143
  run_pressed = col1.button("Run")
144
+
145
+ df=''
146
+ # Get next random question from the CSV
147
+ if col2.button("Random question"):
148
+ reset_results()
149
+ new_row = df.sample(1)
150
+ while new_row["Question Text"].values[0] == st.session_state.question: # Avoid picking the same question twice (the change is not visible on the UI)
151
+ new_row = df.sample(1)
152
+ st.session_state.question = new_row["Question Text"].values[0]
153
+ st.session_state.answer = new_row["Answer"].values[0]
154
+ st.session_state.random_question_requested = True
155
+ # Re-runs the script setting the random question as the textbox value
156
+ # Unfortunately necessary as the Random Question button is _below_ the textbox
157
+ raise st.script_runner.RerunException(st.script_request_queue.RerunData(None))
158
+ else:
159
+ st.session_state.random_question_requested = False
160
 
161
  run_query = (run_pressed or question != st.session_state.question) and not st.session_state.random_question_requested
162
 
163
  # Get results for query
164
  if run_query and question:
165
+ time_start=time.time()
166
  reset_results()
167
  st.session_state.question = question
168
 
172
  ):
173
  try:
174
  st.session_state.results, st.session_state.raw_json = query(pipe, question)
175
+ time_end=time.time()
176
+ print(f'elapsed time: {time_end - time_start}')
177
  except JSONDecodeError as je:
178
  st.error("πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?")
179
  return
204
  #url = get_backlink(result, my_ip)
205
  # Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
206
  st.write(markdown("- ..."+context[:start_idx] + str(annotation(answer, "ANSWER", "#8ef")) + context[end_idx:]+"..."), unsafe_allow_html=True)
207
+ source = ""
208
+ url = get_backlink(result)
209
+ if url:
210
+ source = f"({result['document']['meta']['url']})"
211
+ else:
212
+ source = f"{result['source']}"
213
+ st.markdown(f"**Score:** {result['score']:.2f} - **Source:** {source}")
214
+ else:
215
+ st.info("πŸ€” &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!")
216
  main()