Stefano Fiorucci commited on
Commit
5bbc60d
1 Parent(s): 4d7c011

refactoring try

Browse files
Files changed (2) hide show
  1. app.py +3 -25
  2. haystack_utils.py +29 -1
app.py CHANGED
@@ -9,7 +9,8 @@ from typing import List, Dict, Any, Tuple, Optional
9
  from annotated_text import annotation
10
  from urllib.parse import unquote
11
 
12
- from haystack_utils import start_haystack, set_state_if_absent, load_questions
 
13
 
14
  INDEX_DIR = 'data/index'
15
  QUESTIONS_PATH = 'data/questions.txt'
@@ -20,30 +21,7 @@ READER_CONFIG_THRESHOLD = 0.15
20
  RETRIEVER_TOP_K = 10
21
  READER_TOP_K = 5
22
 
23
- # the following function is a wrapper for start_haystack,
24
- # which loads document store, retriever, reader and creates pipeline.
25
- # cached to make index and models load only at start
26
- @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
27
- allow_output_mutation=True)
28
- def start_app():
29
- return start_haystack()
30
-
31
-
32
- @st.cache()
33
- def load_questions_wrapper():
34
- return load_questions()
35
-
36
- pipe = start_app()
37
-
38
- # the pipeline is not included as parameter of the following function,
39
- # because it is difficult to cache
40
- @st.cache(persist=True, allow_output_mutation=True)
41
- def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5):
42
- """Run query and get answers"""
43
- params = {"Retriever": {"top_k": retriever_top_k},
44
- "Reader": {"top_k": reader_top_k}}
45
- results = pipe.run(question, params=params)
46
- return results
47
 
48
 
49
  def main():
9
  from annotated_text import annotation
10
  from urllib.parse import unquote
11
 
12
+ from haystack_utils import (set_state_if_absent, load_questions,
13
+ query)
14
 
15
  INDEX_DIR = 'data/index'
16
  QUESTIONS_PATH = 'data/questions.txt'
21
  RETRIEVER_TOP_K = 10
22
  READER_TOP_K = 5
23
 
24
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
  def main():
haystack_utils.py CHANGED
@@ -8,6 +8,8 @@ import streamlit as st
8
  from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
9
  READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
10
 
 
 
11
  def start_haystack():
12
  """
13
  load document store, retriever, reader and create pipeline
@@ -35,8 +37,34 @@ def set_state_if_absent(key, value):
35
  if key not in st.session_state:
36
  st.session_state[key] = value
37
 
 
38
  def load_questions():
39
  with open(QUESTIONS_PATH) as fin:
40
  questions = [line.strip() for line in fin.readlines()
41
  if not line.startswith('#')]
42
- return questions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
9
  READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
10
 
11
+ @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
12
+ allow_output_mutation=True)
13
  def start_haystack():
14
  """
15
  load document store, retriever, reader and create pipeline
37
  if key not in st.session_state:
38
  st.session_state[key] = value
39
 
40
+ @st.cache()
41
  def load_questions():
42
  with open(QUESTIONS_PATH) as fin:
43
  questions = [line.strip() for line in fin.readlines()
44
  if not line.startswith('#')]
45
+ return questions
46
+
47
+ # # the following function is a wrapper for start_haystack,
48
+ # # which loads document store, retriever, reader and creates pipeline.
49
+ # # cached to make index and models load only at start
50
+ # @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
51
+ # allow_output_mutation=True)
52
+ # def start_app():
53
+ # return start_haystack()
54
+
55
+
56
+ # @st.cache()
57
+ # def load_questions_wrapper():
58
+ # return load_questions()
59
+
60
+ pipe = start_haystack()
61
+
62
+ # the pipeline is not included as parameter of the following function,
63
+ # because it is difficult to cache
64
+ @st.cache(persist=True, allow_output_mutation=True)
65
+ def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5):
66
+ """Run query and get answers"""
67
+ params = {"Retriever": {"top_k": retriever_top_k},
68
+ "Reader": {"top_k": reader_top_k}}
69
+ results = pipe.run(question, params=params)
70
+ return results