Spaces:
Sleeping
Sleeping
Stefano Fiorucci
commited on
Commit
•
5bbc60d
1
Parent(s):
4d7c011
refactoring try
Browse files- app.py +3 -25
- haystack_utils.py +29 -1
app.py
CHANGED
@@ -9,7 +9,8 @@ from typing import List, Dict, Any, Tuple, Optional
|
|
9 |
from annotated_text import annotation
|
10 |
from urllib.parse import unquote
|
11 |
|
12 |
-
from haystack_utils import
|
|
|
13 |
|
14 |
INDEX_DIR = 'data/index'
|
15 |
QUESTIONS_PATH = 'data/questions.txt'
|
@@ -20,30 +21,7 @@ READER_CONFIG_THRESHOLD = 0.15
|
|
20 |
RETRIEVER_TOP_K = 10
|
21 |
READER_TOP_K = 5
|
22 |
|
23 |
-
|
24 |
-
# which loads document store, retriever, reader and creates pipeline.
|
25 |
-
# cached to make index and models load only at start
|
26 |
-
@st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
|
27 |
-
allow_output_mutation=True)
|
28 |
-
def start_app():
|
29 |
-
return start_haystack()
|
30 |
-
|
31 |
-
|
32 |
-
@st.cache()
|
33 |
-
def load_questions_wrapper():
|
34 |
-
return load_questions()
|
35 |
-
|
36 |
-
pipe = start_app()
|
37 |
-
|
38 |
-
# the pipeline is not included as parameter of the following function,
|
39 |
-
# because it is difficult to cache
|
40 |
-
@st.cache(persist=True, allow_output_mutation=True)
|
41 |
-
def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5):
|
42 |
-
"""Run query and get answers"""
|
43 |
-
params = {"Retriever": {"top_k": retriever_top_k},
|
44 |
-
"Reader": {"top_k": reader_top_k}}
|
45 |
-
results = pipe.run(question, params=params)
|
46 |
-
return results
|
47 |
|
48 |
|
49 |
def main():
|
9 |
from annotated_text import annotation
|
10 |
from urllib.parse import unquote
|
11 |
|
12 |
+
from haystack_utils import (set_state_if_absent, load_questions,
|
13 |
+
query)
|
14 |
|
15 |
INDEX_DIR = 'data/index'
|
16 |
QUESTIONS_PATH = 'data/questions.txt'
|
21 |
RETRIEVER_TOP_K = 10
|
22 |
READER_TOP_K = 5
|
23 |
|
24 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
|
27 |
def main():
|
haystack_utils.py
CHANGED
@@ -8,6 +8,8 @@ import streamlit as st
|
|
8 |
from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
|
9 |
READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
|
10 |
|
|
|
|
|
11 |
def start_haystack():
|
12 |
"""
|
13 |
load document store, retriever, reader and create pipeline
|
@@ -35,8 +37,34 @@ def set_state_if_absent(key, value):
|
|
35 |
if key not in st.session_state:
|
36 |
st.session_state[key] = value
|
37 |
|
|
|
38 |
def load_questions():
|
39 |
with open(QUESTIONS_PATH) as fin:
|
40 |
questions = [line.strip() for line in fin.readlines()
|
41 |
if not line.startswith('#')]
|
42 |
-
return questions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
|
9 |
READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
|
10 |
|
11 |
+
@st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
|
12 |
+
allow_output_mutation=True)
|
13 |
def start_haystack():
|
14 |
"""
|
15 |
load document store, retriever, reader and create pipeline
|
37 |
if key not in st.session_state:
|
38 |
st.session_state[key] = value
|
39 |
|
40 |
+
@st.cache()
|
41 |
def load_questions():
|
42 |
with open(QUESTIONS_PATH) as fin:
|
43 |
questions = [line.strip() for line in fin.readlines()
|
44 |
if not line.startswith('#')]
|
45 |
+
return questions
|
46 |
+
|
47 |
+
# # the following function is a wrapper for start_haystack,
|
48 |
+
# # which loads document store, retriever, reader and creates pipeline.
|
49 |
+
# # cached to make index and models load only at start
|
50 |
+
# @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
|
51 |
+
# allow_output_mutation=True)
|
52 |
+
# def start_app():
|
53 |
+
# return start_haystack()
|
54 |
+
|
55 |
+
|
56 |
+
# @st.cache()
|
57 |
+
# def load_questions_wrapper():
|
58 |
+
# return load_questions()
|
59 |
+
|
60 |
+
pipe = start_haystack()
|
61 |
+
|
62 |
+
# the pipeline is not included as parameter of the following function,
|
63 |
+
# because it is difficult to cache
|
64 |
+
@st.cache(persist=True, allow_output_mutation=True)
|
65 |
+
def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5):
|
66 |
+
"""Run query and get answers"""
|
67 |
+
params = {"Retriever": {"top_k": retriever_top_k},
|
68 |
+
"Reader": {"top_k": reader_top_k}}
|
69 |
+
results = pipe.run(question, params=params)
|
70 |
+
return results
|