File size: 2,490 Bytes
aabdf81
 
 
 
 
 
 
 
 
 
5bbc60d
 
aabdf81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bbc60d
aabdf81
 
 
 
5bbc60d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import shutil
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import EmbeddingRetriever
from haystack.pipelines import ExtractiveQAPipeline
from haystack.nodes import FARMReader
import streamlit as st

from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
    READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)

@st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
          allow_output_mutation=True)
def start_haystack():
    """
    load document store, retriever, reader and create pipeline
    """
    shutil.copy(f'{INDEX_DIR}/faiss_document_store.db', '.')
    document_store = FAISSDocumentStore(
        faiss_index_path=f'{INDEX_DIR}/my_faiss_index.faiss',
        faiss_config_path=f'{INDEX_DIR}/my_faiss_index.json')
    print(f'Index size: {document_store.get_document_count()}')
    
    retriever = EmbeddingRetriever(
        document_store=document_store,
        embedding_model=RETRIEVER_MODEL,
        model_format=RETRIEVER_MODEL_FORMAT
    )
    
    reader = FARMReader(model_name_or_path=READER_MODEL,
                        use_gpu=False,
                        confidence_threshold=READER_CONFIG_THRESHOLD)
    
    pipe = ExtractiveQAPipeline(reader, retriever)
    return pipe

def set_state_if_absent(key, value):
    if key not in st.session_state:
        st.session_state[key] = value

@st.cache()
def load_questions():
    with open(QUESTIONS_PATH) as fin:
        questions = [line.strip() for line in fin.readlines()
                     if not line.startswith('#')]
    return questions

# # the following function is a wrapper for start_haystack,
# # which loads document store, retriever, reader and creates pipeline.
# # cached to make index and models load only at start
# @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
#           allow_output_mutation=True)
# def start_app():
#     return start_haystack()


# @st.cache()
# def load_questions_wrapper():
#     return load_questions()

pipe = start_haystack()

# the pipeline is not included as parameter of the following function,
# because it is difficult to cache
@st.cache(persist=True, allow_output_mutation=True)
def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5):
    """Run query and get answers"""
    params = {"Retriever": {"top_k": retriever_top_k},
              "Reader": {"top_k": reader_top_k}}
    results = pipe.run(question, params=params)
    return results