Jai12345 commited on
Commit
25a257c
β€’
1 Parent(s): f301424

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from haystack.utils import fetch_archive_from_http, clean_wiki_text, convert_files_to_docs
4
+ from haystack.schema import Answer
5
+ from haystack.document_stores import InMemoryDocumentStore
6
+ from haystack.pipelines import ExtractiveQAPipeline
7
+ from haystack.nodes import FARMReader, TfidfRetriever
8
+ import logging
9
+ from markdown import markdown
10
+ from annotated_text import annotation
11
+ from PIL import Image
12
+
13
+ os.environ['TOKENIZERS_PARALLELISM'] = "false"
14
+
15
+
16
+ # Haystack Components
17
+ @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None}, allow_output_mutation=True)
18
+ def start_haystack():
19
+ document_store = InMemoryDocumentStore()
20
+ load_and_write_data(document_store)
21
+ retriever = TfidfRetriever(document_store=document_store)
22
+ reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2-distilled", use_gpu=True)
23
+ pipeline = ExtractiveQAPipeline(reader, retriever)
24
+ return pipeline
25
+
26
+
27
+ def load_and_write_data(document_store):
28
+ doc_dir = './amazon_help_docs'
29
+ docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
30
+
31
+ document_store.write_documents(docs)
32
+
33
+
34
+ pipeline = start_haystack()
35
+
36
+
37
+ def set_state_if_absent(key, value):
38
+ if key not in st.session_state:
39
+ st.session_state[key] = value
40
+
41
+
42
+ set_state_if_absent("question", "What is amazon music?")
43
+ set_state_if_absent("results", None)
44
+
45
+
46
+ def reset_results(*args):
47
+ st.session_state.results = None
48
+
49
+
50
+ # Streamlit App
51
+
52
+ image = Image.open('got-haystack.png')
53
+ st.image(image)
54
+
55
+ st.markdown("""
56
+ This QA demo uses a [Haystack Extractive QA Pipeline](https://haystack.deepset.ai/components/ready-made-pipelines#extractiveqapipeline) with
57
+ an [InMemoryDocumentStore](https://haystack.deepset.ai/components/document-store) which contains documents about Game of Thrones πŸ‘‘
58
+ Go ahead and ask questions about the marvellous kingdom!
59
+ """, unsafe_allow_html=True)
60
+
61
+ question = st.text_input("", value=st.session_state.question, max_chars=100, on_change=reset_results)
62
+
63
+
64
+ def ask_question(question):
65
+ prediction = pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})
66
+ results = []
67
+ for answer in prediction["answers"]:
68
+ answer = answer.to_dict()
69
+ if answer["answer"]:
70
+ results.append(
71
+ {
72
+ "context": "..." + answer["context"] + "...",
73
+ "answer": answer["answer"],
74
+ "relevance": round(answer["score"] * 100, 2),
75
+ "offset_start_in_doc": answer["offsets_in_document"][0]["start"],
76
+ }
77
+ )
78
+ else:
79
+ results.append(
80
+ {
81
+ "context": None,
82
+ "answer": None,
83
+ "relevance": round(answer["score"] * 100, 2),
84
+ }
85
+ )
86
+ return results
87
+
88
+
89
+ if question:
90
+ with st.spinner("πŸ‘‘    Performing semantic search on royal scripts..."):
91
+ try:
92
+ msg = 'Asked ' + question
93
+ logging.info(msg)
94
+ st.session_state.results = ask_question(question)
95
+ except Exception as e:
96
+ logging.exception(e)
97
+
98
+ if st.session_state.results:
99
+ st.write('## Top Results')
100
+ for count, result in enumerate(st.session_state.results):
101
+ if result["answer"]:
102
+ answer, context = result["answer"], result["context"]
103
+ start_idx = context.find(answer)
104
+ end_idx = start_idx + len(answer)
105
+ st.write(
106
+ markdown(context[:start_idx] + str(
107
+ annotation(body=answer, label="ANSWER", background="#964448", color='#ffffff')) + context[
108
+ end_idx:]),
109
+ unsafe_allow_html=True,
110
+ )
111
+ st.markdown(f"**Relevance:** {result['relevance']}")
112
+ else:
113
+ st.info(
114
+ "πŸ€”    Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
115
+ )
116
+
117
+