anakin87 commited on
Commit
35f0167
·
1 Parent(s): 4c2a969

steps in present output

Browse files
Rock_fact_checker.py CHANGED
@@ -1,30 +1,30 @@
1
  import streamlit as st
2
 
3
  import time
4
- import streamlit as st
5
  import logging
6
  from json import JSONDecodeError
7
  # from markdown import markdown
8
  # from annotated_text import annotation
9
  # from urllib.parse import unquote
10
  import random
 
11
 
12
- from app_utils.backend_utils import load_questions, query
13
- from app_utils.frontend_utils import set_state_if_absent, reset_results
14
  from app_utils.config import RETRIEVER_TOP_K
15
 
16
 
17
  def main():
18
 
19
 
20
- questions = load_questions()
21
 
22
  # Persistent state
23
- set_state_if_absent('question', "Elvis Presley is alive")
24
  set_state_if_absent('answer', '')
25
  set_state_if_absent('results', None)
26
  set_state_if_absent('raw_json', None)
27
- set_state_if_absent('random_question_requested', False)
28
 
29
 
30
  ## MAIN CONTAINER
@@ -34,7 +34,7 @@ def main():
34
  ##### Enter a factual statement about [Rock music](https://en.wikipedia.org/wiki/List_of_mainstream_rock_performers) and let the AI check it out for you...
35
  """)
36
  # Search bar
37
- question = st.text_input("", value=st.session_state.question,
38
  max_chars=100, on_change=reset_results)
39
  col1, col2 = st.columns(2)
40
  col1.markdown(
@@ -43,33 +43,33 @@ def main():
43
  "<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
44
  # Run button
45
  run_pressed = col1.button("Run")
46
- # Random question button
47
- if col2.button("Random question"):
48
  reset_results()
49
- question = random.choice(questions)
50
- # Avoid picking the same question twice (the change is not visible on the UI)
51
- while question == st.session_state.question:
52
- question = random.choice(questions)
53
- st.session_state.question = question
54
- st.session_state.random_question_requested = True
55
- # Re-runs the script setting the random question as the textbox value
56
- # Unfortunately necessary as the Random Question button is _below_ the textbox
57
  # raise st.script_runner.RerunException(
58
  # st.script_request_queue.RerunData(None))
59
  else:
60
- st.session_state.random_question_requested = False
61
- run_query = (run_pressed or question != st.session_state.question) \
62
- and not st.session_state.random_question_requested
63
 
64
  # Get results for query
65
- if run_query and question:
66
  time_start = time.time()
67
  reset_results()
68
- st.session_state.question = question
69
  with st.spinner("🧠 &nbsp;&nbsp; Performing neural search on documents..."):
70
  try:
71
  st.session_state.results = query(
72
- question, RETRIEVER_TOP_K)
73
  time_end = time.time()
74
  print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
75
  print(f'elapsed time: {time_end - time_start}')
@@ -82,10 +82,53 @@ def main():
82
  st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
83
  return
84
 
85
- # # Display results
86
- # if st.session_state.results:
87
- # st.write("## Results:")
88
- # alert_irrelevance = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # if len(st.session_state.results['answers']) == 0:
90
  # st.info("""🤔 &nbsp;&nbsp; Haystack is unsure whether any of
91
  # the documents contain an answer to your question. Try to reformulate it!""")
@@ -114,4 +157,13 @@ def main():
114
  # st.markdown(
115
  # f"**Score:** {result['score']:.2f} - **Source:** {source}")
116
 
 
 
 
 
 
 
 
 
 
117
  main()
 
1
  import streamlit as st
2
 
3
  import time
 
4
  import logging
5
  from json import JSONDecodeError
6
  # from markdown import markdown
7
  # from annotated_text import annotation
8
  # from urllib.parse import unquote
9
  import random
10
+ import pandas as pd
11
 
12
+ from app_utils.backend_utils import load_statements, query
13
+ from app_utils.frontend_utils import set_state_if_absent, reset_results, entailment_html_messages
14
  from app_utils.config import RETRIEVER_TOP_K
15
 
16
 
17
  def main():
18
 
19
 
20
+ statements = load_statements()
21
 
22
  # Persistent state
23
+ set_state_if_absent('statement', "Elvis Presley is alive")
24
  set_state_if_absent('answer', '')
25
  set_state_if_absent('results', None)
26
  set_state_if_absent('raw_json', None)
27
+ set_state_if_absent('random_statement_requested', False)
28
 
29
 
30
  ## MAIN CONTAINER
 
34
  ##### Enter a factual statement about [Rock music](https://en.wikipedia.org/wiki/List_of_mainstream_rock_performers) and let the AI check it out for you...
35
  """)
36
  # Search bar
37
+ statement = st.text_input("", value=st.session_state.statement,
38
  max_chars=100, on_change=reset_results)
39
  col1, col2 = st.columns(2)
40
  col1.markdown(
 
43
  "<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
44
  # Run button
45
  run_pressed = col1.button("Run")
46
+ # Random statement button
47
+ if col2.button("Random statement"):
48
  reset_results()
49
+ statement = random.choice(statements)
50
+ # Avoid picking the same statement twice (the change is not visible on the UI)
51
+ while statement == st.session_state.statement:
52
+ statement = random.choice(statements)
53
+ st.session_state.statement = statement
54
+ st.session_state.random_statement_requested = True
55
+ # Re-runs the script setting the random statement as the textbox value
56
+ # Unfortunately necessary as the Random statement button is _below_ the textbox
57
  # raise st.script_runner.RerunException(
58
  # st.script_request_queue.RerunData(None))
59
  else:
60
+ st.session_state.random_statement_requested = False
61
+ run_query = (run_pressed or statement != st.session_state.statement) \
62
+ and not st.session_state.random_statement_requested
63
 
64
  # Get results for query
65
+ if run_query and statement:
66
  time_start = time.time()
67
  reset_results()
68
+ st.session_state.statement = statement
69
  with st.spinner("🧠 &nbsp;&nbsp; Performing neural search on documents..."):
70
  try:
71
  st.session_state.results = query(
72
+ statement, RETRIEVER_TOP_K)
73
  time_end = time.time()
74
  print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
75
  print(f'elapsed time: {time_end - time_start}')
 
82
  st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
83
  return
84
 
85
+ # Display results
86
+ if st.session_state.results:
87
+ results = st.session_state.results
88
+ docs, agg_entailment_info = results['documents'], results['agg_entailment_info']
89
+ print(results)
90
+
91
+ max_key = max(agg_entailment_info, key=agg_entailment_info.get)
92
+ message = entailment_html_messages[max_key]
93
+ st.markdown(f'<h4>{message}</h4>', unsafe_allow_html=True)
94
+ st.markdown(f'###### Aggregate entailment information:')
95
+ st.write(results['agg_entailment_info'])
96
+ st.markdown(f'###### Relevant snippets:')
97
+
98
+ # colms = st.columns((2, 5, 1, 1, 1, 1))
99
+ # fields = ["Page title",'Content', 'Relevance', 'contradiction', 'neutral', 'entailment']
100
+ # for col, field_name in zip(colms, fields):
101
+ # # header
102
+ # col.write(field_name)
103
+ df = []
104
+ for doc in docs:
105
+ # col1, col2, col3, col4, col5, col6 = st.columns((2, 5, 1, 1, 1, 1))
106
+ # col1.write(f"[{doc.meta['name']}]({doc.meta['url']})")
107
+ # col2.write(f"{doc.content}")
108
+ # col3.write(f"{doc.score:.3f}")
109
+ # col4.write(f"{doc.meta['entailment_info']['contradiction']:.2f}")
110
+ # col5.write(f"{doc.meta['entailment_info']['neutral']:.2f}")
111
+ # col6.write(f"{doc.meta['entailment_info']['entailment']:.2f}")
112
+
113
+ # 'con': f"{doc.meta['entailment_info']['contradiction']:.2f}",
114
+ # 'neu': f"{doc.meta['entailment_info']['neutral']:.2f}",
115
+ # 'ent': f"{doc.meta['entailment_info']['entailment']:.2f}",
116
+ # # 'url': doc.meta['url'],
117
+ # 'Content': doc.content}
118
+ #
119
+ #
120
+ #
121
+ row = {'Title': doc.meta['name'],
122
+ 'Relevance': f"{doc.score:.3f}",
123
+ 'con': f"{doc.meta['entailment_info']['contradiction']:.2f}",
124
+ 'neu': f"{doc.meta['entailment_info']['neutral']:.2f}",
125
+ 'ent': f"{doc.meta['entailment_info']['entailment']:.2f}",
126
+ # 'url': doc.meta['url'],
127
+ 'Content': doc.content}
128
+ df.append(row)
129
+ st.dataframe(pd.DataFrame(df))#.style.apply(highlight))
130
+
131
+
132
  # if len(st.session_state.results['answers']) == 0:
133
  # st.info("""🤔 &nbsp;&nbsp; Haystack is unsure whether any of
134
  # the documents contain an answer to your question. Try to reformulate it!""")
 
157
  # st.markdown(
158
  # f"**Score:** {result['score']:.2f} - **Source:** {source}")
159
 
160
+ # def make_pretty(styler):
161
+ # styler.set_caption("Weather Conditions")
162
+ # # styler.format(rain_condition)
163
+ # styler.format_con(lambda v: v.float(v))
164
+ # styler.background_gradient(axis=None, vmin=0, vmax=1, cmap="YlGnBu")
165
+ # return styler
166
+
167
+ def highlight(s):
168
+ return ['background-color: red']*5
169
  main()
app_utils/backend_utils.py CHANGED
@@ -40,19 +40,33 @@ pipe = start_haystack()
40
  # the pipeline is not included as parameter of the following function,
41
  # because it is difficult to cache
42
  @st.cache(persist=True, allow_output_mutation=True)
43
- def query(question: str, retriever_top_k: int = 5):
44
- """Run query and get answers"""
45
  params = {"retriever": {"top_k": retriever_top_k}}
46
- results = pipe.run(question, params=params)
47
- print(results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  return results
49
 
50
  @st.cache()
51
- def load_questions():
52
  """Load statements from file"""
53
  with open(STATEMENTS_PATH) as fin:
54
- questions = [line.strip() for line in fin.readlines()
55
  if not line.startswith('#')]
56
- return questions
57
 
58
 
 
40
  # the pipeline is not included as parameter of the following function,
41
  # because it is difficult to cache
42
  @st.cache(persist=True, allow_output_mutation=True)
43
+ def query(statement: str, retriever_top_k: int = 5):
44
+ """Run query and verify statement"""
45
  params = {"retriever": {"top_k": retriever_top_k}}
46
+ results = pipe.run(statement, params=params)
47
+
48
+ scores, agg_con, agg_neu, agg_ent = 0,0,0,0
49
+ for doc in results['documents']:
50
+ scores+=doc.score
51
+ ent_info=doc.meta['entailment_info']
52
+ con,neu,ent = ent_info['contradiction'], ent_info['neutral'], ent_info['entailment']
53
+ agg_con+=con*doc.score
54
+ agg_neu+=neu*doc.score
55
+ agg_ent+=ent*doc.score
56
+
57
+ results['agg_entailment_info'] = {
58
+ 'contradiction': round(agg_con/scores, 2),
59
+ 'neutral': round(agg_neu/scores, 2),
60
+ 'entailment': round(agg_ent/scores, 2)}
61
+
62
  return results
63
 
64
  @st.cache()
65
+ def load_statements():
66
  """Load statements from file"""
67
  with open(STATEMENTS_PATH) as fin:
68
+ statements = [line.strip() for line in fin.readlines()
69
  if not line.startswith('#')]
70
+ return statements
71
 
72
 
app_utils/frontend_utils.py CHANGED
@@ -11,5 +11,6 @@ def reset_results(*args):
11
  st.session_state.results = None
12
  st.session_state.raw_json = None
13
 
14
-
15
-
 
 
11
  st.session_state.results = None
12
  st.session_state.raw_json = None
13
 
14
+ entailment_html_messages = {'entailment': 'The knowledge base seems to <span style="color:green">confirm</span> your statement',
15
+ 'contradiction': 'The knowledge base seems to <span style="color:red">contradict</span> your statement',
16
+ 'neutral': 'The knowledge base is <span style="color:darkgray">neutral</span> about your statement'}
data/index/faiss_document_store.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91914481925284e0a0e193ad9fd90c820e226304d5ad35b25371afaa046281f4
3
- size 75456512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449d3708074efc81a6c59bbe4164b18f5ac45b28db530fee8eec5eff74504e45
3
+ size 73195520
data/index/my_faiss_index.faiss CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:285c67834753ed442f76e2ea907d984416ca0166aa2d48bf85106d0801fe3113
3
- size 153560109
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b971dc989aec74c8e34d808a4f9c713dc93c3ae247ec5cbefb0e00d98a3fcc56
3
+ size 150488109
data/statements.txt CHANGED
@@ -1,5 +1,20 @@
1
  Kurt Cobain died in 1994
2
  Kurt Cobain died in 2008
3
- Green Day are a heavy metal band
4
- Green Day are a punk rock band
5
- The Beatles' first album was released in 1985
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  Kurt Cobain died in 1994
2
  Kurt Cobain died in 2008
3
+ Green Day is a heavy metal band
4
+ Green Day is a punk rock band
5
+ The Beatles' first album was released in 1985
6
+ The Offspring is a French punk rock band
7
+ Kurt Cobain was a biker
8
+ Joe Cocker was American
9
+ The Eagles won a Grammy Award
10
+ Mick Jagger was part of the Beatles
11
+ London calling is a hit by The Clash
12
+ Red Hot Chili Peppers were formed in New York
13
+ The Smiths is a very long-lived band
14
+ U2 have participated in philanthropic initiatives
15
+ Sweet Home Alabama is a popular song by Lynyrd Skynyrd
16
+ Steve Vai collaborated with Frank Zappa
17
+ The White Stripes were a trio
18
+ The White Stripes were composed by Jack White and Meg White
19
+ Scorpions is a German trap band
20
+ Sepultura is a heavy metal band