apjanco commited on
Commit
7136222
1 Parent(s): 0126f24

working on ent buttons to update query

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import streamlit as st
2
  import textract
3
  import tempfile
4
- from typing import List
5
  import spacy
6
  from spacy.tokens import DocBin, Doc, Span
7
  from collections import Counter
@@ -13,6 +12,8 @@ with open("style.css") as f:
13
  st.markdown("<style>" + f.read() + "</style>", unsafe_allow_html=True)
14
 
15
  st.title('Index and Search a Collection of Documents')
 
 
16
 
17
  @st.cache
18
  def download_model(select_model:str):
@@ -22,7 +23,7 @@ def download_model(select_model:str):
22
  except Exception as e:
23
  return False
24
 
25
- def search_docs(query:str, documents:List[Doc], nlp) -> List[Span]:
26
  terms = query.split('|')
27
  patterns = [nlp.make_doc(text) for text in terms]
28
  matcher = PhraseMatcher(nlp.vocab)
@@ -36,6 +37,9 @@ def search_docs(query:str, documents:List[Doc], nlp) -> List[Span]:
36
 
37
  return results
38
 
 
 
 
39
  models = srsly.read_json('models.json')
40
  models[''] = [] #require the user to choose a language
41
  languages = models.keys()
@@ -53,8 +57,10 @@ if language:
53
 
54
 
55
  uploaded_files = st.file_uploader("Select files to process", accept_multiple_files=True)
56
- query = st.sidebar.text_input(label="Enter your query (use | to separate search terms)", value="...")
 
57
  documents = []
 
58
  for uploaded_file in uploaded_files:
59
  file_type = uploaded_file.type
60
  file_suffix = '.' + uploaded_file.name.split('.')[-1]
@@ -66,24 +72,24 @@ if language:
66
  doc = nlp(text)
67
  doc.user_data['filename'] = uploaded_file.name
68
  documents.append(doc)
69
- ent_freq = Counter([ent.label_ for ent in doc.ents])
70
- for key, value in ent_freq.items():
71
- if st.sidebar.button(key, key=key):
72
- #There are multiple identical st.button widgets with key='DATE'.
73
- #To fix this, please make sure that the key argument is unique for each st.button you create.
74
- # need to pool results from several files, then create buttons and search links
75
- st.sidebar.write(value)
76
- for ent in doc.ents:
77
- if ent.label_ == key:
78
- #TODO Make clickable button that updates query
79
- #TODO also return distinct ents
80
- st.sidebar.write(ent.text)
81
 
82
  except Exception as e:
83
  st.error(e)
84
-
 
 
 
 
 
 
 
 
 
85
  results_container = st.container()
86
- results = search_docs(query, documents,nlp)
87
  for result in results:
88
  doc = result.doc
89
  sent_before = doc[result.sent.start:result.start]
 
1
  import streamlit as st
2
  import textract
3
  import tempfile
 
4
  import spacy
5
  from spacy.tokens import DocBin, Doc, Span
6
  from collections import Counter
 
12
  st.markdown("<style>" + f.read() + "</style>", unsafe_allow_html=True)
13
 
14
  st.title('Index and Search a Collection of Documents')
15
+ if 'query' not in st.session_state:
16
+ st.session_state['query'] = ''
17
 
18
  @st.cache
19
  def download_model(select_model:str):
 
23
  except Exception as e:
24
  return False
25
 
26
+ def search_docs(query:str, documents:list[Doc], nlp) -> list[Span]:
27
  terms = query.split('|')
28
  patterns = [nlp.make_doc(text) for text in terms]
29
  matcher = PhraseMatcher(nlp.vocab)
 
37
 
38
  return results
39
 
40
+ def update_query(arg:str):
41
+ st.session_state.query = arg
42
+
43
  models = srsly.read_json('models.json')
44
  models[''] = [] #require the user to choose a language
45
  languages = models.keys()
 
57
 
58
 
59
  uploaded_files = st.file_uploader("Select files to process", accept_multiple_files=True)
60
+ st.session_state.query = st.sidebar.text_input(label="Enter your query (use | to separate search terms)", value="...")
61
+
62
  documents = []
63
+ all_ents = []
64
  for uploaded_file in uploaded_files:
65
  file_type = uploaded_file.type
66
  file_suffix = '.' + uploaded_file.name.split('.')[-1]
 
72
  doc = nlp(text)
73
  doc.user_data['filename'] = uploaded_file.name
74
  documents.append(doc)
75
+ for ent in doc.ents:
76
+ all_ents.append(ent)
77
+
 
 
 
 
 
 
 
 
 
78
 
79
  except Exception as e:
80
  st.error(e)
81
+
82
+ ents_container = st.container()
83
+ label_freq = Counter([ent.label_ for ent in all_ents])
84
+ for key, value in label_freq.items():
85
+ if st.sidebar.button(key, key=key):
86
+ st.sidebar.write(value)
87
+ text_freq = Counter([ent.text for ent in all_ents if ent.label_ == key])
88
+ for text in text_freq.keys():
89
+ st.sidebar.button(f'{text} ({text_freq[text]})', on_click=update_query, args=(text, ))
90
+
91
  results_container = st.container()
92
+ results = search_docs(st.session_state.query, documents,nlp)
93
  for result in results:
94
  doc = result.doc
95
  sent_before = doc[result.sent.start:result.start]