apjanco
commited on
Commit
•
4c6bd94
1
Parent(s):
3dfae67
progress on search
Browse files
app.py
CHANGED
@@ -22,9 +22,10 @@ def download_model(select_model:str):
|
|
22 |
except Exception as e:
|
23 |
return False
|
24 |
|
25 |
-
def search_docs(query:str,
|
26 |
terms = query.split('|')
|
27 |
patterns = [nlp.make_doc(text) for text in terms]
|
|
|
28 |
matcher.add(query, patterns)
|
29 |
|
30 |
results = []
|
@@ -49,10 +50,10 @@ if language:
|
|
49 |
nlp = spacy.load(select_model)
|
50 |
|
51 |
nlp.max_length = 1200000
|
52 |
-
|
53 |
|
54 |
uploaded_files = st.file_uploader("Select files to process", accept_multiple_files=True)
|
55 |
-
query = st.sidebar.text_input(label="Enter your query", value="...")
|
56 |
documents = []
|
57 |
for uploaded_file in uploaded_files:
|
58 |
file_type = uploaded_file.type
|
@@ -70,12 +71,25 @@ if language:
|
|
70 |
st.sidebar.write(value)
|
71 |
for ent in doc.ents:
|
72 |
if ent.label_ == key:
|
|
|
|
|
73 |
st.sidebar.write(ent.text)
|
74 |
|
75 |
except Exception as e:
|
76 |
st.error(e)
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
st.write(results)
|
80 |
#st.download_button('Download', '', 'text/plain')
|
81 |
|
|
|
22 |
except Exception as e:
|
23 |
return False
|
24 |
|
25 |
+
def search_docs(query:str, documents:List[Doc], nlp):
|
26 |
terms = query.split('|')
|
27 |
patterns = [nlp.make_doc(text) for text in terms]
|
28 |
+
matcher = PhraseMatcher(nlp.vocab)
|
29 |
matcher.add(query, patterns)
|
30 |
|
31 |
results = []
|
|
|
50 |
nlp = spacy.load(select_model)
|
51 |
|
52 |
nlp.max_length = 1200000
|
53 |
+
|
54 |
|
55 |
uploaded_files = st.file_uploader("Select files to process", accept_multiple_files=True)
|
56 |
+
query = st.sidebar.text_input(label="Enter your query (use | to separate search terms)", value="...")
|
57 |
documents = []
|
58 |
for uploaded_file in uploaded_files:
|
59 |
file_type = uploaded_file.type
|
|
|
71 |
st.sidebar.write(value)
|
72 |
for ent in doc.ents:
|
73 |
if ent.label_ == key:
|
74 |
+
#TODO Make clickable button that updates query
|
75 |
+
#TODO also return distinct ents
|
76 |
st.sidebar.write(ent.text)
|
77 |
|
78 |
except Exception as e:
|
79 |
st.error(e)
|
80 |
+
|
81 |
+
results_container = st.container()
|
82 |
+
results = search_docs(query, documents,nlp)
|
83 |
+
for result in results:
|
84 |
+
doc = result.doc
|
85 |
+
sent_before = doc[result.sent.start:result.start]
|
86 |
+
sent_after = doc[result.end:result.sent.end]
|
87 |
+
results_container.markdown(f"""
|
88 |
+
<div class="card">
|
89 |
+
<div class='text'>{sent_before.text} <span class="result"> {result.text}</span>{sent_after.text}</div>
|
90 |
+
</div>
|
91 |
+
""", unsafe_allow_html=True)
|
92 |
+
|
93 |
st.write(results)
|
94 |
#st.download_button('Download', '', 'text/plain')
|
95 |
|