turn to DEEP work
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import textract
|
|
3 |
import tempfile
|
4 |
from typing import List
|
5 |
import spacy
|
6 |
-
from spacy.tokens import DocBin, Doc
|
7 |
from collections import Counter
|
8 |
import srsly
|
9 |
from spacy.matcher import PhraseMatcher
|
@@ -22,7 +22,7 @@ def download_model(select_model:str):
|
|
22 |
except Exception as e:
|
23 |
return False
|
24 |
|
25 |
-
def search_docs(query:str, documents:List[Doc], nlp):
|
26 |
terms = query.split('|')
|
27 |
patterns = [nlp.make_doc(text) for text in terms]
|
28 |
matcher = PhraseMatcher(nlp.vocab)
|
@@ -34,7 +34,7 @@ def search_docs(query:str, documents:List[Doc], nlp):
|
|
34 |
for match in matches:
|
35 |
results.append(doc[match[1]:match[2]])
|
36 |
|
37 |
-
return results
|
38 |
|
39 |
models = srsly.read_json('models.json')
|
40 |
models[''] = [] #require the user to choose a language
|
@@ -64,10 +64,14 @@ if language:
|
|
64 |
text = textract.process(temp.name)
|
65 |
text = text.decode('utf-8')
|
66 |
doc = nlp(text)
|
|
|
67 |
documents.append(doc)
|
68 |
ent_freq = Counter([ent.label_ for ent in doc.ents])
|
69 |
for key, value in ent_freq.items():
|
70 |
-
if st.sidebar.button(key):
|
|
|
|
|
|
|
71 |
st.sidebar.write(value)
|
72 |
for ent in doc.ents:
|
73 |
if ent.label_ == key:
|
@@ -85,11 +89,10 @@ if language:
|
|
85 |
sent_before = doc[result.sent.start:result.start]
|
86 |
sent_after = doc[result.end:result.sent.end]
|
87 |
results_container.markdown(f"""
|
88 |
-
<div
|
89 |
-
<div class='text'>{sent_before.text} <span class="
|
90 |
</div>
|
91 |
""", unsafe_allow_html=True)
|
92 |
|
93 |
-
st.write(results)
|
94 |
#st.download_button('Download', '', 'text/plain')
|
95 |
|
|
|
3 |
import tempfile
|
4 |
from typing import List
|
5 |
import spacy
|
6 |
+
from spacy.tokens import DocBin, Doc, Span
|
7 |
from collections import Counter
|
8 |
import srsly
|
9 |
from spacy.matcher import PhraseMatcher
|
|
|
22 |
except Exception as e:
|
23 |
return False
|
24 |
|
25 |
+
def search_docs(query:str, documents:List[Doc], nlp) -> List[Span]:
|
26 |
terms = query.split('|')
|
27 |
patterns = [nlp.make_doc(text) for text in terms]
|
28 |
matcher = PhraseMatcher(nlp.vocab)
|
|
|
34 |
for match in matches:
|
35 |
results.append(doc[match[1]:match[2]])
|
36 |
|
37 |
+
return results
|
38 |
|
39 |
models = srsly.read_json('models.json')
|
40 |
models[''] = [] #require the user to choose a language
|
|
|
64 |
text = textract.process(temp.name)
|
65 |
text = text.decode('utf-8')
|
66 |
doc = nlp(text)
|
67 |
+
doc.user_data['filename'] = uploaded_file.name
|
68 |
documents.append(doc)
|
69 |
ent_freq = Counter([ent.label_ for ent in doc.ents])
|
70 |
for key, value in ent_freq.items():
|
71 |
+
if st.sidebar.button(key, key=key):
|
72 |
+
#There are multiple identical st.button widgets with key='DATE'.
|
73 |
+
#To fix this, please make sure that the key argument is unique for each st.button you create.
|
74 |
+
# need to pool results from several files, then create buttons and search links
|
75 |
st.sidebar.write(value)
|
76 |
for ent in doc.ents:
|
77 |
if ent.label_ == key:
|
|
|
89 |
sent_before = doc[result.sent.start:result.start]
|
90 |
sent_after = doc[result.end:result.sent.end]
|
91 |
results_container.markdown(f"""
|
92 |
+
<div style="border: 2px solid #202d89;border-radius: 15px;"><p>{result.doc.user_data['filename']}</p>
|
93 |
+
<div class='text'>{sent_before.text} <span class="text_mark"> {result.text}</span>{sent_after.text}</div>
|
94 |
</div>
|
95 |
""", unsafe_allow_html=True)
|
96 |
|
|
|
97 |
#st.download_button('Download', '', 'text/plain')
|
98 |
|
style.css
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
.card {
|
2 |
text-align: center;
|
3 |
border-radius: 10px;
|
4 |
-
background: linear-gradient(
|
5 |
90deg,
|
6 |
rgba(77, 241, 241, 1) 0%,
|
7 |
rgba(82, 124, 230, 1) 43%,
|
8 |
rgba(219, 0, 132, 1) 100%
|
9 |
-
);
|
10 |
margin-top: 15%;
|
11 |
margin-bottom: 5%;
|
12 |
padding-top: 15%;
|
@@ -70,7 +70,7 @@
|
|
70 |
}
|
71 |
|
72 |
.text_mark {
|
73 |
-
background
|
74 |
color: white;
|
75 |
padding: 0.5%;
|
76 |
border-radius: 5px;
|
|
|
1 |
.card {
|
2 |
text-align: center;
|
3 |
border-radius: 10px;
|
4 |
+
/* background: linear-gradient(
|
5 |
90deg,
|
6 |
rgba(77, 241, 241, 1) 0%,
|
7 |
rgba(82, 124, 230, 1) 43%,
|
8 |
rgba(219, 0, 132, 1) 100%
|
9 |
+
); */
|
10 |
margin-top: 15%;
|
11 |
margin-bottom: 5%;
|
12 |
padding-top: 15%;
|
|
|
70 |
}
|
71 |
|
72 |
.text_mark {
|
73 |
+
background: radial-gradient(circle, rgba(63,94,251,1) 0%, rgba(252,70,107,1) 100%);
|
74 |
color: white;
|
75 |
padding: 0.5%;
|
76 |
border-radius: 5px;
|