# Core Pkgs import streamlit as st # NLP Pkgs import spacy_streamlit import spacy import re nlp = spacy.load('en_docusco_spacy') def pre_process(txt): txt = re.sub(r'\bits\b', 'it s', txt) txt = re.sub(r'\bIts\b', 'It s', txt) txt = " ".join(txt.split()) return(txt) #import os #from PIL import Image def main(): st.title("DocuScope and Part-of-Speech Tagging with spaCy") st.markdown("This demo uses a trained spaCy model ([en_docusco_spacy](https://huggingface.co/browndw/en_docusco_spacy)) to identify DocuScope categories in text.") st.markdown("It is also trained on the [CLAWS7](https://ucrel.lancs.ac.uk/claws7tags.html) part-of-speech tagset.") st.markdown("NOTE: this demo is public - please don't enter confidential text") #our_image = Image.open(os.path.join('SpaCy_logo.svg.png')) #st.image(our_image) menu = ["Tokens","DocuScope"] choice = st.sidebar.selectbox("Menu",menu) if choice == "Tokens": st.subheader("Tokenization") raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.") docx = pre_process(raw_text) docx = nlp(docx) if st.button("Tokenize"): spacy_streamlit.visualize_tokens(docx,attrs=['text','tag_', 'ent_iob_', 'ent_type_']) elif choice == "DocuScope": st.subheader("Named Entity Recognition") raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.") docx = pre_process(raw_text) docx = nlp(docx) spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels) if __name__ == '__main__': main()