Spaces:

browndw
/

docuscope-demo-spacy

Runtime error

App Files Files Community

docuscope-demo-spacy / app.py

browndw

Update app.py

e57c7ed almost 2 years ago

raw history blame contribute delete

No virus

1.94 kB

	# Core Pkgs
	import streamlit as st

	# NLP Pkgs
	import spacy_streamlit
	import spacy
	import re

	nlp = spacy.load('en_docusco_spacy')

	def pre_process(txt):
	txt = re.sub(r'\bits\b', 'it s', txt)
	txt = re.sub(r'\bIts\b', 'It s', txt)
	txt = " ".join(txt.split())
	return(txt)

	#import os
	#from PIL import Image


	def main():

	st.title("DocuScope and Part-of-Speech Tagging with spaCy")
	st.markdown("This demo uses a trained spaCy model ([en_docusco_spacy](https://huggingface.co/browndw/en_docusco_spacy)) to identify DocuScope categories in text.")
	st.markdown("It is also trained on the [CLAWS7](https://ucrel.lancs.ac.uk/claws7tags.html) part-of-speech tagset.")
	st.markdown("NOTE: this demo is public - please don't enter confidential text")

	#our_image = Image.open(os.path.join('SpaCy_logo.svg.png'))
	#st.image(our_image)

	menu = ["Tokens","DocuScope"]
	choice = st.sidebar.selectbox("Menu",menu)

	if choice == "Tokens":
	st.subheader("Tokenization")
	raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
	docx = pre_process(raw_text)
	docx = nlp(docx)
	if st.button("Tokenize"):
	spacy_streamlit.visualize_tokens(docx,attrs=['text','tag_', 'ent_iob_', 'ent_type_'])

	elif choice == "DocuScope":
	st.subheader("Named Entity Recognition")
	raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
	docx = pre_process(raw_text)
	docx = nlp(docx)
	spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)


	if __name__ == '__main__':
	main()