Spaces:

browndw
/

docuscope-demo-spacy

Runtime error

App Files Files Community

browndw commited on Jul 13, 2022

Commit

e57c7ed

1 Parent(s): b89a8fa

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -53

app.py CHANGED Viewed

@@ -1,59 +1,52 @@
-import streamlit as st
 import spacy
-from spacytextblob.spacytextblob import SpacyTextBlob
-from spacy_streamlit import visualize_ner
-st.header("DocuScope Demo (spaCy)")
-st.markdown("This demo uses a trained spaCy model (**en_docusco_spacy**) to identify DocuScope categories in text.")
-st.markdown("The is one of a family of models trained on DocuScope.")
-st.markdown("NOTE: this demo is public - please don't enter confidential text")
-# Streamlit text boxes
-# Text source: https://www.theguardian.com/film/2016/dec/22/jaws-steven-spielberg-1975-review-derek-malcolm
-text = st.text_area('Enter text:', value="Jaws is a splendidly shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
-nlp = spacy.load("en_docusco_spacy")
-if text:
-    doc = nlp(text)
-    visualize_ner(doc, labels=nlp.get_pipe("ner").labels)
-st.header("Label Explanation")
-st.markdown("**Academic Terms:** Abstract, rare, specialized, or disciplinary-specific terms that are indicative of informationally dense writing")
-st.markdown("**Academic Writing Moves:** Phrases and terms that indicate academic writing moves, which are common in research genres and are derived from the work of Swales (1981) and Cotos et al. (2015, 2017)")
-st.markdown("**Character:** References multiple dimensions of a character or human being as a social agent, both individual and collective")
-st.markdown("**Citation:** Language that indicates the attribution of information to, or citation of, another source.")
-st.markdown("**Citation Authorized:** Referencing the citation of another source that is represented as true and not arguable")
-st.markdown("**Citation Hedged:** Referencing the citation of another source that is presented as arguable")
-st.markdown("**Confidence Hedged:** Referencing language that presents a claim as uncertain")
-st.markdown("**Confidence High:** Referencing language that presents a claim with certainty")
-st.markdown("**Confidence Low:** Referencing language that presents a claim as extremely unlikely")
-st.markdown("**Contingent:** Referencing contingency, typically contingency in the world, rather than contingency in one's knowledge")
-st.markdown("**Description:** Language that evokes sights, sounds, smells, touches and tastes, as well as scenes and objects")
-st.markdown("**Facilitate:** Language that enables or directs one through specific tasks and actions")
-st.markdown("**First Person:** This cluster captures first person.")
-st.markdown("**Force Stressed:** Language that is forceful and stressed, often using emphatics, comparative forms, or superlative forms")
-st.markdown("**Future:** Referencing future actions, states, or desires")
-st.markdown("**Information Change:** Referencing changes of information, particularly changes that are more neutral")
-st.markdown("**Information Change Negative:** Referencing negative change")
-st.markdown("**Information Change Positive:** Referencing positive change")
-st.markdown("**Information Exposition:** Information in the form of expository devices, or language that describes or explains, frequently in regards to quantities and comparisons")
-st.markdown("**Information Place:** Language designating places")
-st.markdown("**Information Report Verbs:** Informational verbs and verb phrases of reporting")
-st.markdown("**Information States:** Referencing information states, or states of being")
-st.markdown("**Information Topics:** Referencing topics, usually nominal subjects or objects, that indicate the “aboutness” of a text")
-st.markdown("**Inquiry:** Referencing inquiry, or language that points to some kind of inquiry or investigation")
-st.markdown("**Interactive:** Addresses from the author to the reader or from persons in the text to other persons. The address comes in the language of everyday conversation, colloquy, exchange, questions, attention-getters, feedback, interactive genre markers, and the use of the second person.")
-st.markdown("**Metadiscourse Cohesive:** The use of words to build cohesive markers that help the reader navigate the text and signal linkages in the text, which are often additive or contrastive")
-st.markdown("**Metadiscourse Interactive:** The use of words to build cohesive markers that interact with the reader")
-st.markdown("**Narrative:** Language that involves people, description, and events extending in time")
-st.markdown("**Negative:** Referencing dimensions of negativity, including negative acts, emotions, relations, and values")
-st.markdown("**Positive:** Referencing dimensions of positivity, including actions, emotions, relations, and values")
-st.markdown("**Public Terms:** Referencing public terms, concepts from public language, media, the language of authority, institutions, and responsibility")
-st.markdown("**Reasoning:** Language that has a reasoning focus, supporting inferences about cause, consequence, generalization, concession, and linear inference either from premise to conclusion or conclusion to premise")
-st.markdown("**Responsibility:** Referencing the language of responsibility")
-st.markdown("**Strategic:** This dimension is active when the text structures strategies activism, advantage-seeking, game-playing cognition, plans, and goal-seeking.")
-st.markdown("**Syntactic Complexity:** The features in this category are often what are called “function words,” like determiners and prepositions.")
-st.markdown("**Uncertainty:** References uncertainty, when confidence levels are unknown.")
-st.markdown("**Updates:** References updates that anticipate someone searching for information and receiving it.")

+# Core Pkgs
+import streamlit as st
+# NLP Pkgs
+import spacy_streamlit
 import spacy
+import re
+nlp = spacy.load('en_docusco_spacy')
+def pre_process(txt):
+    txt = re.sub(r'\bits\b', 'it s', txt)
+    txt = re.sub(r'\bIts\b', 'It s', txt)
+    txt = " ".join(txt.split())
+    return(txt)
+#import os
+#from PIL import Image
+def main():
+	st.title("DocuScope and Part-of-Speech Tagging with spaCy")
+	st.markdown("This demo uses a trained spaCy model ([en_docusco_spacy](https://huggingface.co/browndw/en_docusco_spacy)) to identify DocuScope categories in text.")
+	st.markdown("It is also trained on the [CLAWS7](https://ucrel.lancs.ac.uk/claws7tags.html) part-of-speech tagset.")
+	st.markdown("NOTE: this demo is public - please don't enter confidential text")
+        #our_image = Image.open(os.path.join('SpaCy_logo.svg.png'))
+	#st.image(our_image)
+	menu = ["Tokens","DocuScope"]
+	choice = st.sidebar.selectbox("Menu",menu)
+	if choice == "Tokens":
+		st.subheader("Tokenization")
+		raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
+		docx = pre_process(raw_text)
+		docx = nlp(docx)
+		if st.button("Tokenize"):
+			spacy_streamlit.visualize_tokens(docx,attrs=['text','tag_', 'ent_iob_', 'ent_type_'])
+	elif choice == "DocuScope":
+		st.subheader("Named Entity Recognition")
+		raw_text = st.text_area('Enter text:', value="Jaws is a shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.")
+		docx = pre_process(raw_text)
+		docx = nlp(docx)
+		spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)
+if __name__ == '__main__':
+	main()