Spaces:

egumasa
/

engagement-analyzer-demo

Running

App Files Files Community

egumasa commited on Feb 4, 2023

Commit

7cf7080

•

1 Parent(s): 2f6a316

updated model

Browse files

Files changed (13) hide show

.gitignore +5 -1
demo.py +1 -1
pipeline/__pycache__/custom_functions.cpython-39.pyc +0 -0
pipeline/__pycache__/post_processors.cpython-39.pyc +0 -0
pipeline/post_processors.py +134 -3
resources/__pycache__/colors.cpython-39.pyc +0 -0
resources/__pycache__/template_list.cpython-39.pyc +0 -0
resources/__pycache__/text_list.cpython-39.pyc +0 -0
resources/colors.py +13 -0
resources/template_list.py +48 -0
resources/text_list.py +0 -0
utils/__pycache__/util.cpython-39.pyc +0 -0
utils/util.py +1 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,6 @@
 test_run.py
-.DS_Store

 test_run.py
+.DS_Store
+analyzer.py
+main.py
+results/*
+inputtexts/*

demo.py CHANGED Viewed

@@ -32,7 +32,7 @@ st.set_page_config(page_title="ENGAGEMENT analyzer (beta ver 0.3)",
 @st.cache(allow_output_mutation=True)
 def load_model():
     # nlp = spacy.load("en_engagement_RoBERTa_context_flz")
-    nlp = spacy.load("en_engagement_spl_RoBERTa_acad")
     return (nlp)

 @st.cache(allow_output_mutation=True)
 def load_model():
     # nlp = spacy.load("en_engagement_RoBERTa_context_flz")
+    nlp = spacy.load("en_engagement_LSTM")
     return (nlp)

pipeline/__pycache__/custom_functions.cpython-39.pyc ADDED Viewed

Binary file (3.61 kB). View file

pipeline/__pycache__/post_processors.cpython-39.pyc CHANGED Viewed

Binary files a/pipeline/__pycache__/post_processors.cpython-39.pyc and b/pipeline/__pycache__/post_processors.cpython-39.pyc differ

pipeline/post_processors.py CHANGED Viewed

@@ -19,12 +19,138 @@ def simple_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
     return data, columns
 def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
                 spans_key: str = "sc",
                 attrs: List[str] = SPAN_ATTRS):
-    columns = attrs + ["Conf. score", 'span dep',
-                       "POS", "POS sequence", "head"]
     data = []
     for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']):
@@ -32,11 +158,16 @@ def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
         span_info.extend([str(getattr(span, attr)) for attr in attrs])
         span_info.append(score)
         span_info.append(span.root.dep_)
         span_info.append(span.root.tag_)
         span_info.append("_".join([t.tag_ for t in span]))
         span_info.append(span.root.head.norm_)
-        # span_info.append(span.root.head.dep_ == "ROOT")
         data.append(span_info)
     return data, columns

     return data, columns
+# def span_info_aggregator()
+def construction_classifier(doc, span):
+    category = span.root.dep_
+    spanroot = span.root
+    ##
+    span_t_dep_ = ["_".join([t.norm_, t.dep_]) for t in span]
+    span_dep = [t.dep_ for t in span]
+    span_token = [t.norm_ for t in span]
+    span_tag = [t.tag_ for t in span]
+    c_dep = [c.dep_ for c in spanroot.children]
+    c_pos = [c.pos_ for c in spanroot.children]
+    c_tag = [c.tag_ for c in spanroot.children]
+    ## nesting classifiers
+    if spanroot.dep_ == "conj":
+        while spanroot.dep_ == 'conj':
+            spanroot = spanroot.head
+    if spanroot.dep_ == "poss":
+        while spanroot.dep_ == 'poss':
+            spanroot = spanroot.head
+    ## Simple classifier
+    if spanroot.dep_ in ['pcomp']:
+        if str(spanroot.morph) in ["Aspect=Prog|Tense=Pres|VerbForm=Part"]:
+            category = "Gerund"
+    if spanroot.dep_ in ["pobj", "dobj", "obj", "iobj"]:
+        category = "Object"
+    if spanroot.dep_ in ["nsubj", "nsubjpass"]:
+        category = "Subject"
+    if spanroot.dep_ in ["cc"]:
+        category = "Coordinating conjunction"
+    if spanroot.dep_ in ["ROOT", "advcl"]:
+        if "ccomp" in c_dep and "auxpass" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
+            category = "It is X that-clause"
+        elif "nsubj" in c_dep and "acomp" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
+            category = "It is X that-clause"
+        elif "nsubj" in c_dep and "oprd" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
+            category = "It is X that-clause"
+        elif "nsubj" in c_dep and "it" in span_token and spanroot.pos_ == "VERB":
+            category = "It VERB that-clause"
+        elif "expl" in c_dep and "NOUN" in c_pos:
+            category = "There is/are NOUN"
+        elif spanroot.pos_ in ["AUX", 'VERB']:
+            category = "Main verb"
+        else:
+            category = spanroot.dep_
+    if spanroot.dep_ in ['attr']:
+        c_head = [c.dep_ for c in spanroot.head.children]
+        if "expl" in c_head and "no_det" in span_t_dep_:
+            category = "There is/are no NOUN"
+    # Modal verbs
+    if spanroot.tag_ == "MD":
+        category = "Modal auxiliary"
+    # prep phrases
+    if spanroot.dep_ in ['prep']:
+        category = 'Prepositional Phrase'
+    # adverbial phrases
+    if spanroot.dep_ in ['advmod']:
+        category = "Adverbial modifier"
+        # adverbial phrases
+    if spanroot.dep_ in ['acomp']:
+        category = "Adjectival complement"
+    if spanroot.dep_ in ['neg']:
+        category = "Negative particle"
+    # Preconjunctions
+    if spanroot.dep_ in ['preconj']:
+        category = "Conjunction"
+    # Adverbial clauses
+    ## Check the status of the adverbial clauses carefully
+    if spanroot.dep_ in ['advcl', 'mark', 'acl']:
+        if "mark" in span_dep:
+            category = "Finite adverbial clause"
+        if str(spanroot.morph) in ["Aspect=Prog|Tense=Pres|VerbForm=Part"] and "aux" not in c_dep:
+            category = "Non-finite adv clause"
+        # Check whether it has a subject or not
+        # elif "nsubj" in [c.dep_ for c in spanroot.children]:
+        #     category = "Adverbial clauses"
+        # else:
+        #     category = "Other advcl"
+    if spanroot.dep_ in ['relcl', 'ccomp']:
+        head = spanroot.head
+        if ";" in [t.norm_ for t in head.children]:
+            category = "Main verb"
+        elif "nsubj" not in span_dep:
+            category = "Dependent verb"
+    if spanroot.dep_ in ['dep']:
+        if spanroot.head.dep_ in ['ROOT', 'ccomp'] and spanroot.head.pos_ in ['AUX', 'VERB'] and spanroot.pos_ in ['AUX', 'VERB']:
+            if spanroot.morph == spanroot.head.morph:
+                category = "Main verb"
+            else:
+                category = "Dependent verb"
+    if span.label_ == "CITATION":
+        if "NNP" in span_tag or "NNPS" in span_tag:
+            if span_dep[0] == 'punct' and span_dep[-1] == 'punct':
+                category = "Parenthetical Citation"
+            elif span_tag[0] in ["NNP", "NNPS"]:
+                category = "Narrative Citation"
+        else:
+            category = "Other Citation"
+    return category
 def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
                 spans_key: str = "sc",
                 attrs: List[str] = SPAN_ATTRS):
+    columns = attrs + ["Conf. score", "sent no.", "grammatical realization", 'span dep', "ner",
+                       "POS", 'span dep seq', "POS sequence", "head", "children", "morphology", ]
     data = []
+    # data = span_info_aggregator(doc, columns)
+    sentences = {s: i for i, s in enumerate(doc.sents)}
     for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']):
         span_info.extend([str(getattr(span, attr)) for attr in attrs])
         span_info.append(score)
+        span_info.append(sentences[span.sent])
+        span_info.append(construction_classifier(doc, span))
         span_info.append(span.root.dep_)
+        span_info.append(span.root.ent_type_)
         span_info.append(span.root.tag_)
+        span_info.append("_".join([t.dep_ for t in span]))
         span_info.append("_".join([t.tag_ for t in span]))
         span_info.append(span.root.head.norm_)
+        span_info.append("_".join([c.dep_ for c in span.root.children]))
+        span_info.append(span.root.morph)
         data.append(span_info)
     return data, columns

resources/__pycache__/colors.cpython-39.pyc ADDED Viewed

Binary file (442 Bytes). View file

resources/__pycache__/template_list.cpython-39.pyc ADDED Viewed

Binary file (2.35 kB). View file

resources/__pycache__/text_list.cpython-39.pyc ADDED Viewed

Binary file (121 kB). View file

resources/colors.py ADDED Viewed

	@@ -0,0 +1,13 @@

+COLORS_1 = {
+            "ENTERTAIN": "#82b74b",
+            "DENY": '#c94c4c',
+            "COUNTER": "#eea29a",
+            "PRONOUNCE": "#92a8d1",
+            "ENDORSE": "#034f84",
+            "CITATION": "#b2b2b2",
+            "MONOGLOSS": "#3e4444",
+            "ATTRIBUTE": "#f7786b",
+            "ATTRIBUTION": "#f7786b",
+            "PROCLAIM": "#92a8d1"
+        }

resources/template_list.py ADDED Viewed

	@@ -0,0 +1,48 @@

+TPL_ENT = """
+<mark class="entity" style="background: {bg}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
+    {text}
+    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">{label}</span>
+</mark>
+"""
+TPL_SPANS = """
+<div class="spans" style="line-height: 4.5;">
+    {text}
+    {span_slices}
+    {span_starts}
+</div>
+"""
+TPL_SPAN = """
+<span style="font-weight: bold; display: inline-block; line-height: 3; padding-bottom: 12px;position: relative;">
+    {text}
+    {span_slices}
+    {span_starts}
+</span>
+"""
+TPL_SPAN_SLICE = """
+<span style="background: {bg}; top: {top_offset}px;  display: inline-block; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
+</span>
+"""
+TPL_SPAN_START = """
+<span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
+    <span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
+        {label}{kb_link}
+    </span>
+</span>
+"""
+TPL_SPAN_START_RTL = """
+<span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
+    <span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
+        {label}{kb_link}
+    </span>
+</span>
+"""
+DEFAULT_TEXT = """Tickner said regardless of the result, the royal commission was a waste of money and he would proceed with a separate inquiry into the issue headed by Justice Jane Matthews. His attack came as the Aboriginal women involved in the case demanded a female minister examine the religious beliefs they claim are inherent in their fight against a bridge to the island near Goolwa in South Australia."""

resources/text_list.py ADDED Viewed

The diff for this file is too large to render. See raw diff

utils/__pycache__/util.cpython-39.pyc CHANGED Viewed

Binary files a/utils/__pycache__/util.cpython-39.pyc and b/utils/__pycache__/util.cpython-39.pyc differ

utils/util.py CHANGED Viewed

@@ -8,6 +8,7 @@ def preprocess(text):
     text = re.sub('\n', ' ', text)
     text = re.sub('\s+', " ", text)
     text = re.sub('&&&&&&&&#&#&#&#&', '\n\n', text)
     return text

     text = re.sub('\n', ' ', text)
     text = re.sub('\s+', " ", text)
     text = re.sub('&&&&&&&&#&#&#&#&', '\n\n', text)
+    text = re.sub("--- Para SEP ---", '\n', text)
     return text