Spaces:

egumasa
/

engagement-analyzer-demo

Running

App Files Files Community

egumasa commited on Feb 3, 2023

Commit

63e4e17

•

1 Parent(s): 349cb79

new pipeline

Browse files

Files changed (3) hide show

demo.py +3 -1
requirements.txt +2 -1
utils/util.py +84 -7

demo.py CHANGED Viewed

@@ -8,7 +8,7 @@ from spacy.tokens import Doc
 import streamlit as st
-from utils.util import delete_overlapping_span
 from utils.visualize import visualize_spans
 # nlp = spacy.load(
@@ -261,6 +261,7 @@ with st.form("my_form"):
 # st.write(text)
 # delete_span(doc.spans['sc'])
 delete_overlapping_span(doc.spans['sc'])
 visualize_spans(
@@ -292,6 +293,7 @@ st.subheader("Bibliography")
 st.markdown("""
 * Chang, P., & Schleppegrell, M. (2011). Taking an effective authorial stance in academic writing: Making the linguistic resources explicit for L2 writers in the social sciences. _Journal of English for Academic Purposes, 10_ (3), 140–151. https://doi.org/10.1016/j.jeap.2011.05.005
 * Martin, J. R., & White, P. R. R. (2005). _The language of evaluation: Appraisal in English._ Palgrave Macmillan.
 * Wu, S. M. (2007). The use of engagement resources in high- and low-rated undergraduate geography essays. _Journal of English for Academic Purposes, 6_ (3), 254–271. https://doi.org/10.1016/j.jeap.2007.09.006
 """)

 import streamlit as st
+from utils.util import delete_overlapping_span, cleanup_justify
 from utils.visualize import visualize_spans
 # nlp = spacy.load(
 # st.write(text)
 # delete_span(doc.spans['sc'])
+cleanup_justify(doc, doc.spans['sc'])
 delete_overlapping_span(doc.spans['sc'])
 visualize_spans(
 st.markdown("""
 * Chang, P., & Schleppegrell, M. (2011). Taking an effective authorial stance in academic writing: Making the linguistic resources explicit for L2 writers in the social sciences. _Journal of English for Academic Purposes, 10_ (3), 140–151. https://doi.org/10.1016/j.jeap.2011.05.005
 * Martin, J. R., & White, P. R. R. (2005). _The language of evaluation: Appraisal in English._ Palgrave Macmillan.
+* Ryshina-Pankova, M. (2014). Exploring academic argumentation in course-related blogs through ENGAGEMENT. In G. Thompson & L. Alba-Juez (Eds.), _Pragmatics & Beyond New Series (Vol. 242, pp. 281–302)_. John Benjamins Publishing Company. https://doi.org/10.1075/pbns.242.14rys
 * Wu, S. M. (2007). The use of engagement resources in high- and low-rated undergraduate geography essays. _Journal of English for Academic Purposes, 6_ (3), 254–271. https://doi.org/10.1016/j.jeap.2007.09.006
 """)

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ spacy_streamlit
 # https://huggingface.co/egumasa/en_engagement_RoBERTa_combined/resolve/main/en_engagement_RoBERTa_combined-any-py3-none-any.whl
 # https://huggingface.co/egumasa/en_engagement_RoBERTa_context_flz/resolve/main/en_engagement_RoBERTa_context_flz-any-py3-none-any.whl
 # https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad_max1_do02/resolve/main/en_engagement_spl_RoBERTa_acad_max1_do02-any-py3-none-any.whl
-https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad/resolve/main/en_engagement_spl_RoBERTa_acad-any-py3-none-any.whl

 # https://huggingface.co/egumasa/en_engagement_RoBERTa_combined/resolve/main/en_engagement_RoBERTa_combined-any-py3-none-any.whl
 # https://huggingface.co/egumasa/en_engagement_RoBERTa_context_flz/resolve/main/en_engagement_RoBERTa_context_flz-any-py3-none-any.whl
 # https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad_max1_do02/resolve/main/en_engagement_spl_RoBERTa_acad_max1_do02-any-py3-none-any.whl
+# https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad/resolve/main/en_engagement_spl_RoBERTa_acad-any-py3-none-any.whl
+https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad2/resolve/main/en_engagement_spl_RoBERTa_acad2-any-py3-none-any.whl

utils/util.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import re
 from collections import Counter
 def preprocess(text):
@@ -10,6 +11,17 @@ def preprocess(text):
     return text
 def delete_overlapping_span(span_sc: dict):
     # print(span_sc)
     start_token_list = [spn.start for spn in span_sc]
@@ -21,7 +33,7 @@ def delete_overlapping_span(span_sc: dict):
     info = {}
     for n, (spn, score) in enumerate(zip(span_sc, span_sc.attrs['scores']),
-                                     start=1):
         res = {
             'score': score,
             'spn': spn,
@@ -54,10 +66,75 @@ def delete_overlapping_span(span_sc: dict):
             id_del.append(n)
     # print(id_comp)
-    for n, idx in enumerate(id_del):
-        # print(idx)
-        try:
-            del span_sc[idx - n]
-        except IndexError:
-            continue

 import re
 from collections import Counter
+from spacy.tokens import SpanGroup
 def preprocess(text):
     return text
+def del_spans(span_sc, indexes: list):
+    indexes.sort(
+        reverse=True
+    )  # reversing allows the deletion from the last, keeping the original index
+    for idx in indexes:
+        if idx + 1 < len(span_sc):
+            del span_sc[idx + 1]
 def delete_overlapping_span(span_sc: dict):
     # print(span_sc)
     start_token_list = [spn.start for spn in span_sc]
     info = {}
     for n, (spn, score) in enumerate(zip(span_sc, span_sc.attrs['scores']),
+                                     start=0):
         res = {
             'score': score,
             'spn': spn,
             id_del.append(n)
     # print(id_comp)
+    del_spans(span_sc, id_del)
+    # for n, idx in enumerate(id_del):
+    #     # print(idx)
+    #     try:
+    #         del span_sc[idx - n]
+    #     except IndexError:
+    #         continue
+def cleanup_justify(doc, span_sc: dict):
+    # This function adjusts the JUSTIFYING span
+    # First create an index of span with JUSTIFYING tags
+    justifies = {}
+    for idx, span in enumerate(span_sc):
+        # temp_root = span.root
+        # while span.start <= temp_root.head.i <= span.end:
+        #     temp_root = temp_root.head
+        if span.label_ in ['JUSTIFYING']:
+            justifies[span.root] = {
+                "span": span,
+                "head": span.root.head,
+                "start": span.start,
+                "end": span.end,
+                "del": False,
+                "dependent": False,
+                "span_idx": idx
+            }
+    # print(justifies)
+    # flagging the dependency
+    for spanroot, info in justifies.items():
+        if spanroot.head in justifies:
+            info['dependent'] = True
+            info['del'] = True
+    # print(justifies)
+    new_spans = []
+    for spanroot, info in justifies.items():
+        if not info['dependent']:
+            # print("New Justifying candidate span:")
+            # print(doc[spanroot.left_edge.i:spanroot.right_edge.i + 1])
+            new_span = doc[spanroot.left_edge.i:spanroot.right_edge.i + 1]
+            new_span.label_ = "JUSTIFYING"
+            if new_span not in span_sc:
+                new_spans.append(new_span)
+                info['del'] = True
+        else:
+            info['del'] = True
+    to_delete = [
+        info['span_idx'] for spanroot, info in justifies.items() if info['del']
+    ]
+    to_delete_span = [
+        info['span'] for spanroot, info in justifies.items() if info['del']
+    ]
+    # print(to_delete)
+    # print(to_delete_span)
+    del_spans(span_sc, to_delete)
+    span_grp = SpanGroup(doc, spans=new_spans)
+    span_sc.extend(span_grp)
+    # print(justifies)