egumasa commited on
Commit
63e4e17
β€’
1 Parent(s): 349cb79

new pipeline

Browse files
Files changed (3) hide show
  1. demo.py +3 -1
  2. requirements.txt +2 -1
  3. utils/util.py +84 -7
demo.py CHANGED
@@ -8,7 +8,7 @@ from spacy.tokens import Doc
8
 
9
  import streamlit as st
10
 
11
- from utils.util import delete_overlapping_span
12
  from utils.visualize import visualize_spans
13
 
14
  # nlp = spacy.load(
@@ -261,6 +261,7 @@ with st.form("my_form"):
261
  # st.write(text)
262
  # delete_span(doc.spans['sc'])
263
 
 
264
  delete_overlapping_span(doc.spans['sc'])
265
 
266
  visualize_spans(
@@ -292,6 +293,7 @@ st.subheader("Bibliography")
292
  st.markdown("""
293
  * Chang, P., & Schleppegrell, M. (2011). Taking an effective authorial stance in academic writing: Making the linguistic resources explicit for L2 writers in the social sciences. _Journal of English for Academic Purposes, 10_ (3), 140–151. https://doi.org/10.1016/j.jeap.2011.05.005
294
  * Martin, J. R., & White, P. R. R. (2005). _The language of evaluation: Appraisal in English._ Palgrave Macmillan.
 
295
  * Wu, S. M. (2007). The use of engagement resources in high- and low-rated undergraduate geography essays. _Journal of English for Academic Purposes, 6_ (3), 254–271. https://doi.org/10.1016/j.jeap.2007.09.006
296
 
297
  """)
 
8
 
9
  import streamlit as st
10
 
11
+ from utils.util import delete_overlapping_span, cleanup_justify
12
  from utils.visualize import visualize_spans
13
 
14
  # nlp = spacy.load(
 
261
  # st.write(text)
262
  # delete_span(doc.spans['sc'])
263
 
264
+ cleanup_justify(doc, doc.spans['sc'])
265
  delete_overlapping_span(doc.spans['sc'])
266
 
267
  visualize_spans(
 
293
  st.markdown("""
294
  * Chang, P., & Schleppegrell, M. (2011). Taking an effective authorial stance in academic writing: Making the linguistic resources explicit for L2 writers in the social sciences. _Journal of English for Academic Purposes, 10_ (3), 140–151. https://doi.org/10.1016/j.jeap.2011.05.005
295
  * Martin, J. R., & White, P. R. R. (2005). _The language of evaluation: Appraisal in English._ Palgrave Macmillan.
296
+ * Ryshina-Pankova, M. (2014). Exploring academic argumentation in course-related blogs through ENGAGEMENT. In G. Thompson & L. Alba-Juez (Eds.), _Pragmatics & Beyond New Series (Vol. 242, pp. 281–302)_. John Benjamins Publishing Company. https://doi.org/10.1075/pbns.242.14rys
297
  * Wu, S. M. (2007). The use of engagement resources in high- and low-rated undergraduate geography essays. _Journal of English for Academic Purposes, 6_ (3), 254–271. https://doi.org/10.1016/j.jeap.2007.09.006
298
 
299
  """)
requirements.txt CHANGED
@@ -5,4 +5,5 @@ spacy_streamlit
5
  # https://huggingface.co/egumasa/en_engagement_RoBERTa_combined/resolve/main/en_engagement_RoBERTa_combined-any-py3-none-any.whl
6
  # https://huggingface.co/egumasa/en_engagement_RoBERTa_context_flz/resolve/main/en_engagement_RoBERTa_context_flz-any-py3-none-any.whl
7
  # https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad_max1_do02/resolve/main/en_engagement_spl_RoBERTa_acad_max1_do02-any-py3-none-any.whl
8
- https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad/resolve/main/en_engagement_spl_RoBERTa_acad-any-py3-none-any.whl
 
 
5
  # https://huggingface.co/egumasa/en_engagement_RoBERTa_combined/resolve/main/en_engagement_RoBERTa_combined-any-py3-none-any.whl
6
  # https://huggingface.co/egumasa/en_engagement_RoBERTa_context_flz/resolve/main/en_engagement_RoBERTa_context_flz-any-py3-none-any.whl
7
  # https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad_max1_do02/resolve/main/en_engagement_spl_RoBERTa_acad_max1_do02-any-py3-none-any.whl
8
+ # https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad/resolve/main/en_engagement_spl_RoBERTa_acad-any-py3-none-any.whl
9
+ https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad2/resolve/main/en_engagement_spl_RoBERTa_acad2-any-py3-none-any.whl
utils/util.py CHANGED
@@ -1,5 +1,6 @@
1
  import re
2
  from collections import Counter
 
3
 
4
 
5
  def preprocess(text):
@@ -10,6 +11,17 @@ def preprocess(text):
10
  return text
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
13
  def delete_overlapping_span(span_sc: dict):
14
  # print(span_sc)
15
  start_token_list = [spn.start for spn in span_sc]
@@ -21,7 +33,7 @@ def delete_overlapping_span(span_sc: dict):
21
 
22
  info = {}
23
  for n, (spn, score) in enumerate(zip(span_sc, span_sc.attrs['scores']),
24
- start=1):
25
  res = {
26
  'score': score,
27
  'spn': spn,
@@ -54,10 +66,75 @@ def delete_overlapping_span(span_sc: dict):
54
  id_del.append(n)
55
 
56
  # print(id_comp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- for n, idx in enumerate(id_del):
59
- # print(idx)
60
- try:
61
- del span_sc[idx - n]
62
- except IndexError:
63
- continue
 
1
  import re
2
  from collections import Counter
3
+ from spacy.tokens import SpanGroup
4
 
5
 
6
  def preprocess(text):
 
11
  return text
12
 
13
 
14
+ def del_spans(span_sc, indexes: list):
15
+
16
+ indexes.sort(
17
+ reverse=True
18
+ ) # reversing allows the deletion from the last, keeping the original index
19
+
20
+ for idx in indexes:
21
+ if idx + 1 < len(span_sc):
22
+ del span_sc[idx + 1]
23
+
24
+
25
  def delete_overlapping_span(span_sc: dict):
26
  # print(span_sc)
27
  start_token_list = [spn.start for spn in span_sc]
 
33
 
34
  info = {}
35
  for n, (spn, score) in enumerate(zip(span_sc, span_sc.attrs['scores']),
36
+ start=0):
37
  res = {
38
  'score': score,
39
  'spn': spn,
 
66
  id_del.append(n)
67
 
68
  # print(id_comp)
69
+ del_spans(span_sc, id_del)
70
+ # for n, idx in enumerate(id_del):
71
+ # # print(idx)
72
+
73
+ # try:
74
+ # del span_sc[idx - n]
75
+ # except IndexError:
76
+ # continue
77
+
78
+
79
+ def cleanup_justify(doc, span_sc: dict):
80
+ # This function adjusts the JUSTIFYING span
81
+
82
+ # First create an index of span with JUSTIFYING tags
83
+ justifies = {}
84
+ for idx, span in enumerate(span_sc):
85
+ # temp_root = span.root
86
+ # while span.start <= temp_root.head.i <= span.end:
87
+ # temp_root = temp_root.head
88
+ if span.label_ in ['JUSTIFYING']:
89
+ justifies[span.root] = {
90
+ "span": span,
91
+ "head": span.root.head,
92
+ "start": span.start,
93
+ "end": span.end,
94
+ "del": False,
95
+ "dependent": False,
96
+ "span_idx": idx
97
+ }
98
+ # print(justifies)
99
+
100
+ # flagging the dependency
101
+ for spanroot, info in justifies.items():
102
+ if spanroot.head in justifies:
103
+ info['dependent'] = True
104
+ info['del'] = True
105
+
106
+ # print(justifies)
107
+ new_spans = []
108
+ for spanroot, info in justifies.items():
109
+
110
+ if not info['dependent']:
111
+ # print("New Justifying candidate span:")
112
+ # print(doc[spanroot.left_edge.i:spanroot.right_edge.i + 1])
113
+
114
+ new_span = doc[spanroot.left_edge.i:spanroot.right_edge.i + 1]
115
+ new_span.label_ = "JUSTIFYING"
116
+
117
+ if new_span not in span_sc:
118
+ new_spans.append(new_span)
119
+ info['del'] = True
120
+
121
+ else:
122
+ info['del'] = True
123
+
124
+ to_delete = [
125
+ info['span_idx'] for spanroot, info in justifies.items() if info['del']
126
+ ]
127
+
128
+ to_delete_span = [
129
+ info['span'] for spanroot, info in justifies.items() if info['del']
130
+ ]
131
+
132
+ # print(to_delete)
133
+ # print(to_delete_span)
134
+
135
+ del_spans(span_sc, to_delete)
136
+
137
+ span_grp = SpanGroup(doc, spans=new_spans)
138
+ span_sc.extend(span_grp)
139
 
140
+ # print(justifies)