musfiqdehan commited on
Commit
6b83453
1 Parent(s): 37dce2a

Add get_alignments_table function to generate a Markdown table of Spacy PoS tags

Browse files
Files changed (1) hide show
  1. helper/alignment_mappers.py +60 -0
helper/alignment_mappers.py CHANGED
@@ -122,4 +122,64 @@ def get_word_index_mapping(source="", target="", model_name=""):
122
  return result
123
 
124
 
 
 
 
 
 
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  return result
123
 
124
 
125
+ def get_alignments_table(
126
+ source="",
127
+ target="",
128
+ model_name=""):
129
+ """Get Spacy PoS Tags and return a Markdown table"""
130
 
131
+ sent_src, sent_tgt, align_words = get_alignment_mapping(
132
+ source=source, target=target, model_name=model_name
133
+ )
134
+
135
+ mapped_sent_src = []
136
+
137
+ html_table = '''
138
+ <table>
139
+ <thead>
140
+ <th>Source</th>
141
+ <th>Target</th>
142
+ </thead>
143
+ '''
144
+
145
+ for i, j in sorted(align_words):
146
+ punc = r"""!()-[]{}।;:'"\,<>./?@#$%^&*_~"""
147
+ if sent_src[i] in punc or sent_tgt[j] in punc:
148
+ mapped_sent_src.append(sent_src[i])
149
+
150
+ html_table += f'''
151
+ <tbody>
152
+ <tr>
153
+ <td> {sent_src[i]} </td>
154
+ <td> {sent_tgt[j]} </td>
155
+ </tr>
156
+ '''
157
+ else:
158
+ mapped_sent_src.append(sent_src[i])
159
+
160
+ html_table += f'''
161
+ <tr>
162
+ <td> {sent_src[i]} </td>
163
+ <td> {sent_tgt[j]} </td>
164
+ </tr>
165
+ '''
166
+
167
+ unks = list(set(sent_src).difference(set(mapped_sent_src)))
168
+ for word in unks:
169
+
170
+ html_table += f'''
171
+ <tr>
172
+ <td> {word} </td>
173
+ <td> N/A </td>
174
+ </tr>
175
+ '''
176
+
177
+ html_table += '''
178
+ </tbody>
179
+ </table>
180
+ '''
181
+
182
+ pos_accuracy = ((len(sent_src) - len(unks)) / len(sent_src))
183
+ pos_accuracy = f"{pos_accuracy:0.2%}"
184
+
185
+ return html_table, pos_accuracy