zklmorales
commited on
Commit
•
9204d3c
1
Parent(s):
1a873d5
Upload 104 files
Browse files- Final.ipynb +27 -38
- POS Tag Automation/POS Tagger.ipynb +62 -28
- test.ipynb +34 -5
Final.ipynb
CHANGED
@@ -2,19 +2,28 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
{
|
9 |
"name": "stdout",
|
10 |
"output_type": "stream",
|
11 |
"text": [
|
12 |
-
"Original sentence:
|
13 |
-
"Sentence: Nagulat
|
14 |
-
"Correctness Probability: 0.
|
15 |
-
"Cosine Similarity: 0.
|
16 |
-
"Levenshtein Score:
|
17 |
-
"[('Nagulat siya kanina dahil sa ingay', 0.9978345036506653, 0.22926439344882965, 82)]\n"
|
18 |
]
|
19 |
}
|
20 |
],
|
@@ -149,7 +158,8 @@
|
|
149 |
" \n",
|
150 |
" # Compute cosine similarity between original masked word and predicted word\n",
|
151 |
" similarity = torch.nn.functional.cosine_similarity(original_embedding.unsqueeze(0), candidate_embedding.unsqueeze(0)).item()\n",
|
152 |
-
" \n",
|
|
|
153 |
" replaced_words = masked_words.copy()\n",
|
154 |
" replaced_words[i] = candidate_mlm\n",
|
155 |
" corrected_sentence = \" \".join(replaced_words).split() # Split and join to remove extra spaces\n",
|
@@ -166,42 +176,21 @@
|
|
166 |
" probability = torch.softmax(outputs_cls.logits, dim=1).squeeze().tolist()[1]\n",
|
167 |
"\n",
|
168 |
" # Append the corrected sentence along with its probability and cosine similarity\n",
|
169 |
-
" grammar_correction_candidates.append((corrected_sentence, probability, similarity))\n",
|
170 |
"\n",
|
171 |
"\n",
|
172 |
" # Sort the grammar correction candidates by their probabilities and cosine similarities in descending order\n",
|
173 |
-
" grammar_correction_candidates.sort(key=lambda x: (x[1], x[2]), reverse=True)\n",
|
174 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
"\n",
|
176 |
-
" threshold = 60 # Adjust this threshold according to your requirement\n",
|
177 |
-
" # Initialize a list to store the top 5 candidates\n",
|
178 |
-
" top_candidates = []\n",
|
179 |
"\n",
|
180 |
-
"
|
181 |
-
" for candidate, probability, cosine_similarity in grammar_correction_candidates:\n",
|
182 |
-
" fuzzy_match_score = fuzz.ratio(new_sentence, candidate)\n",
|
183 |
-
" \n",
|
184 |
-
" # Check if the current candidate should be included in the top 5\n",
|
185 |
-
" if len(top_candidates) < 1:\n",
|
186 |
-
" top_candidates.append((candidate, probability, cosine_similarity, fuzzy_match_score))\n",
|
187 |
-
" # Sort the top_candidates based on cosine similarity in descending order\n",
|
188 |
-
" top_candidates.sort(key=lambda x: x[2], reverse=True)\n",
|
189 |
-
" else:\n",
|
190 |
-
" # Compare the cosine similarity of the current candidate with the lowest similarity in the top_candidates\n",
|
191 |
-
" min_similarity = min(top_candidates, key=lambda x: x[2])[2]\n",
|
192 |
-
" if cosine_similarity > min_similarity:\n",
|
193 |
-
" # Replace the candidate with the lowest similarity in the top_candidates list\n",
|
194 |
-
" min_index = top_candidates.index(min(top_candidates, key=lambda x: x[2]))\n",
|
195 |
-
" top_candidates[min_index] = (candidate, probability, cosine_similarity, fuzzy_match_score)\n",
|
196 |
-
" # Sort the top_candidates based on cosine similarity in descending order\n",
|
197 |
-
" top_candidates.sort(key=lambda x: x[2], reverse=True)\n",
|
198 |
-
"\n",
|
199 |
-
" for idx, (candidate, probability, cosine_similarity, fuzzy_match_score) in enumerate(top_candidates):\n",
|
200 |
-
" print(\"Sentence:\", candidate)\n",
|
201 |
-
" print(\"Correctness Probability:\", probability)\n",
|
202 |
-
" print(\"Cosine Similarity:\", cosine_similarity)\n",
|
203 |
-
" print(\"Levenshtein Score:\", fuzzy_match_score)\n",
|
204 |
-
" print(top_candidates)\n"
|
205 |
]
|
206 |
}
|
207 |
],
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
13 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
14 |
+
"c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\fuzzywuzzy\\fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning\n",
|
15 |
+
" warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')\n"
|
16 |
+
]
|
17 |
+
},
|
18 |
{
|
19 |
"name": "stdout",
|
20 |
"output_type": "stream",
|
21 |
"text": [
|
22 |
+
"Original sentence: Magugulat ako kanina dahil sa pagsabog\n",
|
23 |
+
"Sentence: Nagulat ako kanina dahil sa pagsabog\n",
|
24 |
+
"Correctness Probability: 0.9976696372032166\n",
|
25 |
+
"Cosine Similarity: 0.20241191983222961\n",
|
26 |
+
"Levenshtein Score: 75\n"
|
|
|
27 |
]
|
28 |
}
|
29 |
],
|
|
|
158 |
" \n",
|
159 |
" # Compute cosine similarity between original masked word and predicted word\n",
|
160 |
" similarity = torch.nn.functional.cosine_similarity(original_embedding.unsqueeze(0), candidate_embedding.unsqueeze(0)).item()\n",
|
161 |
+
" fuzzy_match_score = fuzz.ratio(token, candidate_mlm)\n",
|
162 |
+
"\n",
|
163 |
" replaced_words = masked_words.copy()\n",
|
164 |
" replaced_words[i] = candidate_mlm\n",
|
165 |
" corrected_sentence = \" \".join(replaced_words).split() # Split and join to remove extra spaces\n",
|
|
|
176 |
" probability = torch.softmax(outputs_cls.logits, dim=1).squeeze().tolist()[1]\n",
|
177 |
"\n",
|
178 |
" # Append the corrected sentence along with its probability and cosine similarity\n",
|
179 |
+
" grammar_correction_candidates.append((corrected_sentence, probability, similarity, fuzzy_match_score))\n",
|
180 |
"\n",
|
181 |
"\n",
|
182 |
" # Sort the grammar correction candidates by their probabilities and cosine similarities in descending order\n",
|
183 |
+
" grammar_correction_candidates.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)\n",
|
184 |
"\n",
|
185 |
+
"if grammar_correction_candidates:\n",
|
186 |
+
" candidate, probability, cosine_similarity, fuzzy_match_score = grammar_correction_candidates[0]\n",
|
187 |
+
" print(\"Sentence:\", candidate)\n",
|
188 |
+
" print(\"Correctness Probability:\", probability)\n",
|
189 |
+
" print(\"Cosine Similarity:\", cosine_similarity)\n",
|
190 |
+
" print(\"Levenshtein Score:\", fuzzy_match_score)\n",
|
191 |
"\n",
|
|
|
|
|
|
|
192 |
"\n",
|
193 |
+
"\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
]
|
195 |
}
|
196 |
],
|
POS Tag Automation/POS Tagger.ipynb
CHANGED
@@ -2,16 +2,25 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import transformers "
|
10 |
]
|
11 |
},
|
12 |
{
|
13 |
"cell_type": "code",
|
14 |
-
"execution_count":
|
15 |
"metadata": {},
|
16 |
"outputs": [
|
17 |
{
|
@@ -33,7 +42,7 @@
|
|
33 |
},
|
34 |
{
|
35 |
"cell_type": "code",
|
36 |
-
"execution_count":
|
37 |
"metadata": {},
|
38 |
"outputs": [
|
39 |
{
|
@@ -110,7 +119,7 @@
|
|
110 |
},
|
111 |
{
|
112 |
"cell_type": "code",
|
113 |
-
"execution_count":
|
114 |
"metadata": {},
|
115 |
"outputs": [],
|
116 |
"source": [
|
@@ -134,7 +143,7 @@
|
|
134 |
},
|
135 |
{
|
136 |
"cell_type": "code",
|
137 |
-
"execution_count":
|
138 |
"metadata": {},
|
139 |
"outputs": [],
|
140 |
"source": [
|
@@ -144,7 +153,7 @@
|
|
144 |
},
|
145 |
{
|
146 |
"cell_type": "code",
|
147 |
-
"execution_count":
|
148 |
"metadata": {},
|
149 |
"outputs": [],
|
150 |
"source": [
|
@@ -201,7 +210,7 @@
|
|
201 |
},
|
202 |
{
|
203 |
"cell_type": "code",
|
204 |
-
"execution_count":
|
205 |
"metadata": {},
|
206 |
"outputs": [
|
207 |
{
|
@@ -231,7 +240,7 @@
|
|
231 |
" 'prev2pos': 'PRS'}"
|
232 |
]
|
233 |
},
|
234 |
-
"execution_count":
|
235 |
"metadata": {},
|
236 |
"output_type": "execute_result"
|
237 |
}
|
@@ -244,7 +253,7 @@
|
|
244 |
},
|
245 |
{
|
246 |
"cell_type": "code",
|
247 |
-
"execution_count":
|
248 |
"metadata": {},
|
249 |
"outputs": [],
|
250 |
"source": [
|
@@ -260,7 +269,7 @@
|
|
260 |
},
|
261 |
{
|
262 |
"cell_type": "code",
|
263 |
-
"execution_count":
|
264 |
"metadata": {},
|
265 |
"outputs": [],
|
266 |
"source": [
|
@@ -276,7 +285,7 @@
|
|
276 |
},
|
277 |
{
|
278 |
"cell_type": "code",
|
279 |
-
"execution_count":
|
280 |
"metadata": {},
|
281 |
"outputs": [
|
282 |
{
|
@@ -293,7 +302,7 @@
|
|
293 |
},
|
294 |
{
|
295 |
"cell_type": "code",
|
296 |
-
"execution_count":
|
297 |
"metadata": {},
|
298 |
"outputs": [
|
299 |
{
|
@@ -325,7 +334,7 @@
|
|
325 |
" 'PMP']"
|
326 |
]
|
327 |
},
|
328 |
-
"execution_count":
|
329 |
"metadata": {},
|
330 |
"output_type": "execute_result"
|
331 |
}
|
@@ -336,7 +345,7 @@
|
|
336 |
},
|
337 |
{
|
338 |
"cell_type": "code",
|
339 |
-
"execution_count":
|
340 |
"metadata": {},
|
341 |
"outputs": [],
|
342 |
"source": [
|
@@ -356,33 +365,58 @@
|
|
356 |
},
|
357 |
{
|
358 |
"cell_type": "code",
|
359 |
-
"execution_count":
|
360 |
"metadata": {},
|
361 |
"outputs": [
|
362 |
{
|
363 |
"name": "stderr",
|
364 |
"output_type": "stream",
|
365 |
"text": [
|
366 |
-
"c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1609: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
367 |
-
"
|
368 |
]
|
369 |
},
|
370 |
{
|
371 |
-
"
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
379 |
}
|
380 |
],
|
381 |
"source": [
|
|
|
|
|
|
|
382 |
"labels = list(crf.classes_)\n",
|
|
|
|
|
383 |
"y_pred = crf.predict(X_valid)\n",
|
384 |
-
"
|
385 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
]
|
387 |
},
|
388 |
{
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
13 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
14 |
+
]
|
15 |
+
}
|
16 |
+
],
|
17 |
"source": [
|
18 |
"import transformers "
|
19 |
]
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
+
"execution_count": 2,
|
24 |
"metadata": {},
|
25 |
"outputs": [
|
26 |
{
|
|
|
42 |
},
|
43 |
{
|
44 |
"cell_type": "code",
|
45 |
+
"execution_count": 48,
|
46 |
"metadata": {},
|
47 |
"outputs": [
|
48 |
{
|
|
|
119 |
},
|
120 |
{
|
121 |
"cell_type": "code",
|
122 |
+
"execution_count": 49,
|
123 |
"metadata": {},
|
124 |
"outputs": [],
|
125 |
"source": [
|
|
|
143 |
},
|
144 |
{
|
145 |
"cell_type": "code",
|
146 |
+
"execution_count": 5,
|
147 |
"metadata": {},
|
148 |
"outputs": [],
|
149 |
"source": [
|
|
|
153 |
},
|
154 |
{
|
155 |
"cell_type": "code",
|
156 |
+
"execution_count": 6,
|
157 |
"metadata": {},
|
158 |
"outputs": [],
|
159 |
"source": [
|
|
|
210 |
},
|
211 |
{
|
212 |
"cell_type": "code",
|
213 |
+
"execution_count": 7,
|
214 |
"metadata": {},
|
215 |
"outputs": [
|
216 |
{
|
|
|
240 |
" 'prev2pos': 'PRS'}"
|
241 |
]
|
242 |
},
|
243 |
+
"execution_count": 7,
|
244 |
"metadata": {},
|
245 |
"output_type": "execute_result"
|
246 |
}
|
|
|
253 |
},
|
254 |
{
|
255 |
"cell_type": "code",
|
256 |
+
"execution_count": 8,
|
257 |
"metadata": {},
|
258 |
"outputs": [],
|
259 |
"source": [
|
|
|
269 |
},
|
270 |
{
|
271 |
"cell_type": "code",
|
272 |
+
"execution_count": 9,
|
273 |
"metadata": {},
|
274 |
"outputs": [],
|
275 |
"source": [
|
|
|
285 |
},
|
286 |
{
|
287 |
"cell_type": "code",
|
288 |
+
"execution_count": 10,
|
289 |
"metadata": {},
|
290 |
"outputs": [
|
291 |
{
|
|
|
302 |
},
|
303 |
{
|
304 |
"cell_type": "code",
|
305 |
+
"execution_count": 11,
|
306 |
"metadata": {},
|
307 |
"outputs": [
|
308 |
{
|
|
|
334 |
" 'PMP']"
|
335 |
]
|
336 |
},
|
337 |
+
"execution_count": 11,
|
338 |
"metadata": {},
|
339 |
"output_type": "execute_result"
|
340 |
}
|
|
|
345 |
},
|
346 |
{
|
347 |
"cell_type": "code",
|
348 |
+
"execution_count": 12,
|
349 |
"metadata": {},
|
350 |
"outputs": [],
|
351 |
"source": [
|
|
|
365 |
},
|
366 |
{
|
367 |
"cell_type": "code",
|
368 |
+
"execution_count": 60,
|
369 |
"metadata": {},
|
370 |
"outputs": [
|
371 |
{
|
372 |
"name": "stderr",
|
373 |
"output_type": "stream",
|
374 |
"text": [
|
375 |
+
"c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_label.py:895: UserWarning: unknown class(es) [\"['CCB', 'PMC', 'VBTR', 'RBI', 'DTP', 'NNP', 'DTC', 'VBTS_CCP', 'NNC', 'CCP', 'JJD_CCP', '[PAD]', '[PAD]', 'VBTR', 'PRP', 'DTP', 'NNP', 'CCT', 'CDB', 'PMP']\", \"['CCR', 'JJD', 'DTC', 'NNC', 'CCB', 'NNC', 'PMC', 'VBAF', 'RBI', 'CCB', 'CDB', '[PAD]', 'JJD_CCP', 'NNC', 'JJD', 'PMS', 'NNC', 'PMP']\", \"['CCR', 'VBTS', 'CCP', 'DTC', 'NNC', 'PMC', 'RBF', 'CCP', '[PAD]', '[PAD]', 'VBTS', 'CCB', 'DTCP', 'NNC', 'DTC', 'JJD', 'CCP', 'NNC', 'NNP', 'CCP', 'VBTS', 'CCT', 'JJN_CCP', 'NNC', 'PMP']\", \"['CCT', 'CCT', 'CDB', 'CCP', 'NNC', 'CCP', 'VBTS', 'CCB', 'NNP', 'NNP', 'CCP', 'VBTR', 'DTP', 'NNP', 'NNP', 'LM', 'JJD_CCP', 'CDB', 'NNC', 'RBI', 'DTC', 'JJD_CCP', 'NNC_CCP', 'CCT', '[PAD]', 'NNP', 'PMP']\", \"['CCT', 'CDB', 'PMC', 'CDB', 'JJN_CCP', 'VBTS_CCP', 'VBH', 'NNC', 'CCT', 'NNC', 'CCP', 'VBAF', 'RBI', 'CCB', 'PRSP_CCP', 'NNC', 'CCT', 'VBTR', 'CCT', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCB', '[PAD]', 'NNC', 'CCT', 'DTCP', 'JJD_CCP', 'NNC', 'CCP', 'VBTR', 'CCT', 'PRI_CCP', 'NNC', 'PMC', 'VBTR', 'DTP', 'NNP', 'FW', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'NNP', 'CCT', 'CCT', '[PAD]', '[PAD]', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['CCT', 'NNC', 'CCB', 'NNC', 'PMC', 'VBTS', 'DTP', 'NNP', 'CCP', 'VBAF', 'RBI', 'PRP', 'CCB', 'CDB', 'PMC', 'CDB', 'RBL', 'CCT', 'NNP', 'PMC', 'CCT', 'RBF', 'RBM', 'PRS', 'VBTS', 'CCT', 'PRSP_CCP', 'NNC', 'PMP']\", \"['CCT', 'NNC', 'PRO', 'CCB', 'JJD_CCP', 'CCR', 'PMS', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'DTP', 'NNP', 'CCR', 'CCT', 'NNP', 'FW', 'FW', 'PRO', 'CCP', 'VBH', '[PAD]', 'PMS', 'VBW', 'CCT', 'PRI', 'PRO_CCP', 'NNC', 'PMP']\", \"['CCT', 'NNP', 'NNP', 'JJN_CCP', 'VBTS', 'CCB', 'NNC', 'CCB', 'FW', 'PMS', 'FW', 'RBW', 'PRI_CCP', 'FW', 'FW', 'DTC', 'NNC', 'CCT', 'NNP', 'NNP', 'RBW_CCP', 'NNP', 'CDB', 'PMP']\", \"['CCT', 'PMC', 'VBTS', 'CCB', 'NNC', 'CCP', 'DTC', 'FW', 'CDB', 'LM', 'VBTS', 'CCB', 'NNC', 'CCT', 'PRI', 'CCB', 'NNC', 'CCB', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['CCT', 'PRI_CCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'NNP', 'PMS', 'NNP', 'NNP', 'PMS', 'PMC', 'VBTS', 'PRO', 'CCP', 'RBF', 'PRS', 'VBAF', 'CCB', 'NNP', 'FW', '[PAD]', 'NNC', 'RBI', 'CCT', 'JJD_CCP', 'NNC', 'PMP']\", \"['CCT', 'VBTR', 'CCP', 'NNP', 'NNP', 'LM', '[PAD]', '[PAD]', 'VBTS', 'DTP', 'NNP', 'NNP', 'CCT', 'NNPA', 'CCP', 'VBW', 'CCB', 'PRI_CCP', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['CDB', 'RBW', 'VBOF', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'CCP', 'RBF', 'JJD', 'CCP', 'DTC', 'CCR', 'PMS', 'NNC', 'CCT', 'NNP', 'LM', 'VBTF', 'CCB', 'JJD_CCP', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'CCB', 'NNC', 'PRS', 'LM', 'VBTR_CCP', 'JJD', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'LM', '[PAD]', 'VBTR']\", \"['DTC', 'DTCP', 'NNC', 'LM', 'VBTR', 'RBW', 'JJD', 'CCT', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'PMS', 'NNC', 'CCB', 'NNP', 'LM', 'VBTR', 'RBI', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC_CCP', 'PMS', 'NNC', 'CCT', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'CCB', 'PMS', 'NNP', 'LM', 'CCT', 'RBI', 'CCT', 'DTCP', 'NNC', 'PMP']\", \"['DTC', 'FW', 'FW', 'NNP', 'FW', 'NNP', 'PMS', 'NNPA', 'PMS', 'DTC', '[PAD]', '[PAD]', 'VBTS', 'CCP', 'VBAF', 'DTC', 'NNP', 'CCT', 'FW', 'NNC', 'PMP']\", \"['DTC', 'FW', 'NNP', 'LM', 'CCT', 'CCT', 'JJN', 'CCP', 'VBTS_CCP', 'NNC', 'PMC', '[PAD]', '[PAD]', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNC', 'CCA', 'PRI_CCP', 'NNC', 'PMP']\", \"['DTC', 'JJD_CCP', 'NNC', 'LM', '[PAD]', 'VBTR', 'PMP']\", \"['DTC', 'NNC', 'LM', 'VBTS', 'CCB', '[PAD]', 'PMS', 'NNC', 'CCB', 'NNC', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['DTC', 'NNC', 'NNC_CCP', 'FW', '[PAD]', 'FW', 'NNP', 'FW', 'PMS', 'NNP', 'NNP', 'CCP', 'NNC', 'PRP']\", \"['DTC', 'NNC_CCP', 'VBTS', 'LM', '[PAD]', 'NNC', 'PMP']\", \"['DTC', 'PRI', 'CCB', 'NNC', 'LM', 'VBW', 'CCB', 'NNC', 'RBL', 'CCT', 'NNC', 'PMP']\", \"['DTC', 'PRSP_CCP', 'JJCC', 'JJD_CCP', '[PAD]', 'JJD_CCP', 'NNC', 'LM', 'VBTR', 'CCT', 'PRSP', 'CCP', 'VBW', 'CCB', 'JJD', 'CCP', 'NNC', 'PMP']\", \"['DTC', 'VBTS_CCP', 'NNC', 'LM', 'JJD', 'RBW', '[PAD]', 'VBTS', 'DTP', 'NNP_CCP', 'NNP', 'CCT', 'PRI', 'CCB', 'NNC', 'RBW', 'VBOF', 'CCT', 'NNPA', 'PMS', '[PAD]', 'PMS', 'NNPA', 'PMP']\", \"['JJCC', 'CCT', 'CDB', 'NNC', 'DTC', 'VBOF', 'CCT', '[PAD]', '[PAD]', 'VBTS', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'CCT', 'PRI', 'RBI_CCP', 'NNC', 'CCP', 'VBH', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['JJD', 'CCP', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCA', 'RBD_CCP', '[PAD]', 'JJD_CCP', 'CDB', 'PMS', 'CDB', 'CCB', 'RBW', 'PMP']\", \"['JJD', 'CCP', 'VBOF', 'CCB', 'NNC', 'DTP', 'NNP', '[PAD]', 'NNP', 'NNP', 'PMC', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMC', 'NNC', 'CCB', 'NNC', '[PAD]', '[PAD]', 'VBOF', 'PMC', 'PRI', 'NNP', 'PMP']\", \"['JJD', 'JJN_CCP', 'NNC', 'RBW', 'VBTR', 'DTC', 'NNC', 'CCT', 'NNC', 'CCT', 'RBF', 'RBM', 'PRO', 'VBOF', 'CCB', 'NNC', 'CCA', 'CCT', 'NNC', 'LM', 'VBTR', 'RBI', 'DTC', 'NNC', 'PMS', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['JJD', 'LM', 'VBTR', 'CCB', 'FW', 'NNP', 'CCP', 'DTP', 'NNP', 'NNP', 'CCT', 'NNPA', 'CCP', 'VBOF', 'DTC', 'FW', 'FW', 'NNP', 'DTP', 'NNP', 'PMP']\", \"['JJD', 'LM', 'VBTS', 'DTP', 'NNP', 'CCP', 'RBF', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'DTC', 'RBW', 'PRS_CCP', 'NNC', 'PMC', 'NNC', 'LM', 'VBS', 'VBAF', 'CCB', 'NNC_CCP', 'VBTR_VBAF', 'CCB', 'PRSP_CCP', 'DTCP', 'VBTS', 'PMP']\", \"['JJD', 'PMS', 'FW', 'PMS', 'RBM', 'DTC', 'NNC', 'PRO', 'PMP']\", \"['JJD', 'PRL', 'PMC', 'JJD', 'CCP', 'VBAF', 'CCT', 'NNC', 'DTP', 'NNP', 'NNP', 'DTC', 'NNC', 'PRO', 'CCA', 'VBTS', 'RBM', 'CCB', 'NNC', 'DTC', 'NNC', 'CCB', 'RBF', 'JJD_CCP', 'NNC', 'PMP']\", \"['JJD', 'RBI_CCP', 'VBOF', 'CCB', 'NNC', 'CCP', 'NNC', 'CCB', 'DTCP', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCT', 'DTCP', 'JJD_CCP', 'NNC', 'PMP']\", \"['JJD_CCP', 'NNC', 'CCB', 'RBW', '[PAD]', '[PAD]', '[PAD]', 'NNC', 'DTC', 'NNC', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'PMP']\", \"['JJD_CCP', 'NNC', 'RBI', 'DTP', 'NNP', 'CCP', 'VBTF', 'PRP', 'DTC', 'PRI', 'CCB', 'NNC', 'CCB', 'NNC', 'VBW', 'DTC', 'NNC', 'CCR', 'VBN', 'RBI', 'PRO_CCP', 'VBAF', 'CCT', 'NNC', 'PMP']\", \"['JJD_CCP', 'VBW', 'CCB', 'NNC_CCP', 'NNP', 'DTP', 'NNP', 'PMP']\", \"['JJN_CCP', 'NNC', 'DTC', 'NNC', 'RBW', 'VBW', 'DTC', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['JJN_CCP', 'NNC', 'DTC', 'VBTF', 'CCB', 'NNC', 'PMS', 'JJD_CCP', 'NNC', 'CCT', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'CCA', 'CCT', 'NNC', 'NNP', 'CCT', 'VBW', 'CCB', 'NNC', 'PMP']\", \"['NNC', 'RBW', 'VBW', 'DTC', 'JJN_CCP', 'NNC', 'FW', 'FW', 'CCP', 'RBL', 'NNC', 'CCB', 'JJD_CCP', 'NNC', 'CCT', 'NNPA', 'NNC', 'CCT', 'NNP', 'PMP']\", \"['PRI_CCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'NNP', 'NNP', 'DTC', 'NNP', 'CCA', 'VBTS', 'CCB', 'RBF', 'RBI', 'VBTS_CCP', 'NNC', 'PMP']\", \"['PRI_CCP', 'NNC', 'DTC', '[PAD]', 'VBTR', 'CCP', 'VBW', 'CCB', 'NNP', 'NNP', 'CCT', 'CCT', 'JJD_CCP', 'VBN_CCP', 'NNC', 'PMP']\", \"['PRO', 'DTC', 'JJD', 'VBTS', 'CCB', 'NNC', 'DTP', 'NNP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['PRO', 'RBI', 'NNP', 'CDB', 'LM', '[PAD]', 'VBW', 'CCB', 'NNP', 'CDB', 'PMP', 'CDB', 'CCP', 'NNC', 'DTC', '[PAD]', '[PAD]', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['PRS', 'LM', 'VBTS', 'CCR', 'CCT', '[PAD]', '[PAD]', '[PAD]', 'RBD', 'NNC_CCP', 'DTCP', 'NNC', 'PMP']\", \"['PRS_CCP', 'RBM', 'DTC', 'JJCS_JJD_CCP', 'NNC', 'CCP', 'PRSP_CCP', 'VBOF', 'CCT', 'NNC', 'CCP', 'VBAF', 'CCT', 'CDB', '[PAD]', 'NNP', '[PAD]', 'NNC', 'PMP']\", \"['RBD_CCP', 'VBOF', 'CCB', 'NNP', 'NNP', 'DTC', 'NNC', 'CCT', 'NNP', 'FW', 'PMS', 'NNC', 'DTP', 'NNP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMS', 'CDB', 'PMC', 'RBW', 'DTC', '[PAD]', 'PMS', 'NNC', 'DTP', 'NNP', 'NNP', 'PMP']\", \"['RBF', 'VBTS', 'DTC', 'PRSP_CCP', 'VBTR', 'CCT', 'PRSP_CCP', 'NNC', 'PMP']\", \"['RBF', 'VBTS', 'DTP', 'NNP', 'DTC', 'NNC', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'NNC', 'CCT', 'PRSP', 'CCP', 'VBTR', 'PRS_CCP', 'VBTR', 'RBM', 'DTC', 'FW', 'FW', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['RBR', 'CCT', 'NNPA', 'PMC', 'VBH', 'PRQ_CCP', 'DTCP', 'FW', 'NNC', 'DTC', 'VBS', 'CCP', 'VBW', 'CCT', 'VBW', 'CCB', 'NNC', 'CCT', 'NNP', 'PMC', 'CCT', 'CCT', 'PRSP_CCP', 'NNC', 'CCB', 'NNP', 'NNP', 'PMP']\", \"['RBR', 'DTP', 'NNP', 'PMC', 'VBTS', 'CCP', 'RBI', 'CCB', 'NNP', 'FW', 'NNP', 'FW', 'NNP', 'NNP', 'DTC', 'NNP', 'FW', 'NNC', 'CCB', 'DTCP', 'NNP', 'NNP', '[PAD]', '[PAD]', 'PMS', 'RBR', 'CCP', 'RBI', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'PRS_CCP', 'VBAF', 'CCB', 'NNC', 'CCR', 'PRS', 'LM', '[PAD]', 'VBTR', 'CCP', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'RBI', 'CCT', 'NNC', 'DTP', 'NNP', 'CCP', 'NNC', 'LM', 'DTP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'PMC', 'NNP', 'NNP', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'VBAF', 'VBH', 'VBTS', 'PRP_CCP', 'JJD_CCP', 'NNC', 'PMP']\", \"['RBW_CCP', 'PRS', 'RBI', 'CCT', 'NNC', 'CCT', 'PRS', 'LM', 'VBTR', 'NNC', 'PMP']\", \"['RBW_CCP', 'RBW', 'VBOF', 'CCB', 'NNP', 'CCP', 'CCR', 'RBF', 'JJD', 'CCP', 'VBW', 'CCB', 'NNC', 'PMC', 'VBS', 'VBOF', 'PRO', 'CCP', 'RBD_CCP', 'VBW', 'DTC', 'RBL', 'NNC', 'PMP']\", \"['VBAF', 'CCP', 'RBI', 'PRS', 'VBOF', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNC', 'CCA', 'VBAF', 'CCT', '[PAD]', 'VBTR', 'CCP', 'RBF', 'CCP', 'VBW', 'DTC', 'NNC', 'PMP']\", \"['VBAF', 'PRS', 'CCT', 'NNC', 'CCT', 'VBAF', 'CCB', 'JJN_CCP', 'NNC', 'PMP']\", \"['VBOF', 'CCB', 'NNC', 'CCB', 'NNP', 'DTC', 'PRI', 'CCB', 'NNC', 'CCT', 'JJN', 'PMS', 'CDB', 'CCB', 'NNPA', 'CCT', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBOF', 'CCB', 'PRI', 'DTC', 'PRSP_CCP', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['VBOF', 'PRO', 'CCP', 'RBI', 'PMS', 'FW', '[PAD]', 'NNC', 'DTC', 'PRSP_CCP', 'NNC', 'CCR', 'NNC', 'RBI', 'PRO', 'CCA', 'VBS', 'PRP_CCP', 'VBOF', 'PRO', 'PMP']\", \"['VBOF', 'PRS', 'DTC', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNC']\", \"['VBS', 'RBI', 'RBI', 'CCB', 'NNC', 'RBW', 'NNP', 'NNP', 'NNP', 'NNP', 'CCA', 'NNP', 'NNP', 'CCP', 'VBTS', 'RBW_CCP', 'NNC', 'PMP']\", \"['VBTR', 'RBI', 'DTP', 'NNP', 'DTC', 'NNP', 'CCP', 'VBOF', 'DTC', 'PRSP_CCP', 'DTCP', 'NNC', 'CCP', 'VBS', 'VBAF', 'CCT', 'DTCP', 'NNC', 'CCA', 'NNC', 'CCB', 'NNP', 'CCR', 'VBTS', 'PRL', 'PMP']\", \"['VBTR', 'RBI', 'RBI', 'CCT', 'NNP', 'FW', 'FW', 'DTC', 'PRI_CCP', 'NNP', 'NNP', 'VBW', 'DTC', 'PRQ_CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNC', 'NNC', 'DTC', 'DTCP', 'FW', 'FW', 'PMP']\", \"['VBTR_CCP', 'RBL', 'CDB', 'PMC', 'CDB', 'NNC', 'DTC', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'NNC', 'CCT', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'CCB', 'DTCP', 'NNC', 'DTC', 'CDB', 'PMS', 'JJD', 'CCP', 'NNC', 'CCP', 'VBTR', 'CCP', 'NNC', 'JJD', 'CCT', 'NNC', 'PMP', 'NNP', 'PMC', 'NNP', 'NNP', 'CCR', 'CCT', 'NNC_CCP', 'NNC', 'PMP']\", \"['VBTS', 'CCB', 'NNC', 'CCP', 'NNC', 'CCT', 'NNP', 'FW', 'DTC', 'NNC', 'CCT', 'VBW', 'CCA', 'VBW', 'CCB', 'NNC', 'DTC', 'NNC', 'CCR', 'VBTS', 'DTC', 'NNC', 'NNC', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCB', 'NNC', 'DTC', 'FW', 'FW', 'NNP', 'PMS', 'NNPA', 'PMS', 'CCT', 'DTCP', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'CCT', 'CCT', 'NNC', 'CCB', 'JJD', 'PMS', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCP', 'DTC', 'JJN_CCP', 'NNC', 'LM', 'RBL', 'NNPA', 'NNP', 'NNP', 'CCT', 'NNP', 'NNC', 'PMP']\", \"['VBTS', 'CCP', 'RBM', 'CCB', 'DTCP', 'NNC', 'CCP', 'NNC', 'RBW', 'CCP', '[PAD]', '[PAD]', 'VBTS', 'DTC', 'NNC', 'CCB', 'DTCP', '[PAD]', 'NNC', 'DTC', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'CCP', 'RBM', 'PRS', 'CCB', 'NNC', 'CCP', 'NNC', 'DTC', 'VBTS_CCP', 'NNC', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCP', 'VBTR', 'DTC', 'PRSP_CCP', 'NNC_CCP', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'NNP', 'CCP', 'VBH', 'JJD_CCP', 'CDB', 'PMS', 'FW', 'CCT', 'NNC', 'CCB', 'PRSP_CCP', 'NNC', 'CCB', 'NNC', 'CCA', 'VBOF', 'CCB', 'DTCP', 'NNC', 'PMP']\", \"['VBTS', 'DTC', 'NNC', 'CCB', 'PRSP_CCP', 'DTCP', 'NNC', 'PMP']\", \"['VBTS', 'DTC', 'NNC', 'CCP', 'DTP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'PMC', 'CDB', 'PMC', 'JJD_CCP', '[PAD]', 'VBTS', 'CCT', 'NNP', 'NNP', 'CCT', '[PAD]', 'VBW', 'CCT', 'NNP', 'NNP', 'CCR', 'CCT', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'CCP', 'VBS', 'VBTF', 'CCP', 'RBI', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNP', 'FW', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'NNPA', 'PMS', 'DTC', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', 'FW', 'CCP', 'VBW', 'PRS', 'CCA', 'PRSP_CCP', 'NNC', 'LM', 'RBF', 'VBW', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', 'NNP', 'CCP', 'NNC', 'CCP', 'RBI', 'DTC', 'FW', 'NNC', 'CCP', 'VBTF', 'RBI_CCP', 'VBOF', 'DTP', 'NNP', 'NNP', 'NNP', 'PMP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', '[PAD]', '[PAD]', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'DTC', 'NNC', 'CCP', 'DTP', 'NNP', 'PMS', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMS', 'FW', 'PMC', 'CCP', 'VBTS', 'CCR', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', '[PAD]', 'NNP', 'NNC', 'CCP', 'VBAF', 'DTC', 'VBTS_CCP', 'NNC', 'CCB', 'CDB', 'NNC', 'PMP']\", \"['VBTS', 'PRS', 'CCP', 'RBF', 'PRS', '[PAD]', '[PAD]', 'VBW', 'CCT', 'PRSP_CCP', 'DTCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'CCB', 'JJD_CCP', 'NNC', 'DTC', 'NNC', 'CCA', 'JJCC', 'JJD', 'CCR', 'VBOF', 'CCP', 'RBI', 'NNC', 'PRO_CCP', 'FW', 'PMP']\", \"['VBTS', 'RBI', 'CCT', 'NNC', 'DTC', 'NNC', 'CCT', 'RBF', 'CCP', 'VBTS', 'DTC', 'PRSP_CCP', 'NNC', 'CCR', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'PRI', 'PMS', 'NNCA', 'PRI_CCP', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'DTC', 'PRI_CCP', 'NNP', 'PMC', 'RBL', 'JJN', 'CCP', 'NNC', 'PMC', 'CCP', 'VBAF', 'CCT', 'NNP', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'DTP', 'NNP', 'CCP', 'VBTS', 'PRS', 'DTC', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'PMP', 'PMS', 'CCP', 'PMS', 'NNPA', 'PRS', 'CCB', 'DTC', 'NNC', 'CCP', '[PAD]', 'VBTS', 'CCB', 'PRSP_CCP', 'NNC', 'PMP', 'PMS']\", \"['VBTS', 'RBI', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'PMC', 'CCP', 'VBTS', 'PMS', 'FW', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'PMC', 'CCP', 'RBI', 'PMS', 'FW', '[PAD]', 'VBTR', 'DTC', 'DTCP', 'PRO', 'CCR', 'PMS', 'PRI', 'PMS', 'RBI_CCP', 'NNC', 'PMP']\", \"['VBTS', 'RBM', 'CCB', 'FW', 'DTC', 'DTCP', 'NNC', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNP', 'PMC', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'CCA', 'PRI', 'CCT', 'DTCP', 'PRO', 'LM', 'JJD', 'PMP']\", \"['VBTS', 'RBM', 'CCB', 'NNC', 'DTC', 'NNC', 'CCT', 'NNC', 'CCA', 'PRL', '[PAD]', '[PAD]', 'VBTS', 'CCR', '[PAD]', '[PAD]', 'VBTS', 'DTP', 'NNP', 'CCB', 'NNP', 'DTC', 'NNC', 'CCB', 'VBAF', 'CCT', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'RBW', 'CCP', 'RBL', 'JJN', 'CCP', 'NNC', 'CCP', 'DTC', 'NNC', 'CCB', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCB', 'NNC', 'PMP']\", \"['VBW', 'DTC', 'CDB', 'NNC', 'LM', 'VBTR', 'CCP', 'CCT', 'NNC', 'DTC', 'JJD', 'PMS', 'FW', 'NNP', 'FW', 'PMP']\", \"['[PAD]', 'VBAF', 'CCB', 'VBW', 'CCT', 'NNC', 'CCP', 'VBTS', 'RBL', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCB', 'NNC', 'JJD', 'CCB', 'NNC', 'CCB', 'NNC', 'DTC', 'DTCP', 'NNC', 'RBL', 'CCT', 'JJD', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCP', 'NNC', 'DTC', 'VBTS', 'CCA', 'CDB', 'PMC', 'CDB', 'RBI', 'DTC', 'VBTS', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCP', 'NNP', 'RBL', 'CCT', 'JJN', 'CCP', 'NNC', 'DTC', '[PAD]', 'VBTS', 'CCB', 'NNC', 'CCA', 'NNC', 'CCT', '[PAD]', '[PAD]', '[PAD]', 'NNP', 'CCT', 'CCT', '[PAD]', 'JJD_CCP', 'NNC', 'PMC', 'CCB', 'DTC', 'JJN_CCP', '[PAD]', 'FW', 'FW', 'PMP']\", \"['[PAD]', 'VBTS', 'DTC', 'DTCP', 'NNC_CCP', '[PAD]', 'VBTR', 'PRL', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'DTC', 'VBTS', 'CCT', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'DTP', 'NNP', 'VBW', 'CCP', '[PAD]', 'VBTS', 'CCP', '[PAD]', '[PAD]', 'VBOF', 'PRS', 'CCB', 'DTCP', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'CCA', 'VBTS', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['[PAD]', 'VBTS', 'RBI', 'DTC', 'VBW', 'CCB', 'NNC', 'PRP', 'CCT', 'JJD', 'CCB', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'CCT', 'VBOF', 'DTC', 'FW', 'NNC', 'PRO', 'PMP']\", \"['[PAD]', 'VBTS_CCP', 'NNC_CCP', 'RBW', 'PMC', 'VBN', 'RBI', 'RBI', 'RBI', 'DTP', 'PRI_CCP', 'NNC', 'CCP', '[PAD]', 'VBTS', 'JJD', 'PRO', 'PMP']\", \"['[PAD]', '[PAD]', 'NNC', 'CCP', 'VBOF', 'CCP', 'DTC', 'PRSP_CCP', 'NNC', 'LM', 'VBTR', 'RBW', 'VBTR', 'PMP']\", \"['[PAD]', '[PAD]', 'VBOF', 'DTC', 'VBTS', 'CCT', 'PRSP', 'NNC_CCP', 'VBTR', 'PRS', 'NNC', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'CCT', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCR', '[PAD]', '[PAD]', 'PRP', 'DTP', 'NNP', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'RBI', 'DTC', 'NNC', 'NNC', 'DTP', 'NNP', 'NNP', 'CCR', 'CCT', 'RBM', 'PMS', 'FW', 'NNC', 'PRS', 'CCT', 'JJD_CCP', 'NNC', 'CCP', 'DTP', 'NNP', 'NNP', 'CCP', 'RBD_CCP', 'VBAF', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'RBI', 'DTC', 'VBTR', 'NNC_CCP', 'PRO', 'PMP']\"] will be ignored\n",
|
376 |
+
" warnings.warn(\n"
|
377 |
]
|
378 |
},
|
379 |
{
|
380 |
+
"ename": "UFuncTypeError",
|
381 |
+
"evalue": "ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U12'), dtype('<U12')) -> None",
|
382 |
+
"output_type": "error",
|
383 |
+
"traceback": [
|
384 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
385 |
+
"\u001b[1;31mUFuncTypeError\u001b[0m Traceback (most recent call last)",
|
386 |
+
"Cell \u001b[1;32mIn[60], line 21\u001b[0m\n\u001b[0;32m 18\u001b[0m y_pred_bin \u001b[38;5;241m=\u001b[39m mlb\u001b[38;5;241m.\u001b[39mtransform(y_pred_str)\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# Print classification report\u001b[39;00m\n\u001b[1;32m---> 21\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mclassification_report\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_valid_bin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_pred_bin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m)\u001b[49m)\n",
|
387 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:2342\u001b[0m, in \u001b[0;36mclassification_report\u001b[1;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[0;32m 2340\u001b[0m headers \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprecision\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrecall\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mf1-score\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msupport\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 2341\u001b[0m \u001b[38;5;66;03m# compute per-class results without averaging\u001b[39;00m\n\u001b[1;32m-> 2342\u001b[0m p, r, f1, s \u001b[38;5;241m=\u001b[39m \u001b[43mprecision_recall_fscore_support\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2343\u001b[0m \u001b[43m \u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2344\u001b[0m \u001b[43m \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2345\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2346\u001b[0m \u001b[43m \u001b[49m\u001b[43maverage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 2347\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2348\u001b[0m \u001b[43m \u001b[49m\u001b[43mzero_division\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mzero_division\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2349\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2350\u001b[0m rows \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mzip\u001b[39m(target_names, p, r, f1, s)\n\u001b[0;32m 2352\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y_type\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultilabel\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n",
|
388 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1577\u001b[0m, in \u001b[0;36mprecision_recall_fscore_support\u001b[1;34m(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)\u001b[0m\n\u001b[0;32m 1575\u001b[0m \u001b[38;5;66;03m# Calculate tp_sum, pred_sum, true_sum ###\u001b[39;00m\n\u001b[0;32m 1576\u001b[0m samplewise \u001b[38;5;241m=\u001b[39m average \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msamples\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1577\u001b[0m MCM \u001b[38;5;241m=\u001b[39m \u001b[43mmultilabel_confusion_matrix\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1578\u001b[0m \u001b[43m \u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1579\u001b[0m \u001b[43m \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1580\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1581\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1582\u001b[0m \u001b[43m \u001b[49m\u001b[43msamplewise\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msamplewise\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1583\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1584\u001b[0m tp_sum \u001b[38;5;241m=\u001b[39m MCM[:, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m 1585\u001b[0m pred_sum \u001b[38;5;241m=\u001b[39m tp_sum \u001b[38;5;241m+\u001b[39m MCM[:, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m]\n",
|
389 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:552\u001b[0m, in \u001b[0;36mmultilabel_confusion_matrix\u001b[1;34m(y_true, y_pred, sample_weight, labels, samplewise)\u001b[0m\n\u001b[0;32m 549\u001b[0m \u001b[38;5;66;03m# All labels are index integers for multilabel.\u001b[39;00m\n\u001b[0;32m 550\u001b[0m \u001b[38;5;66;03m# Select labels:\u001b[39;00m\n\u001b[0;32m 551\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray_equal(labels, present_labels):\n\u001b[1;32m--> 552\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(present_labels):\n\u001b[0;32m 553\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 554\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll labels must be in [0, n labels) for \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 555\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultilabel targets. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 556\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGot \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m > \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (np\u001b[38;5;241m.\u001b[39mmax(labels), np\u001b[38;5;241m.\u001b[39mmax(present_labels))\n\u001b[0;32m 557\u001b[0m )\n\u001b[0;32m 558\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mmin(labels) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
|
390 |
+
"File \u001b[1;32m<__array_function__ internals>:200\u001b[0m, in \u001b[0;36mamax\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
|
391 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\fromnumeric.py:2820\u001b[0m, in \u001b[0;36mamax\u001b[1;34m(a, axis, out, keepdims, initial, where)\u001b[0m\n\u001b[0;32m 2703\u001b[0m \u001b[38;5;129m@array_function_dispatch\u001b[39m(_amax_dispatcher)\n\u001b[0;32m 2704\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mamax\u001b[39m(a, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, out\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, keepdims\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue, initial\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue,\n\u001b[0;32m 2705\u001b[0m where\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue):\n\u001b[0;32m 2706\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 2707\u001b[0m \u001b[38;5;124;03m Return the maximum of an array or maximum along an axis.\u001b[39;00m\n\u001b[0;32m 2708\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2818\u001b[0m \u001b[38;5;124;03m 5\u001b[39;00m\n\u001b[0;32m 2819\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2820\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_wrapreduction\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmaximum\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmax\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2821\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minitial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minitial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n",
|
392 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\fromnumeric.py:86\u001b[0m, in \u001b[0;36m_wrapreduction\u001b[1;34m(obj, ufunc, method, axis, dtype, out, **kwargs)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m reduction(axis\u001b[38;5;241m=\u001b[39maxis, out\u001b[38;5;241m=\u001b[39mout, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mpasskwargs)\n\u001b[1;32m---> 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mufunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpasskwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
393 |
+
"\u001b[1;31mUFuncTypeError\u001b[0m: ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U12'), dtype('<U12')) -> None"
|
394 |
+
]
|
395 |
}
|
396 |
],
|
397 |
"source": [
|
398 |
+
"from sklearn.metrics import classification_report\n",
|
399 |
+
"\n",
|
400 |
+
"# Get the list of unique labels\n",
|
401 |
"labels = list(crf.classes_)\n",
|
402 |
+
"\n",
|
403 |
+
"# Predict labels for the validation set\n",
|
404 |
"y_pred = crf.predict(X_valid)\n",
|
405 |
+
"\n",
|
406 |
+
"from sklearn.preprocessing import MultiLabelBinarizer\n",
|
407 |
+
"\n",
|
408 |
+
"# Convert labels to strings\n",
|
409 |
+
"y_valid_str = [[str(label)] for label in y_valid]\n",
|
410 |
+
"y_pred_str = [[str(label)] for label in y_pred]\n",
|
411 |
+
"\n",
|
412 |
+
"# Convert labels to binary array format\n",
|
413 |
+
"mlb = MultiLabelBinarizer()\n",
|
414 |
+
"y_valid_bin = mlb.fit_transform(y_valid_str)\n",
|
415 |
+
"y_pred_bin = mlb.transform(y_pred_str)\n",
|
416 |
+
"\n",
|
417 |
+
"# Print classification report\n",
|
418 |
+
"print(classification_report(y_valid_bin, y_pred_bin, labels=labels))\n",
|
419 |
+
"\n"
|
420 |
]
|
421 |
},
|
422 |
{
|
test.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
@@ -103,8 +103,8 @@
|
|
103 |
"name": "stdout",
|
104 |
"output_type": "stream",
|
105 |
"text": [
|
106 |
-
"Sentence is grammatically
|
107 |
-
"Probabilities: [0.
|
108 |
]
|
109 |
}
|
110 |
],
|
@@ -115,7 +115,7 @@
|
|
115 |
"tokenizer = AutoTokenizer.from_pretrained(\"zklmorales/bert_finetuned\")\n",
|
116 |
"model = AutoModelForSequenceClassification.from_pretrained(\"zklmorales/bert_finetuned\")\n",
|
117 |
"\n",
|
118 |
-
"new_sentence = \"
|
119 |
"\n",
|
120 |
"# Tokenize the input text\n",
|
121 |
"inputs = tokenizer(new_sentence, return_tensors=\"pt\")\n",
|
@@ -141,7 +141,7 @@
|
|
141 |
},
|
142 |
{
|
143 |
"cell_type": "code",
|
144 |
-
"execution_count":
|
145 |
"metadata": {},
|
146 |
"outputs": [
|
147 |
{
|
@@ -299,6 +299,35 @@
|
|
299 |
" print(candidate, \"Probability:\", probability)\n",
|
300 |
"print(predicted_labels)\n"
|
301 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
}
|
303 |
],
|
304 |
"metadata": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
|
|
103 |
"name": "stdout",
|
104 |
"output_type": "stream",
|
105 |
"text": [
|
106 |
+
"Sentence is grammatically correct.\n",
|
107 |
+
"Probabilities: [0.00594444340094924, 0.9940555095672607]\n"
|
108 |
]
|
109 |
}
|
110 |
],
|
|
|
115 |
"tokenizer = AutoTokenizer.from_pretrained(\"zklmorales/bert_finetuned\")\n",
|
116 |
"model = AutoModelForSequenceClassification.from_pretrained(\"zklmorales/bert_finetuned\")\n",
|
117 |
"\n",
|
118 |
+
"new_sentence = \"Pupunta ako kahapon sa siyudad upang bumili ang mga gamit ko\"\n",
|
119 |
"\n",
|
120 |
"# Tokenize the input text\n",
|
121 |
"inputs = tokenizer(new_sentence, return_tensors=\"pt\")\n",
|
|
|
141 |
},
|
142 |
{
|
143 |
"cell_type": "code",
|
144 |
+
"execution_count": 3,
|
145 |
"metadata": {},
|
146 |
"outputs": [
|
147 |
{
|
|
|
299 |
" print(candidate, \"Probability:\", probability)\n",
|
300 |
"print(predicted_labels)\n"
|
301 |
]
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"cell_type": "code",
|
305 |
+
"execution_count": 17,
|
306 |
+
"metadata": {},
|
307 |
+
"outputs": [
|
308 |
+
{
|
309 |
+
"name": "stdout",
|
310 |
+
"output_type": "stream",
|
311 |
+
"text": [
|
312 |
+
"Nagising\n",
|
313 |
+
"67\n"
|
314 |
+
]
|
315 |
+
}
|
316 |
+
],
|
317 |
+
"source": [
|
318 |
+
"from fuzzywuzzy import fuzz\n",
|
319 |
+
"\n",
|
320 |
+
"original_word = \"Gigisingin\"\n",
|
321 |
+
"suggestions = [\"Tatakbo\", \"Nagising\", \"Hihiga\", \"Kakain\"]\n",
|
322 |
+
"\n",
|
323 |
+
"threshold = 60\n",
|
324 |
+
"\n",
|
325 |
+
"for suggestion in suggestions:\n",
|
326 |
+
" similarity_score = fuzz.ratio(original_word, suggestion)\n",
|
327 |
+
" if similarity_score >= threshold:\n",
|
328 |
+
" print(suggestion)\n",
|
329 |
+
" print(fuzz.ratio(original_word, suggestion))\n"
|
330 |
+
]
|
331 |
}
|
332 |
],
|
333 |
"metadata": {
|