zklmorales commited on
Commit
9204d3c
1 Parent(s): 1a873d5

Upload 104 files

Browse files
Files changed (3) hide show
  1. Final.ipynb +27 -38
  2. POS Tag Automation/POS Tagger.ipynb +62 -28
  3. test.ipynb +34 -5
Final.ipynb CHANGED
@@ -2,19 +2,28 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 12,
6
  "metadata": {},
7
  "outputs": [
 
 
 
 
 
 
 
 
 
 
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "Original sentence: Magigising siya kanina dahil sa ingay\n",
13
- "Sentence: Nagulat siya kanina dahil sa ingay\n",
14
- "Correctness Probability: 0.9978345036506653\n",
15
- "Cosine Similarity: 0.22926439344882965\n",
16
- "Levenshtein Score: 82\n",
17
- "[('Nagulat siya kanina dahil sa ingay', 0.9978345036506653, 0.22926439344882965, 82)]\n"
18
  ]
19
  }
20
  ],
@@ -149,7 +158,8 @@
149
  " \n",
150
  " # Compute cosine similarity between original masked word and predicted word\n",
151
  " similarity = torch.nn.functional.cosine_similarity(original_embedding.unsqueeze(0), candidate_embedding.unsqueeze(0)).item()\n",
152
- " \n",
 
153
  " replaced_words = masked_words.copy()\n",
154
  " replaced_words[i] = candidate_mlm\n",
155
  " corrected_sentence = \" \".join(replaced_words).split() # Split and join to remove extra spaces\n",
@@ -166,42 +176,21 @@
166
  " probability = torch.softmax(outputs_cls.logits, dim=1).squeeze().tolist()[1]\n",
167
  "\n",
168
  " # Append the corrected sentence along with its probability and cosine similarity\n",
169
- " grammar_correction_candidates.append((corrected_sentence, probability, similarity))\n",
170
  "\n",
171
  "\n",
172
  " # Sort the grammar correction candidates by their probabilities and cosine similarities in descending order\n",
173
- " grammar_correction_candidates.sort(key=lambda x: (x[1], x[2]), reverse=True)\n",
174
  "\n",
 
 
 
 
 
 
175
  "\n",
176
- " threshold = 60 # Adjust this threshold according to your requirement\n",
177
- " # Initialize a list to store the top 5 candidates\n",
178
- " top_candidates = []\n",
179
  "\n",
180
- " # Iterate over each candidate and keep track of the top 5 based on cosine similarity\n",
181
- " for candidate, probability, cosine_similarity in grammar_correction_candidates:\n",
182
- " fuzzy_match_score = fuzz.ratio(new_sentence, candidate)\n",
183
- " \n",
184
- " # Check if the current candidate should be included in the top 5\n",
185
- " if len(top_candidates) < 1:\n",
186
- " top_candidates.append((candidate, probability, cosine_similarity, fuzzy_match_score))\n",
187
- " # Sort the top_candidates based on cosine similarity in descending order\n",
188
- " top_candidates.sort(key=lambda x: x[2], reverse=True)\n",
189
- " else:\n",
190
- " # Compare the cosine similarity of the current candidate with the lowest similarity in the top_candidates\n",
191
- " min_similarity = min(top_candidates, key=lambda x: x[2])[2]\n",
192
- " if cosine_similarity > min_similarity:\n",
193
- " # Replace the candidate with the lowest similarity in the top_candidates list\n",
194
- " min_index = top_candidates.index(min(top_candidates, key=lambda x: x[2]))\n",
195
- " top_candidates[min_index] = (candidate, probability, cosine_similarity, fuzzy_match_score)\n",
196
- " # Sort the top_candidates based on cosine similarity in descending order\n",
197
- " top_candidates.sort(key=lambda x: x[2], reverse=True)\n",
198
- "\n",
199
- " for idx, (candidate, probability, cosine_similarity, fuzzy_match_score) in enumerate(top_candidates):\n",
200
- " print(\"Sentence:\", candidate)\n",
201
- " print(\"Correctness Probability:\", probability)\n",
202
- " print(\"Cosine Similarity:\", cosine_similarity)\n",
203
- " print(\"Levenshtein Score:\", fuzzy_match_score)\n",
204
- " print(top_candidates)\n"
205
  ]
206
  }
207
  ],
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n",
14
+ "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\fuzzywuzzy\\fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning\n",
15
+ " warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')\n"
16
+ ]
17
+ },
18
  {
19
  "name": "stdout",
20
  "output_type": "stream",
21
  "text": [
22
+ "Original sentence: Magugulat ako kanina dahil sa pagsabog\n",
23
+ "Sentence: Nagulat ako kanina dahil sa pagsabog\n",
24
+ "Correctness Probability: 0.9976696372032166\n",
25
+ "Cosine Similarity: 0.20241191983222961\n",
26
+ "Levenshtein Score: 75\n"
 
27
  ]
28
  }
29
  ],
 
158
  " \n",
159
  " # Compute cosine similarity between original masked word and predicted word\n",
160
  " similarity = torch.nn.functional.cosine_similarity(original_embedding.unsqueeze(0), candidate_embedding.unsqueeze(0)).item()\n",
161
+ " fuzzy_match_score = fuzz.ratio(token, candidate_mlm)\n",
162
+ "\n",
163
  " replaced_words = masked_words.copy()\n",
164
  " replaced_words[i] = candidate_mlm\n",
165
  " corrected_sentence = \" \".join(replaced_words).split() # Split and join to remove extra spaces\n",
 
176
  " probability = torch.softmax(outputs_cls.logits, dim=1).squeeze().tolist()[1]\n",
177
  "\n",
178
  " # Append the corrected sentence along with its probability and cosine similarity\n",
179
+ " grammar_correction_candidates.append((corrected_sentence, probability, similarity, fuzzy_match_score))\n",
180
  "\n",
181
  "\n",
182
  " # Sort the grammar correction candidates by their probabilities and cosine similarities in descending order\n",
183
+ " grammar_correction_candidates.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)\n",
184
  "\n",
185
+ "if grammar_correction_candidates:\n",
186
+ " candidate, probability, cosine_similarity, fuzzy_match_score = grammar_correction_candidates[0]\n",
187
+ " print(\"Sentence:\", candidate)\n",
188
+ " print(\"Correctness Probability:\", probability)\n",
189
+ " print(\"Cosine Similarity:\", cosine_similarity)\n",
190
+ " print(\"Levenshtein Score:\", fuzzy_match_score)\n",
191
  "\n",
 
 
 
192
  "\n",
193
+ "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  ]
195
  }
196
  ],
POS Tag Automation/POS Tagger.ipynb CHANGED
@@ -2,16 +2,25 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 22,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import transformers "
10
  ]
11
  },
12
  {
13
  "cell_type": "code",
14
- "execution_count": 23,
15
  "metadata": {},
16
  "outputs": [
17
  {
@@ -33,7 +42,7 @@
33
  },
34
  {
35
  "cell_type": "code",
36
- "execution_count": 24,
37
  "metadata": {},
38
  "outputs": [
39
  {
@@ -110,7 +119,7 @@
110
  },
111
  {
112
  "cell_type": "code",
113
- "execution_count": 25,
114
  "metadata": {},
115
  "outputs": [],
116
  "source": [
@@ -134,7 +143,7 @@
134
  },
135
  {
136
  "cell_type": "code",
137
- "execution_count": 26,
138
  "metadata": {},
139
  "outputs": [],
140
  "source": [
@@ -144,7 +153,7 @@
144
  },
145
  {
146
  "cell_type": "code",
147
- "execution_count": 27,
148
  "metadata": {},
149
  "outputs": [],
150
  "source": [
@@ -201,7 +210,7 @@
201
  },
202
  {
203
  "cell_type": "code",
204
- "execution_count": 28,
205
  "metadata": {},
206
  "outputs": [
207
  {
@@ -231,7 +240,7 @@
231
  " 'prev2pos': 'PRS'}"
232
  ]
233
  },
234
- "execution_count": 28,
235
  "metadata": {},
236
  "output_type": "execute_result"
237
  }
@@ -244,7 +253,7 @@
244
  },
245
  {
246
  "cell_type": "code",
247
- "execution_count": 29,
248
  "metadata": {},
249
  "outputs": [],
250
  "source": [
@@ -260,7 +269,7 @@
260
  },
261
  {
262
  "cell_type": "code",
263
- "execution_count": 30,
264
  "metadata": {},
265
  "outputs": [],
266
  "source": [
@@ -276,7 +285,7 @@
276
  },
277
  {
278
  "cell_type": "code",
279
- "execution_count": 31,
280
  "metadata": {},
281
  "outputs": [
282
  {
@@ -293,7 +302,7 @@
293
  },
294
  {
295
  "cell_type": "code",
296
- "execution_count": 32,
297
  "metadata": {},
298
  "outputs": [
299
  {
@@ -325,7 +334,7 @@
325
  " 'PMP']"
326
  ]
327
  },
328
- "execution_count": 32,
329
  "metadata": {},
330
  "output_type": "execute_result"
331
  }
@@ -336,7 +345,7 @@
336
  },
337
  {
338
  "cell_type": "code",
339
- "execution_count": 33,
340
  "metadata": {},
341
  "outputs": [],
342
  "source": [
@@ -356,33 +365,58 @@
356
  },
357
  {
358
  "cell_type": "code",
359
- "execution_count": 34,
360
  "metadata": {},
361
  "outputs": [
362
  {
363
  "name": "stderr",
364
  "output_type": "stream",
365
  "text": [
366
- "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1609: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.\n",
367
- " _warn_prf(average, \"true nor predicted\", \"F-score is\", len(true_sum))\n"
368
  ]
369
  },
370
  {
371
- "data": {
372
- "text/plain": [
373
- "0.8595460529092004"
374
- ]
375
- },
376
- "execution_count": 34,
377
- "metadata": {},
378
- "output_type": "execute_result"
 
 
 
 
 
 
 
379
  }
380
  ],
381
  "source": [
 
 
 
382
  "labels = list(crf.classes_)\n",
 
 
383
  "y_pred = crf.predict(X_valid)\n",
384
- "metrics.flat_f1_score(y_valid, y_pred,\n",
385
- " average='weighted', labels= labels)"
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  ]
387
  },
388
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ }
16
+ ],
17
  "source": [
18
  "import transformers "
19
  ]
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 2,
24
  "metadata": {},
25
  "outputs": [
26
  {
 
42
  },
43
  {
44
  "cell_type": "code",
45
+ "execution_count": 48,
46
  "metadata": {},
47
  "outputs": [
48
  {
 
119
  },
120
  {
121
  "cell_type": "code",
122
+ "execution_count": 49,
123
  "metadata": {},
124
  "outputs": [],
125
  "source": [
 
143
  },
144
  {
145
  "cell_type": "code",
146
+ "execution_count": 5,
147
  "metadata": {},
148
  "outputs": [],
149
  "source": [
 
153
  },
154
  {
155
  "cell_type": "code",
156
+ "execution_count": 6,
157
  "metadata": {},
158
  "outputs": [],
159
  "source": [
 
210
  },
211
  {
212
  "cell_type": "code",
213
+ "execution_count": 7,
214
  "metadata": {},
215
  "outputs": [
216
  {
 
240
  " 'prev2pos': 'PRS'}"
241
  ]
242
  },
243
+ "execution_count": 7,
244
  "metadata": {},
245
  "output_type": "execute_result"
246
  }
 
253
  },
254
  {
255
  "cell_type": "code",
256
+ "execution_count": 8,
257
  "metadata": {},
258
  "outputs": [],
259
  "source": [
 
269
  },
270
  {
271
  "cell_type": "code",
272
+ "execution_count": 9,
273
  "metadata": {},
274
  "outputs": [],
275
  "source": [
 
285
  },
286
  {
287
  "cell_type": "code",
288
+ "execution_count": 10,
289
  "metadata": {},
290
  "outputs": [
291
  {
 
302
  },
303
  {
304
  "cell_type": "code",
305
+ "execution_count": 11,
306
  "metadata": {},
307
  "outputs": [
308
  {
 
334
  " 'PMP']"
335
  ]
336
  },
337
+ "execution_count": 11,
338
  "metadata": {},
339
  "output_type": "execute_result"
340
  }
 
345
  },
346
  {
347
  "cell_type": "code",
348
+ "execution_count": 12,
349
  "metadata": {},
350
  "outputs": [],
351
  "source": [
 
365
  },
366
  {
367
  "cell_type": "code",
368
+ "execution_count": 60,
369
  "metadata": {},
370
  "outputs": [
371
  {
372
  "name": "stderr",
373
  "output_type": "stream",
374
  "text": [
375
+ "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_label.py:895: UserWarning: unknown class(es) [\"['CCB', 'PMC', 'VBTR', 'RBI', 'DTP', 'NNP', 'DTC', 'VBTS_CCP', 'NNC', 'CCP', 'JJD_CCP', '[PAD]', '[PAD]', 'VBTR', 'PRP', 'DTP', 'NNP', 'CCT', 'CDB', 'PMP']\", \"['CCR', 'JJD', 'DTC', 'NNC', 'CCB', 'NNC', 'PMC', 'VBAF', 'RBI', 'CCB', 'CDB', '[PAD]', 'JJD_CCP', 'NNC', 'JJD', 'PMS', 'NNC', 'PMP']\", \"['CCR', 'VBTS', 'CCP', 'DTC', 'NNC', 'PMC', 'RBF', 'CCP', '[PAD]', '[PAD]', 'VBTS', 'CCB', 'DTCP', 'NNC', 'DTC', 'JJD', 'CCP', 'NNC', 'NNP', 'CCP', 'VBTS', 'CCT', 'JJN_CCP', 'NNC', 'PMP']\", \"['CCT', 'CCT', 'CDB', 'CCP', 'NNC', 'CCP', 'VBTS', 'CCB', 'NNP', 'NNP', 'CCP', 'VBTR', 'DTP', 'NNP', 'NNP', 'LM', 'JJD_CCP', 'CDB', 'NNC', 'RBI', 'DTC', 'JJD_CCP', 'NNC_CCP', 'CCT', '[PAD]', 'NNP', 'PMP']\", \"['CCT', 'CDB', 'PMC', 'CDB', 'JJN_CCP', 'VBTS_CCP', 'VBH', 'NNC', 'CCT', 'NNC', 'CCP', 'VBAF', 'RBI', 'CCB', 'PRSP_CCP', 'NNC', 'CCT', 'VBTR', 'CCT', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCB', '[PAD]', 'NNC', 'CCT', 'DTCP', 'JJD_CCP', 'NNC', 'CCP', 'VBTR', 'CCT', 'PRI_CCP', 'NNC', 'PMC', 'VBTR', 'DTP', 'NNP', 'FW', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'NNP', 'CCT', 'CCT', '[PAD]', '[PAD]', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['CCT', 'NNC', 'CCB', 'NNC', 'PMC', 'VBTS', 'DTP', 'NNP', 'CCP', 'VBAF', 'RBI', 'PRP', 'CCB', 'CDB', 'PMC', 'CDB', 'RBL', 'CCT', 'NNP', 'PMC', 'CCT', 'RBF', 'RBM', 'PRS', 'VBTS', 'CCT', 'PRSP_CCP', 'NNC', 'PMP']\", \"['CCT', 'NNC', 'PRO', 'CCB', 'JJD_CCP', 'CCR', 'PMS', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'DTP', 'NNP', 'CCR', 'CCT', 'NNP', 'FW', 'FW', 'PRO', 'CCP', 'VBH', '[PAD]', 'PMS', 'VBW', 'CCT', 'PRI', 'PRO_CCP', 'NNC', 'PMP']\", \"['CCT', 'NNP', 'NNP', 'JJN_CCP', 'VBTS', 'CCB', 'NNC', 'CCB', 'FW', 'PMS', 'FW', 'RBW', 'PRI_CCP', 'FW', 'FW', 'DTC', 'NNC', 'CCT', 'NNP', 'NNP', 'RBW_CCP', 'NNP', 'CDB', 'PMP']\", \"['CCT', 'PMC', 'VBTS', 'CCB', 'NNC', 'CCP', 'DTC', 'FW', 'CDB', 'LM', 'VBTS', 'CCB', 'NNC', 'CCT', 'PRI', 'CCB', 'NNC', 'CCB', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['CCT', 'PRI_CCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'NNP', 'PMS', 'NNP', 'NNP', 'PMS', 'PMC', 'VBTS', 'PRO', 'CCP', 'RBF', 'PRS', 'VBAF', 'CCB', 'NNP', 'FW', '[PAD]', 'NNC', 'RBI', 'CCT', 'JJD_CCP', 'NNC', 'PMP']\", \"['CCT', 'VBTR', 'CCP', 'NNP', 'NNP', 'LM', '[PAD]', '[PAD]', 'VBTS', 'DTP', 'NNP', 'NNP', 'CCT', 'NNPA', 'CCP', 'VBW', 'CCB', 'PRI_CCP', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['CDB', 'RBW', 'VBOF', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'CCP', 'RBF', 'JJD', 'CCP', 'DTC', 'CCR', 'PMS', 'NNC', 'CCT', 'NNP', 'LM', 'VBTF', 'CCB', 'JJD_CCP', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'CCB', 'NNC', 'PRS', 'LM', 'VBTR_CCP', 'JJD', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'LM', '[PAD]', 'VBTR']\", \"['DTC', 'DTCP', 'NNC', 'LM', 'VBTR', 'RBW', 'JJD', 'CCT', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'PMS', 'NNC', 'CCB', 'NNP', 'LM', 'VBTR', 'RBI', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC_CCP', 'PMS', 'NNC', 'CCT', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'CCB', 'PMS', 'NNP', 'LM', 'CCT', 'RBI', 'CCT', 'DTCP', 'NNC', 'PMP']\", \"['DTC', 'FW', 'FW', 'NNP', 'FW', 'NNP', 'PMS', 'NNPA', 'PMS', 'DTC', '[PAD]', '[PAD]', 'VBTS', 'CCP', 'VBAF', 'DTC', 'NNP', 'CCT', 'FW', 'NNC', 'PMP']\", \"['DTC', 'FW', 'NNP', 'LM', 'CCT', 'CCT', 'JJN', 'CCP', 'VBTS_CCP', 'NNC', 'PMC', '[PAD]', '[PAD]', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNC', 'CCA', 'PRI_CCP', 'NNC', 'PMP']\", \"['DTC', 'JJD_CCP', 'NNC', 'LM', '[PAD]', 'VBTR', 'PMP']\", \"['DTC', 'NNC', 'LM', 'VBTS', 'CCB', '[PAD]', 'PMS', 'NNC', 'CCB', 'NNC', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['DTC', 'NNC', 'NNC_CCP', 'FW', '[PAD]', 'FW', 'NNP', 'FW', 'PMS', 'NNP', 'NNP', 'CCP', 'NNC', 'PRP']\", \"['DTC', 'NNC_CCP', 'VBTS', 'LM', '[PAD]', 'NNC', 'PMP']\", \"['DTC', 'PRI', 'CCB', 'NNC', 'LM', 'VBW', 'CCB', 'NNC', 'RBL', 'CCT', 'NNC', 'PMP']\", \"['DTC', 'PRSP_CCP', 'JJCC', 'JJD_CCP', '[PAD]', 'JJD_CCP', 'NNC', 'LM', 'VBTR', 'CCT', 'PRSP', 'CCP', 'VBW', 'CCB', 'JJD', 'CCP', 'NNC', 'PMP']\", \"['DTC', 'VBTS_CCP', 'NNC', 'LM', 'JJD', 'RBW', '[PAD]', 'VBTS', 'DTP', 'NNP_CCP', 'NNP', 'CCT', 'PRI', 'CCB', 'NNC', 'RBW', 'VBOF', 'CCT', 'NNPA', 'PMS', '[PAD]', 'PMS', 'NNPA', 'PMP']\", \"['JJCC', 'CCT', 'CDB', 'NNC', 'DTC', 'VBOF', 'CCT', '[PAD]', '[PAD]', 'VBTS', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'CCT', 'PRI', 'RBI_CCP', 'NNC', 'CCP', 'VBH', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['JJD', 'CCP', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCA', 'RBD_CCP', '[PAD]', 'JJD_CCP', 'CDB', 'PMS', 'CDB', 'CCB', 'RBW', 'PMP']\", \"['JJD', 'CCP', 'VBOF', 'CCB', 'NNC', 'DTP', 'NNP', '[PAD]', 'NNP', 'NNP', 'PMC', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMC', 'NNC', 'CCB', 'NNC', '[PAD]', '[PAD]', 'VBOF', 'PMC', 'PRI', 'NNP', 'PMP']\", \"['JJD', 'JJN_CCP', 'NNC', 'RBW', 'VBTR', 'DTC', 'NNC', 'CCT', 'NNC', 'CCT', 'RBF', 'RBM', 'PRO', 'VBOF', 'CCB', 'NNC', 'CCA', 'CCT', 'NNC', 'LM', 'VBTR', 'RBI', 'DTC', 'NNC', 'PMS', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['JJD', 'LM', 'VBTR', 'CCB', 'FW', 'NNP', 'CCP', 'DTP', 'NNP', 'NNP', 'CCT', 'NNPA', 'CCP', 'VBOF', 'DTC', 'FW', 'FW', 'NNP', 'DTP', 'NNP', 'PMP']\", \"['JJD', 'LM', 'VBTS', 'DTP', 'NNP', 'CCP', 'RBF', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'DTC', 'RBW', 'PRS_CCP', 'NNC', 'PMC', 'NNC', 'LM', 'VBS', 'VBAF', 'CCB', 'NNC_CCP', 'VBTR_VBAF', 'CCB', 'PRSP_CCP', 'DTCP', 'VBTS', 'PMP']\", \"['JJD', 'PMS', 'FW', 'PMS', 'RBM', 'DTC', 'NNC', 'PRO', 'PMP']\", \"['JJD', 'PRL', 'PMC', 'JJD', 'CCP', 'VBAF', 'CCT', 'NNC', 'DTP', 'NNP', 'NNP', 'DTC', 'NNC', 'PRO', 'CCA', 'VBTS', 'RBM', 'CCB', 'NNC', 'DTC', 'NNC', 'CCB', 'RBF', 'JJD_CCP', 'NNC', 'PMP']\", \"['JJD', 'RBI_CCP', 'VBOF', 'CCB', 'NNC', 'CCP', 'NNC', 'CCB', 'DTCP', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCT', 'DTCP', 'JJD_CCP', 'NNC', 'PMP']\", \"['JJD_CCP', 'NNC', 'CCB', 'RBW', '[PAD]', '[PAD]', '[PAD]', 'NNC', 'DTC', 'NNC', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'PMP']\", \"['JJD_CCP', 'NNC', 'RBI', 'DTP', 'NNP', 'CCP', 'VBTF', 'PRP', 'DTC', 'PRI', 'CCB', 'NNC', 'CCB', 'NNC', 'VBW', 'DTC', 'NNC', 'CCR', 'VBN', 'RBI', 'PRO_CCP', 'VBAF', 'CCT', 'NNC', 'PMP']\", \"['JJD_CCP', 'VBW', 'CCB', 'NNC_CCP', 'NNP', 'DTP', 'NNP', 'PMP']\", \"['JJN_CCP', 'NNC', 'DTC', 'NNC', 'RBW', 'VBW', 'DTC', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['JJN_CCP', 'NNC', 'DTC', 'VBTF', 'CCB', 'NNC', 'PMS', 'JJD_CCP', 'NNC', 'CCT', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'CCA', 'CCT', 'NNC', 'NNP', 'CCT', 'VBW', 'CCB', 'NNC', 'PMP']\", \"['NNC', 'RBW', 'VBW', 'DTC', 'JJN_CCP', 'NNC', 'FW', 'FW', 'CCP', 'RBL', 'NNC', 'CCB', 'JJD_CCP', 'NNC', 'CCT', 'NNPA', 'NNC', 'CCT', 'NNP', 'PMP']\", \"['PRI_CCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'NNP', 'NNP', 'DTC', 'NNP', 'CCA', 'VBTS', 'CCB', 'RBF', 'RBI', 'VBTS_CCP', 'NNC', 'PMP']\", \"['PRI_CCP', 'NNC', 'DTC', '[PAD]', 'VBTR', 'CCP', 'VBW', 'CCB', 'NNP', 'NNP', 'CCT', 'CCT', 'JJD_CCP', 'VBN_CCP', 'NNC', 'PMP']\", \"['PRO', 'DTC', 'JJD', 'VBTS', 'CCB', 'NNC', 'DTP', 'NNP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['PRO', 'RBI', 'NNP', 'CDB', 'LM', '[PAD]', 'VBW', 'CCB', 'NNP', 'CDB', 'PMP', 'CDB', 'CCP', 'NNC', 'DTC', '[PAD]', '[PAD]', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['PRS', 'LM', 'VBTS', 'CCR', 'CCT', '[PAD]', '[PAD]', '[PAD]', 'RBD', 'NNC_CCP', 'DTCP', 'NNC', 'PMP']\", \"['PRS_CCP', 'RBM', 'DTC', 'JJCS_JJD_CCP', 'NNC', 'CCP', 'PRSP_CCP', 'VBOF', 'CCT', 'NNC', 'CCP', 'VBAF', 'CCT', 'CDB', '[PAD]', 'NNP', '[PAD]', 'NNC', 'PMP']\", \"['RBD_CCP', 'VBOF', 'CCB', 'NNP', 'NNP', 'DTC', 'NNC', 'CCT', 'NNP', 'FW', 'PMS', 'NNC', 'DTP', 'NNP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMS', 'CDB', 'PMC', 'RBW', 'DTC', '[PAD]', 'PMS', 'NNC', 'DTP', 'NNP', 'NNP', 'PMP']\", \"['RBF', 'VBTS', 'DTC', 'PRSP_CCP', 'VBTR', 'CCT', 'PRSP_CCP', 'NNC', 'PMP']\", \"['RBF', 'VBTS', 'DTP', 'NNP', 'DTC', 'NNC', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'NNC', 'CCT', 'PRSP', 'CCP', 'VBTR', 'PRS_CCP', 'VBTR', 'RBM', 'DTC', 'FW', 'FW', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['RBR', 'CCT', 'NNPA', 'PMC', 'VBH', 'PRQ_CCP', 'DTCP', 'FW', 'NNC', 'DTC', 'VBS', 'CCP', 'VBW', 'CCT', 'VBW', 'CCB', 'NNC', 'CCT', 'NNP', 'PMC', 'CCT', 'CCT', 'PRSP_CCP', 'NNC', 'CCB', 'NNP', 'NNP', 'PMP']\", \"['RBR', 'DTP', 'NNP', 'PMC', 'VBTS', 'CCP', 'RBI', 'CCB', 'NNP', 'FW', 'NNP', 'FW', 'NNP', 'NNP', 'DTC', 'NNP', 'FW', 'NNC', 'CCB', 'DTCP', 'NNP', 'NNP', '[PAD]', '[PAD]', 'PMS', 'RBR', 'CCP', 'RBI', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'PRS_CCP', 'VBAF', 'CCB', 'NNC', 'CCR', 'PRS', 'LM', '[PAD]', 'VBTR', 'CCP', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'RBI', 'CCT', 'NNC', 'DTP', 'NNP', 'CCP', 'NNC', 'LM', 'DTP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'PMC', 'NNP', 'NNP', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'VBAF', 'VBH', 'VBTS', 'PRP_CCP', 'JJD_CCP', 'NNC', 'PMP']\", \"['RBW_CCP', 'PRS', 'RBI', 'CCT', 'NNC', 'CCT', 'PRS', 'LM', 'VBTR', 'NNC', 'PMP']\", \"['RBW_CCP', 'RBW', 'VBOF', 'CCB', 'NNP', 'CCP', 'CCR', 'RBF', 'JJD', 'CCP', 'VBW', 'CCB', 'NNC', 'PMC', 'VBS', 'VBOF', 'PRO', 'CCP', 'RBD_CCP', 'VBW', 'DTC', 'RBL', 'NNC', 'PMP']\", \"['VBAF', 'CCP', 'RBI', 'PRS', 'VBOF', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNC', 'CCA', 'VBAF', 'CCT', '[PAD]', 'VBTR', 'CCP', 'RBF', 'CCP', 'VBW', 'DTC', 'NNC', 'PMP']\", \"['VBAF', 'PRS', 'CCT', 'NNC', 'CCT', 'VBAF', 'CCB', 'JJN_CCP', 'NNC', 'PMP']\", \"['VBOF', 'CCB', 'NNC', 'CCB', 'NNP', 'DTC', 'PRI', 'CCB', 'NNC', 'CCT', 'JJN', 'PMS', 'CDB', 'CCB', 'NNPA', 'CCT', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBOF', 'CCB', 'PRI', 'DTC', 'PRSP_CCP', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['VBOF', 'PRO', 'CCP', 'RBI', 'PMS', 'FW', '[PAD]', 'NNC', 'DTC', 'PRSP_CCP', 'NNC', 'CCR', 'NNC', 'RBI', 'PRO', 'CCA', 'VBS', 'PRP_CCP', 'VBOF', 'PRO', 'PMP']\", \"['VBOF', 'PRS', 'DTC', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNC']\", \"['VBS', 'RBI', 'RBI', 'CCB', 'NNC', 'RBW', 'NNP', 'NNP', 'NNP', 'NNP', 'CCA', 'NNP', 'NNP', 'CCP', 'VBTS', 'RBW_CCP', 'NNC', 'PMP']\", \"['VBTR', 'RBI', 'DTP', 'NNP', 'DTC', 'NNP', 'CCP', 'VBOF', 'DTC', 'PRSP_CCP', 'DTCP', 'NNC', 'CCP', 'VBS', 'VBAF', 'CCT', 'DTCP', 'NNC', 'CCA', 'NNC', 'CCB', 'NNP', 'CCR', 'VBTS', 'PRL', 'PMP']\", \"['VBTR', 'RBI', 'RBI', 'CCT', 'NNP', 'FW', 'FW', 'DTC', 'PRI_CCP', 'NNP', 'NNP', 'VBW', 'DTC', 'PRQ_CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNC', 'NNC', 'DTC', 'DTCP', 'FW', 'FW', 'PMP']\", \"['VBTR_CCP', 'RBL', 'CDB', 'PMC', 'CDB', 'NNC', 'DTC', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'NNC', 'CCT', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'CCB', 'DTCP', 'NNC', 'DTC', 'CDB', 'PMS', 'JJD', 'CCP', 'NNC', 'CCP', 'VBTR', 'CCP', 'NNC', 'JJD', 'CCT', 'NNC', 'PMP', 'NNP', 'PMC', 'NNP', 'NNP', 'CCR', 'CCT', 'NNC_CCP', 'NNC', 'PMP']\", \"['VBTS', 'CCB', 'NNC', 'CCP', 'NNC', 'CCT', 'NNP', 'FW', 'DTC', 'NNC', 'CCT', 'VBW', 'CCA', 'VBW', 'CCB', 'NNC', 'DTC', 'NNC', 'CCR', 'VBTS', 'DTC', 'NNC', 'NNC', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCB', 'NNC', 'DTC', 'FW', 'FW', 'NNP', 'PMS', 'NNPA', 'PMS', 'CCT', 'DTCP', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'CCT', 'CCT', 'NNC', 'CCB', 'JJD', 'PMS', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCP', 'DTC', 'JJN_CCP', 'NNC', 'LM', 'RBL', 'NNPA', 'NNP', 'NNP', 'CCT', 'NNP', 'NNC', 'PMP']\", \"['VBTS', 'CCP', 'RBM', 'CCB', 'DTCP', 'NNC', 'CCP', 'NNC', 'RBW', 'CCP', '[PAD]', '[PAD]', 'VBTS', 'DTC', 'NNC', 'CCB', 'DTCP', '[PAD]', 'NNC', 'DTC', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'CCP', 'RBM', 'PRS', 'CCB', 'NNC', 'CCP', 'NNC', 'DTC', 'VBTS_CCP', 'NNC', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCP', 'VBTR', 'DTC', 'PRSP_CCP', 'NNC_CCP', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'NNP', 'CCP', 'VBH', 'JJD_CCP', 'CDB', 'PMS', 'FW', 'CCT', 'NNC', 'CCB', 'PRSP_CCP', 'NNC', 'CCB', 'NNC', 'CCA', 'VBOF', 'CCB', 'DTCP', 'NNC', 'PMP']\", \"['VBTS', 'DTC', 'NNC', 'CCB', 'PRSP_CCP', 'DTCP', 'NNC', 'PMP']\", \"['VBTS', 'DTC', 'NNC', 'CCP', 'DTP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'PMC', 'CDB', 'PMC', 'JJD_CCP', '[PAD]', 'VBTS', 'CCT', 'NNP', 'NNP', 'CCT', '[PAD]', 'VBW', 'CCT', 'NNP', 'NNP', 'CCR', 'CCT', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'CCP', 'VBS', 'VBTF', 'CCP', 'RBI', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNP', 'FW', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'NNPA', 'PMS', 'DTC', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', 'FW', 'CCP', 'VBW', 'PRS', 'CCA', 'PRSP_CCP', 'NNC', 'LM', 'RBF', 'VBW', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', 'NNP', 'CCP', 'NNC', 'CCP', 'RBI', 'DTC', 'FW', 'NNC', 'CCP', 'VBTF', 'RBI_CCP', 'VBOF', 'DTP', 'NNP', 'NNP', 'NNP', 'PMP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', '[PAD]', '[PAD]', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'DTC', 'NNC', 'CCP', 'DTP', 'NNP', 'PMS', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMS', 'FW', 'PMC', 'CCP', 'VBTS', 'CCR', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', '[PAD]', 'NNP', 'NNC', 'CCP', 'VBAF', 'DTC', 'VBTS_CCP', 'NNC', 'CCB', 'CDB', 'NNC', 'PMP']\", \"['VBTS', 'PRS', 'CCP', 'RBF', 'PRS', '[PAD]', '[PAD]', 'VBW', 'CCT', 'PRSP_CCP', 'DTCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'CCB', 'JJD_CCP', 'NNC', 'DTC', 'NNC', 'CCA', 'JJCC', 'JJD', 'CCR', 'VBOF', 'CCP', 'RBI', 'NNC', 'PRO_CCP', 'FW', 'PMP']\", \"['VBTS', 'RBI', 'CCT', 'NNC', 'DTC', 'NNC', 'CCT', 'RBF', 'CCP', 'VBTS', 'DTC', 'PRSP_CCP', 'NNC', 'CCR', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'PRI', 'PMS', 'NNCA', 'PRI_CCP', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'DTC', 'PRI_CCP', 'NNP', 'PMC', 'RBL', 'JJN', 'CCP', 'NNC', 'PMC', 'CCP', 'VBAF', 'CCT', 'NNP', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'DTP', 'NNP', 'CCP', 'VBTS', 'PRS', 'DTC', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'PMP', 'PMS', 'CCP', 'PMS', 'NNPA', 'PRS', 'CCB', 'DTC', 'NNC', 'CCP', '[PAD]', 'VBTS', 'CCB', 'PRSP_CCP', 'NNC', 'PMP', 'PMS']\", \"['VBTS', 'RBI', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'PMC', 'CCP', 'VBTS', 'PMS', 'FW', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'PMC', 'CCP', 'RBI', 'PMS', 'FW', '[PAD]', 'VBTR', 'DTC', 'DTCP', 'PRO', 'CCR', 'PMS', 'PRI', 'PMS', 'RBI_CCP', 'NNC', 'PMP']\", \"['VBTS', 'RBM', 'CCB', 'FW', 'DTC', 'DTCP', 'NNC', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNP', 'PMC', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'CCA', 'PRI', 'CCT', 'DTCP', 'PRO', 'LM', 'JJD', 'PMP']\", \"['VBTS', 'RBM', 'CCB', 'NNC', 'DTC', 'NNC', 'CCT', 'NNC', 'CCA', 'PRL', '[PAD]', '[PAD]', 'VBTS', 'CCR', '[PAD]', '[PAD]', 'VBTS', 'DTP', 'NNP', 'CCB', 'NNP', 'DTC', 'NNC', 'CCB', 'VBAF', 'CCT', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'RBW', 'CCP', 'RBL', 'JJN', 'CCP', 'NNC', 'CCP', 'DTC', 'NNC', 'CCB', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCB', 'NNC', 'PMP']\", \"['VBW', 'DTC', 'CDB', 'NNC', 'LM', 'VBTR', 'CCP', 'CCT', 'NNC', 'DTC', 'JJD', 'PMS', 'FW', 'NNP', 'FW', 'PMP']\", \"['[PAD]', 'VBAF', 'CCB', 'VBW', 'CCT', 'NNC', 'CCP', 'VBTS', 'RBL', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCB', 'NNC', 'JJD', 'CCB', 'NNC', 'CCB', 'NNC', 'DTC', 'DTCP', 'NNC', 'RBL', 'CCT', 'JJD', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCP', 'NNC', 'DTC', 'VBTS', 'CCA', 'CDB', 'PMC', 'CDB', 'RBI', 'DTC', 'VBTS', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCP', 'NNP', 'RBL', 'CCT', 'JJN', 'CCP', 'NNC', 'DTC', '[PAD]', 'VBTS', 'CCB', 'NNC', 'CCA', 'NNC', 'CCT', '[PAD]', '[PAD]', '[PAD]', 'NNP', 'CCT', 'CCT', '[PAD]', 'JJD_CCP', 'NNC', 'PMC', 'CCB', 'DTC', 'JJN_CCP', '[PAD]', 'FW', 'FW', 'PMP']\", \"['[PAD]', 'VBTS', 'DTC', 'DTCP', 'NNC_CCP', '[PAD]', 'VBTR', 'PRL', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'DTC', 'VBTS', 'CCT', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'DTP', 'NNP', 'VBW', 'CCP', '[PAD]', 'VBTS', 'CCP', '[PAD]', '[PAD]', 'VBOF', 'PRS', 'CCB', 'DTCP', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'CCA', 'VBTS', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['[PAD]', 'VBTS', 'RBI', 'DTC', 'VBW', 'CCB', 'NNC', 'PRP', 'CCT', 'JJD', 'CCB', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'CCT', 'VBOF', 'DTC', 'FW', 'NNC', 'PRO', 'PMP']\", \"['[PAD]', 'VBTS_CCP', 'NNC_CCP', 'RBW', 'PMC', 'VBN', 'RBI', 'RBI', 'RBI', 'DTP', 'PRI_CCP', 'NNC', 'CCP', '[PAD]', 'VBTS', 'JJD', 'PRO', 'PMP']\", \"['[PAD]', '[PAD]', 'NNC', 'CCP', 'VBOF', 'CCP', 'DTC', 'PRSP_CCP', 'NNC', 'LM', 'VBTR', 'RBW', 'VBTR', 'PMP']\", \"['[PAD]', '[PAD]', 'VBOF', 'DTC', 'VBTS', 'CCT', 'PRSP', 'NNC_CCP', 'VBTR', 'PRS', 'NNC', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'CCT', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCR', '[PAD]', '[PAD]', 'PRP', 'DTP', 'NNP', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'RBI', 'DTC', 'NNC', 'NNC', 'DTP', 'NNP', 'NNP', 'CCR', 'CCT', 'RBM', 'PMS', 'FW', 'NNC', 'PRS', 'CCT', 'JJD_CCP', 'NNC', 'CCP', 'DTP', 'NNP', 'NNP', 'CCP', 'RBD_CCP', 'VBAF', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'RBI', 'DTC', 'VBTR', 'NNC_CCP', 'PRO', 'PMP']\"] will be ignored\n",
376
+ " warnings.warn(\n"
377
  ]
378
  },
379
  {
380
+ "ename": "UFuncTypeError",
381
+ "evalue": "ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U12'), dtype('<U12')) -> None",
382
+ "output_type": "error",
383
+ "traceback": [
384
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
385
+ "\u001b[1;31mUFuncTypeError\u001b[0m Traceback (most recent call last)",
386
+ "Cell \u001b[1;32mIn[60], line 21\u001b[0m\n\u001b[0;32m 18\u001b[0m y_pred_bin \u001b[38;5;241m=\u001b[39m mlb\u001b[38;5;241m.\u001b[39mtransform(y_pred_str)\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# Print classification report\u001b[39;00m\n\u001b[1;32m---> 21\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mclassification_report\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_valid_bin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_pred_bin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m)\u001b[49m)\n",
387
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:2342\u001b[0m, in \u001b[0;36mclassification_report\u001b[1;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[0;32m 2340\u001b[0m headers \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprecision\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrecall\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mf1-score\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msupport\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 2341\u001b[0m \u001b[38;5;66;03m# compute per-class results without averaging\u001b[39;00m\n\u001b[1;32m-> 2342\u001b[0m p, r, f1, s \u001b[38;5;241m=\u001b[39m \u001b[43mprecision_recall_fscore_support\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2343\u001b[0m \u001b[43m \u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2344\u001b[0m \u001b[43m \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2345\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2346\u001b[0m \u001b[43m \u001b[49m\u001b[43maverage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 2347\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2348\u001b[0m \u001b[43m \u001b[49m\u001b[43mzero_division\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mzero_division\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2349\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2350\u001b[0m rows \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mzip\u001b[39m(target_names, p, r, f1, s)\n\u001b[0;32m 2352\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y_type\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultilabel\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n",
388
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1577\u001b[0m, in \u001b[0;36mprecision_recall_fscore_support\u001b[1;34m(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)\u001b[0m\n\u001b[0;32m 1575\u001b[0m \u001b[38;5;66;03m# Calculate tp_sum, pred_sum, true_sum ###\u001b[39;00m\n\u001b[0;32m 1576\u001b[0m samplewise \u001b[38;5;241m=\u001b[39m average \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msamples\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1577\u001b[0m MCM \u001b[38;5;241m=\u001b[39m \u001b[43mmultilabel_confusion_matrix\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1578\u001b[0m \u001b[43m \u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1579\u001b[0m \u001b[43m \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1580\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1581\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1582\u001b[0m \u001b[43m \u001b[49m\u001b[43msamplewise\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msamplewise\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1583\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1584\u001b[0m tp_sum \u001b[38;5;241m=\u001b[39m MCM[:, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m 1585\u001b[0m pred_sum \u001b[38;5;241m=\u001b[39m tp_sum \u001b[38;5;241m+\u001b[39m MCM[:, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m]\n",
389
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:552\u001b[0m, in \u001b[0;36mmultilabel_confusion_matrix\u001b[1;34m(y_true, y_pred, sample_weight, labels, samplewise)\u001b[0m\n\u001b[0;32m 549\u001b[0m \u001b[38;5;66;03m# All labels are index integers for multilabel.\u001b[39;00m\n\u001b[0;32m 550\u001b[0m \u001b[38;5;66;03m# Select labels:\u001b[39;00m\n\u001b[0;32m 551\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray_equal(labels, present_labels):\n\u001b[1;32m--> 552\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(present_labels):\n\u001b[0;32m 553\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 554\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll labels must be in [0, n labels) for \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 555\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultilabel targets. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 556\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGot \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m > \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (np\u001b[38;5;241m.\u001b[39mmax(labels), np\u001b[38;5;241m.\u001b[39mmax(present_labels))\n\u001b[0;32m 557\u001b[0m )\n\u001b[0;32m 558\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mmin(labels) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
390
+ "File \u001b[1;32m<__array_function__ internals>:200\u001b[0m, in \u001b[0;36mamax\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
391
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\fromnumeric.py:2820\u001b[0m, in \u001b[0;36mamax\u001b[1;34m(a, axis, out, keepdims, initial, where)\u001b[0m\n\u001b[0;32m 2703\u001b[0m \u001b[38;5;129m@array_function_dispatch\u001b[39m(_amax_dispatcher)\n\u001b[0;32m 2704\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mamax\u001b[39m(a, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, out\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, keepdims\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue, initial\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue,\n\u001b[0;32m 2705\u001b[0m where\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue):\n\u001b[0;32m 2706\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 2707\u001b[0m \u001b[38;5;124;03m Return the maximum of an array or maximum along an axis.\u001b[39;00m\n\u001b[0;32m 2708\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2818\u001b[0m \u001b[38;5;124;03m 5\u001b[39;00m\n\u001b[0;32m 2819\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2820\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_wrapreduction\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmaximum\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmax\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2821\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minitial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minitial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n",
392
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\fromnumeric.py:86\u001b[0m, in \u001b[0;36m_wrapreduction\u001b[1;34m(obj, ufunc, method, axis, dtype, out, **kwargs)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m reduction(axis\u001b[38;5;241m=\u001b[39maxis, out\u001b[38;5;241m=\u001b[39mout, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mpasskwargs)\n\u001b[1;32m---> 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mufunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpasskwargs\u001b[49m\u001b[43m)\u001b[49m\n",
393
+ "\u001b[1;31mUFuncTypeError\u001b[0m: ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U12'), dtype('<U12')) -> None"
394
+ ]
395
  }
396
  ],
397
  "source": [
398
+ "from sklearn.metrics import classification_report\n",
399
+ "\n",
400
+ "# Get the list of unique labels\n",
401
  "labels = list(crf.classes_)\n",
402
+ "\n",
403
+ "# Predict labels for the validation set\n",
404
  "y_pred = crf.predict(X_valid)\n",
405
+ "\n",
406
+ "from sklearn.preprocessing import MultiLabelBinarizer\n",
407
+ "\n",
408
+ "# Convert labels to strings\n",
409
+ "y_valid_str = [[str(label)] for label in y_valid]\n",
410
+ "y_pred_str = [[str(label)] for label in y_pred]\n",
411
+ "\n",
412
+ "# Convert labels to binary array format\n",
413
+ "mlb = MultiLabelBinarizer()\n",
414
+ "y_valid_bin = mlb.fit_transform(y_valid_str)\n",
415
+ "y_pred_bin = mlb.transform(y_pred_str)\n",
416
+ "\n",
417
+ "# Print classification report\n",
418
+ "print(classification_report(y_valid_bin, y_pred_bin, labels=labels))\n",
419
+ "\n"
420
  ]
421
  },
422
  {
test.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [
8
  {
@@ -103,8 +103,8 @@
103
  "name": "stdout",
104
  "output_type": "stream",
105
  "text": [
106
- "Sentence is grammatically wrong.\n",
107
- "Probabilities: [0.9901305437088013, 0.009869435802102089]\n"
108
  ]
109
  }
110
  ],
@@ -115,7 +115,7 @@
115
  "tokenizer = AutoTokenizer.from_pretrained(\"zklmorales/bert_finetuned\")\n",
116
  "model = AutoModelForSequenceClassification.from_pretrained(\"zklmorales/bert_finetuned\")\n",
117
  "\n",
118
- "new_sentence = \"Siya ay magigising kanina.\"\n",
119
  "\n",
120
  "# Tokenize the input text\n",
121
  "inputs = tokenizer(new_sentence, return_tensors=\"pt\")\n",
@@ -141,7 +141,7 @@
141
  },
142
  {
143
  "cell_type": "code",
144
- "execution_count": null,
145
  "metadata": {},
146
  "outputs": [
147
  {
@@ -299,6 +299,35 @@
299
  " print(candidate, \"Probability:\", probability)\n",
300
  "print(predicted_labels)\n"
301
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  }
303
  ],
304
  "metadata": {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
 
103
  "name": "stdout",
104
  "output_type": "stream",
105
  "text": [
106
+ "Sentence is grammatically correct.\n",
107
+ "Probabilities: [0.00594444340094924, 0.9940555095672607]\n"
108
  ]
109
  }
110
  ],
 
115
  "tokenizer = AutoTokenizer.from_pretrained(\"zklmorales/bert_finetuned\")\n",
116
  "model = AutoModelForSequenceClassification.from_pretrained(\"zklmorales/bert_finetuned\")\n",
117
  "\n",
118
+ "new_sentence = \"Pupunta ako kahapon sa siyudad upang bumili ang mga gamit ko\"\n",
119
  "\n",
120
  "# Tokenize the input text\n",
121
  "inputs = tokenizer(new_sentence, return_tensors=\"pt\")\n",
 
141
  },
142
  {
143
  "cell_type": "code",
144
+ "execution_count": 3,
145
  "metadata": {},
146
  "outputs": [
147
  {
 
299
  " print(candidate, \"Probability:\", probability)\n",
300
  "print(predicted_labels)\n"
301
  ]
302
+ },
303
+ {
304
+ "cell_type": "code",
305
+ "execution_count": 17,
306
+ "metadata": {},
307
+ "outputs": [
308
+ {
309
+ "name": "stdout",
310
+ "output_type": "stream",
311
+ "text": [
312
+ "Nagising\n",
313
+ "67\n"
314
+ ]
315
+ }
316
+ ],
317
+ "source": [
318
+ "from fuzzywuzzy import fuzz\n",
319
+ "\n",
320
+ "original_word = \"Gigisingin\"\n",
321
+ "suggestions = [\"Tatakbo\", \"Nagising\", \"Hihiga\", \"Kakain\"]\n",
322
+ "\n",
323
+ "threshold = 60\n",
324
+ "\n",
325
+ "for suggestion in suggestions:\n",
326
+ " similarity_score = fuzz.ratio(original_word, suggestion)\n",
327
+ " if similarity_score >= threshold:\n",
328
+ " print(suggestion)\n",
329
+ " print(fuzz.ratio(original_word, suggestion))\n"
330
+ ]
331
  }
332
  ],
333
  "metadata": {