Upload 104 files

Browse files

Files changed (3) hide show

Final.ipynb +27 -38
POS Tag Automation/POS Tagger.ipynb +62 -28
test.ipynb +34 -5

Final.ipynb CHANGED Viewed

@@ -2,19 +2,28 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Original sentence: Magigising siya kanina dahil sa ingay\n",
-      "Sentence: Nagulat siya kanina dahil sa ingay\n",
-      "Correctness Probability: 0.9978345036506653\n",
-      "Cosine Similarity: 0.22926439344882965\n",
-      "Levenshtein Score: 82\n",
-      "[('Nagulat siya kanina dahil sa ingay', 0.9978345036506653, 0.22926439344882965, 82)]\n"
      ]
     }
    ],
@@ -149,7 +158,8 @@
     "                \n",
     "                # Compute cosine similarity between original masked word and predicted word\n",
     "                similarity = torch.nn.functional.cosine_similarity(original_embedding.unsqueeze(0), candidate_embedding.unsqueeze(0)).item()\n",
-    "                \n",
     "                replaced_words = masked_words.copy()\n",
     "                replaced_words[i] = candidate_mlm\n",
     "                corrected_sentence = \" \".join(replaced_words).split()  # Split and join to remove extra spaces\n",
@@ -166,42 +176,21 @@
     "                probability = torch.softmax(outputs_cls.logits, dim=1).squeeze().tolist()[1]\n",
     "\n",
     "                # Append the corrected sentence along with its probability and cosine similarity\n",
-    "                grammar_correction_candidates.append((corrected_sentence, probability, similarity))\n",
     "\n",
     "\n",
     "    # Sort the grammar correction candidates by their probabilities and cosine similarities in descending order\n",
-    "    grammar_correction_candidates.sort(key=lambda x: (x[1], x[2]), reverse=True)\n",
     "\n",
     "\n",
-    "    threshold = 60  # Adjust this threshold according to your requirement\n",
-    "    # Initialize a list to store the top 5 candidates\n",
-    "    top_candidates = []\n",
     "\n",
-    "    # Iterate over each candidate and keep track of the top 5 based on cosine similarity\n",
-    "    for candidate, probability, cosine_similarity in grammar_correction_candidates:\n",
-    "        fuzzy_match_score = fuzz.ratio(new_sentence, candidate)\n",
-    "        \n",
-    "        # Check if the current candidate should be included in the top 5\n",
-    "        if len(top_candidates) < 1:\n",
-    "            top_candidates.append((candidate, probability, cosine_similarity, fuzzy_match_score))\n",
-    "            # Sort the top_candidates based on cosine similarity in descending order\n",
-    "            top_candidates.sort(key=lambda x: x[2], reverse=True)\n",
-    "        else:\n",
-    "            # Compare the cosine similarity of the current candidate with the lowest similarity in the top_candidates\n",
-    "            min_similarity = min(top_candidates, key=lambda x: x[2])[2]\n",
-    "            if cosine_similarity > min_similarity:\n",
-    "                # Replace the candidate with the lowest similarity in the top_candidates list\n",
-    "                min_index = top_candidates.index(min(top_candidates, key=lambda x: x[2]))\n",
-    "                top_candidates[min_index] = (candidate, probability, cosine_similarity, fuzzy_match_score)\n",
-    "                # Sort the top_candidates based on cosine similarity in descending order\n",
-    "                top_candidates.sort(key=lambda x: x[2], reverse=True)\n",
-    "\n",
-    "    for idx, (candidate, probability, cosine_similarity, fuzzy_match_score) in enumerate(top_candidates):\n",
-    "        print(\"Sentence:\", candidate)\n",
-    "        print(\"Correctness Probability:\", probability)\n",
-    "        print(\"Cosine Similarity:\", cosine_similarity)\n",
-    "        print(\"Levenshtein Score:\", fuzzy_match_score)\n",
-    "        print(top_candidates)\n"
    ]
   }
  ],

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\fuzzywuzzy\\fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning\n",
+      "  warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Original sentence: Magugulat ako kanina dahil sa pagsabog\n",
+      "Sentence: Nagulat ako kanina dahil sa pagsabog\n",
+      "Correctness Probability: 0.9976696372032166\n",
+      "Cosine Similarity: 0.20241191983222961\n",
+      "Levenshtein Score: 75\n"
      ]
     }
    ],
     "                \n",
     "                # Compute cosine similarity between original masked word and predicted word\n",
     "                similarity = torch.nn.functional.cosine_similarity(original_embedding.unsqueeze(0), candidate_embedding.unsqueeze(0)).item()\n",
+    "                fuzzy_match_score = fuzz.ratio(token, candidate_mlm)\n",
+    "\n",
     "                replaced_words = masked_words.copy()\n",
     "                replaced_words[i] = candidate_mlm\n",
     "                corrected_sentence = \" \".join(replaced_words).split()  # Split and join to remove extra spaces\n",
     "                probability = torch.softmax(outputs_cls.logits, dim=1).squeeze().tolist()[1]\n",
     "\n",
     "                # Append the corrected sentence along with its probability and cosine similarity\n",
+    "                grammar_correction_candidates.append((corrected_sentence, probability, similarity, fuzzy_match_score))\n",
     "\n",
     "\n",
     "    # Sort the grammar correction candidates by their probabilities and cosine similarities in descending order\n",
+    "    grammar_correction_candidates.sort(key=lambda x: (x[3], x[1], x[2]), reverse=True)\n",
     "\n",
+    "if grammar_correction_candidates:\n",
+    "    candidate, probability, cosine_similarity, fuzzy_match_score = grammar_correction_candidates[0]\n",
+    "    print(\"Sentence:\", candidate)\n",
+    "    print(\"Correctness Probability:\", probability)\n",
+    "    print(\"Cosine Similarity:\", cosine_similarity)\n",
+    "    print(\"Levenshtein Score:\", fuzzy_match_score)\n",
     "\n",
     "\n",
+    "\n"
    ]
   }
  ],

POS Tag Automation/POS Tagger.ipynb CHANGED Viewed

@@ -2,16 +2,25 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 22,
    "metadata": {},
-   "outputs": [],
    "source": [
     "import transformers "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -33,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -110,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -134,7 +143,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -144,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -201,7 +210,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -231,7 +240,7 @@
        " 'prev2pos': 'PRS'}"
       ]
      },
-     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -244,7 +253,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -260,7 +269,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -276,7 +285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -293,7 +302,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -325,7 +334,7 @@
        " 'PMP']"
       ]
      },
-     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -336,7 +345,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -356,33 +365,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1609: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.\n",
-      "  _warn_prf(average, \"true nor predicted\", \"F-score is\", len(true_sum))\n"
      ]
     },
     {
-     "data": {
-      "text/plain": [
-       "0.8595460529092004"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
     "labels = list(crf.classes_)\n",
     "y_pred = crf.predict(X_valid)\n",
-    "metrics.flat_f1_score(y_valid, y_pred,\n",
-    "                      average='weighted', labels= labels)"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
    "source": [
     "import transformers "
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 48,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 49,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
        " 'prev2pos': 'PRS'}"
       ]
      },
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
        " 'PMP']"
       ]
      },
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 60,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "c:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\preprocessing\\_label.py:895: UserWarning: unknown class(es) [\"['CCB', 'PMC', 'VBTR', 'RBI', 'DTP', 'NNP', 'DTC', 'VBTS_CCP', 'NNC', 'CCP', 'JJD_CCP', '[PAD]', '[PAD]', 'VBTR', 'PRP', 'DTP', 'NNP', 'CCT', 'CDB', 'PMP']\", \"['CCR', 'JJD', 'DTC', 'NNC', 'CCB', 'NNC', 'PMC', 'VBAF', 'RBI', 'CCB', 'CDB', '[PAD]', 'JJD_CCP', 'NNC', 'JJD', 'PMS', 'NNC', 'PMP']\", \"['CCR', 'VBTS', 'CCP', 'DTC', 'NNC', 'PMC', 'RBF', 'CCP', '[PAD]', '[PAD]', 'VBTS', 'CCB', 'DTCP', 'NNC', 'DTC', 'JJD', 'CCP', 'NNC', 'NNP', 'CCP', 'VBTS', 'CCT', 'JJN_CCP', 'NNC', 'PMP']\", \"['CCT', 'CCT', 'CDB', 'CCP', 'NNC', 'CCP', 'VBTS', 'CCB', 'NNP', 'NNP', 'CCP', 'VBTR', 'DTP', 'NNP', 'NNP', 'LM', 'JJD_CCP', 'CDB', 'NNC', 'RBI', 'DTC', 'JJD_CCP', 'NNC_CCP', 'CCT', '[PAD]', 'NNP', 'PMP']\", \"['CCT', 'CDB', 'PMC', 'CDB', 'JJN_CCP', 'VBTS_CCP', 'VBH', 'NNC', 'CCT', 'NNC', 'CCP', 'VBAF', 'RBI', 'CCB', 'PRSP_CCP', 'NNC', 'CCT', 'VBTR', 'CCT', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCB', '[PAD]', 'NNC', 'CCT', 'DTCP', 'JJD_CCP', 'NNC', 'CCP', 'VBTR', 'CCT', 'PRI_CCP', 'NNC', 'PMC', 'VBTR', 'DTP', 'NNP', 'FW', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'NNP', 'CCT', 'CCT', '[PAD]', '[PAD]', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['CCT', 'NNC', 'CCB', 'NNC', 'PMC', 'VBTS', 'DTP', 'NNP', 'CCP', 'VBAF', 'RBI', 'PRP', 'CCB', 'CDB', 'PMC', 'CDB', 'RBL', 'CCT', 'NNP', 'PMC', 'CCT', 'RBF', 'RBM', 'PRS', 'VBTS', 'CCT', 'PRSP_CCP', 'NNC', 'PMP']\", \"['CCT', 'NNC', 'PRO', 'CCB', 'JJD_CCP', 'CCR', 'PMS', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'DTP', 'NNP', 'CCR', 'CCT', 'NNP', 'FW', 'FW', 'PRO', 'CCP', 'VBH', '[PAD]', 'PMS', 'VBW', 'CCT', 'PRI', 'PRO_CCP', 'NNC', 'PMP']\", \"['CCT', 'NNP', 'NNP', 'JJN_CCP', 'VBTS', 'CCB', 'NNC', 'CCB', 'FW', 'PMS', 'FW', 'RBW', 'PRI_CCP', 'FW', 'FW', 'DTC', 'NNC', 'CCT', 'NNP', 'NNP', 'RBW_CCP', 'NNP', 'CDB', 'PMP']\", \"['CCT', 'PMC', 'VBTS', 'CCB', 'NNC', 'CCP', 'DTC', 'FW', 'CDB', 'LM', 'VBTS', 'CCB', 'NNC', 'CCT', 'PRI', 'CCB', 'NNC', 'CCB', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['CCT', 'PRI_CCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'NNP', 'PMS', 'NNP', 'NNP', 'PMS', 'PMC', 'VBTS', 'PRO', 'CCP', 'RBF', 'PRS', 'VBAF', 'CCB', 'NNP', 'FW', '[PAD]', 'NNC', 'RBI', 'CCT', 'JJD_CCP', 'NNC', 'PMP']\", \"['CCT', 'VBTR', 'CCP', 'NNP', 'NNP', 'LM', '[PAD]', '[PAD]', 'VBTS', 'DTP', 'NNP', 'NNP', 'CCT', 'NNPA', 'CCP', 'VBW', 'CCB', 'PRI_CCP', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['CDB', 'RBW', 'VBOF', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'CCP', 'RBF', 'JJD', 'CCP', 'DTC', 'CCR', 'PMS', 'NNC', 'CCT', 'NNP', 'LM', 'VBTF', 'CCB', 'JJD_CCP', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'CCB', 'NNC', 'PRS', 'LM', 'VBTR_CCP', 'JJD', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'LM', '[PAD]', 'VBTR']\", \"['DTC', 'DTCP', 'NNC', 'LM', 'VBTR', 'RBW', 'JJD', 'CCT', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC', 'PMS', 'NNC', 'CCB', 'NNP', 'LM', 'VBTR', 'RBI', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'PMP']\", \"['DTC', 'DTCP', 'NNC_CCP', 'PMS', 'NNC', 'CCT', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'CCB', 'PMS', 'NNP', 'LM', 'CCT', 'RBI', 'CCT', 'DTCP', 'NNC', 'PMP']\", \"['DTC', 'FW', 'FW', 'NNP', 'FW', 'NNP', 'PMS', 'NNPA', 'PMS', 'DTC', '[PAD]', '[PAD]', 'VBTS', 'CCP', 'VBAF', 'DTC', 'NNP', 'CCT', 'FW', 'NNC', 'PMP']\", \"['DTC', 'FW', 'NNP', 'LM', 'CCT', 'CCT', 'JJN', 'CCP', 'VBTS_CCP', 'NNC', 'PMC', '[PAD]', '[PAD]', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNC', 'CCA', 'PRI_CCP', 'NNC', 'PMP']\", \"['DTC', 'JJD_CCP', 'NNC', 'LM', '[PAD]', 'VBTR', 'PMP']\", \"['DTC', 'NNC', 'LM', 'VBTS', 'CCB', '[PAD]', 'PMS', 'NNC', 'CCB', 'NNC', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['DTC', 'NNC', 'NNC_CCP', 'FW', '[PAD]', 'FW', 'NNP', 'FW', 'PMS', 'NNP', 'NNP', 'CCP', 'NNC', 'PRP']\", \"['DTC', 'NNC_CCP', 'VBTS', 'LM', '[PAD]', 'NNC', 'PMP']\", \"['DTC', 'PRI', 'CCB', 'NNC', 'LM', 'VBW', 'CCB', 'NNC', 'RBL', 'CCT', 'NNC', 'PMP']\", \"['DTC', 'PRSP_CCP', 'JJCC', 'JJD_CCP', '[PAD]', 'JJD_CCP', 'NNC', 'LM', 'VBTR', 'CCT', 'PRSP', 'CCP', 'VBW', 'CCB', 'JJD', 'CCP', 'NNC', 'PMP']\", \"['DTC', 'VBTS_CCP', 'NNC', 'LM', 'JJD', 'RBW', '[PAD]', 'VBTS', 'DTP', 'NNP_CCP', 'NNP', 'CCT', 'PRI', 'CCB', 'NNC', 'RBW', 'VBOF', 'CCT', 'NNPA', 'PMS', '[PAD]', 'PMS', 'NNPA', 'PMP']\", \"['JJCC', 'CCT', 'CDB', 'NNC', 'DTC', 'VBOF', 'CCT', '[PAD]', '[PAD]', 'VBTS', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'CCT', 'PRI', 'RBI_CCP', 'NNC', 'CCP', 'VBH', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['JJD', 'CCP', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCA', 'RBD_CCP', '[PAD]', 'JJD_CCP', 'CDB', 'PMS', 'CDB', 'CCB', 'RBW', 'PMP']\", \"['JJD', 'CCP', 'VBOF', 'CCB', 'NNC', 'DTP', 'NNP', '[PAD]', 'NNP', 'NNP', 'PMC', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMC', 'NNC', 'CCB', 'NNC', '[PAD]', '[PAD]', 'VBOF', 'PMC', 'PRI', 'NNP', 'PMP']\", \"['JJD', 'JJN_CCP', 'NNC', 'RBW', 'VBTR', 'DTC', 'NNC', 'CCT', 'NNC', 'CCT', 'RBF', 'RBM', 'PRO', 'VBOF', 'CCB', 'NNC', 'CCA', 'CCT', 'NNC', 'LM', 'VBTR', 'RBI', 'DTC', 'NNC', 'PMS', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['JJD', 'LM', 'VBTR', 'CCB', 'FW', 'NNP', 'CCP', 'DTP', 'NNP', 'NNP', 'CCT', 'NNPA', 'CCP', 'VBOF', 'DTC', 'FW', 'FW', 'NNP', 'DTP', 'NNP', 'PMP']\", \"['JJD', 'LM', 'VBTS', 'DTP', 'NNP', 'CCP', 'RBF', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'DTC', 'RBW', 'PRS_CCP', 'NNC', 'PMC', 'NNC', 'LM', 'VBS', 'VBAF', 'CCB', 'NNC_CCP', 'VBTR_VBAF', 'CCB', 'PRSP_CCP', 'DTCP', 'VBTS', 'PMP']\", \"['JJD', 'PMS', 'FW', 'PMS', 'RBM', 'DTC', 'NNC', 'PRO', 'PMP']\", \"['JJD', 'PRL', 'PMC', 'JJD', 'CCP', 'VBAF', 'CCT', 'NNC', 'DTP', 'NNP', 'NNP', 'DTC', 'NNC', 'PRO', 'CCA', 'VBTS', 'RBM', 'CCB', 'NNC', 'DTC', 'NNC', 'CCB', 'RBF', 'JJD_CCP', 'NNC', 'PMP']\", \"['JJD', 'RBI_CCP', 'VBOF', 'CCB', 'NNC', 'CCP', 'NNC', 'CCB', 'DTCP', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCT', 'DTCP', 'JJD_CCP', 'NNC', 'PMP']\", \"['JJD_CCP', 'NNC', 'CCB', 'RBW', '[PAD]', '[PAD]', '[PAD]', 'NNC', 'DTC', 'NNC', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'PMP']\", \"['JJD_CCP', 'NNC', 'RBI', 'DTP', 'NNP', 'CCP', 'VBTF', 'PRP', 'DTC', 'PRI', 'CCB', 'NNC', 'CCB', 'NNC', 'VBW', 'DTC', 'NNC', 'CCR', 'VBN', 'RBI', 'PRO_CCP', 'VBAF', 'CCT', 'NNC', 'PMP']\", \"['JJD_CCP', 'VBW', 'CCB', 'NNC_CCP', 'NNP', 'DTP', 'NNP', 'PMP']\", \"['JJN_CCP', 'NNC', 'DTC', 'NNC', 'RBW', 'VBW', 'DTC', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['JJN_CCP', 'NNC', 'DTC', 'VBTF', 'CCB', 'NNC', 'PMS', 'JJD_CCP', 'NNC', 'CCT', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'CCA', 'CCT', 'NNC', 'NNP', 'CCT', 'VBW', 'CCB', 'NNC', 'PMP']\", \"['NNC', 'RBW', 'VBW', 'DTC', 'JJN_CCP', 'NNC', 'FW', 'FW', 'CCP', 'RBL', 'NNC', 'CCB', 'JJD_CCP', 'NNC', 'CCT', 'NNPA', 'NNC', 'CCT', 'NNP', 'PMP']\", \"['PRI_CCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'NNP', 'NNP', 'DTC', 'NNP', 'CCA', 'VBTS', 'CCB', 'RBF', 'RBI', 'VBTS_CCP', 'NNC', 'PMP']\", \"['PRI_CCP', 'NNC', 'DTC', '[PAD]', 'VBTR', 'CCP', 'VBW', 'CCB', 'NNP', 'NNP', 'CCT', 'CCT', 'JJD_CCP', 'VBN_CCP', 'NNC', 'PMP']\", \"['PRO', 'DTC', 'JJD', 'VBTS', 'CCB', 'NNC', 'DTP', 'NNP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['PRO', 'RBI', 'NNP', 'CDB', 'LM', '[PAD]', 'VBW', 'CCB', 'NNP', 'CDB', 'PMP', 'CDB', 'CCP', 'NNC', 'DTC', '[PAD]', '[PAD]', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['PRS', 'LM', 'VBTS', 'CCR', 'CCT', '[PAD]', '[PAD]', '[PAD]', 'RBD', 'NNC_CCP', 'DTCP', 'NNC', 'PMP']\", \"['PRS_CCP', 'RBM', 'DTC', 'JJCS_JJD_CCP', 'NNC', 'CCP', 'PRSP_CCP', 'VBOF', 'CCT', 'NNC', 'CCP', 'VBAF', 'CCT', 'CDB', '[PAD]', 'NNP', '[PAD]', 'NNC', 'PMP']\", \"['RBD_CCP', 'VBOF', 'CCB', 'NNP', 'NNP', 'DTC', 'NNC', 'CCT', 'NNP', 'FW', 'PMS', 'NNC', 'DTP', 'NNP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMS', 'CDB', 'PMC', 'RBW', 'DTC', '[PAD]', 'PMS', 'NNC', 'DTP', 'NNP', 'NNP', 'PMP']\", \"['RBF', 'VBTS', 'DTC', 'PRSP_CCP', 'VBTR', 'CCT', 'PRSP_CCP', 'NNC', 'PMP']\", \"['RBF', 'VBTS', 'DTP', 'NNP', 'DTC', 'NNC', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'NNC', 'CCT', 'PRSP', 'CCP', 'VBTR', 'PRS_CCP', 'VBTR', 'RBM', 'DTC', 'FW', 'FW', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['RBR', 'CCT', 'NNPA', 'PMC', 'VBH', 'PRQ_CCP', 'DTCP', 'FW', 'NNC', 'DTC', 'VBS', 'CCP', 'VBW', 'CCT', 'VBW', 'CCB', 'NNC', 'CCT', 'NNP', 'PMC', 'CCT', 'CCT', 'PRSP_CCP', 'NNC', 'CCB', 'NNP', 'NNP', 'PMP']\", \"['RBR', 'DTP', 'NNP', 'PMC', 'VBTS', 'CCP', 'RBI', 'CCB', 'NNP', 'FW', 'NNP', 'FW', 'NNP', 'NNP', 'DTC', 'NNP', 'FW', 'NNC', 'CCB', 'DTCP', 'NNP', 'NNP', '[PAD]', '[PAD]', 'PMS', 'RBR', 'CCP', 'RBI', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'PRS_CCP', 'VBAF', 'CCB', 'NNC', 'CCR', 'PRS', 'LM', '[PAD]', 'VBTR', 'CCP', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'RBI', 'CCT', 'NNC', 'DTP', 'NNP', 'CCP', 'NNC', 'LM', 'DTP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'PMC', 'NNP', 'NNP', 'CCB', 'NNC', 'PMP']\", \"['RBW', 'VBAF', 'VBH', 'VBTS', 'PRP_CCP', 'JJD_CCP', 'NNC', 'PMP']\", \"['RBW_CCP', 'PRS', 'RBI', 'CCT', 'NNC', 'CCT', 'PRS', 'LM', 'VBTR', 'NNC', 'PMP']\", \"['RBW_CCP', 'RBW', 'VBOF', 'CCB', 'NNP', 'CCP', 'CCR', 'RBF', 'JJD', 'CCP', 'VBW', 'CCB', 'NNC', 'PMC', 'VBS', 'VBOF', 'PRO', 'CCP', 'RBD_CCP', 'VBW', 'DTC', 'RBL', 'NNC', 'PMP']\", \"['VBAF', 'CCP', 'RBI', 'PRS', 'VBOF', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNC', 'CCA', 'VBAF', 'CCT', '[PAD]', 'VBTR', 'CCP', 'RBF', 'CCP', 'VBW', 'DTC', 'NNC', 'PMP']\", \"['VBAF', 'PRS', 'CCT', 'NNC', 'CCT', 'VBAF', 'CCB', 'JJN_CCP', 'NNC', 'PMP']\", \"['VBOF', 'CCB', 'NNC', 'CCB', 'NNP', 'DTC', 'PRI', 'CCB', 'NNC', 'CCT', 'JJN', 'PMS', 'CDB', 'CCB', 'NNPA', 'CCT', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBOF', 'CCB', 'PRI', 'DTC', 'PRSP_CCP', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['VBOF', 'PRO', 'CCP', 'RBI', 'PMS', 'FW', '[PAD]', 'NNC', 'DTC', 'PRSP_CCP', 'NNC', 'CCR', 'NNC', 'RBI', 'PRO', 'CCA', 'VBS', 'PRP_CCP', 'VBOF', 'PRO', 'PMP']\", \"['VBOF', 'PRS', 'DTC', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNC']\", \"['VBS', 'RBI', 'RBI', 'CCB', 'NNC', 'RBW', 'NNP', 'NNP', 'NNP', 'NNP', 'CCA', 'NNP', 'NNP', 'CCP', 'VBTS', 'RBW_CCP', 'NNC', 'PMP']\", \"['VBTR', 'RBI', 'DTP', 'NNP', 'DTC', 'NNP', 'CCP', 'VBOF', 'DTC', 'PRSP_CCP', 'DTCP', 'NNC', 'CCP', 'VBS', 'VBAF', 'CCT', 'DTCP', 'NNC', 'CCA', 'NNC', 'CCB', 'NNP', 'CCR', 'VBTS', 'PRL', 'PMP']\", \"['VBTR', 'RBI', 'RBI', 'CCT', 'NNP', 'FW', 'FW', 'DTC', 'PRI_CCP', 'NNP', 'NNP', 'VBW', 'DTC', 'PRQ_CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNC', 'NNC', 'DTC', 'DTCP', 'FW', 'FW', 'PMP']\", \"['VBTR_CCP', 'RBL', 'CDB', 'PMC', 'CDB', 'NNC', 'DTC', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'NNC', 'CCT', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'CCB', 'DTCP', 'NNC', 'DTC', 'CDB', 'PMS', 'JJD', 'CCP', 'NNC', 'CCP', 'VBTR', 'CCP', 'NNC', 'JJD', 'CCT', 'NNC', 'PMP', 'NNP', 'PMC', 'NNP', 'NNP', 'CCR', 'CCT', 'NNC_CCP', 'NNC', 'PMP']\", \"['VBTS', 'CCB', 'NNC', 'CCP', 'NNC', 'CCT', 'NNP', 'FW', 'DTC', 'NNC', 'CCT', 'VBW', 'CCA', 'VBW', 'CCB', 'NNC', 'DTC', 'NNC', 'CCR', 'VBTS', 'DTC', 'NNC', 'NNC', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCB', 'NNC', 'DTC', 'FW', 'FW', 'NNP', 'PMS', 'NNPA', 'PMS', 'CCT', 'DTCP', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'CCT', 'CCT', 'NNC', 'CCB', 'JJD', 'PMS', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCP', 'DTC', 'JJN_CCP', 'NNC', 'LM', 'RBL', 'NNPA', 'NNP', 'NNP', 'CCT', 'NNP', 'NNC', 'PMP']\", \"['VBTS', 'CCP', 'RBM', 'CCB', 'DTCP', 'NNC', 'CCP', 'NNC', 'RBW', 'CCP', '[PAD]', '[PAD]', 'VBTS', 'DTC', 'NNC', 'CCB', 'DTCP', '[PAD]', 'NNC', 'DTC', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'CCP', 'RBM', 'PRS', 'CCB', 'NNC', 'CCP', 'NNC', 'DTC', 'VBTS_CCP', 'NNC', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'CCP', 'VBTR', 'DTC', 'PRSP_CCP', 'NNC_CCP', '[PAD]', '[PAD]', '[PAD]', '[PAD]', 'NNP', 'CCP', 'VBH', 'JJD_CCP', 'CDB', 'PMS', 'FW', 'CCT', 'NNC', 'CCB', 'PRSP_CCP', 'NNC', 'CCB', 'NNC', 'CCA', 'VBOF', 'CCB', 'DTCP', 'NNC', 'PMP']\", \"['VBTS', 'DTC', 'NNC', 'CCB', 'PRSP_CCP', 'DTCP', 'NNC', 'PMP']\", \"['VBTS', 'DTC', 'NNC', 'CCP', 'DTP', '[PAD]', '[PAD]', 'NNP', 'NNP', 'PMC', 'CDB', 'PMC', 'JJD_CCP', '[PAD]', 'VBTS', 'CCT', 'NNP', 'NNP', 'CCT', '[PAD]', 'VBW', 'CCT', 'NNP', 'NNP', 'CCR', 'CCT', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'CCA', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'CCP', 'VBS', 'VBTF', 'CCP', 'RBI', 'CCB', 'NNC', 'CCT', 'DTCP', 'NNC', 'CCB', 'NNP', 'FW', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'NNP', 'PMS', 'NNPA', 'PMS', 'NNPA', 'PMS', 'DTC', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', 'FW', 'CCP', 'VBW', 'PRS', 'CCA', 'PRSP_CCP', 'NNC', 'LM', 'RBF', 'VBW', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', 'NNP', 'CCP', 'NNC', 'CCP', 'RBI', 'DTC', 'FW', 'NNC', 'CCP', 'VBTF', 'RBI_CCP', 'VBOF', 'DTP', 'NNP', 'NNP', 'NNP', 'PMP', 'NNP', 'NNP', 'PMP']\", \"['VBTS', 'DTP', 'NNP', 'NNP', '[PAD]', '[PAD]', 'NNC', 'CCB', 'NNP', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'DTC', 'NNC', 'CCP', 'DTP', 'NNP', 'PMS', 'NNP', 'NNP', 'NNP', 'PMC', 'CDB', 'PMS', 'FW', 'PMC', 'CCP', 'VBTS', 'CCR', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'DTP', '[PAD]', 'NNP', 'NNC', 'CCP', 'VBAF', 'DTC', 'VBTS_CCP', 'NNC', 'CCB', 'CDB', 'NNC', 'PMP']\", \"['VBTS', 'PRS', 'CCP', 'RBF', 'PRS', '[PAD]', '[PAD]', 'VBW', 'CCT', 'PRSP_CCP', 'DTCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'CCB', 'JJD_CCP', 'NNC', 'DTC', 'NNC', 'CCA', 'JJCC', 'JJD', 'CCR', 'VBOF', 'CCP', 'RBI', 'NNC', 'PRO_CCP', 'FW', 'PMP']\", \"['VBTS', 'RBI', 'CCT', 'NNC', 'DTC', 'NNC', 'CCT', 'RBF', 'CCP', 'VBTS', 'DTC', 'PRSP_CCP', 'NNC', 'CCR', 'CCT', 'NNC', 'CCB', 'DTCP', 'NNC', 'CCP', 'VBTS', 'CCT', 'PRI', 'PMS', 'NNCA', 'PRI_CCP', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'DTC', 'PRI_CCP', 'NNP', 'PMC', 'RBL', 'JJN', 'CCP', 'NNC', 'PMC', 'CCP', 'VBAF', 'CCT', 'NNP', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'RBI', 'DTP', 'NNP', 'CCP', 'VBTS', 'PRS', 'DTC', 'NNC', 'CCT', 'PRSP_CCP', 'NNC', 'PMP', 'PMS', 'CCP', 'PMS', 'NNPA', 'PRS', 'CCB', 'DTC', 'NNC', 'CCP', '[PAD]', 'VBTS', 'CCB', 'PRSP_CCP', 'NNC', 'PMP', 'PMS']\", \"['VBTS', 'RBI', 'DTP', 'NNP', 'NNP', 'NNP', 'NNP', 'PMC', 'CCP', 'VBTS', 'PMS', 'FW', 'NNP', 'NNP', 'PMS', 'NNP', 'NNP', 'PMC', 'CCP', 'RBI', 'PMS', 'FW', '[PAD]', 'VBTR', 'DTC', 'DTCP', 'PRO', 'CCR', 'PMS', 'PRI', 'PMS', 'RBI_CCP', 'NNC', 'PMP']\", \"['VBTS', 'RBM', 'CCB', 'FW', 'DTC', 'DTCP', 'NNC', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNP', 'PMC', 'CCT', 'DTCP', 'NNC', 'CCP', 'NNC', 'CCA', 'PRI', 'CCT', 'DTCP', 'PRO', 'LM', 'JJD', 'PMP']\", \"['VBTS', 'RBM', 'CCB', 'NNC', 'DTC', 'NNC', 'CCT', 'NNC', 'CCA', 'PRL', '[PAD]', '[PAD]', 'VBTS', 'CCR', '[PAD]', '[PAD]', 'VBTS', 'DTP', 'NNP', 'CCB', 'NNP', 'DTC', 'NNC', 'CCB', 'VBAF', 'CCT', '[PAD]', 'NNC', 'PMP']\", \"['VBTS', 'RBW', 'CCP', 'RBL', 'JJN', 'CCP', 'NNC', 'CCP', 'DTC', 'NNC', 'CCB', 'NNP', 'FW', 'NNP', 'NNP', 'NNP', 'CCP', 'VBTS', 'CCB', 'NNC', 'PMP']\", \"['VBW', 'DTC', 'CDB', 'NNC', 'LM', 'VBTR', 'CCP', 'CCT', 'NNC', 'DTC', 'JJD', 'PMS', 'FW', 'NNP', 'FW', 'PMP']\", \"['[PAD]', 'VBAF', 'CCB', 'VBW', 'CCT', 'NNC', 'CCP', 'VBTS', 'RBL', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCB', 'NNC', 'JJD', 'CCB', 'NNC', 'CCB', 'NNC', 'DTC', 'DTCP', 'NNC', 'RBL', 'CCT', 'JJD', 'CCP', 'JJD_CCP', 'NNC', 'CCT', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCP', 'NNC', 'DTC', 'VBTS', 'CCA', 'CDB', 'PMC', 'CDB', 'RBI', 'DTC', 'VBTS', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'CCP', 'NNP', 'RBL', 'CCT', 'JJN', 'CCP', 'NNC', 'DTC', '[PAD]', 'VBTS', 'CCB', 'NNC', 'CCA', 'NNC', 'CCT', '[PAD]', '[PAD]', '[PAD]', 'NNP', 'CCT', 'CCT', '[PAD]', 'JJD_CCP', 'NNC', 'PMC', 'CCB', 'DTC', 'JJN_CCP', '[PAD]', 'FW', 'FW', 'PMP']\", \"['[PAD]', 'VBTS', 'DTC', 'DTCP', 'NNC_CCP', '[PAD]', 'VBTR', 'PRL', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'DTC', 'VBTS', 'CCT', 'NNC', 'PMP']\", \"['[PAD]', 'VBTS', 'DTP', 'NNP', 'VBW', 'CCP', '[PAD]', 'VBTS', 'CCP', '[PAD]', '[PAD]', 'VBOF', 'PRS', 'CCB', 'DTCP', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'CCA', 'VBTS', 'CCT', 'NNP', 'NNP', 'PMP']\", \"['[PAD]', 'VBTS', 'RBI', 'DTC', 'VBW', 'CCB', 'NNC', 'PRP', 'CCT', 'JJD', 'CCB', 'NNC', 'CCT', 'NNC', 'CCB', 'NNC', 'CCT', 'VBOF', 'DTC', 'FW', 'NNC', 'PRO', 'PMP']\", \"['[PAD]', 'VBTS_CCP', 'NNC_CCP', 'RBW', 'PMC', 'VBN', 'RBI', 'RBI', 'RBI', 'DTP', 'PRI_CCP', 'NNC', 'CCP', '[PAD]', 'VBTS', 'JJD', 'PRO', 'PMP']\", \"['[PAD]', '[PAD]', 'NNC', 'CCP', 'VBOF', 'CCP', 'DTC', 'PRSP_CCP', 'NNC', 'LM', 'VBTR', 'RBW', 'VBTR', 'PMP']\", \"['[PAD]', '[PAD]', 'VBOF', 'DTC', 'VBTS', 'CCT', 'PRSP', 'NNC_CCP', 'VBTR', 'PRS', 'NNC', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'CCT', 'NNC', 'DTC', 'DTCP', 'NNC', 'CCR', '[PAD]', '[PAD]', 'PRP', 'DTP', 'NNP', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'RBI', 'DTC', 'NNC', 'NNC', 'DTP', 'NNP', 'NNP', 'CCR', 'CCT', 'RBM', 'PMS', 'FW', 'NNC', 'PRS', 'CCT', 'JJD_CCP', 'NNC', 'CCP', 'DTP', 'NNP', 'NNP', 'CCP', 'RBD_CCP', 'VBAF', 'CCT', 'NNC', 'CCB', 'NNC', 'PMP']\", \"['[PAD]', '[PAD]', 'VBTS', 'RBI', 'DTC', 'VBTR', 'NNC_CCP', 'PRO', 'PMP']\"] will be ignored\n",
+      "  warnings.warn(\n"
      ]
     },
     {
+     "ename": "UFuncTypeError",
+     "evalue": "ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U12'), dtype('<U12')) -> None",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mUFuncTypeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[60], line 21\u001b[0m\n\u001b[0;32m     18\u001b[0m y_pred_bin \u001b[38;5;241m=\u001b[39m mlb\u001b[38;5;241m.\u001b[39mtransform(y_pred_str)\n\u001b[0;32m     20\u001b[0m \u001b[38;5;66;03m# Print classification report\u001b[39;00m\n\u001b[1;32m---> 21\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mclassification_report\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_valid_bin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_pred_bin\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:2342\u001b[0m, in \u001b[0;36mclassification_report\u001b[1;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[0;32m   2340\u001b[0m headers \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprecision\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrecall\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mf1-score\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msupport\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m   2341\u001b[0m \u001b[38;5;66;03m# compute per-class results without averaging\u001b[39;00m\n\u001b[1;32m-> 2342\u001b[0m p, r, f1, s \u001b[38;5;241m=\u001b[39m \u001b[43mprecision_recall_fscore_support\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   2343\u001b[0m \u001b[43m    \u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2344\u001b[0m \u001b[43m    \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2345\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2346\u001b[0m \u001b[43m    \u001b[49m\u001b[43maverage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m   2347\u001b[0m \u001b[43m    \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2348\u001b[0m \u001b[43m    \u001b[49m\u001b[43mzero_division\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mzero_division\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2349\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   2350\u001b[0m rows \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mzip\u001b[39m(target_names, p, r, f1, s)\n\u001b[0;32m   2352\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y_type\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultilabel\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n",
+      "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1577\u001b[0m, in \u001b[0;36mprecision_recall_fscore_support\u001b[1;34m(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)\u001b[0m\n\u001b[0;32m   1575\u001b[0m \u001b[38;5;66;03m# Calculate tp_sum, pred_sum, true_sum ###\u001b[39;00m\n\u001b[0;32m   1576\u001b[0m samplewise \u001b[38;5;241m=\u001b[39m average \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msamples\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1577\u001b[0m MCM \u001b[38;5;241m=\u001b[39m \u001b[43mmultilabel_confusion_matrix\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1578\u001b[0m \u001b[43m    \u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1579\u001b[0m \u001b[43m    \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1580\u001b[0m \u001b[43m    \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1581\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1582\u001b[0m \u001b[43m    \u001b[49m\u001b[43msamplewise\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msamplewise\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1583\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1584\u001b[0m tp_sum \u001b[38;5;241m=\u001b[39m MCM[:, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m   1585\u001b[0m pred_sum \u001b[38;5;241m=\u001b[39m tp_sum \u001b[38;5;241m+\u001b[39m MCM[:, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m]\n",
+      "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:552\u001b[0m, in \u001b[0;36mmultilabel_confusion_matrix\u001b[1;34m(y_true, y_pred, sample_weight, labels, samplewise)\u001b[0m\n\u001b[0;32m    549\u001b[0m \u001b[38;5;66;03m# All labels are index integers for multilabel.\u001b[39;00m\n\u001b[0;32m    550\u001b[0m \u001b[38;5;66;03m# Select labels:\u001b[39;00m\n\u001b[0;32m    551\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m np\u001b[38;5;241m.\u001b[39marray_equal(labels, present_labels):\n\u001b[1;32m--> 552\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(present_labels):\n\u001b[0;32m    553\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    554\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll labels must be in [0, n labels) for \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    555\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultilabel targets. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    556\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGot \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m > \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (np\u001b[38;5;241m.\u001b[39mmax(labels), np\u001b[38;5;241m.\u001b[39mmax(present_labels))\n\u001b[0;32m    557\u001b[0m         )\n\u001b[0;32m    558\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mmin(labels) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
+      "File \u001b[1;32m<__array_function__ internals>:200\u001b[0m, in \u001b[0;36mamax\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
+      "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\fromnumeric.py:2820\u001b[0m, in \u001b[0;36mamax\u001b[1;34m(a, axis, out, keepdims, initial, where)\u001b[0m\n\u001b[0;32m   2703\u001b[0m \u001b[38;5;129m@array_function_dispatch\u001b[39m(_amax_dispatcher)\n\u001b[0;32m   2704\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mamax\u001b[39m(a, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, out\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, keepdims\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue, initial\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue,\n\u001b[0;32m   2705\u001b[0m          where\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39m_NoValue):\n\u001b[0;32m   2706\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2707\u001b[0m \u001b[38;5;124;03m    Return the maximum of an array or maximum along an axis.\u001b[39;00m\n\u001b[0;32m   2708\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2818\u001b[0m \u001b[38;5;124;03m    5\u001b[39;00m\n\u001b[0;32m   2819\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2820\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_wrapreduction\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmaximum\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmax\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2821\u001b[0m \u001b[43m                          \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minitial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minitial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\numpy\\core\\fromnumeric.py:86\u001b[0m, in \u001b[0;36m_wrapreduction\u001b[1;34m(obj, ufunc, method, axis, dtype, out, **kwargs)\u001b[0m\n\u001b[0;32m     83\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m     84\u001b[0m             \u001b[38;5;28;01mreturn\u001b[39;00m reduction(axis\u001b[38;5;241m=\u001b[39maxis, out\u001b[38;5;241m=\u001b[39mout, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mpasskwargs)\n\u001b[1;32m---> 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mufunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpasskwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[1;31mUFuncTypeError\u001b[0m: ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U12'), dtype('<U12')) -> None"
+     ]
     }
    ],
    "source": [
+    "from sklearn.metrics import classification_report\n",
+    "\n",
+    "# Get the list of unique labels\n",
     "labels = list(crf.classes_)\n",
+    "\n",
+    "# Predict labels for the validation set\n",
     "y_pred = crf.predict(X_valid)\n",
+    "\n",
+    "from sklearn.preprocessing import MultiLabelBinarizer\n",
+    "\n",
+    "# Convert labels to strings\n",
+    "y_valid_str = [[str(label)] for label in y_valid]\n",
+    "y_pred_str = [[str(label)] for label in y_pred]\n",
+    "\n",
+    "# Convert labels to binary array format\n",
+    "mlb = MultiLabelBinarizer()\n",
+    "y_valid_bin = mlb.fit_transform(y_valid_str)\n",
+    "y_pred_bin = mlb.transform(y_pred_str)\n",
+    "\n",
+    "# Print classification report\n",
+    "print(classification_report(y_valid_bin, y_pred_bin, labels=labels))\n",
+    "\n"
    ]
   },
   {

test.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -103,8 +103,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Sentence is grammatically wrong.\n",
-      "Probabilities: [0.9901305437088013, 0.009869435802102089]\n"
      ]
     }
    ],
@@ -115,7 +115,7 @@
     "tokenizer = AutoTokenizer.from_pretrained(\"zklmorales/bert_finetuned\")\n",
     "model = AutoModelForSequenceClassification.from_pretrained(\"zklmorales/bert_finetuned\")\n",
     "\n",
-    "new_sentence = \"Siya ay magigising kanina.\"\n",
     "\n",
     "# Tokenize the input text\n",
     "inputs = tokenizer(new_sentence, return_tensors=\"pt\")\n",
@@ -141,7 +141,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -299,6 +299,35 @@
     "    print(candidate, \"Probability:\", probability)\n",
     "print(predicted_labels)\n"
    ]
   }
  ],
  "metadata": {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Sentence is grammatically correct.\n",
+      "Probabilities: [0.00594444340094924, 0.9940555095672607]\n"
      ]
     }
    ],
     "tokenizer = AutoTokenizer.from_pretrained(\"zklmorales/bert_finetuned\")\n",
     "model = AutoModelForSequenceClassification.from_pretrained(\"zklmorales/bert_finetuned\")\n",
     "\n",
+    "new_sentence = \"Pupunta ako kahapon sa siyudad upang bumili ang mga gamit ko\"\n",
     "\n",
     "# Tokenize the input text\n",
     "inputs = tokenizer(new_sentence, return_tensors=\"pt\")\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
     "    print(candidate, \"Probability:\", probability)\n",
     "print(predicted_labels)\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nagising\n",
+      "67\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fuzzywuzzy import fuzz\n",
+    "\n",
+    "original_word = \"Gigisingin\"\n",
+    "suggestions = [\"Tatakbo\", \"Nagising\", \"Hihiga\", \"Kakain\"]\n",
+    "\n",
+    "threshold = 60\n",
+    "\n",
+    "for suggestion in suggestions:\n",
+    "    similarity_score = fuzz.ratio(original_word, suggestion)\n",
+    "    if similarity_score >= threshold:\n",
+    "        print(suggestion)\n",
+    "        print(fuzz.ratio(original_word, suggestion))\n"
+   ]
   }
  ],
  "metadata": {