{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os; os.chdir('..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from utils.get_category import predict, get_top_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Index</th>\n",
       "      <th>Keyword</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>guide to headphones</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>headphone guide</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>buy headphones guide</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>choosing headphones guide</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>sony headphones guide</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>273</td>\n",
       "      <td>guidelines 2022</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>274</td>\n",
       "      <td>guidelines 2023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>275</td>\n",
       "      <td>ts guidelines</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>276</td>\n",
       "      <td>guided drawing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>277</td>\n",
       "      <td>guided meditation</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>277 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Index                    Keyword\n",
       "0        1        guide to headphones\n",
       "1        2            headphone guide\n",
       "2        3       buy headphones guide\n",
       "3        4  choosing headphones guide\n",
       "4        5      sony headphones guide\n",
       "..     ...                        ...\n",
       "272    273            guidelines 2022\n",
       "273    274            guidelines 2023\n",
       "274    275              ts guidelines\n",
       "275    276             guided drawing\n",
       "276    277          guided meditation\n",
       "\n",
       "[277 rows x 2 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df= pd.read_csv(\n",
    "    'data_test/keywords-2.csv'\n",
    ")\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predicted Class:  Computers_and_Electronics \n",
      "probabilities_scores: 1.0\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'P(Hobbies_and_Leisure)': 0.034,\n",
       " 'P(News)': 0.005,\n",
       " 'P(Science)': 0.008,\n",
       " 'P(Autos_and_Vehicles)': 0.002,\n",
       " 'P(Health)': 0.009,\n",
       " 'P(Pets_and_Animals)': 0.003,\n",
       " 'P(Adult)': 0.017,\n",
       " 'P(Computers_and_Electronics)': 1.0,\n",
       " 'P(Online Communities)': 0.019,\n",
       " 'P(Beauty_and_Fitness)': 0.007,\n",
       " 'P(People_and_Society)': 0.0,\n",
       " 'P(Business_and_Industrial)': 0.001,\n",
       " 'P(Reference)': 0.007,\n",
       " 'P(Shopping)': 0.173,\n",
       " 'P(Travel_and_Transportation)': 0.001,\n",
       " 'P(Food_and_Drink)': 0.012,\n",
       " 'P(Law_and_Government)': 0.022,\n",
       " 'P(Books_and_Literature)': 0.001,\n",
       " 'P(Finance)': 0.01,\n",
       " 'P(Games)': 0.069,\n",
       " 'P(Home_and_Garden)': 0.011,\n",
       " 'P(Jobs_and_Education)': 0.001,\n",
       " 'P(Arts_and_Entertainment)': 0.005,\n",
       " 'P(Sensitive Subjects)': 0.003,\n",
       " 'P(Real Estate)': 0.009,\n",
       " 'P(Internet_and_Telecom)': 0.063,\n",
       " 'P(Sports)': 0.014,\n",
       " 'Predicted Label': 'Computers_and_Electronics',\n",
       " 'Predicted Label Score': 1.0}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predict('cat ear headphones')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Category']= df.Keyword.map(get_top_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import numpy as np\n",
    "# for i in range(len(df.Category)):\n",
    "#     # print(type(df.Category[i]))\n",
    "#     # df.Category[i]= df.Category[i]['Predicted Label']\n",
    "#     # df.loc[i, 'Category']= f\"{df.Category[i]['Predicted Label']}, {str(np.round(df.Category[i]['Predicted Label Score'],2))}\"\n",
    "#     df.loc[i, 'Probablity Score']= f\"{str(np.round(df.Category[i]['Predicted Label Score'],2))}\"\n",
    "#     df.loc[i, 'Category']= f\"{df.Category[i]['Predicted Label']}\"\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Index</th>\n",
       "      <th>Keyword</th>\n",
       "      <th>Category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>guide to headphones</td>\n",
       "      <td>[(Computers_and_Electronics, 1.0)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>headphone guide</td>\n",
       "      <td>[(Computers_and_Electronics, 0.999)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>buy headphones guide</td>\n",
       "      <td>[(Shopping, 0.997), (Computers_and_Electronics...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>choosing headphones guide</td>\n",
       "      <td>[(Computers_and_Electronics, 1.0)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>sony headphones guide</td>\n",
       "      <td>[(Computers_and_Electronics, 1.0)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>273</td>\n",
       "      <td>guidelines 2022</td>\n",
       "      <td>[(Computers_and_Electronics, 1.0)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>274</td>\n",
       "      <td>guidelines 2023</td>\n",
       "      <td>[(Computers_and_Electronics, 1.0)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>275</td>\n",
       "      <td>ts guidelines</td>\n",
       "      <td>[(Computers_and_Electronics, 0.933), (Games, 0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>276</td>\n",
       "      <td>guided drawing</td>\n",
       "      <td>[(Reference, 0.995)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>277</td>\n",
       "      <td>guided meditation</td>\n",
       "      <td>[(Beauty_and_Fitness, 1.0), (Hobbies_and_Leisu...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>277 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Index                    Keyword  \\\n",
       "0        1        guide to headphones   \n",
       "1        2            headphone guide   \n",
       "2        3       buy headphones guide   \n",
       "3        4  choosing headphones guide   \n",
       "4        5      sony headphones guide   \n",
       "..     ...                        ...   \n",
       "272    273            guidelines 2022   \n",
       "273    274            guidelines 2023   \n",
       "274    275              ts guidelines   \n",
       "275    276             guided drawing   \n",
       "276    277          guided meditation   \n",
       "\n",
       "                                              Category  \n",
       "0                   [(Computers_and_Electronics, 1.0)]  \n",
       "1                 [(Computers_and_Electronics, 0.999)]  \n",
       "2    [(Shopping, 0.997), (Computers_and_Electronics...  \n",
       "3                   [(Computers_and_Electronics, 1.0)]  \n",
       "4                   [(Computers_and_Electronics, 1.0)]  \n",
       "..                                                 ...  \n",
       "272                 [(Computers_and_Electronics, 1.0)]  \n",
       "273                 [(Computers_and_Electronics, 1.0)]  \n",
       "274  [(Computers_and_Electronics, 0.933), (Games, 0...  \n",
       "275                               [(Reference, 0.995)]  \n",
       "276  [(Beauty_and_Fitness, 1.0), (Hobbies_and_Leisu...  \n",
       "\n",
       "[277 rows x 3 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(\n",
    "    'data_test/labelled_data.csv',\n",
    "    index=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Index</th>\n",
       "      <th>Keyword</th>\n",
       "      <th>Category</th>\n",
       "      <th>Probablity Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>93</th>\n",
       "      <td>94</td>\n",
       "      <td>reddit headphones buying guide</td>\n",
       "      <td>Internet_and_Telecom</td>\n",
       "      <td>0.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>117</td>\n",
       "      <td>turtle beach headset guide</td>\n",
       "      <td>Pets_and_Animals</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>121</td>\n",
       "      <td>earbuds buying guide reddit</td>\n",
       "      <td>Online Communities</td>\n",
       "      <td>0.91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>208</th>\n",
       "      <td>209</td>\n",
       "      <td>guides</td>\n",
       "      <td>Sports</td>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224</th>\n",
       "      <td>225</td>\n",
       "      <td>guide book</td>\n",
       "      <td>Books_and_Literature</td>\n",
       "      <td>0.86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225</th>\n",
       "      <td>226</td>\n",
       "      <td>guide for sale</td>\n",
       "      <td>Shopping</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226</th>\n",
       "      <td>227</td>\n",
       "      <td>guide for school</td>\n",
       "      <td>Jobs_and_Education</td>\n",
       "      <td>0.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>228</td>\n",
       "      <td>guide for students</td>\n",
       "      <td>Other</td>\n",
       "      <td>0.57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>248</th>\n",
       "      <td>249</td>\n",
       "      <td>guidebook</td>\n",
       "      <td>Books_and_Literature</td>\n",
       "      <td>0.91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>251</th>\n",
       "      <td>252</td>\n",
       "      <td>guide reddit</td>\n",
       "      <td>Online Communities</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>260</th>\n",
       "      <td>261</td>\n",
       "      <td>hr guide interview questions</td>\n",
       "      <td>Jobs_and_Education</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>262</th>\n",
       "      <td>263</td>\n",
       "      <td>vi beginners guide</td>\n",
       "      <td>Hobbies_and_Leisure</td>\n",
       "      <td>0.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>265</th>\n",
       "      <td>266</td>\n",
       "      <td>guidelines</td>\n",
       "      <td>Sensitive Subjects</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>270</th>\n",
       "      <td>271</td>\n",
       "      <td>guided reading</td>\n",
       "      <td>Books_and_Literature</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>271</th>\n",
       "      <td>272</td>\n",
       "      <td>guided reading level</td>\n",
       "      <td>Books_and_Literature</td>\n",
       "      <td>0.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>276</td>\n",
       "      <td>guided drawing</td>\n",
       "      <td>Other</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>277</td>\n",
       "      <td>guided meditation</td>\n",
       "      <td>Beauty_and_Fitness</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Index                         Keyword              Category  \\\n",
       "93      94  reddit headphones buying guide  Internet_and_Telecom   \n",
       "116    117      turtle beach headset guide      Pets_and_Animals   \n",
       "120    121     earbuds buying guide reddit    Online Communities   \n",
       "208    209                          guides                Sports   \n",
       "224    225                      guide book  Books_and_Literature   \n",
       "225    226                  guide for sale              Shopping   \n",
       "226    227                guide for school    Jobs_and_Education   \n",
       "227    228              guide for students                 Other   \n",
       "248    249                       guidebook  Books_and_Literature   \n",
       "251    252                    guide reddit    Online Communities   \n",
       "260    261    hr guide interview questions    Jobs_and_Education   \n",
       "262    263              vi beginners guide   Hobbies_and_Leisure   \n",
       "265    266                      guidelines    Sensitive Subjects   \n",
       "270    271                  guided reading  Books_and_Literature   \n",
       "271    272            guided reading level  Books_and_Literature   \n",
       "275    276                  guided drawing                 Other   \n",
       "276    277               guided meditation    Beauty_and_Fitness   \n",
       "\n",
       "    Probablity Score  \n",
       "93              0.98  \n",
       "116             0.99  \n",
       "120             0.91  \n",
       "208              0.7  \n",
       "224             0.86  \n",
       "225              1.0  \n",
       "226             0.84  \n",
       "227             0.57  \n",
       "248             0.91  \n",
       "251              1.0  \n",
       "260              1.0  \n",
       "262              0.9  \n",
       "265              1.0  \n",
       "270              1.0  \n",
       "271             0.98  \n",
       "275             0.56  \n",
       "276              1.0  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df[df.Category!='Computers_and_Electronics']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Index</th>\n",
       "      <th>Keyword</th>\n",
       "      <th>Category</th>\n",
       "      <th>Probablity Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>228</td>\n",
       "      <td>guide for students</td>\n",
       "      <td>Other</td>\n",
       "      <td>0.57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>276</td>\n",
       "      <td>guided drawing</td>\n",
       "      <td>Other</td>\n",
       "      <td>0.56</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Index             Keyword Category Probablity Score\n",
       "227    228  guide for students    Other             0.57\n",
       "275    276      guided drawing    Other             0.56"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.Category=='Other']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Computers_and_Electronics', 1.0)]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_top_labels(\n",
    "    'turtle beach headset guide'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predicted Class:  Computers_and_Electronics \n",
      "probabilities_scores: 0.9980000257492065\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'P(Hobbies_and_Leisure)': 0.221,\n",
       " 'P(News)': 0.001,\n",
       " 'P(Science)': 0.032,\n",
       " 'P(Autos_and_Vehicles)': 0.013,\n",
       " 'P(Health)': 0.004,\n",
       " 'P(Pets_and_Animals)': 0.162,\n",
       " 'P(Adult)': 0.013,\n",
       " 'P(Computers_and_Electronics)': 0.998,\n",
       " 'P(Online Communities)': 0.255,\n",
       " 'P(Beauty_and_Fitness)': 0.016,\n",
       " 'P(People_and_Society)': 0.0,\n",
       " 'P(Business_and_Industrial)': 0.001,\n",
       " 'P(Reference)': 0.003,\n",
       " 'P(Shopping)': 0.083,\n",
       " 'P(Travel_and_Transportation)': 0.006,\n",
       " 'P(Food_and_Drink)': 0.01,\n",
       " 'P(Law_and_Government)': 0.005,\n",
       " 'P(Books_and_Literature)': 0.001,\n",
       " 'P(Finance)': 0.006,\n",
       " 'P(Games)': 0.045,\n",
       " 'P(Home_and_Garden)': 0.021,\n",
       " 'P(Jobs_and_Education)': 0.001,\n",
       " 'P(Arts_and_Entertainment)': 0.004,\n",
       " 'P(Sensitive Subjects)': 0.002,\n",
       " 'P(Real Estate)': 0.009,\n",
       " 'P(Internet_and_Telecom)': 0.029,\n",
       " 'P(Sports)': 0.014,\n",
       " 'Predicted Label': 'Computers_and_Electronics',\n",
       " 'Predicted Label Score': 0.998}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predict(\n",
    "    'turtle beach headphones guide'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os; os.chdir('..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "from utils.get_category import get_top_labels, predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Food_and_Drink', 0.989),\n",
       " ('Computers_and_Electronics', 0.973),\n",
       " ('Games', 0.172),\n",
       " ('Shopping', 0.134)]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_top_labels(\n",
    "    \"apple\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Computers_and_Electronics', 0.999), ('Shopping', 0.993)]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_top_labels(\n",
    "    'amazon  mindkoo headsets with discount'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Home_and_Garden', 0.999), ('Computers_and_Electronics', 0.243)]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_top_labels(\n",
    "    'how to use lawn mower'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}