Spaces:

shreyan67
/

Job_Catalyst-AI

Sleeping

File size: 65,652 Bytes

7f21468

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<>:1: SyntaxWarning: invalid escape sequence '\\j'\n",
      "<>:1: SyntaxWarning: invalid escape sequence '\\j'\n",
      "C:\\Users\\HP\\AppData\\Local\\Temp\\ipykernel_17816\\3441334955.py:1: SyntaxWarning: invalid escape sequence '\\j'\n",
      "  Jobs = pd.read_csv(\"dataJobs\\job_descriptions.csv\")\n",
      "C:\\Users\\HP\\AppData\\Local\\Temp\\ipykernel_17816\\3441334955.py:1: SyntaxWarning: invalid escape sequence '\\j'\n",
      "  Jobs = pd.read_csv(\"dataJobs\\job_descriptions.csv\")\n"
     ]
    },
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'dataJobs\\\\job_descriptions.csv'",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mFileNotFoundError\u001b[39m                         Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m Jobs = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mdataJobs\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mjob_descriptions.csv\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m      2\u001b[39m Jobs.shape\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\HP\\OneDrive\\Desktop\\job-recommendation-system-ai-main\\venv\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1026\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m   1013\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m   1014\u001b[39m     dialect,\n\u001b[32m   1015\u001b[39m     delimiter,\n\u001b[32m   (...)\u001b[39m\u001b[32m   1022\u001b[39m     dtype_backend=dtype_backend,\n\u001b[32m   1023\u001b[39m )\n\u001b[32m   1024\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\HP\\OneDrive\\Desktop\\job-recommendation-system-ai-main\\venv\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:620\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m    617\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m    619\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m620\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    622\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m    623\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\HP\\OneDrive\\Desktop\\job-recommendation-system-ai-main\\venv\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1620\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m   1617\u001b[39m     \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m   1619\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1620\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\HP\\OneDrive\\Desktop\\job-recommendation-system-ai-main\\venv\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1880\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m   1878\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m   1879\u001b[39m         mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1880\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1881\u001b[39m \u001b[43m    \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1882\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1883\u001b[39m \u001b[43m    \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1884\u001b[39m \u001b[43m    \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1885\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1886\u001b[39m \u001b[43m    \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1887\u001b[39m \u001b[43m    \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1888\u001b[39m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1889\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1890\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1891\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\HP\\OneDrive\\Desktop\\job-recommendation-system-ai-main\\venv\\Lib\\site-packages\\pandas\\io\\common.py:873\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m    868\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m    869\u001b[39m     \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m    870\u001b[39m     \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m    871\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m    872\u001b[39m         \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m         handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m    874\u001b[39m \u001b[43m            \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    875\u001b[39m \u001b[43m            \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    876\u001b[39m \u001b[43m            \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    877\u001b[39m \u001b[43m            \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    878\u001b[39m \u001b[43m            \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m    879\u001b[39m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    880\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    881\u001b[39m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m    882\u001b[39m         handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n",
      "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: 'dataJobs\\\\job_descriptions.csv'"
     ]
    }
   ],
   "source": [
    "Jobs = pd.read_csv(\"dataJobs\\job_descriptions.csv\")\n",
    "Jobs.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'Jobs' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m Jobs_shuffled = \u001b[43mJobs\u001b[49m.sample(frac=\u001b[32m1\u001b[39m, random_state=\u001b[32m42\u001b[39m).reset_index(drop=\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
      "\u001b[31mNameError\u001b[39m: name 'Jobs' is not defined"
     ]
    }
   ],
   "source": [
    "Jobs_shuffled = Jobs.sample(frac=1, random_state=42).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "FinalJobs = Jobs_shuffled.iloc[:10000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Index(['Job Id', 'Experience', 'Qualifications', 'Salary Range', 'location',\n",
       "        'Country', 'latitude', 'longitude', 'Work Type', 'Company Size',\n",
       "        'Job Posting Date', 'Preference', 'Contact Person', 'Contact',\n",
       "        'Job Title', 'Role', 'Job Portal', 'Job Description', 'Benefits',\n",
       "        'skills', 'Responsibilities', 'Company', 'Company Profile'],\n",
       "       dtype='object'),\n",
       " (10000, 23))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#FinalJobs.to_csv(\"Jobs.csv\" , index= False)\n",
    "FinalJobs.columns, FinalJobs.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Job Id</th>\n",
       "      <th>Experience</th>\n",
       "      <th>Qualifications</th>\n",
       "      <th>Salary Range</th>\n",
       "      <th>location</th>\n",
       "      <th>Country</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>Work Type</th>\n",
       "      <th>Company Size</th>\n",
       "      <th>...</th>\n",
       "      <th>Contact</th>\n",
       "      <th>Job Title</th>\n",
       "      <th>Role</th>\n",
       "      <th>Job Portal</th>\n",
       "      <th>Job Description</th>\n",
       "      <th>Benefits</th>\n",
       "      <th>skills</th>\n",
       "      <th>Responsibilities</th>\n",
       "      <th>Company</th>\n",
       "      <th>Company Profile</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8573</th>\n",
       "      <td>1901955228738870</td>\n",
       "      <td>3 to 10 Years</td>\n",
       "      <td>MBA</td>\n",
       "      <td>$62K-$100K</td>\n",
       "      <td>Kampala</td>\n",
       "      <td>Uganda</td>\n",
       "      <td>1.3733</td>\n",
       "      <td>32.2903</td>\n",
       "      <td>Full-Time</td>\n",
       "      <td>115838</td>\n",
       "      <td>...</td>\n",
       "      <td>+1-597-207-4474</td>\n",
       "      <td>Legal Advisor</td>\n",
       "      <td>In-House Counsel</td>\n",
       "      <td>Dice</td>\n",
       "      <td>In the role of In-House Counsel, you will prov...</td>\n",
       "      <td>{'Casual Dress Code, Social and Recreational A...</td>\n",
       "      <td>In-house counsel Legal advisory Employment law...</td>\n",
       "      <td>Serve as in-house legal counsel for organizati...</td>\n",
       "      <td>Oracle</td>\n",
       "      <td>{\"Sector\":\"Technology\",\"Industry\":\"Computer So...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                Job Id     Experience Qualifications Salary Range location  \\\n",
       "8573  1901955228738870  3 to 10 Years            MBA   $62K-$100K  Kampala   \n",
       "\n",
       "     Country  latitude  longitude  Work Type  Company Size  ...  \\\n",
       "8573  Uganda    1.3733    32.2903  Full-Time        115838  ...   \n",
       "\n",
       "              Contact      Job Title              Role Job Portal  \\\n",
       "8573  +1-597-207-4474  Legal Advisor  In-House Counsel       Dice   \n",
       "\n",
       "                                        Job Description  \\\n",
       "8573  In the role of In-House Counsel, you will prov...   \n",
       "\n",
       "                                               Benefits  \\\n",
       "8573  {'Casual Dress Code, Social and Recreational A...   \n",
       "\n",
       "                                                 skills  \\\n",
       "8573  In-house counsel Legal advisory Employment law...   \n",
       "\n",
       "                                       Responsibilities Company  \\\n",
       "8573  Serve as in-house legal counsel for organizati...  Oracle   \n",
       "\n",
       "                                        Company Profile  \n",
       "8573  {\"Sector\":\"Technology\",\"Industry\":\"Computer So...  \n",
       "\n",
       "[1 rows x 23 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FinalJobs.sample(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\LAPSHOP\\AppData\\Local\\Temp\\ipykernel_24448\\857821168.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  FinalJobs[\"workplace\"]= FinalJobs[\"location\"]+\" \"+FinalJobs[\"Country\"]\n",
      "C:\\Users\\LAPSHOP\\AppData\\Local\\Temp\\ipykernel_24448\\857821168.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  FinalJobs[\"working_mode\"] = FinalJobs[\"Work Type\"]\n",
      "C:\\Users\\LAPSHOP\\AppData\\Local\\Temp\\ipykernel_24448\\857821168.py:15: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  FinalJobs[\"salary\"] = FinalJobs[\"Salary Range\"].apply(calculate_avg_salary)\n",
      "C:\\Users\\LAPSHOP\\AppData\\Local\\Temp\\ipykernel_24448\\857821168.py:18: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  FinalJobs[\"position\"] = FinalJobs[\"Job Title\"]\n",
      "C:\\Users\\LAPSHOP\\AppData\\Local\\Temp\\ipykernel_24448\\857821168.py:21: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  FinalJobs[\"job_role_and_duties\"] = FinalJobs[\"Responsibilities\"] +\" \" + FinalJobs[\"Role\"] + FinalJobs[\"Job Description\"]\n",
      "C:\\Users\\LAPSHOP\\AppData\\Local\\Temp\\ipykernel_24448\\857821168.py:24: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  FinalJobs[\"requisite_skill\"] = FinalJobs[\"skills\"] + \" \" + FinalJobs[\"Experience\"] +\" \"+ FinalJobs[\"Qualifications\"]\n",
      "C:\\Users\\LAPSHOP\\AppData\\Local\\Temp\\ipykernel_24448\\857821168.py:27: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  FinalJobs[\"offer_details\"] = FinalJobs[\"Benefits\"]\n"
     ]
    }
   ],
   "source": [
    "# 1. Create \"workplace\" column from \"location\" and \"country\"\n",
    "FinalJobs[\"workplace\"]= FinalJobs[\"location\"]+\" \"+FinalJobs[\"Country\"]\n",
    "# 3. Create \"working_mode\" column from \"Work Type\"\n",
    "FinalJobs[\"working_mode\"] = FinalJobs[\"Work Type\"]\n",
    "\n",
    "# 4. Create \"salary\" column by calculating the average of Salary Range\n",
    "def calculate_avg_salary(salary_range):\n",
    "    if isinstance(salary_range, str) and \"-\" in salary_range:\n",
    "        salary_range = salary_range.replace(\"$\", \"\").replace(\"K\", \"\").split(\"-\")\n",
    "        min_salary = float(salary_range[0]) * 1000\n",
    "        max_salary = float(salary_range[1]) * 1000\n",
    "        return (min_salary + max_salary) / 2\n",
    "    return None\n",
    "\n",
    "FinalJobs[\"salary\"] = FinalJobs[\"Salary Range\"].apply(calculate_avg_salary)\n",
    "\n",
    "# 5. Create \"position\" column from \"Job Title\"\n",
    "FinalJobs[\"position\"] = FinalJobs[\"Job Title\"]\n",
    "\n",
    "# 6. Create \"job_role_and_duties\" from \"Responsibilities\", \"Role\", and \"Job Description\"\n",
    "FinalJobs[\"job_role_and_duties\"] = FinalJobs[\"Responsibilities\"] +\" \" + FinalJobs[\"Role\"] + FinalJobs[\"Job Description\"]\n",
    "\n",
    "# 7. Create \"requisite_skill\" from \"skills\", \"Experience\", and \"Qualifications\"\n",
    "FinalJobs[\"requisite_skill\"] = FinalJobs[\"skills\"] + \" \" + FinalJobs[\"Experience\"] +\" \"+ FinalJobs[\"Qualifications\"]\n",
    "\n",
    "# 8. Create \"offer_details\" from \"Benefits\"\n",
    "FinalJobs[\"offer_details\"] = FinalJobs[\"Benefits\"]\n",
    "\n",
    "# Drop unnecessary columns (optional)\n",
    "FinalJobs = FinalJobs.drop(columns=[\"location\", \"Country\", \"latitude\", \"longitude\", \"Work Type\", \"Salary Range\",\n",
    "                                      \"Job Title\", \"Responsibilities\", \"Role\", \"Job Description\",\n",
    "                                      \"skills\", \"Experience\", \"Qualifications\", \"Benefits\"])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Job Id', 'Company Size', 'Job Posting Date', 'Preference',\n",
       "       'Contact Person', 'Contact', 'Job Portal', 'Company', 'Company Profile',\n",
       "       'workplace', 'working_mode', 'salary', 'position',\n",
       "       'job_role_and_duties', 'requisite_skill', 'offer_details'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FinalJobs.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "FinalJobs = FinalJobs.drop(columns=['Company Size', 'Job Posting Date', 'Preference','Contact Person', 'Contact', 'Job Portal', 'Company', 'Company Profile'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Job Id', 'workplace', 'working_mode', 'salary', 'position',\n",
       "       'job_role_and_duties', 'requisite_skill', 'offer_details'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FinalJobs.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Job Id</th>\n",
       "      <th>workplace</th>\n",
       "      <th>working_mode</th>\n",
       "      <th>salary</th>\n",
       "      <th>position</th>\n",
       "      <th>job_role_and_duties</th>\n",
       "      <th>requisite_skill</th>\n",
       "      <th>offer_details</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9713</th>\n",
       "      <td>2049802603799610</td>\n",
       "      <td>Jerusalem Israel</td>\n",
       "      <td>Part-Time</td>\n",
       "      <td>75500.0</td>\n",
       "      <td>Process Engineer</td>\n",
       "      <td>Specialize in chemical processes, including de...</td>\n",
       "      <td>Chemical engineering Process design Chemical r...</td>\n",
       "      <td>{'Casual Dress Code, Social and Recreational A...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                Job Id         workplace working_mode   salary  \\\n",
       "9713  2049802603799610  Jerusalem Israel    Part-Time  75500.0   \n",
       "\n",
       "              position                                job_role_and_duties  \\\n",
       "9713  Process Engineer  Specialize in chemical processes, including de...   \n",
       "\n",
       "                                        requisite_skill  \\\n",
       "9713  Chemical engineering Process design Chemical r...   \n",
       "\n",
       "                                          offer_details  \n",
       "9713  {'Casual Dress Code, Social and Recreational A...  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FinalJobs.sample(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Job Id is    1017340707950150\n",
      "workplace is    Panama City Panama\n",
      "working_mode is    Contract\n",
      "salary is    69500.0\n",
      "position is    Procurement Manager\n",
      "job_role_and_duties is    Promote supplier diversity initiatives and inclusion. Identify and onboard diverse suppliers. Monitor compliance with diversity goals and regulations. Supplier Diversity ManagerPromote diversity and inclusion in the supply chain, manage supplier diversity programs, and assess supplier performance.\n",
      "requisite_skill is    Supplier diversity programs Diversity and inclusion initiatives Supplier assessment and certification Data collection and reporting Vendor outreach and engagement Strategic planning Communication skills Relationship building Attention to diversity and inclusion principles 5 to 10 Years BBA\n",
      "offer_details is    {'Transportation Benefits, Professional Development, Bonuses and Incentive Programs, Profit-Sharing, Employee Discounts'}\n"
     ]
    }
   ],
   "source": [
    "for i in FinalJobs.columns:\n",
    "  print( i , \"is   \", FinalJobs[i][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt to\n",
      "[nltk_data]     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n",
      "[nltk_data] Downloading package stopwords to\n",
      "[nltk_data]     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]   Unzipping corpora\\stopwords.zip.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import re\n",
    "import nltk\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.tokenize import word_tokenize\n",
    "\n",
    "# Download NLTK resources (if not already downloaded)\n",
    "nltk.download('punkt')\n",
    "nltk.download('stopwords')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading collection 'all'\n",
      "[nltk_data]    | \n",
      "[nltk_data]    | Downloading package abc to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package abc is already up-to-date!\n",
      "[nltk_data]    | Downloading package alpino to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package alpino is already up-to-date!\n",
      "[nltk_data]    | Downloading package averaged_perceptron_tagger to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package averaged_perceptron_tagger is already up-\n",
      "[nltk_data]    |       to-date!\n",
      "[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package averaged_perceptron_tagger_eng is already\n",
      "[nltk_data]    |       up-to-date!\n",
      "[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package averaged_perceptron_tagger_ru is already\n",
      "[nltk_data]    |       up-to-date!\n",
      "[nltk_data]    | Downloading package averaged_perceptron_tagger_rus to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package averaged_perceptron_tagger_rus is already\n",
      "[nltk_data]    |       up-to-date!\n",
      "[nltk_data]    | Downloading package basque_grammars to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package basque_grammars is already up-to-date!\n",
      "[nltk_data]    | Downloading package bcp47 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package bcp47 is already up-to-date!\n",
      "[nltk_data]    | Downloading package biocreative_ppi to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package biocreative_ppi is already up-to-date!\n",
      "[nltk_data]    | Downloading package bllip_wsj_no_aux to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package bllip_wsj_no_aux is already up-to-date!\n",
      "[nltk_data]    | Downloading package book_grammars to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package book_grammars is already up-to-date!\n",
      "[nltk_data]    | Downloading package brown to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package brown is already up-to-date!\n",
      "[nltk_data]    | Downloading package brown_tei to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package brown_tei is already up-to-date!\n",
      "[nltk_data]    | Downloading package cess_cat to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package cess_cat is already up-to-date!\n",
      "[nltk_data]    | Downloading package cess_esp to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package cess_esp is already up-to-date!\n",
      "[nltk_data]    | Downloading package chat80 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package chat80 is already up-to-date!\n",
      "[nltk_data]    | Downloading package city_database to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package city_database is already up-to-date!\n",
      "[nltk_data]    | Downloading package cmudict to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package cmudict is already up-to-date!\n",
      "[nltk_data]    | Downloading package comparative_sentences to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package comparative_sentences is already up-to-\n",
      "[nltk_data]    |       date!\n",
      "[nltk_data]    | Downloading package comtrans to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package comtrans is already up-to-date!\n",
      "[nltk_data]    | Downloading package conll2000 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package conll2000 is already up-to-date!\n",
      "[nltk_data]    | Downloading package conll2002 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package conll2002 is already up-to-date!\n",
      "[nltk_data]    | Downloading package conll2007 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package conll2007 is already up-to-date!\n",
      "[nltk_data]    | Downloading package crubadan to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package crubadan is already up-to-date!\n",
      "[nltk_data]    | Downloading package dependency_treebank to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package dependency_treebank is already up-to-date!\n",
      "[nltk_data]    | Downloading package dolch to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package dolch is already up-to-date!\n",
      "[nltk_data]    | Downloading package europarl_raw to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package europarl_raw is already up-to-date!\n",
      "[nltk_data]    | Downloading package extended_omw to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package extended_omw is already up-to-date!\n",
      "[nltk_data]    | Downloading package floresta to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package floresta is already up-to-date!\n",
      "[nltk_data]    | Downloading package framenet_v15 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package framenet_v15 is already up-to-date!\n",
      "[nltk_data]    | Downloading package framenet_v17 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package framenet_v17 is already up-to-date!\n",
      "[nltk_data]    | Downloading package gazetteers to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package gazetteers is already up-to-date!\n",
      "[nltk_data]    | Downloading package genesis to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package genesis is already up-to-date!\n",
      "[nltk_data]    | Downloading package gutenberg to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package gutenberg is already up-to-date!\n",
      "[nltk_data]    | Downloading package ieer to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package ieer is already up-to-date!\n",
      "[nltk_data]    | Downloading package inaugural to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Unzipping corpora\\inaugural.zip.\n",
      "[nltk_data]    | Downloading package indian to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package indian is already up-to-date!\n",
      "[nltk_data]    | Downloading package jeita to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package jeita is already up-to-date!\n",
      "[nltk_data]    | Downloading package kimmo to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package kimmo is already up-to-date!\n",
      "[nltk_data]    | Downloading package knbc to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package knbc is already up-to-date!\n",
      "[nltk_data]    | Downloading package large_grammars to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package large_grammars is already up-to-date!\n",
      "[nltk_data]    | Downloading package lin_thesaurus to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package lin_thesaurus is already up-to-date!\n",
      "[nltk_data]    | Downloading package mac_morpho to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package mac_morpho is already up-to-date!\n",
      "[nltk_data]    | Downloading package machado to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package machado is already up-to-date!\n",
      "[nltk_data]    | Downloading package masc_tagged to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package masc_tagged is already up-to-date!\n",
      "[nltk_data]    | Downloading package maxent_ne_chunker to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package maxent_ne_chunker is already up-to-date!\n",
      "[nltk_data]    | Downloading package maxent_ne_chunker_tab to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package maxent_ne_chunker_tab is already up-to-\n",
      "[nltk_data]    |       date!\n",
      "[nltk_data]    | Downloading package maxent_treebank_pos_tagger to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package maxent_treebank_pos_tagger is already up-\n",
      "[nltk_data]    |       to-date!\n",
      "[nltk_data]    | Downloading package maxent_treebank_pos_tagger_tab to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package maxent_treebank_pos_tagger_tab is already\n",
      "[nltk_data]    |       up-to-date!\n",
      "[nltk_data]    | Downloading package moses_sample to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package moses_sample is already up-to-date!\n",
      "[nltk_data]    | Downloading package movie_reviews to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package movie_reviews is already up-to-date!\n",
      "[nltk_data]    | Downloading package mte_teip5 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package mte_teip5 is already up-to-date!\n",
      "[nltk_data]    | Downloading package mwa_ppdb to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package mwa_ppdb is already up-to-date!\n",
      "[nltk_data]    | Downloading package names to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package names is already up-to-date!\n",
      "[nltk_data]    | Downloading package nombank.1.0 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package nombank.1.0 is already up-to-date!\n",
      "[nltk_data]    | Downloading package nonbreaking_prefixes to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package nonbreaking_prefixes is already up-to-date!\n",
      "[nltk_data]    | Downloading package nps_chat to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package nps_chat is already up-to-date!\n",
      "[nltk_data]    | Downloading package omw to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package omw is already up-to-date!\n",
      "[nltk_data]    | Downloading package omw-1.4 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package omw-1.4 is already up-to-date!\n",
      "[nltk_data]    | Downloading package opinion_lexicon to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package opinion_lexicon is already up-to-date!\n",
      "[nltk_data]    | Downloading package panlex_swadesh to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package panlex_swadesh is already up-to-date!\n",
      "[nltk_data]    | Downloading package paradigms to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package paradigms is already up-to-date!\n",
      "[nltk_data]    | Downloading package pe08 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package pe08 is already up-to-date!\n",
      "[nltk_data]    | Downloading package perluniprops to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package perluniprops is already up-to-date!\n",
      "[nltk_data]    | Downloading package pil to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package pil is already up-to-date!\n",
      "[nltk_data]    | Downloading package pl196x to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package pl196x is already up-to-date!\n",
      "[nltk_data]    | Downloading package porter_test to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package porter_test is already up-to-date!\n",
      "[nltk_data]    | Downloading package ppattach to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package ppattach is already up-to-date!\n",
      "[nltk_data]    | Downloading package problem_reports to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package problem_reports is already up-to-date!\n",
      "[nltk_data]    | Downloading package product_reviews_1 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package product_reviews_1 is already up-to-date!\n",
      "[nltk_data]    | Downloading package product_reviews_2 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package product_reviews_2 is already up-to-date!\n",
      "[nltk_data]    | Downloading package propbank to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package propbank is already up-to-date!\n",
      "[nltk_data]    | Downloading package pros_cons to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package pros_cons is already up-to-date!\n",
      "[nltk_data]    | Downloading package ptb to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package ptb is already up-to-date!\n",
      "[nltk_data]    | Downloading package punkt to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package punkt is already up-to-date!\n",
      "[nltk_data]    | Downloading package punkt_tab to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Unzipping tokenizers\\punkt_tab.zip.\n",
      "[nltk_data]    | Downloading package qc to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package qc is already up-to-date!\n",
      "[nltk_data]    | Downloading package reuters to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package reuters is already up-to-date!\n",
      "[nltk_data]    | Downloading package rslp to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package rslp is already up-to-date!\n",
      "[nltk_data]    | Downloading package rte to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package rte is already up-to-date!\n",
      "[nltk_data]    | Downloading package sample_grammars to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package sample_grammars is already up-to-date!\n",
      "[nltk_data]    | Downloading package semcor to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package semcor is already up-to-date!\n",
      "[nltk_data]    | Downloading package senseval to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package senseval is already up-to-date!\n",
      "[nltk_data]    | Downloading package sentence_polarity to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package sentence_polarity is already up-to-date!\n",
      "[nltk_data]    | Downloading package sentiwordnet to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package sentiwordnet is already up-to-date!\n",
      "[nltk_data]    | Downloading package shakespeare to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package shakespeare is already up-to-date!\n",
      "[nltk_data]    | Downloading package sinica_treebank to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package sinica_treebank is already up-to-date!\n",
      "[nltk_data]    | Downloading package smultron to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package smultron is already up-to-date!\n",
      "[nltk_data]    | Downloading package snowball_data to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package snowball_data is already up-to-date!\n",
      "[nltk_data]    | Downloading package spanish_grammars to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package spanish_grammars is already up-to-date!\n",
      "[nltk_data]    | Downloading package state_union to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package state_union is already up-to-date!\n",
      "[nltk_data]    | Downloading package stopwords to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package stopwords is already up-to-date!\n",
      "[nltk_data]    | Downloading package subjectivity to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package subjectivity is already up-to-date!\n",
      "[nltk_data]    | Downloading package swadesh to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package swadesh is already up-to-date!\n",
      "[nltk_data]    | Downloading package switchboard to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package switchboard is already up-to-date!\n",
      "[nltk_data]    | Downloading package tagsets to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package tagsets is already up-to-date!\n",
      "[nltk_data]    | Downloading package tagsets_json to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package tagsets_json is already up-to-date!\n",
      "[nltk_data]    | Downloading package timit to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package timit is already up-to-date!\n",
      "[nltk_data]    | Downloading package toolbox to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package toolbox is already up-to-date!\n",
      "[nltk_data]    | Downloading package treebank to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package treebank is already up-to-date!\n",
      "[nltk_data]    | Downloading package twitter_samples to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package twitter_samples is already up-to-date!\n",
      "[nltk_data]    | Downloading package udhr to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package udhr is already up-to-date!\n",
      "[nltk_data]    | Downloading package udhr2 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package udhr2 is already up-to-date!\n",
      "[nltk_data]    | Downloading package unicode_samples to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package unicode_samples is already up-to-date!\n",
      "[nltk_data]    | Downloading package universal_tagset to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package universal_tagset is already up-to-date!\n",
      "[nltk_data]    | Downloading package universal_treebanks_v20 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package universal_treebanks_v20 is already up-to-\n",
      "[nltk_data]    |       date!\n",
      "[nltk_data]    | Downloading package vader_lexicon to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package vader_lexicon is already up-to-date!\n",
      "[nltk_data]    | Downloading package verbnet to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package verbnet is already up-to-date!\n",
      "[nltk_data]    | Downloading package verbnet3 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package verbnet3 is already up-to-date!\n",
      "[nltk_data]    | Downloading package webtext to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package webtext is already up-to-date!\n",
      "[nltk_data]    | Downloading package wmt15_eval to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package wmt15_eval is already up-to-date!\n",
      "[nltk_data]    | Downloading package word2vec_sample to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package word2vec_sample is already up-to-date!\n",
      "[nltk_data]    | Downloading package wordnet to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package wordnet is already up-to-date!\n",
      "[nltk_data]    | Downloading package wordnet2021 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package wordnet2021 is already up-to-date!\n",
      "[nltk_data]    | Downloading package wordnet2022 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package wordnet2022 is already up-to-date!\n",
      "[nltk_data]    | Downloading package wordnet31 to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package wordnet31 is already up-to-date!\n",
      "[nltk_data]    | Downloading package wordnet_ic to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package wordnet_ic is already up-to-date!\n",
      "[nltk_data]    | Downloading package words to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package words is already up-to-date!\n",
      "[nltk_data]    | Downloading package ycoe to\n",
      "[nltk_data]    |     C:\\Users\\LAPSHOP\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]    |   Package ycoe is already up-to-date!\n",
      "[nltk_data]    | \n",
      "[nltk_data]  Done downloading collection all\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import nltk\n",
    "nltk.download('all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Job Id is    1017340707950150\n",
    "workplace is    Panama City Panama\n",
    "working_mode is    Contract\n",
    "salary is    69500.0\n",
    "position is    Procurement Manager\n",
    "job_role_and_duties is    Promote supplier diversity initiatives and inclusion. Identify and onboard diverse suppliers. Monitor compliance with diversity goals and regulations. Supplier Diversity ManagerPromote diversity and inclusion in the supply chain, manage supplier diversity programs, and assess supplier performance.\n",
    "requisite_skill is    Supplier diversity programs Diversity and inclusion initiatives Supplier assessment and certification Data collection and reporting Vendor outreach and engagement Strategic planning Communication skills Relationship building Attention to diversity and inclusion principles 5 to 10 Years BBA\n",
    "offer_details is    {'Transportation Benefits, Professional Development, Bonuses and Incentive Programs, Profit-Sharing, Employee Discounts'}\n",
    "'''\n",
    "# convert workplace to lowercase\n",
    "FinalJobs[\"workplace\"] = FinalJobs[\"workplace\"].str.lower()\n",
    "# convert working_mode to lowercase\n",
    "FinalJobs[\"working_mode\"] = FinalJobs[\"working_mode\"].str.lower()\n",
    "# convert position to lowercase\n",
    "FinalJobs[\"position\"] = FinalJobs[\"position\"].str.lower()\n",
    "# convert job_role_and_duties to lowercase and remove extra spaces\n",
    "FinalJobs[\"job_role_and_duties\"] = FinalJobs[\"job_role_and_duties\"].str.lower().str.strip()\n",
    "# convert requisite_skill to lowercase and remove extra spaces\n",
    "FinalJobs[\"requisite_skill\"] = FinalJobs[\"requisite_skill\"].str.lower().str.strip()\n",
    "# convert offer_details to lowercase and remove extra spaces and curly brackets\n",
    "FinalJobs[\"offer_details\"] = FinalJobs[\"offer_details\"].str.lower().str.strip(\"{}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Job Id</th>\n",
       "      <th>workplace</th>\n",
       "      <th>working_mode</th>\n",
       "      <th>salary</th>\n",
       "      <th>position</th>\n",
       "      <th>job_role_and_duties</th>\n",
       "      <th>requisite_skill</th>\n",
       "      <th>offer_details</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3184</th>\n",
       "      <td>2380596380494743</td>\n",
       "      <td>valletta malta</td>\n",
       "      <td>part-time</td>\n",
       "      <td>80500.0</td>\n",
       "      <td>network technician</td>\n",
       "      <td>focus on network security, implementing measur...</td>\n",
       "      <td>network security cybersecurity intrusion detec...</td>\n",
       "      <td>'childcare assistance, paid time off (pto), re...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                Job Id       workplace working_mode   salary  \\\n",
       "3184  2380596380494743  valletta malta    part-time  80500.0   \n",
       "\n",
       "                position                                job_role_and_duties  \\\n",
       "3184  network technician  focus on network security, implementing measur...   \n",
       "\n",
       "                                        requisite_skill  \\\n",
       "3184  network security cybersecurity intrusion detec...   \n",
       "\n",
       "                                          offer_details  \n",
       "3184  'childcare assistance, paid time off (pto), re...  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FinalJobs.sample(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "# Remove special characters from \"job_role_and_duties\" and \"requisite_skill\" and offer_details\n",
    "def remove_special_characters(text):\n",
    "    return re.sub(r\"[^a-zA-Z0-9 ]\", \"\", text)\n",
    "\n",
    "FinalJobs[\"job_role_and_duties\"] = FinalJobs[\"job_role_and_duties\"].apply(remove_special_characters)\n",
    "FinalJobs[\"requisite_skill\"] = FinalJobs[\"requisite_skill\"].apply(remove_special_characters)\n",
    "FinalJobs[\"offer_details\"] = FinalJobs[\"offer_details\"].apply(remove_special_characters)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Job Id</th>\n",
       "      <th>workplace</th>\n",
       "      <th>working_mode</th>\n",
       "      <th>salary</th>\n",
       "      <th>position</th>\n",
       "      <th>job_role_and_duties</th>\n",
       "      <th>requisite_skill</th>\n",
       "      <th>offer_details</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9327</th>\n",
       "      <td>2334880320049787</td>\n",
       "      <td>manama bahrain</td>\n",
       "      <td>intern</td>\n",
       "      <td>93500.0</td>\n",
       "      <td>research analyst</td>\n",
       "      <td>conduct research in social sciences gathering ...</td>\n",
       "      <td>social science research methods qualitative an...</td>\n",
       "      <td>flexible spending accounts fsas relocation ass...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                Job Id       workplace working_mode   salary  \\\n",
       "9327  2334880320049787  manama bahrain       intern  93500.0   \n",
       "\n",
       "              position                                job_role_and_duties  \\\n",
       "9327  research analyst  conduct research in social sciences gathering ...   \n",
       "\n",
       "                                        requisite_skill  \\\n",
       "9327  social science research methods qualitative an...   \n",
       "\n",
       "                                          offer_details  \n",
       "9327  flexible spending accounts fsas relocation ass...  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FinalJobs.sample(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10000, 8)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FinalJobs.to_csv(\"JobsFE.csv\" , index= False)\n",
    "FinalJobs.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}