{ "cells": [ { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import nltk\n", "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package vader_lexicon to C:\\Users\\Rikhil\n", "[nltk_data] Nellimarla\\AppData\\Roaming\\nltk_data...\n", "[nltk_data] Package vader_lexicon is already up-to-date!\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Download VADER lexicon for sentiment analysis\n", "nltk.download('vader_lexicon')" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: '../Datasets/IMDB Dataset.csv'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[38], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m data \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../Datasets/IMDB Dataset.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m 1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 1014\u001b[0m dialect,\n\u001b[0;32m 1015\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1022\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[0;32m 1023\u001b[0m )\n\u001b[0;32m 1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m 619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[0;32m 623\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_engine(f, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine)\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[0;32m 1879\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m get_handle(\n\u001b[0;32m 1881\u001b[0m f,\n\u001b[0;32m 1882\u001b[0m mode,\n\u001b[0;32m 1883\u001b[0m encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1884\u001b[0m compression\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompression\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1885\u001b[0m memory_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmemory_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m),\n\u001b[0;32m 1886\u001b[0m is_text\u001b[38;5;241m=\u001b[39mis_text,\n\u001b[0;32m 1887\u001b[0m errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding_errors\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstrict\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[0;32m 1888\u001b[0m storage_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstorage_options\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m 1889\u001b[0m )\n\u001b[0;32m 1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\io\\common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[0;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[1;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[0;32m 874\u001b[0m handle,\n\u001b[0;32m 875\u001b[0m ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m 876\u001b[0m encoding\u001b[38;5;241m=\u001b[39mioargs\u001b[38;5;241m.\u001b[39mencoding,\n\u001b[0;32m 877\u001b[0m errors\u001b[38;5;241m=\u001b[39merrors,\n\u001b[0;32m 878\u001b[0m newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 879\u001b[0m )\n\u001b[0;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../Datasets/IMDB Dataset.csv'" ] } ], "source": [ "data = pd.read_csv('../Datasets/IMDB Dataset.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data=data.head(500)\n", "sia = SentimentIntensityAnalyzer()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[23], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mcolumns:\n\u001b[1;32m----> 2\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msentiment_score\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: sia\u001b[38;5;241m.\u001b[39mpolarity_scores(x)[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompound\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mColumn \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m not found.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\core\\series.py:4924\u001b[0m, in \u001b[0;36mSeries.apply\u001b[1;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[0;32m 4789\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[0;32m 4790\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 4791\u001b[0m func: AggFuncType,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4796\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m 4797\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[0;32m 4798\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 4799\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[0;32m 4800\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4915\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[0;32m 4916\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m 4917\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m SeriesApply(\n\u001b[0;32m 4918\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 4919\u001b[0m func,\n\u001b[0;32m 4920\u001b[0m convert_dtype\u001b[38;5;241m=\u001b[39mconvert_dtype,\n\u001b[0;32m 4921\u001b[0m by_row\u001b[38;5;241m=\u001b[39mby_row,\n\u001b[0;32m 4922\u001b[0m args\u001b[38;5;241m=\u001b[39margs,\n\u001b[0;32m 4923\u001b[0m kwargs\u001b[38;5;241m=\u001b[39mkwargs,\n\u001b[1;32m-> 4924\u001b[0m )\u001b[38;5;241m.\u001b[39mapply()\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\core\\apply.py:1427\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1424\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[0;32m 1426\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[1;32m-> 1427\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_standard()\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\core\\apply.py:1507\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1501\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[0;32m 1502\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[0;32m 1503\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[0;32m 1504\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[0;32m 1505\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[0;32m 1506\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1507\u001b[0m mapped \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_map_values(\n\u001b[0;32m 1508\u001b[0m mapper\u001b[38;5;241m=\u001b[39mcurried, na_action\u001b[38;5;241m=\u001b[39maction, convert\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconvert_dtype\n\u001b[0;32m 1509\u001b[0m )\n\u001b[0;32m 1511\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[0;32m 1512\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[0;32m 1513\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[0;32m 1514\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\core\\base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[1;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[0;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[0;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[1;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m algorithms\u001b[38;5;241m.\u001b[39mmap_array(arr, mapper, na_action\u001b[38;5;241m=\u001b[39mna_action, convert\u001b[38;5;241m=\u001b[39mconvert)\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\pandas\\core\\algorithms.py:1743\u001b[0m, in \u001b[0;36mmap_array\u001b[1;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[0;32m 1741\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m-> 1743\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer(values, mapper, convert\u001b[38;5;241m=\u001b[39mconvert)\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[0;32m 1746\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[0;32m 1747\u001b[0m )\n", "File \u001b[1;32mlib.pyx:2972\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[1;34m()\u001b[0m\n", "Cell \u001b[1;32mIn[23], line 2\u001b[0m, in \u001b[0;36m\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mcolumns:\n\u001b[1;32m----> 2\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msentiment_score\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: sia\u001b[38;5;241m.\u001b[39mpolarity_scores(x)[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompound\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mColumn \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreview\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m not found.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\nltk\\sentiment\\vader.py:366\u001b[0m, in \u001b[0;36mSentimentIntensityAnalyzer.polarity_scores\u001b[1;34m(self, text)\u001b[0m\n\u001b[0;32m 355\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 356\u001b[0m \u001b[38;5;124;03mReturn a float for sentiment strength based on the input text.\u001b[39;00m\n\u001b[0;32m 357\u001b[0m \u001b[38;5;124;03mPositive values are positive valence, negative value are negative\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 363\u001b[0m \u001b[38;5;124;03m matched as if it was a normal word in the sentence.\u001b[39;00m\n\u001b[0;32m 364\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 365\u001b[0m \u001b[38;5;66;03m# text, words_and_emoticons, is_cap_diff = self.preprocess(text)\u001b[39;00m\n\u001b[1;32m--> 366\u001b[0m sentitext \u001b[38;5;241m=\u001b[39m SentiText(\n\u001b[0;32m 367\u001b[0m text, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconstants\u001b[38;5;241m.\u001b[39mPUNC_LIST, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconstants\u001b[38;5;241m.\u001b[39mREGEX_REMOVE_PUNCTUATION\n\u001b[0;32m 368\u001b[0m )\n\u001b[0;32m 369\u001b[0m sentiments \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m 370\u001b[0m words_and_emoticons \u001b[38;5;241m=\u001b[39m sentitext\u001b[38;5;241m.\u001b[39mwords_and_emoticons\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\nltk\\sentiment\\vader.py:274\u001b[0m, in \u001b[0;36mSentiText.__init__\u001b[1;34m(self, text, punc_list, regex_remove_punctuation)\u001b[0m\n\u001b[0;32m 272\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mPUNC_LIST \u001b[38;5;241m=\u001b[39m punc_list\n\u001b[0;32m 273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mREGEX_REMOVE_PUNCTUATION \u001b[38;5;241m=\u001b[39m regex_remove_punctuation\n\u001b[1;32m--> 274\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwords_and_emoticons \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_words_and_emoticons()\n\u001b[0;32m 275\u001b[0m \u001b[38;5;66;03m# doesn't separate words from\u001b[39;00m\n\u001b[0;32m 276\u001b[0m \u001b[38;5;66;03m# adjacent punctuation (keeps emoticons & contractions)\u001b[39;00m\n\u001b[0;32m 277\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_cap_diff \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mallcap_differential(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwords_and_emoticons)\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\nltk\\sentiment\\vader.py:306\u001b[0m, in \u001b[0;36mSentiText._words_and_emoticons\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 300\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 301\u001b[0m \u001b[38;5;124;03mRemoves leading and trailing puncutation\u001b[39;00m\n\u001b[0;32m 302\u001b[0m \u001b[38;5;124;03mLeaves contractions and most emoticons\u001b[39;00m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;124;03m Does not preserve punc-plus-letter emoticons (e.g. :D)\u001b[39;00m\n\u001b[0;32m 304\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 305\u001b[0m wes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext\u001b[38;5;241m.\u001b[39msplit()\n\u001b[1;32m--> 306\u001b[0m words_punc_dict \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_words_plus_punc()\n\u001b[0;32m 307\u001b[0m wes \u001b[38;5;241m=\u001b[39m [we \u001b[38;5;28;01mfor\u001b[39;00m we \u001b[38;5;129;01min\u001b[39;00m wes \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(we) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m 308\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, we \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(wes):\n", "File \u001b[1;32mc:\\Users\\Rikhil Nellimarla\\.conda\\envs\\NLP_env\\Lib\\site-packages\\nltk\\sentiment\\vader.py:293\u001b[0m, in \u001b[0;36mSentiText._words_plus_punc\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 291\u001b[0m words_only \u001b[38;5;241m=\u001b[39m {w \u001b[38;5;28;01mfor\u001b[39;00m w \u001b[38;5;129;01min\u001b[39;00m words_only \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(w) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m}\n\u001b[0;32m 292\u001b[0m \u001b[38;5;66;03m# the product gives ('cat', ',') and (',', 'cat')\u001b[39;00m\n\u001b[1;32m--> 293\u001b[0m punc_before \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(p): p[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m product(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mPUNC_LIST, words_only)}\n\u001b[0;32m 294\u001b[0m punc_after \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(p): p[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m product(words_only, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mPUNC_LIST)}\n\u001b[0;32m 295\u001b[0m words_punc_dict \u001b[38;5;241m=\u001b[39m punc_before\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "if 'review' in data.columns:\n", " data['sentiment_score'] = data['review'].apply(lambda x: sia.polarity_scores(x)['compound'])\n", "else:\n", " print(\"Column 'review' not found.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This oatmeal is not good. Its mushy, soft, I don't like it. Quaker Oats is the way to go.\n" ] } ], "source": [ "print(data['sentiment'].value_counts())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['This', 'oatmeal', 'is', 'not', 'good', '.', 'Its', 'mushy', ',', 'soft']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plt.figure(figsize=(8, 6))\n", "sns.countplot(x='sentiment', data=data, palette='Set1')\n", "plt.title('Sentiment Distribution')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('This', 'DT'),\n", " ('oatmeal', 'NN'),\n", " ('is', 'VBZ'),\n", " ('not', 'RB'),\n", " ('good', 'JJ'),\n", " ('.', '.'),\n", " ('Its', 'PRP$'),\n", " ('mushy', 'NN'),\n", " (',', ','),\n", " ('soft', 'JJ')]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.to_csv('sentiment_analysis_results.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(S\n", " This/DT\n", " oatmeal/NN\n", " is/VBZ\n", " not/RB\n", " good/JJ\n", " ./.\n", " Its/PRP$\n", " mushy/NN\n", " ,/,\n", " soft/JJ\n", " ,/,\n", " I/PRP\n", " do/VBP\n", " n't/RB\n", " like/VB\n", " it/PRP\n", " ./.\n", " (ORGANIZATION Quaker/NNP Oats/NNPS)\n", " is/VBZ\n", " the/DT\n", " way/NN\n", " to/TO\n", " go/VB\n", " ./.)\n" ] } ], "source": [ "print(data[['review', 'sentiment_score', 'sentiment']].head())" ] } ], "metadata": { "kernelspec": { "display_name": "NLP_env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }