{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Sentiment Analysis using BERT\n\nBERT (Bidirectionnal Encoder Representations for Transformers) is a “new method of pre-training language representations” developed by Google and released in late 2018.","metadata":{"id":"QbM7x-5UEzUR"}},{"cell_type":"markdown","source":"### Import Libraries and Set the intial variables","metadata":{"id":"Q6hKNfAlEzUS"}},{"cell_type":"code","source":"!pip install transformers","metadata":{"id":"vezpsX-7GphM","outputId":"a7164402-8a7b-4e4c-e118-ce6498ba4f2e","execution":{"iopub.status.busy":"2023-07-10T16:50:49.494748Z","iopub.execute_input":"2023-07-10T16:50:49.495093Z","iopub.status.idle":"2023-07-10T16:50:58.493961Z","shell.execute_reply.started":"2023-07-10T16:50:49.495058Z","shell.execute_reply":"2023-07-10T16:50:58.493016Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Requirement already satisfied: transformers in /opt/conda/lib/python3.7/site-packages (3.5.1)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.7/site-packages (from transformers) (2020.4.4)\nRequirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from transformers) (2.23.0)\nRequirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from transformers) (1.18.5)\nRequirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.7/site-packages (from transformers) (4.45.0)\nRequirement already satisfied: sacremoses in /opt/conda/lib/python3.7/site-packages (from transformers) (0.0.43)\nRequirement already satisfied: protobuf in /opt/conda/lib/python3.7/site-packages (from transformers) (3.14.0)\nRequirement already satisfied: sentencepiece==0.1.91 in /opt/conda/lib/python3.7/site-packages (from transformers) (0.1.91)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from transformers) (3.0.10)\nRequirement already satisfied: packaging in /opt/conda/lib/python3.7/site-packages (from transformers) (20.1)\nRequirement already satisfied: tokenizers==0.9.3 in /opt/conda/lib/python3.7/site-packages (from transformers) (0.9.3)\nRequirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (1.14.0)\nRequirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (2.4.7)\nRequirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (1.14.0)\nRequirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (2.9)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (2020.12.5)\nRequirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (3.0.4)\nRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (1.25.9)\nRequirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (1.14.0)\nRequirement already satisfied: joblib in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (0.14.1)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.7/site-packages (from transformers) (2020.4.4)\nRequirement already satisfied: click in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (7.1.1)\nRequirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.7/site-packages (from transformers) (4.45.0)\n\u001b[33mWARNING: You are using pip version 20.3.1; however, version 23.1.2 is available.\nYou should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n","output_type":"stream"}]},{"cell_type":"code","source":"# Import necessary libraries\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\nfrom pylab import rcParams\nimport matplotlib.pyplot as plt\nfrom matplotlib import rc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import confusion_matrix, classification_report\nfrom collections import defaultdict\nfrom textwrap import wrap\n\n# Torch ML libraries\nimport transformers\nfrom transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\nimport torch\nfrom torch import nn, optim\nfrom torch.utils.data import Dataset, DataLoader\n\n# Misc.\nimport warnings\nwarnings.filterwarnings('ignore')","metadata":{"id":"WtQykqrfEzUT","execution":{"iopub.status.busy":"2023-07-10T16:50:58.497019Z","iopub.execute_input":"2023-07-10T16:50:58.497414Z","iopub.status.idle":"2023-07-10T16:51:07.643083Z","shell.execute_reply.started":"2023-07-10T16:50:58.497372Z","shell.execute_reply":"2023-07-10T16:51:07.642100Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"# Set intial variables and constants\n%config InlineBackend.figure_format='retina'\n\n# Graph Designs\nsns.set(style='whitegrid', palette='muted', font_scale=1.2)\nHAPPY_COLORS_PALETTE = [\"#01BEFE\", \"#FFDD00\", \"#FF7D00\", \"#FF006D\", \"#ADFF02\", \"#8F00FF\"]\nsns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))\nrcParams['figure.figsize'] = 12, 8\n\n# Random seed for reproducibilty\nRANDOM_SEED = 42\nnp.random.seed(RANDOM_SEED)\ntorch.manual_seed(RANDOM_SEED)\n\n# Set GPU\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")","metadata":{"id":"gRINjFWWEzUb","execution":{"iopub.status.busy":"2023-07-10T16:51:07.644736Z","iopub.execute_input":"2023-07-10T16:51:07.645102Z","iopub.status.idle":"2023-07-10T16:51:07.673480Z","shell.execute_reply.started":"2023-07-10T16:51:07.645062Z","shell.execute_reply":"2023-07-10T16:51:07.672727Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"markdown","source":"### Load the data","metadata":{"id":"LQOFO5MSEzUf"}},{"cell_type":"code","source":"df = pd.read_csv('../input/google-play-store-reviews/reviews.csv')\ndf.shape","metadata":{"id":"g6b5ajqzEzUg","outputId":"a7888891-5d69-42b7-a1e0-ccdbe75f04e4","execution":{"iopub.status.busy":"2023-07-10T16:51:07.675332Z","iopub.execute_input":"2023-07-10T16:51:07.675860Z","iopub.status.idle":"2023-07-10T16:51:07.906167Z","shell.execute_reply.started":"2023-07-10T16:51:07.675813Z","shell.execute_reply":"2023-07-10T16:51:07.905252Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"(12495, 12)"},"metadata":{}}]},{"cell_type":"code","source":"# Let's have a look at the data \ndf.head()","metadata":{"id":"eQ5Uwg8xEzUk","outputId":"2b23324b-d208-462f-d011-95c2f7847390","execution":{"iopub.status.busy":"2023-07-10T16:51:07.909702Z","iopub.execute_input":"2023-07-10T16:51:07.909988Z","iopub.status.idle":"2023-07-10T16:51:07.939908Z","shell.execute_reply.started":"2023-07-10T16:51:07.909959Z","shell.execute_reply":"2023-07-10T16:51:07.939226Z"},"trusted":true},"execution_count":5,"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":" reviewId \\\n0 gp:AOqpTOEhZuqSqqWnaKRgv-9ABYdajFUB0WugPGh-SG-... \n1 gp:AOqpTOH0WP4IQKBZ2LrdNmFy_YmpPCVrV3diEU9KGm3... \n2 gp:AOqpTOEMCkJB8Iq1p-r9dPwnSYadA5BkPWTf32Z1azu... \n3 gp:AOqpTOGFrUWuKGycpje8kszj3uwHN6tU_fd4gLVFy9z... \n4 gp:AOqpTOHls7DW8wmDFzTkHwxuqFkdNQtKHmO6Pt9jhZE... \n\n userName \\\n0 Eric Tie \n1 john alpha \n2 Sudhakar .S \n3 SKGflorida@bellsouth.net DAVID S \n4 Louann Stoker \n\n userImage \\\n0 https://play-lh.googleusercontent.com/a-/AOh14... \n1 https://play-lh.googleusercontent.com/a-/AOh14... \n2 https://play-lh.googleusercontent.com/a-/AOh14... \n3 https://play-lh.googleusercontent.com/-75aK0WF... \n4 https://play-lh.googleusercontent.com/-pBcY_Z-... \n\n content score thumbsUpCount \\\n0 I cannot open the app anymore 1 0 \n1 I have been begging for a refund from this app... 1 0 \n2 Very costly for the premium version (approx In... 1 0 \n3 Used to keep me organized, but all the 2020 UP... 1 0 \n4 Dan Birthday Oct 28 1 0 \n\n reviewCreatedVersion at \\\n0 5.4.0.6 2020-10-27 21:24:41 \n1 NaN 2020-10-27 14:03:28 \n2 NaN 2020-10-27 08:18:40 \n3 NaN 2020-10-26 13:28:07 \n4 5.6.0.7 2020-10-26 06:10:50 \n\n replyContent repliedAt \\\n0 NaN NaN \n1 Please note that from checking our records, yo... 2020-10-27 15:05:52 \n2 NaN NaN \n3 What do you find troublesome about the update?... 2020-10-26 14:58:29 \n4 NaN NaN \n\n sortOrder appId \n0 newest com.anydo \n1 newest com.anydo \n2 newest com.anydo \n3 newest com.anydo \n4 newest com.anydo ","text/html":"
\n | reviewId | \nuserName | \nuserImage | \ncontent | \nscore | \nthumbsUpCount | \nreviewCreatedVersion | \nat | \nreplyContent | \nrepliedAt | \nsortOrder | \nappId | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \ngp:AOqpTOEhZuqSqqWnaKRgv-9ABYdajFUB0WugPGh-SG-... | \nEric Tie | \nhttps://play-lh.googleusercontent.com/a-/AOh14... | \nI cannot open the app anymore | \n1 | \n0 | \n5.4.0.6 | \n2020-10-27 21:24:41 | \nNaN | \nNaN | \nnewest | \ncom.anydo | \n
1 | \ngp:AOqpTOH0WP4IQKBZ2LrdNmFy_YmpPCVrV3diEU9KGm3... | \njohn alpha | \nhttps://play-lh.googleusercontent.com/a-/AOh14... | \nI have been begging for a refund from this app... | \n1 | \n0 | \nNaN | \n2020-10-27 14:03:28 | \nPlease note that from checking our records, yo... | \n2020-10-27 15:05:52 | \nnewest | \ncom.anydo | \n
2 | \ngp:AOqpTOEMCkJB8Iq1p-r9dPwnSYadA5BkPWTf32Z1azu... | \nSudhakar .S | \nhttps://play-lh.googleusercontent.com/a-/AOh14... | \nVery costly for the premium version (approx In... | \n1 | \n0 | \nNaN | \n2020-10-27 08:18:40 | \nNaN | \nNaN | \nnewest | \ncom.anydo | \n
3 | \ngp:AOqpTOGFrUWuKGycpje8kszj3uwHN6tU_fd4gLVFy9z... | \nSKGflorida@bellsouth.net DAVID S | \nhttps://play-lh.googleusercontent.com/-75aK0WF... | \nUsed to keep me organized, but all the 2020 UP... | \n1 | \n0 | \nNaN | \n2020-10-26 13:28:07 | \nWhat do you find troublesome about the update?... | \n2020-10-26 14:58:29 | \nnewest | \ncom.anydo | \n
4 | \ngp:AOqpTOHls7DW8wmDFzTkHwxuqFkdNQtKHmO6Pt9jhZE... | \nLouann Stoker | \nhttps://play-lh.googleusercontent.com/-pBcY_Z-... | \nDan Birthday Oct 28 | \n1 | \n0 | \n5.6.0.7 | \n2020-10-26 06:10:50 | \nNaN | \nNaN | \nnewest | \ncom.anydo | \n