{ "cells": [ { "cell_type": "markdown", "source": [ "# Loading of Data" ], "metadata": { "id": "3O2gRML1CxuY" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ClXaJHz3skxq", "outputId": "620e7537-0423-4b59-c9fd-9146b2062a4a" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" ] } ], "source": [ "# prompt: load the dataset from a Google Drive\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from google.colab import drive\n", "\n", "# Load the dataset from a Google Drive file\n", "drive.mount('/content/drive')\n", "data = pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/Twitter_Analysis.csv')\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "y4xKC0BJVd-o", "outputId": "01958e58-4379-487a-c1ef-aabad115e6a6" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 707 }, "id": "EsspYImeyQeS", "outputId": "7c076ae0-2533-4439-c347-994aa7d50418" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Unnamed: 0 majority_target \\\n", "0 0 True \n", "1 1 True \n", "2 2 True \n", "3 3 True \n", "4 4 True \n", "\n", " statement BinaryNumTarget \\\n", "0 End of eviction moratorium means millions of A... 1.0 \n", "1 End of eviction moratorium means millions of A... 1.0 \n", "2 End of eviction moratorium means millions of A... 1.0 \n", "3 End of eviction moratorium means millions of A... 1.0 \n", "4 End of eviction moratorium means millions of A... 1.0 \n", "\n", " tweet followers_count \\\n", "0 @POTUS Biden Blunders - 6 Month Update\\n\\nInfl... 4262.0 \n", "1 @S0SickRick @Stairmaster_ @6d6f636869 Not as m... 1393.0 \n", "2 THE SUPREME COURT is siding with super rich pr... 9.0 \n", "3 @POTUS Biden Blunders\\n\\nBroken campaign promi... 4262.0 \n", "4 @OhComfy I agree. The confluence of events rig... 70.0 \n", "\n", " friends_count favourites_count statuses_count listed_count ... \\\n", "0 3619.0 34945.0 16423.0 44.0 ... \n", "1 1621.0 31436.0 37184.0 64.0 ... \n", "2 84.0 219.0 1184.0 0.0 ... \n", "3 3619.0 34945.0 16423.0 44.0 ... \n", "4 166.0 15282.0 2194.0 0.0 ... \n", "\n", " determiners conjunctions dots exclamation questions ampersand \\\n", "0 0 0 5 0 1 0 \n", "1 0 2 1 0 0 0 \n", "2 0 1 0 0 0 0 \n", "3 0 1 3 0 0 1 \n", "4 0 1 3 0 1 0 \n", "\n", " capitals digits long_word_freq short_word_freq \n", "0 33 3 5 19 \n", "1 14 0 2 34 \n", "2 3 0 4 10 \n", "3 6 8 1 30 \n", "4 11 3 2 19 \n", "\n", "[5 rows x 64 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0majority_targetstatementBinaryNumTargettweetfollowers_countfriends_countfavourites_countstatuses_countlisted_count...determinersconjunctionsdotsexclamationquestionsampersandcapitalsdigitslong_word_freqshort_word_freq
00TrueEnd of eviction moratorium means millions of A...1.0@POTUS Biden Blunders - 6 Month Update\\n\\nInfl...4262.03619.034945.016423.044.0...005010333519
11TrueEnd of eviction moratorium means millions of A...1.0@S0SickRick @Stairmaster_ @6d6f636869 Not as m...1393.01621.031436.037184.064.0...021000140234
22TrueEnd of eviction moratorium means millions of A...1.0THE SUPREME COURT is siding with super rich pr...9.084.0219.01184.00.0...01000030410
33TrueEnd of eviction moratorium means millions of A...1.0@POTUS Biden Blunders\\n\\nBroken campaign promi...4262.03619.034945.016423.044.0...01300168130
44TrueEnd of eviction moratorium means millions of A...1.0@OhComfy I agree. The confluence of events rig...70.0166.015282.02194.00.0...013010113219
\n", "

5 rows × 64 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "data" } }, "metadata": {}, "execution_count": 3 } ], "source": [ "data.head()" ] }, { "cell_type": "markdown", "source": [ "# Pre-Processing" ], "metadata": { "id": "SyzmIOQqCaky" } }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7VRJMRynmTB5", "outputId": "2cea5c6a-91a3-4d6b-e3c2-2309a9c9fc60" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: pycaret in /usr/local/lib/python3.10/dist-packages (3.3.2)\n", "Requirement already satisfied: ipython>=5.5.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (7.34.0)\n", "Requirement already satisfied: ipywidgets>=7.6.5 in /usr/local/lib/python3.10/dist-packages (from pycaret) (7.7.1)\n", "Requirement already satisfied: tqdm>=4.62.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (4.66.4)\n", "Requirement already satisfied: numpy<1.27,>=1.21 in /usr/local/lib/python3.10/dist-packages (from pycaret) (1.25.2)\n", "Requirement already satisfied: pandas<2.2.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (2.0.3)\n", "Requirement already satisfied: jinja2>=3 in /usr/local/lib/python3.10/dist-packages (from pycaret) (3.1.4)\n", "Requirement already satisfied: scipy<=1.11.4,>=1.6.1 in /usr/local/lib/python3.10/dist-packages (from pycaret) (1.11.4)\n", "Requirement already satisfied: joblib<1.4,>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (1.3.2)\n", "Requirement already satisfied: scikit-learn>1.4.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (1.4.2)\n", "Requirement already satisfied: pyod>=1.1.3 in /usr/local/lib/python3.10/dist-packages (from pycaret) (2.0.0)\n", "Requirement already satisfied: imbalanced-learn>=0.12.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (0.12.3)\n", "Requirement already satisfied: category-encoders>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (2.6.3)\n", "Requirement already satisfied: lightgbm>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (4.1.0)\n", "Requirement already satisfied: numba>=0.55.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (0.58.1)\n", "Requirement already satisfied: requests>=2.27.1 in /usr/local/lib/python3.10/dist-packages (from pycaret) (2.31.0)\n", "Requirement already satisfied: psutil>=5.9.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (5.9.5)\n", "Requirement already satisfied: markupsafe>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from pycaret) (2.1.5)\n", "Requirement already satisfied: importlib-metadata>=4.12.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (7.1.0)\n", "Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (5.10.4)\n", "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from pycaret) (2.2.1)\n", "Requirement already satisfied: deprecation>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (2.1.0)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from pycaret) (3.4.1)\n", "Requirement already satisfied: matplotlib<3.8.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (3.7.1)\n", "Requirement already satisfied: scikit-plot>=0.3.7 in /usr/local/lib/python3.10/dist-packages (from pycaret) (0.3.7)\n", "Requirement already satisfied: yellowbrick>=1.4 in /usr/local/lib/python3.10/dist-packages (from pycaret) (1.5)\n", "Requirement already satisfied: plotly>=5.14.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (5.15.0)\n", "Requirement already satisfied: kaleido>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from pycaret) (0.2.1)\n", "Requirement already satisfied: schemdraw==0.15 in /usr/local/lib/python3.10/dist-packages (from pycaret) (0.15)\n", "Requirement already satisfied: plotly-resampler>=0.8.3.1 in /usr/local/lib/python3.10/dist-packages (from pycaret) (0.10.0)\n", "Requirement already satisfied: statsmodels>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from pycaret) (0.14.2)\n", "Requirement already satisfied: sktime==0.26.0 in /usr/local/lib/python3.10/dist-packages (from pycaret) (0.26.0)\n", "Requirement already satisfied: tbats>=1.1.3 in /usr/local/lib/python3.10/dist-packages (from pycaret) (1.1.3)\n", "Requirement already satisfied: pmdarima>=2.0.4 in /usr/local/lib/python3.10/dist-packages (from pycaret) (2.0.4)\n", "Requirement already satisfied: wurlitzer in /usr/local/lib/python3.10/dist-packages (from pycaret) (3.1.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from sktime==0.26.0->pycaret) (24.1)\n", "Requirement already satisfied: scikit-base<0.8.0 in /usr/local/lib/python3.10/dist-packages (from sktime==0.26.0->pycaret) (0.7.8)\n", "Requirement already satisfied: patsy>=0.5.1 in /usr/local/lib/python3.10/dist-packages (from category-encoders>=2.4.0->pycaret) (0.5.6)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from imbalanced-learn>=0.12.0->pycaret) (3.5.0)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=4.12.0->pycaret) (3.19.2)\n", "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (67.7.2)\n", "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (0.19.1)\n", "Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (4.4.2)\n", "Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (0.7.5)\n", "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (5.7.1)\n", "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (3.0.47)\n", "Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (2.16.1)\n", "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (0.2.0)\n", "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (0.1.7)\n", "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.5.0->pycaret) (4.9.0)\n", "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets>=7.6.5->pycaret) (5.5.6)\n", "Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets>=7.6.5->pycaret) (0.2.0)\n", "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets>=7.6.5->pycaret) (3.6.6)\n", "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets>=7.6.5->pycaret) (3.0.11)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8.0->pycaret) (1.2.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8.0->pycaret) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8.0->pycaret) (4.53.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8.0->pycaret) (1.4.5)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8.0->pycaret) (9.4.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8.0->pycaret) (3.1.2)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8.0->pycaret) (2.8.2)\n", "Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat>=4.2.0->pycaret) (2.19.1)\n", "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat>=4.2.0->pycaret) (4.19.2)\n", "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /usr/local/lib/python3.10/dist-packages (from nbformat>=4.2.0->pycaret) (5.7.2)\n", "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.55.0->pycaret) (0.41.1)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2.0->pycaret) (2023.4)\n", "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2.0->pycaret) (2024.1)\n", "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly>=5.14.0->pycaret) (8.3.0)\n", "Requirement already satisfied: dash>=2.9.0 in /usr/local/lib/python3.10/dist-packages (from plotly-resampler>=0.8.3.1->pycaret) (2.17.1)\n", "Requirement already satisfied: orjson<4.0.0,>=3.8.0 in /usr/local/lib/python3.10/dist-packages (from plotly-resampler>=0.8.3.1->pycaret) (3.10.5)\n", "Requirement already satisfied: tsdownsample>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from plotly-resampler>=0.8.3.1->pycaret) (0.1.3)\n", "Requirement already satisfied: Cython!=0.29.18,!=0.29.31,>=0.29 in /usr/local/lib/python3.10/dist-packages (from pmdarima>=2.0.4->pycaret) (3.0.10)\n", "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from pmdarima>=2.0.4->pycaret) (2.0.7)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.27.1->pycaret) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.27.1->pycaret) (3.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.27.1->pycaret) (2024.6.2)\n", "Requirement already satisfied: Flask<3.1,>=1.0.4 in /usr/local/lib/python3.10/dist-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (2.2.5)\n", "Requirement already satisfied: Werkzeug<3.1 in /usr/local/lib/python3.10/dist-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (3.0.3)\n", "Requirement already satisfied: dash-html-components==2.0.0 in /usr/local/lib/python3.10/dist-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (2.0.0)\n", "Requirement already satisfied: dash-core-components==2.0.0 in /usr/local/lib/python3.10/dist-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (2.0.0)\n", "Requirement already satisfied: dash-table==5.0.0 in /usr/local/lib/python3.10/dist-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (5.0.0)\n", "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (4.12.2)\n", "Requirement already satisfied: retrying in /usr/local/lib/python3.10/dist-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (1.3.4)\n", "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.10/dist-packages (from dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (1.6.0)\n", "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets>=7.6.5->pycaret) (6.1.12)\n", "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets>=7.6.5->pycaret) (6.3.3)\n", "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=5.5.0->pycaret) (0.8.4)\n", "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (23.2.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (2023.12.1)\n", "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (0.35.1)\n", "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->pycaret) (0.18.1)\n", "Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core!=5.0.*,>=4.12->nbformat>=4.2.0->pycaret) (4.2.2)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.1->category-encoders>=2.4.0->pycaret) (1.16.0)\n", "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=5.5.0->pycaret) (0.7.0)\n", "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.5.0->pycaret) (0.2.13)\n", "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (6.5.5)\n", "Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from Flask<3.1,>=1.0.4->dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (2.2.0)\n", "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.10/dist-packages (from Flask<3.1,>=1.0.4->dash>=2.9.0->plotly-resampler>=0.8.3.1->pycaret) (8.1.7)\n", "Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (24.0.1)\n", "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (23.1.0)\n", "Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (6.5.4)\n", "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.8.3)\n", "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.18.1)\n", "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.20.0)\n", "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.1.0)\n", "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.2.4)\n", "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (4.9.4)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (4.12.3)\n", "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (6.1.0)\n", "Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.7.1)\n", "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.4)\n", "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.3.0)\n", "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.8.4)\n", "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.10.0)\n", "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.5.1)\n", "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.3.0)\n", "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (21.2.0)\n", "Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.24.0)\n", "Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.16.0)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (2.5)\n", "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (0.5.1)\n", "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (2.22)\n", "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (3.7.1)\n", "Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.8.0)\n", "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.3.1)\n", "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets>=7.6.5->pycaret) (1.2.1)\n" ] } ], "source": [ "!pip install pycaret" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "HIFgZcfWlC8F" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import pycaret\n", "import transformers\n", "from transformers import AutoModel, BertTokenizerFast\n", "import matplotlib.pyplot as plt\n", "import sklearn\n", "import sklearn.metrics\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import classification_report\n", "import torch\n", "import torch.nn as nn\n", "import tensorflow as tf\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "# specify GPU\n", "device = torch.device(\"cuda\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 707 }, "id": "Fiqo7nAUm3I2", "outputId": "d5753cd8-4dac-4ecc-f53c-ffd5b9c04943" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "data" }, "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0majority_targetstatementBinaryNumTargettweetfollowers_countfriends_countfavourites_countstatuses_countlisted_count...determinersconjunctionsdotsexclamationquestionsampersandcapitalsdigitslong_word_freqshort_word_freq
00TrueEnd of eviction moratorium means millions of A...1.0@POTUS Biden Blunders - 6 Month Update\\n\\nInfl...4262.03619.034945.016423.044.0...005010333519
11TrueEnd of eviction moratorium means millions of A...1.0@S0SickRick @Stairmaster_ @6d6f636869 Not as m...1393.01621.031436.037184.064.0...021000140234
22TrueEnd of eviction moratorium means millions of A...1.0THE SUPREME COURT is siding with super rich pr...9.084.0219.01184.00.0...01000030410
33TrueEnd of eviction moratorium means millions of A...1.0@POTUS Biden Blunders\\n\\nBroken campaign promi...4262.03619.034945.016423.044.0...01300168130
44TrueEnd of eviction moratorium means millions of A...1.0@OhComfy I agree. The confluence of events rig...70.0166.015282.02194.00.0...013010113219
\n", "

5 rows × 64 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "text/plain": [ " Unnamed: 0 majority_target \\\n", "0 0 True \n", "1 1 True \n", "2 2 True \n", "3 3 True \n", "4 4 True \n", "\n", " statement BinaryNumTarget \\\n", "0 End of eviction moratorium means millions of A... 1.0 \n", "1 End of eviction moratorium means millions of A... 1.0 \n", "2 End of eviction moratorium means millions of A... 1.0 \n", "3 End of eviction moratorium means millions of A... 1.0 \n", "4 End of eviction moratorium means millions of A... 1.0 \n", "\n", " tweet followers_count \\\n", "0 @POTUS Biden Blunders - 6 Month Update\\n\\nInfl... 4262.0 \n", "1 @S0SickRick @Stairmaster_ @6d6f636869 Not as m... 1393.0 \n", "2 THE SUPREME COURT is siding with super rich pr... 9.0 \n", "3 @POTUS Biden Blunders\\n\\nBroken campaign promi... 4262.0 \n", "4 @OhComfy I agree. The confluence of events rig... 70.0 \n", "\n", " friends_count favourites_count statuses_count listed_count ... \\\n", "0 3619.0 34945.0 16423.0 44.0 ... \n", "1 1621.0 31436.0 37184.0 64.0 ... \n", "2 84.0 219.0 1184.0 0.0 ... \n", "3 3619.0 34945.0 16423.0 44.0 ... \n", "4 166.0 15282.0 2194.0 0.0 ... \n", "\n", " determiners conjunctions dots exclamation questions ampersand \\\n", "0 0 0 5 0 1 0 \n", "1 0 2 1 0 0 0 \n", "2 0 1 0 0 0 0 \n", "3 0 1 3 0 0 1 \n", "4 0 1 3 0 1 0 \n", "\n", " capitals digits long_word_freq short_word_freq \n", "0 33 3 5 19 \n", "1 14 0 2 34 \n", "2 3 0 4 10 \n", "3 6 8 1 30 \n", "4 11 3 2 19 \n", "\n", "[5 rows x 64 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# prompt: view the head of data\n", "\n", "data.head()\n", "\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 707 }, "id": "nPLgUP-NvLrW", "outputId": "557c7500-286f-465d-842e-adc214422f55" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Unnamed: 0 majority_target \\\n", "0 0 True \n", "1 1 True \n", "2 2 True \n", "3 3 True \n", "4 4 True \n", "\n", " statement BinaryNumTarget \\\n", "0 End of eviction moratorium means millions of A... 1.0 \n", "1 End of eviction moratorium means millions of A... 1.0 \n", "2 End of eviction moratorium means millions of A... 1.0 \n", "3 End of eviction moratorium means millions of A... 1.0 \n", "4 End of eviction moratorium means millions of A... 1.0 \n", "\n", " tweet followers_count \\\n", "0 @POTUS Biden Blunders - 6 Month Update\\n\\nInfl... 4262.0 \n", "1 @S0SickRick @Stairmaster_ @6d6f636869 Not as m... 1393.0 \n", "2 THE SUPREME COURT is siding with super rich pr... 9.0 \n", "3 @POTUS Biden Blunders\\n\\nBroken campaign promi... 4262.0 \n", "4 @OhComfy I agree. The confluence of events rig... 70.0 \n", "\n", " friends_count favourites_count statuses_count listed_count ... \\\n", "0 3619.0 34945.0 16423.0 44.0 ... \n", "1 1621.0 31436.0 37184.0 64.0 ... \n", "2 84.0 219.0 1184.0 0.0 ... \n", "3 3619.0 34945.0 16423.0 44.0 ... \n", "4 166.0 15282.0 2194.0 0.0 ... \n", "\n", " determiners conjunctions dots exclamation questions ampersand \\\n", "0 0 0 5 0 1 0 \n", "1 0 2 1 0 0 0 \n", "2 0 1 0 0 0 0 \n", "3 0 1 3 0 0 1 \n", "4 0 1 3 0 1 0 \n", "\n", " capitals digits long_word_freq short_word_freq \n", "0 33 3 5 19 \n", "1 14 0 2 34 \n", "2 3 0 4 10 \n", "3 6 8 1 30 \n", "4 11 3 2 19 \n", "\n", "[5 rows x 64 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0majority_targetstatementBinaryNumTargettweetfollowers_countfriends_countfavourites_countstatuses_countlisted_count...determinersconjunctionsdotsexclamationquestionsampersandcapitalsdigitslong_word_freqshort_word_freq
00TrueEnd of eviction moratorium means millions of A...1.0@POTUS Biden Blunders - 6 Month Update\\n\\nInfl...4262.03619.034945.016423.044.0...005010333519
11TrueEnd of eviction moratorium means millions of A...1.0@S0SickRick @Stairmaster_ @6d6f636869 Not as m...1393.01621.031436.037184.064.0...021000140234
22TrueEnd of eviction moratorium means millions of A...1.0THE SUPREME COURT is siding with super rich pr...9.084.0219.01184.00.0...01000030410
33TrueEnd of eviction moratorium means millions of A...1.0@POTUS Biden Blunders\\n\\nBroken campaign promi...4262.03619.034945.016423.044.0...01300168130
44TrueEnd of eviction moratorium means millions of A...1.0@OhComfy I agree. The confluence of events rig...70.0166.015282.02194.00.0...013010113219
\n", "

5 rows × 64 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "data" } }, "metadata": {}, "execution_count": 6 } ], "source": [ "# prompt: remove url and make texts in a field in the dataset in lowercase\n", "\n", "import re\n", "\n", "# Remove URL\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r'http\\S+', ' ', x))\n", "\n", "data.head()\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 707 }, "id": "_mzfkkOQxKwO", "outputId": "233547d2-2b00-49ed-d476-e9e36de7f8a1" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Unnamed: 0 majority_target \\\n", "0 0 True \n", "1 1 True \n", "2 2 True \n", "3 3 True \n", "4 4 True \n", "\n", " statement BinaryNumTarget \\\n", "0 End of eviction moratorium means millions of A... 1.0 \n", "1 End of eviction moratorium means millions of A... 1.0 \n", "2 End of eviction moratorium means millions of A... 1.0 \n", "3 End of eviction moratorium means millions of A... 1.0 \n", "4 End of eviction moratorium means millions of A... 1.0 \n", "\n", " tweet followers_count \\\n", "0 @POTUS Biden Blunders - 6 Month Update\\n\\nInfl... 4262.0 \n", "1 @S0SickRick @Stairmaster_ @6d6f636869 Not as m... 1393.0 \n", "2 THE SUPREME COURT is siding with super rich pr... 9.0 \n", "3 @POTUS Biden Blunders\\n\\nBroken campaign promi... 4262.0 \n", "4 @OhComfy I agree. The confluence of events rig... 70.0 \n", "\n", " friends_count favourites_count statuses_count listed_count ... \\\n", "0 3619.0 34945.0 16423.0 44.0 ... \n", "1 1621.0 31436.0 37184.0 64.0 ... \n", "2 84.0 219.0 1184.0 0.0 ... \n", "3 3619.0 34945.0 16423.0 44.0 ... \n", "4 166.0 15282.0 2194.0 0.0 ... \n", "\n", " determiners conjunctions dots exclamation questions ampersand \\\n", "0 0 0 5 0 1 0 \n", "1 0 2 1 0 0 0 \n", "2 0 1 0 0 0 0 \n", "3 0 1 3 0 0 1 \n", "4 0 1 3 0 1 0 \n", "\n", " capitals digits long_word_freq short_word_freq \n", "0 33 3 5 19 \n", "1 14 0 2 34 \n", "2 3 0 4 10 \n", "3 6 8 1 30 \n", "4 11 3 2 19 \n", "\n", "[5 rows x 64 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0majority_targetstatementBinaryNumTargettweetfollowers_countfriends_countfavourites_countstatuses_countlisted_count...determinersconjunctionsdotsexclamationquestionsampersandcapitalsdigitslong_word_freqshort_word_freq
00TrueEnd of eviction moratorium means millions of A...1.0@POTUS Biden Blunders - 6 Month Update\\n\\nInfl...4262.03619.034945.016423.044.0...005010333519
11TrueEnd of eviction moratorium means millions of A...1.0@S0SickRick @Stairmaster_ @6d6f636869 Not as m...1393.01621.031436.037184.064.0...021000140234
22TrueEnd of eviction moratorium means millions of A...1.0THE SUPREME COURT is siding with super rich pr...9.084.0219.01184.00.0...01000030410
33TrueEnd of eviction moratorium means millions of A...1.0@POTUS Biden Blunders\\n\\nBroken campaign promi...4262.03619.034945.016423.044.0...01300168130
44TrueEnd of eviction moratorium means millions of A...1.0@OhComfy I agree. The confluence of events rig...70.0166.015282.02194.00.0...013010113219
\n", "

5 rows × 64 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "data" } }, "metadata": {}, "execution_count": 7 } ], "source": [ "# prompt: split contractions in the tweet texts\n", "\n", "# Split contractions in the tweet texts\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"can't\", \"cannot\", x))\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"n't\", \"not\", x))\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"i'm\", \"i am\", x))\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"I'm\", \"I am\", x))\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"'re\", \"are\", x))\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"'s\", \"is\", x))\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"'d\", \"would\", x))\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"'ll\", \"will\", x))\n", "data['tweet'] = data['tweet'].apply(lambda x: re.sub(r\"'ve\", \"have\", x))\n", "\n", "data.head()\n" ] }, { "cell_type": "markdown", "source": [ "# Training-Testing-Validation Splitting" ], "metadata": { "id": "SWGvbwHsCJgR" } }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3P9DYEq3y6pR", "outputId": "9feb4243-a9a7-41bf-a84d-6d0c823e8c8e" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "59105 1.0\n", "111976 0.0\n", "31253 1.0\n", "118328 0.0\n", "62880 1.0\n", "Name: BinaryNumTarget, dtype: float64" ] }, "metadata": {}, "execution_count": 8 } ], "source": [ "# Train-Validation-Test set split into 70:15:15 ratio\n", "# Train-Temp split\n", "train_text, temp_text, train_labels, temp_labels = train_test_split(data['tweet'], data['BinaryNumTarget'],\n", " random_state=2018,\n", " test_size=0.3,\n", " stratify=data['majority_target'])\n", "# Validation-Test split\n", "\n", "val_text, test_text, val_labels, test_labels = train_test_split(temp_text, temp_labels,\n", " random_state=2018,\n", " test_size=0.5,\n", " stratify=temp_labels)\n", "temp_labels.head()" ] }, { "cell_type": "markdown", "source": [ "# Bert Model" ], "metadata": { "id": "9DQnsDAUB_2h" } }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177, "referenced_widgets": [ "74b2c16b3dd94af3a6394c71fe9d1d98", "3467cbed80734b45befed3d8867652a6", "ef11cdc5dea344759f9b0f78939c14d4", "55b4b9ea6dd34aecb7213964c47e250b", "5dec5fbe4ae941a581cea79167270bf6", "3085ba25f51f49ae95930564e2d54a0f", "1b4ec4fc682b488caee3ac9ef0b197cd", "c3d27b7643194755979e3b231a7c7359", "13a985113cb040b79027ed3b59ecfb51", "8041895128094c65b1e65760067bb5bd", "f8465e5d2654402fbf9aa789ee39948c", "42778e0f00ee4099845365a4129a9b07", "ee8a393f6f7a43c0af95f13bb1d8d1b4", "46a9f330d01b43ec9323b0866ef1960e", "f5ff7383a64b435096ebc0654c063311", "7ed0cdc589ab4bccb301a7ac2a4cacc6", "f76cc9fee554462992ae333536ac3e78", "9bc6e00fb6dc450e8643dd97bd093b2a", "7f76440815a94593ae2d93a87b418dad", "8a992dca377b4736970d717dd9e41e49", "2b72f679299d4049864f1daea16b9a77", "5589161d795941edb2fdb804bfd815b5", "b58b34df1fac4232b1a392fadabe3446", "0efb7ee658794030b36ca44d79d4b236", "d1eeefd866074f50be6ffd9d6a199d24", "e78ef5b5c8f149dabfd4fb1445251a88", "494b1cb203cf441b8eff14b64371213e", "87ed154eea034d2180ccd5bab5229155", "d599ca19c22943d6aecfeb13fc16f5de", "a1ade1b5205b429894d5374b29490bb8", "f3f2ec20971140b99c3be6bdf21ffa43", "af52e94cb5c446ca972b7a7837293b03", "d26f83118504442b8e8bdd7e33d30b03", "88439fa69b2d4b41a20dffe1c3c62258", "a64be3a4ebe34c0085c256c5dcaa2a09", "64b358189e6a4c47a622846aeb0c7566", "e9feed4a39f64631b2dfe746a583a03c", "1f77f1a77fa4486a937b3a33c994e348", "237c63d8f3cd4794a61ab360fff10ac2", "506133fbc79541d2936f8efc78dd7dde", "30c34b5a4eef43cda64fb5568cb7dc4f", "073eb0f4e8f440acadeb1a326a96a178", "9258c3a11e4b4c4b98e433d44cea9554", "3c32d976a9314b0bb6d74d5cf2488134", "a7e453f3b0494ba7a7ddbd703468b377", "a2f8e2390258408899d6ead630fe2037", "8951cf1b3ef24ce6bb857f70929444e1", "f776aaf69f1c452daf5b441b85aac33e", "12cc0ac549734af28451abb30e1cda83", "4b50bdc9ce454a3f914452480613bfe9", "5cdd170770a04b72af140314eb64b505", "6b711f56c8d549d1b11790071647c7ad", "99c9a4bc768e40a582bd73b88f21eb96", "28e9d0f866a1405d94f8c9b2a2f18490", "385c6ece940b42c8b53bf9a42775a1d7" ] }, "id": "hYWm8E3H1b9g", "outputId": "a808bf13-a91d-4b76-dd8e-38db46f49020" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "config.json: 0%| | 0.00/570 [00:00" ], "image/png": "\n" }, "metadata": {} } ], "source": [ "# Plot histogram of the number of words in train data 'tweet'\n", "seq_len = [len(tweet.split()) for tweet in train_text]\n", "\n", "pd.Series(seq_len).hist(bins = 100,color='firebrick')\n", "plt.xlabel('Number of Words')\n", "plt.ylabel('Number of texts')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "rfwINnZozkyd" }, "outputs": [], "source": [ "# Majority of titles above have word length under 60. So, we set max title length as 60\n", "MAX_LENGHT = 60\n", "# Tokenize and encode sequences in the train set\n", "tokens_train = tokenizer.batch_encode_plus(\n", " train_text.tolist(),\n", " max_length = MAX_LENGHT,\n", " pad_to_max_length=True,\n", " truncation=True\n", ")\n", "# tokenize and encode sequences in the validation set\n", "tokens_val = tokenizer.batch_encode_plus(\n", " val_text.tolist(),\n", " max_length = MAX_LENGHT,\n", " pad_to_max_length=True,\n", " truncation=True\n", ")\n", "# tokenize and encode sequences in the test set\n", "tokens_test = tokenizer.batch_encode_plus(\n", " test_text.tolist(),\n", " max_length = MAX_LENGHT,\n", " pad_to_max_length=True,\n", " truncation=True\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "jE9PI9_H0Moi" }, "outputs": [], "source": [ "# Convert lists to tensors\n", "train_seq = torch.tensor(tokens_train['input_ids'])\n", "train_mask = torch.tensor(tokens_train['attention_mask'])\n", "train_y = torch.tensor(train_labels.tolist())\n", "\n", "val_seq = torch.tensor(tokens_val['input_ids'])\n", "val_mask = torch.tensor(tokens_val['attention_mask'])\n", "val_y = torch.tensor(val_labels.tolist())\n", "\n", "test_seq = torch.tensor(tokens_test['input_ids'])\n", "test_mask = torch.tensor(tokens_test['attention_mask'])\n", "test_y = torch.tensor(test_labels.tolist())" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "id": "Oft-16jR0M6h" }, "outputs": [], "source": [ "# Data Loader structure definition\n", "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n", "batch_size = 32 #define a batch size\n", "\n", "train_data = TensorDataset(train_seq, train_mask, train_y) # wrap tensors\n", "train_sampler = RandomSampler(train_data) # sampler for sampling the data during training\n", "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n", " # dataLoader for train set\n", "val_data = TensorDataset(val_seq, val_mask, val_y) # wrap tensors\n", "val_sampler = SequentialSampler(val_data) # sampler for sampling the data during training\n", "val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)\n", " # dataLoader for validation set" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "dH-wI1yhzQkD" }, "outputs": [], "source": [ "# Freezing the parameters and defining trainable BERT structure\n", "for param in bert.parameters():\n", " param.requires_grad = False # false here means gradient need not be computed" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "id": "5oC6f5jD0vm0" }, "outputs": [], "source": [ "class BERT_Arch(nn.Module):\n", " def __init__(self, bert):\n", " super(BERT_Arch, self).__init__()\n", " self.bert = bert\n", " self.dropout = nn.Dropout(0.1) # dropout layer\n", " self.relu = nn.ReLU() # relu activation function\n", " self.fc1 = nn.Linear(768,512) # dense layer 1\n", " self.fc2 = nn.Linear(512,2) # dense layer 2 (Output layer)\n", " self.softmax = nn.LogSoftmax(dim=1) # softmax activation function\n", " def forward(self, sent_id, mask): # define the forward pass\n", " cls_hs = self.bert(sent_id, attention_mask=mask)['pooler_output']\n", " # pass the inputs to the model\n", " x = self.fc1(cls_hs)\n", " x = self.relu(x)\n", " x = self.dropout(x)\n", " x = self.fc2(x) # output layer\n", " x = self.softmax(x) # apply softmax activation\n", " return x\n", "\n", "model = BERT_Arch(bert)\n", "# Defining the hyperparameters (optimizer, weights of the classes and the epochs)\n", "# Define the optimizer\n", "from transformers import AdamW\n", "optimizer = AdamW(model.parameters(),\n", " lr = 1e-5) # learning rate\n", "# Define the loss function\n", "#cross_entropy = nn.NLLLoss()\n", "cross_entropy = torch.nn.NLLLoss()\n", "# Number of training epochs\n", "epochs = 2" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "BEG81NvL1Rt9" }, "outputs": [], "source": [ "# Defining training and evaluation functions\n", "def train():\n", " model.train()\n", " total_loss, total_accuracy = 0, 0\n", "\n", " for step,batch in enumerate(train_dataloader): # iterate over batches\n", " if step % 50 == 0 and not step == 0: # progress update after every 50 batches.\n", " print(' Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))\n", " batch = [r for r in batch] # push the batch to gpu\n", " sent_id, mask, labels = batch\n", " model.zero_grad() # clear previously calculated gradients\n", " preds = model(sent_id, mask)\n", " labels = torch.tensor(labels, dtype=torch.long) # get model predictions for current batch\n", " loss = cross_entropy(preds, labels) # compute loss between actual & predicted values\n", " total_loss = total_loss + loss.item() # add on to the total loss\n", " loss.backward() # backward pass to calculate the gradients\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # clip gradients to 1.0. It helps in preventing exploding gradient problem\n", " optimizer.step() # update parameters\n", " preds=preds.detach().cpu().numpy() # model predictions are stored on GPU. So, push it to CPU\n", "\n", " avg_loss = total_loss / len(train_dataloader) # compute training loss of the epoch\n", " # reshape predictions in form of (# samples, # classes)\n", " return avg_loss # returns the loss and predictions\n", "\n", "def evaluate():\n", " print(\"\\nEvaluating...\")\n", " model.eval() # Deactivate dropout layers\n", "\n", " total_loss, total_accuracy = 0, 0\n", " for step, batch in enumerate(val_dataloader): # Iterate over batches\n", " if step % 50 == 0 and not step == 0:\n", " # Progress update every 50 batches.\n", " # Elapsed = format_time(time.time() - t0)\n", " print(' Batch {:>5,} of {:>5,}.'.format(step, len(val_dataloader)))\n", " # Report progress\n", "\n", " batch = [t for t in batch] # Push the batch to GPU\n", " sent_id, mask, labels = batch\n", "\n", " with torch.no_grad(): # Deactivate autograd\n", " preds = model(sent_id, mask) # Model predictions\n", "\n", " # Convert labels to long tensors if necessary (assuming labels are currently not long tensors)\n", " if not isinstance(labels, torch.LongTensor):\n", " labels = labels.long()\n", "\n", " loss = cross_entropy(preds, labels) # Compute the validation loss\n", "\n", " total_loss += loss.item()\n", " preds = preds.detach().cpu().numpy()\n", "\n", " avg_loss = total_loss / len(val_dataloader) # Compute the validation loss of the epoch\n", " return avg_loss\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "mSZirgun6IhE" }, "outputs": [], "source": [ "# Train and predict\n", "best_valid_loss = float('inf')\n", "train_losses=[] # empty lists to store training and validation loss of each epoch\n", "valid_losses=[]\n", "\n", "for epoch in range(epochs):\n", " print('\\n Epoch {:} / {:}'.format(epoch + 1, epochs))\n", " train_loss = train() # train model\n", " valid_loss = evaluate() # evaluate model\n", " if valid_loss < best_valid_loss: # save the best model\n", " best_valid_loss = valid_loss\n", " torch.save(model.state_dict(), 'c2_new_models2_weights.pt')\n", " train_losses.append(train_loss) # append training and validation loss\n", " valid_losses.append(valid_loss)\n", "\n", " print(f'\\nTraining Loss: {train_loss:.3f}')\n", " print(f'Validation Loss: {valid_loss:.3f}')" ] }, { "cell_type": "code", "source": [ "import torch\n", "\n", "# Define the model (make sure to define your model class if it's custom)\n", "model = BERT_Arch(bert)\n", "\n", "# Save the state dictionary\n", "torch.save(model.state_dict(), '/content/drive/MyDrive/Colab_Notebooks/c2_new_models2_weights.pt')\n" ], "metadata": { "id": "iaLMS0CD8uOb" }, "execution_count": 26, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Prediction Using Unseen Data" ], "metadata": { "id": "l3U9YMtJBsRF" } }, { "cell_type": "code", "execution_count": 36, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bSlnjFvqa4CD", "outputId": "95c92ac8-e88d-4435-be83-18a77549c592" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "The prediction for the test example is: Fake\n" ] } ], "source": [ "import joblib\n", "import torch\n", "from transformers import BertTokenizer\n", "\n", "# Load the tokenizer and the model\n", "tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')\n", "joblib.dump([model, bert], '/content/drive/MyDrive/Colab_Notebooks/c2_new_models2_weights.pt')\n", "\n", "def predict_fake_news(text):\n", " # Tokenize and encode sequences\n", " inputs = tokenizer.encode_plus(\n", " text,\n", " max_length=60,\n", " pad_to_max_length=True,\n", " truncation=True,\n", " return_tensors=\"pt\"\n", " )\n", "\n", " input_ids = inputs['input_ids']\n", " attention_mask = inputs['attention_mask']\n", "\n", " # Make prediction\n", " model.eval() # Ensure the model is in evaluation mode\n", " with torch.no_grad():\n", " outputs = model(input_ids, attention_mask)\n", "\n", " # Access the logits directly from the outputs Tensor\n", " logits = outputs[0] # Assuming logits are the first element in the output tuple\n", "\n", " # Get the prediction using argmax\n", " prediction = torch.argmax(logits).item()\n", "\n", " # Map prediction to label\n", " label_map = {0: 'Real', 1: 'Fake'}\n", " return label_map[prediction]\n", "\n", "# Test the model with a sample example\n", "test_example = \"Donald Trump Sends Out Embarrassing New Year’s Eve Message; This is Disturbing\"\n", "prediction = predict_fake_news(test_example)\n", "print(f'The prediction for the test example is: {prediction}')" ] }, { "cell_type": "markdown", "source": [ "# Bi-LSTM Model" ], "metadata": { "id": "9o8UNQExBYos" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "JwkfdQa7a505" }, "outputs": [], "source": [ "print(data.columns.tolist())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "vfyQ6_GAJf2y" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "\n", "# Assuming your dataset is in a DataFrame called `data`\n", "features = ['cred', 'BotScore', 'tweet']\n", "target = 'BinaryNumTarget'\n", "\n", "# Splitting the dataset into features and target\n", "X = data[features]\n", "y = data[target]\n", "\n", "# Further splitting into training, validation, and test sets (70:15:15)\n", "train_data, temp_data, train_labels, temp_labels = train_test_split(X, y, test_size=0.3, stratify=y, random_state=2018)\n", "val_data, test_data, val_labels, test_labels = train_test_split(temp_data, temp_labels, test_size=0.5, stratify=temp_labels, random_state=2018)\n", "\n", "# Tokenize and pad tweet texts for Bi-LSTM and CNN models\n", "tokenizer = Tokenizer(num_words=10000)\n", "tokenizer.fit_on_texts(train_data['tweet'])\n", "train_sequences = tokenizer.texts_to_sequences(train_data['tweet'])\n", "val_sequences = tokenizer.texts_to_sequences(val_data['tweet'])\n", "test_sequences = tokenizer.texts_to_sequences(test_data['tweet'])\n", "\n", "max_sequence_length = 100\n", "train_padded = pad_sequences(train_sequences, maxlen=max_sequence_length)\n", "val_padded = pad_sequences(val_sequences, maxlen=max_sequence_length)\n", "test_padded = pad_sequences(test_sequences, maxlen=max_sequence_length)\n", "\n", "# Combine text sequences with other features\n", "train_features = np.hstack((train_data[['cred', 'BotScore']].values, train_padded))\n", "val_features = np.hstack((val_data[['cred', 'BotScore']].values, val_padded))\n", "test_features = np.hstack((test_data[['cred', 'BotScore']].values, test_padded))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 373 }, "id": "TjsnxCPrpIAW", "outputId": "35a0e7b2-9cb7-4da9-f05f-ee1158a5a808" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/5\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;31m# Compile and train Bi-LSTM model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0mbi_lstm_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'adam'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'binary_crossentropy'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'accuracy'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mbi_lstm_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_padded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_labels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval_padded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py\u001b[0m in \u001b[0;36merror_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0mfiltered_tb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 65\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 66\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0mfiltered_tb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_process_traceback_frames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__traceback__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1805\u001b[0m ):\n\u001b[1;32m 1806\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1807\u001b[0;31m \u001b[0mtmp_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1808\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1809\u001b[0m \u001b[0mcontext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/util/traceback_utils.py\u001b[0m in \u001b[0;36merror_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[0mfiltered_tb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 150\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 151\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0mfiltered_tb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_process_traceback_frames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__traceback__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 830\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mOptionalXlaContext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jit_compile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 832\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 833\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 834\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 903\u001b[0m \u001b[0;31m# Lifting succeeded, so variables are initialized and we can run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 904\u001b[0m \u001b[0;31m# no_variable_creation function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 905\u001b[0;31m return tracing_compilation.call_function(\n\u001b[0m\u001b[1;32m 906\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_no_variable_creation_config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 907\u001b[0m )\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/tracing_compilation.py\u001b[0m in \u001b[0;36mcall_function\u001b[0;34m(args, kwargs, tracing_options)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0mbound_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunction_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0mflat_inputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunction_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munpack_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbound_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m return function._call_flat( # pylint: disable=protected-access\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0mflat_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcaptured_inputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m )\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/concrete_function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, tensor_inputs, captured_inputs)\u001b[0m\n\u001b[1;32m 1321\u001b[0m and executing_eagerly):\n\u001b[1;32m 1322\u001b[0m \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1323\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_inference_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall_preflattened\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1324\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[1;32m 1325\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py\u001b[0m in \u001b[0;36mcall_preflattened\u001b[0;34m(self, args)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcall_preflattened\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[0;34m\"\"\"Calls with flattened tensor inputs and returns the structured output.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 216\u001b[0;31m \u001b[0mflat_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall_flat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 217\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunction_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpack_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflat_outputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py\u001b[0m in \u001b[0;36mcall_flat\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mrecord\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstop_recording\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_bound_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecuting_eagerly\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 251\u001b[0;31m outputs = self._bound_context.call_function(\n\u001b[0m\u001b[1;32m 252\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 253\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/context.py\u001b[0m in \u001b[0;36mcall_function\u001b[0;34m(self, name, tensor_inputs, num_outputs)\u001b[0m\n\u001b[1;32m 1484\u001b[0m \u001b[0mcancellation_context\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcancellation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1485\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcancellation_context\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1486\u001b[0;31m outputs = execute.execute(\n\u001b[0m\u001b[1;32m 1487\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1488\u001b[0m \u001b[0mnum_outputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_outputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0m\u001b[1;32m 54\u001b[0m inputs, attrs, num_outputs)\n\u001b[1;32m 55\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout\n", "\n", "# Define Bi-LSTM model\n", "bi_lstm_model = Sequential()\n", "bi_lstm_model.add(Embedding(input_dim=10000, output_dim=128, input_length=max_sequence_length))\n", "bi_lstm_model.add(LSTM(128, return_sequences=True))\n", "bi_lstm_model.add(LSTM(64))\n", "bi_lstm_model.add(Dropout(0.5))\n", "bi_lstm_model.add(Dense(1, activation='sigmoid'))\n", "\n", "# Compile and train Bi-LSTM model\n", "bi_lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", "bi_lstm_model.fit(train_padded, train_labels, epochs=5, batch_size=32, validation_data=(val_padded, val_labels))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 748 }, "id": "IBAPtxWr3yKz", "outputId": "81c00575-cacc-4b52-ab0d-a6cce01767e3" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "630/630 [==============================] - 4s 5ms/step\n", "Accuracy: 0.9805\n", "F1 Score: 0.9811\n", "Recall: 0.9873\n", "Precision: 0.9750\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ], "source": [ "# prompt: Write a code to evaluate the performance of the saved model above based on accuracy, f1-score, recall and precision parameters in 4 decimal places. Add a confusion matrix to it. let the confusion matrix highlight percentage in addition to values\n", "\n", "import numpy as np\n", "from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix\n", "import seaborn as sns\n", "\n", "# Load the saved model\n", "model = tf.keras.models.load_model('/content/drive/MyDrive/Colab_Notebooks/Bi-LSTM-Model-1.h5')\n", "\n", "# Predict on the test data\n", "predictions = model.predict(test_padded)\n", "predicted_labels = np.round(predictions)\n", "\n", "# Calculate the performance metrics\n", "accuracy = accuracy_score(test_labels, predicted_labels)\n", "f1_score = f1_score(test_labels, predicted_labels)\n", "recall = recall_score(test_labels, predicted_labels)\n", "precision = precision_score(test_labels, predicted_labels)\n", "\n", "# Print the performance metrics\n", "print(f'Accuracy: {accuracy:.4f}')\n", "print(f'F1 Score: {f1_score:.4f}')\n", "print(f'Recall: {recall:.4f}')\n", "print(f'Precision: {precision:.4f}')\n", "\n", "# Create a confusion matrix\n", "cm = confusion_matrix(test_labels, predicted_labels)\n", "\n", "# Normalize the confusion matrix by row (true labels)\n", "cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", "\n", "# Create annotations with both counts and percentages\n", "annot = np.empty_like(cm).astype(str)\n", "nrows, ncols = cm.shape\n", "for i in range(nrows):\n", " for j in range(ncols):\n", " c = cm[i, j]\n", " p = cm_normalized[i, j]\n", " s = f'{c}\\n({p:.2%})'\n", " annot[i, j] = s\n", "\n", "# Plot the confusion matrix with annotations\n", "plt.figure(figsize=(10, 7))\n", "ax = sns.heatmap(cm_normalized, annot=annot, fmt='', cmap='Blues', cbar=False)\n", "\n", "ax.set_xlabel('Predicted labels')\n", "ax.set_ylabel('True labels')\n", "ax.set_title('Confusion Matrix')\n", "\n", "# Adjust tick labels based on the shape of the confusion matrix\n", "class_names = ['Class {}'.format(i) for i in range(cm.shape[0])]\n", "ax.xaxis.set_ticklabels(class_names)\n", "ax.yaxis.set_ticklabels(class_names)\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "source": [ "# CNN Model" ], "metadata": { "id": "qQWYo9voBOS-" } }, { "cell_type": "code", "execution_count": 33, "metadata": { "id": "5srutGy9ZplO", "colab": { "base_uri": "https://localhost:8080/", "height": 349 }, "outputId": "c4c4e1de-4788-4855-f42d-a06e03facc36" }, "outputs": [ { "output_type": "error", "ename": "KeyboardInterrupt", "evalue": "", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# Tokenize and pad tweet texts for Bi-LSTM and CNN models\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mtokenizer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTokenizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_words\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_on_texts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tweet'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0mtrain_sequences\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtexts_to_sequences\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tweet'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mval_sequences\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtexts_to_sequences\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tweet'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/keras/src/preprocessing/text.py\u001b[0m in \u001b[0;36mfit_on_texts\u001b[0;34m(self, texts)\u001b[0m\n\u001b[1;32m 301\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mw\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mseq\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mw\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mword_counts\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 303\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mword_counts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 304\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mword_counts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "#CNN Model\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "\n", "# Assuming your dataset is in a DataFrame called `data`\n", "features = ['cred', 'BotScore', 'tweet']\n", "target = 'BinaryNumTarget'\n", "\n", "# Splitting the dataset into features and target\n", "X = data[features]\n", "y = data[target]\n", "\n", "# Further splitting into training, validation, and test sets (70:15:15)\n", "train_data, temp_data, train_labels, temp_labels = train_test_split(X, y, test_size=0.3, stratify=y, random_state=2018)\n", "val_data, test_data, val_labels, test_labels = train_test_split(temp_data, temp_labels, test_size=0.5, stratify=temp_labels, random_state=2018)\n", "\n", "# Tokenize and pad tweet texts for Bi-LSTM and CNN models\n", "tokenizer = Tokenizer(num_words=10000)\n", "tokenizer.fit_on_texts(train_data['tweet'])\n", "train_sequences = tokenizer.texts_to_sequences(train_data['tweet'])\n", "val_sequences = tokenizer.texts_to_sequences(val_data['tweet'])\n", "test_sequences = tokenizer.texts_to_sequences(test_data['tweet'])\n", "\n", "max_sequence_length = 100\n", "train_padded = pad_sequences(train_sequences, maxlen=max_sequence_length)\n", "val_padded = pad_sequences(val_sequences, maxlen=max_sequence_length)\n", "test_padded = pad_sequences(test_sequences, maxlen=max_sequence_length)\n", "\n", "# Combine text sequences with other features\n", "train_features = np.hstack((train_data[['cred', 'BotScore']].values, train_padded))\n", "val_features = np.hstack((val_data[['cred', 'BotScore']].values, val_padded))\n", "test_features = np.hstack((test_data[['cred', 'BotScore']].values, test_padded))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Af_3bfeXnkbZ" }, "outputs": [], "source": [ "\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Embedding, Dense, Dropout\n", "from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten\n", "\n", "# Define CNN model\n", "cnn_model = Sequential()\n", "cnn_model.add(Embedding(input_dim=10000, output_dim=128, input_length=max_sequence_length))\n", "cnn_model.add(Conv1D(128, kernel_size=5, activation='relu'))\n", "cnn_model.add(MaxPooling1D(pool_size=2))\n", "cnn_model.add(Flatten())\n", "cnn_model.add(Dropout(0.5))\n", "cnn_model.add(Dense(1, activation='sigmoid'))\n", "# Compile and train CNN model\n", "cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", "cnn_model.fit(train_padded, train_labels, epochs=5, batch_size=32, validation_data=(val_padded, val_labels))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3iy50OM31iU8" }, "outputs": [], "source": [ "# prompt: Write a code to evaluate the performance of the saved model above based on accuracy, f1-score, recall and precision parameters in 4 decimal places. Add a confusion matrix to it. let the confusion matrix highlight percentage in addition to values\n", "\n", "import numpy as np\n", "from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix\n", "import seaborn as sns\n", "\n", "# Load the saved model\n", "model = tf.keras.models.load_model('CNN-Model-1.h5')\n", "\n", "# Predict on the test data\n", "predictions = model.predict(test_padded)\n", "predicted_labels = np.round(predictions)\n", "\n", "# Calculate the performance metrics\n", "accuracy = accuracy_score(test_labels, predicted_labels)\n", "f1_score = f1_score(test_labels, predicted_labels)\n", "recall = recall_score(test_labels, predicted_labels)\n", "precision = precision_score(test_labels, predicted_labels)\n", "\n", "# Print the performance metrics\n", "print(f'Accuracy: {accuracy:.4f}')\n", "print(f'F1 Score: {f1_score:.4f}')\n", "print(f'Recall: {recall:.4f}')\n", "print(f'Precision: {precision:.4f}')\n", "\n", "# Create a confusion matrix\n", "cm = confusion_matrix(test_labels, predicted_labels)\n", "\n", "# Normalize the confusion matrix by row (true labels)\n", "cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", "\n", "# Create annotations with both counts and percentages\n", "annot = np.empty_like(cm).astype(str)\n", "nrows, ncols = cm.shape\n", "for i in range(nrows):\n", " for j in range(ncols):\n", " c = cm[i, j]\n", " p = cm_normalized[i, j]\n", " s = f'{c}\\n({p:.2%})'\n", " annot[i, j] = s\n", "\n", "# Plot the confusion matrix with annotations\n", "plt.figure(figsize=(10, 7))\n", "ax = sns.heatmap(cm_normalized, annot=annot, fmt='', cmap='Blues', cbar=False)\n", "\n", "ax.set_xlabel('Predicted labels')\n", "ax.set_ylabel('True labels')\n", "ax.set_title('Confusion Matrix')\n", "\n", "# Adjust tick labels based on the shape of the confusion matrix\n", "class_names = ['Class {}'.format(i) for i in range(cm.shape[0])]\n", "ax.xaxis.set_ticklabels(class_names)\n", "ax.yaxis.set_ticklabels(class_names)\n", "\n", "plt.show()\n" ] }, { "cell_type": "markdown", "source": [ "# Ensemble Model" ], "metadata": { "id": "TG5m9sUvAyHb" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "hMh-Ui3N2YVO", "outputId": "c0ba32a7-541c-4c9b-9c78-bb507e2cf716" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/5\n", "2936/2936 [==============================] - 1159s 389ms/step - loss: 0.3232 - accuracy: 0.9037 - val_loss: 0.1407 - val_accuracy: 0.9787\n", "Epoch 2/5\n", "2936/2936 [==============================] - 1130s 385ms/step - loss: 0.0648 - accuracy: 0.9944 - val_loss: 0.0722 - val_accuracy: 0.9824\n", "Epoch 3/5\n", "2936/2936 [==============================] - 1112s 379ms/step - loss: 0.0204 - accuracy: 0.9981 - val_loss: 0.0620 - val_accuracy: 0.9831\n", "Epoch 4/5\n", "2936/2936 [==============================] - 1086s 370ms/step - loss: 0.0100 - accuracy: 0.9981 - val_loss: 0.0663 - val_accuracy: 0.9820\n", "Epoch 5/5\n", "2936/2936 [==============================] - 1095s 373ms/step - loss: 0.0055 - accuracy: 0.9988 - val_loss: 0.0697 - val_accuracy: 0.9821\n" ] } ], "source": [ "# prompt: Generate an ensemble model for combining only BiLSTM and CNN for training and evaluation from saved file\n", "\n", "import tensorflow as tf\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.layers import Input, Concatenate, Dense, GlobalAveragePooling1D\n", "\n", "# Define input layers for each model\n", "input_bilstm = Input(shape=(100,), name='input_bilstm')\n", "input_cnn = Input(shape=(100,), name='input_cnn')\n", "\n", "# Load the saved models\n", "bilstm_model = tf.keras.models.load_model('/content/drive/MyDrive/Colab_Notebooks/Bi-LSTM-Model-1.h5')\n", "cnn_model = tf.keras.models.load_model('/content/drive/MyDrive/Colab_Notebooks/CNN-Model-1.h5')\n", "\n", "# Get outputs from each model\n", "bilstm_output = bilstm_model(input_bilstm)\n", "cnn_output = cnn_model(input_cnn)\n", "\n", "# Concatenate the outputs\n", "ensemble_output = Concatenate()([bilstm_output, cnn_output])\n", "\n", "# Add a dense layer with sigmoid activation\n", "ensemble_output = Dense(1, activation='sigmoid')(ensemble_output)\n", "\n", "# Define the ensemble model\n", "ensemble_model = Model(inputs=[input_bilstm, input_cnn], outputs=ensemble_output)\n", "\n", "# Compile the model\n", "ensemble_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", "\n", "# Train the model\n", "ensemble_model.fit([train_padded, train_padded], train_labels, epochs=5, batch_size=32, validation_data=([val_padded, val_padded], val_labels))\n", "\n", "# Save the model\n", "ensemble_model.save('/content/drive/MyDrive/Colab_Notebooks/ensemble_model_bilstm_cnn.h5')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 659 }, "id": "mEMr7onjtKEd", "outputId": "e6a12422-24cf-4a9d-b1d1-756120cc3137" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "630/630 [==============================] - 6s 8ms/step\n", "Accuracy: 0.9824\n", "F1 Score: 0.9828\n", "Recall: 0.9814\n", "Precision: 0.9842\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# prompt: write a code to generate the performance evaluation for the ensemble model saved in file based on accuracy, precision, recall and f1-score with both value and percentage based confusion matrix\n", "import numpy as np\n", "from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix\n", "import seaborn as sns\n", "# Load the saved ensemble model\n", "ensemble_model = tf.keras.models.load_model('/content/drive/MyDrive/Colab_Notebooks/ensemble_model_bilstm_cnn.h5')\n", "\n", "# Predict on the test data\n", "predictions = ensemble_model.predict([test_padded, test_padded])\n", "predicted_labels = np.round(predictions)\n", "\n", "# Calculate the performance metrics\n", "accuracy = accuracy_score(test_labels, predicted_labels)\n", "f1_score = f1_score(test_labels, predicted_labels)\n", "recall = recall_score(test_labels, predicted_labels)\n", "precision = precision_score(test_labels, predicted_labels)\n", "\n", "# Print the performance metrics\n", "print(f'Accuracy: {accuracy:.4f}')\n", "print(f'F1 Score: {f1_score:.4f}')\n", "print(f'Recall: {recall:.4f}')\n", "print(f'Precision: {precision:.4f}')\n", "\n", "# Create a confusion matrix\n", "cm = confusion_matrix(test_labels, predicted_labels)\n", "\n", "# Normalize the confusion matrix by row (true labels)\n", "cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", "\n", "# Create annotations with both counts and percentages\n", "annot = np.empty_like(cm).astype(str)\n", "nrows, ncols = cm.shape\n", "for i in range(nrows):\n", " for j in range(ncols):\n", " c = cm[i, j]\n", " p = cm_normalized[i, j]\n", " s = f'{c}\\n({p:.2%})'\n", " annot[i, j] = s\n", "\n", "# Plot the confusion matrix with annotations\n", "plt.figure(figsize=(10, 7))\n", "ax = sns.heatmap(cm_normalized, annot=annot, fmt='', cmap='Blues', cbar=False)\n", "\n", "ax.set_xlabel('Predicted labels')\n", "ax.set_ylabel('True labels')\n", "ax.set_title('Confusion Matrix')\n", "\n", "# Adjust tick labels based on the shape of the confusion matrix\n", "class_names = ['Class {}'.format(i) for i in range(cm.shape[0])]\n", "ax.xaxis.set_ticklabels(class_names)\n", "ax.yaxis.set_ticklabels(class_names)\n", "\n", "plt.show()\n" ] }, { "cell_type": "markdown", "source": [ "# Prediction Interface" ], "metadata": { "id": "nmWkCSQD__44" } }, { "cell_type": "code", "execution_count": 37, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gd1Mwr9kh7kC", "outputId": "7a68b8aa-a31d-4d68-c820-3bbaf7f38ba3" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting gradio\n", " Downloading gradio-4.36.1-py3-none-any.whl (12.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m28.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)\n", " Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n", "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n", "Collecting fastapi (from gradio)\n", " Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting ffmpy (from gradio)\n", " Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting gradio-client==1.0.1 (from gradio)\n", " Downloading gradio_client-1.0.1-py3-none-any.whl (318 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.1/318.1 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting httpx>=0.24.1 (from gradio)\n", " Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.23.3)\n", "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.4.0)\n", "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.4)\n", "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.5)\n", "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n", "Requirement already satisfied: numpy<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.25.2)\n", "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.10.5)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (24.1)\n", "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.0.3)\n", "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (9.4.0)\n", "Requirement already satisfied: pydantic>=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.7.3)\n", "Collecting pydub (from gradio)\n", " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", "Collecting python-multipart>=0.0.9 (from gradio)\n", " Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n", "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n", "Collecting ruff>=0.2.2 (from gradio)\n", " Downloading ruff-0.4.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m37.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting semantic-version~=2.0 (from gradio)\n", " Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", "Collecting tomlkit==0.12.0 (from gradio)\n", " Downloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n", "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.12.3)\n", "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.12.2)\n", "Requirement already satisfied: urllib3~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.0.7)\n", "Collecting uvicorn>=0.14.0 (from gradio)\n", " Downloading uvicorn-0.30.1-py3-none-any.whl (62 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==1.0.1->gradio) (2023.6.0)\n", "Collecting websockets<12.0,>=10.0 (from gradio-client==1.0.1->gradio)\n", " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.4)\n", "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (4.19.2)\n", "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.12.1)\n", "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (3.7.1)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (2024.6.2)\n", "Collecting httpcore==1.* (from httpx>=0.24.1->gradio)\n", " Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (3.7)\n", "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (1.3.1)\n", "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx>=0.24.1->gradio)\n", " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (3.14.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (4.66.4)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.2.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.53.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.5)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.2)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2023.4)\n", "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n", "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio) (2.18.4)\n", "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n", "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n", "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (13.7.1)\n", "Collecting starlette<0.38.0,>=0.37.2 (from fastapi->gradio)\n", " Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting fastapi-cli>=0.0.2 (from fastapi->gradio)\n", " Downloading fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)\n", "Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi->gradio)\n", " Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting email_validator>=2.0.0 (from fastapi->gradio)\n", " Downloading email_validator-2.1.2-py3-none-any.whl (30 kB)\n", "Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi->gradio)\n", " Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (23.2.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.12.1)\n", "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.35.1)\n", "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.18.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.16.1)\n", "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx>=0.24.1->gradio) (1.2.1)\n", "Collecting httptools>=0.5.0 (from uvicorn>=0.14.0->gradio)\n", " Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting python-dotenv>=0.13 (from uvicorn>=0.14.0->gradio)\n", " Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn>=0.14.0->gradio)\n", " Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m56.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn>=0.14.0->gradio)\n", " Downloading watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m68.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.3->gradio) (3.3.2)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", "Building wheels for collected packages: ffmpy\n", " Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5584 sha256=3cacb542c5f87008c0848f723ad0b2db06b1937716a3d3fc5025dc91b7ca770a\n", " Stored in directory: /root/.cache/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\n", "Successfully built ffmpy\n", "Installing collected packages: pydub, ffmpy, websockets, uvloop, ujson, tomlkit, semantic-version, ruff, python-multipart, python-dotenv, httptools, h11, dnspython, aiofiles, watchfiles, uvicorn, starlette, httpcore, email_validator, httpx, gradio-client, fastapi-cli, fastapi, gradio\n", "Successfully installed aiofiles-23.2.1 dnspython-2.6.1 email_validator-2.1.2 fastapi-0.111.0 fastapi-cli-0.0.4 ffmpy-0.3.2 gradio-4.36.1 gradio-client-1.0.1 h11-0.14.0 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 pydub-0.25.1 python-dotenv-1.0.1 python-multipart-0.0.9 ruff-0.4.9 semantic-version-2.10.0 starlette-0.37.2 tomlkit-0.12.0 ujson-5.10.0 uvicorn-0.30.1 uvloop-0.19.0 watchfiles-0.22.0 websockets-11.0.3\n" ] } ], "source": [ "# prompt: write a code to install and import Gradio, to run the function predict_fake_news(text)\n", "\n", "!pip install gradio" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 646 }, "id": "nrmUxArzwTQ0", "outputId": "51d162aa-a3cb-4f01-dee0-a985f1177470" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n", "\n", "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", "Running on public URL: https://f02208f8032e00faae.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "
" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [] }, "metadata": {}, "execution_count": 38 } ], "source": [ "import gradio as gr\n", "import tweepy\n", "\n", "\n", "# Define a function to update on Twitter\n", "def update_on_Twitter(tweet_text, prediction):\n", " CONSUMER_KEY = \"q76xzfaSG7jL4unpvaNuPM5Ms\"\n", " CONSUMER_SECRET = \"7h2JCH9fveW3srWarhCmwLbr8rTtVeJ04Qo3q65VItX2L4eFs1\"\n", " ACCESS_TOKEN = \"1636314191198932992-VesD9DTEnagO7fQdCiu5Fh6vuFLbw1\"\n", " ACCESS_TOKEN_SECRET = \"DcTCYDGba8UWlbMEpDvmTMZuVI2XAip7Tu8QgLTrC12AW\"\n", " BAERER_TOKEN = \"AAAAAAAAAAAAAAAAAAAAAPJjnwEAAAAA3DnqW09w51Oufv8UCReOPQLPUtA%3Dz9vzO4DXVbXRU63RZB3TzbCrBc0saEnQZ49GMmGkDqKVu30qwC\"\n", "\n", " # Authenticate to Twitter\n", " auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)\n", " auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)\n", "\n", " # Create an API object\n", " api = tweepy.API(auth)\n", "\n", " # Create a Client object\n", " client = tweepy.Client(\n", " BAERER_TOKEN,\n", " CONSUMER_KEY,\n", " CONSUMER_SECRET,\n", " ACCESS_TOKEN,\n", " ACCESS_TOKEN_SECRET,\n", " wait_on_rate_limit=True\n", " )\n", "\n", " postText = f\"The news: {tweet_text} is {prediction}\"\n", "\n", " try:\n", " api.verify_credentials()\n", " print(\"Authentication OK\")\n", " client.create_tweet(text=postText)\n", " return f'Detect Fake News on Twitter Bot Account'\n", " except Exception as e:\n", " print(e)\n", " return f'Error: {e}'\n", "\n", "# Use Gradio Blocks to create a more flexible interface\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"# Fake News Detection\")\n", " text_input = gr.Textbox(placeholder=\"Enter a news Tweet here...\", label=\"News Tweet\")\n", " text_output = gr.Textbox(label=\"Prediction\")\n", " link_output = gr.HTML(label=\"Twitter Bot Account\")\n", "\n", " # Button to get prediction\n", " gr.Button(\"Predict\").click(predict_fake_news, inputs=text_input, outputs=text_output)\n", "\n", " # Button to generate a Gradio link\n", " gr.Button(\"Detect on Twitter\").click(update_on_Twitter, inputs=[text_input, text_output], outputs=link_output)\n", "\n", "# Launch the interface\n", "demo.launch()\n" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "74b2c16b3dd94af3a6394c71fe9d1d98": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_3467cbed80734b45befed3d8867652a6", "IPY_MODEL_ef11cdc5dea344759f9b0f78939c14d4", "IPY_MODEL_55b4b9ea6dd34aecb7213964c47e250b" ], "layout": "IPY_MODEL_5dec5fbe4ae941a581cea79167270bf6" } }, "3467cbed80734b45befed3d8867652a6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3085ba25f51f49ae95930564e2d54a0f", "placeholder": "​", "style": "IPY_MODEL_1b4ec4fc682b488caee3ac9ef0b197cd", "value": "config.json: 100%" } }, "ef11cdc5dea344759f9b0f78939c14d4": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c3d27b7643194755979e3b231a7c7359", "max": 570, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_13a985113cb040b79027ed3b59ecfb51", "value": 570 } }, "55b4b9ea6dd34aecb7213964c47e250b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8041895128094c65b1e65760067bb5bd", "placeholder": "​", "style": "IPY_MODEL_f8465e5d2654402fbf9aa789ee39948c", "value": " 570/570 [00:00<00:00, 4.04kB/s]" } }, "5dec5fbe4ae941a581cea79167270bf6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3085ba25f51f49ae95930564e2d54a0f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1b4ec4fc682b488caee3ac9ef0b197cd": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c3d27b7643194755979e3b231a7c7359": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "13a985113cb040b79027ed3b59ecfb51": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "8041895128094c65b1e65760067bb5bd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f8465e5d2654402fbf9aa789ee39948c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "42778e0f00ee4099845365a4129a9b07": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_ee8a393f6f7a43c0af95f13bb1d8d1b4", "IPY_MODEL_46a9f330d01b43ec9323b0866ef1960e", "IPY_MODEL_f5ff7383a64b435096ebc0654c063311" ], "layout": "IPY_MODEL_7ed0cdc589ab4bccb301a7ac2a4cacc6" } }, "ee8a393f6f7a43c0af95f13bb1d8d1b4": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f76cc9fee554462992ae333536ac3e78", "placeholder": "​", "style": "IPY_MODEL_9bc6e00fb6dc450e8643dd97bd093b2a", "value": "model.safetensors: 100%" } }, "46a9f330d01b43ec9323b0866ef1960e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7f76440815a94593ae2d93a87b418dad", "max": 440449768, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_8a992dca377b4736970d717dd9e41e49", "value": 440449768 } }, "f5ff7383a64b435096ebc0654c063311": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2b72f679299d4049864f1daea16b9a77", "placeholder": "​", "style": "IPY_MODEL_5589161d795941edb2fdb804bfd815b5", "value": " 440M/440M [00:08<00:00, 53.4MB/s]" } }, "7ed0cdc589ab4bccb301a7ac2a4cacc6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f76cc9fee554462992ae333536ac3e78": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9bc6e00fb6dc450e8643dd97bd093b2a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7f76440815a94593ae2d93a87b418dad": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8a992dca377b4736970d717dd9e41e49": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "2b72f679299d4049864f1daea16b9a77": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5589161d795941edb2fdb804bfd815b5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b58b34df1fac4232b1a392fadabe3446": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0efb7ee658794030b36ca44d79d4b236", "IPY_MODEL_d1eeefd866074f50be6ffd9d6a199d24", "IPY_MODEL_e78ef5b5c8f149dabfd4fb1445251a88" ], "layout": "IPY_MODEL_494b1cb203cf441b8eff14b64371213e" } }, "0efb7ee658794030b36ca44d79d4b236": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_87ed154eea034d2180ccd5bab5229155", "placeholder": "​", "style": "IPY_MODEL_d599ca19c22943d6aecfeb13fc16f5de", "value": "tokenizer_config.json: 100%" } }, "d1eeefd866074f50be6ffd9d6a199d24": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a1ade1b5205b429894d5374b29490bb8", "max": 48, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_f3f2ec20971140b99c3be6bdf21ffa43", "value": 48 } }, "e78ef5b5c8f149dabfd4fb1445251a88": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_af52e94cb5c446ca972b7a7837293b03", "placeholder": "​", "style": "IPY_MODEL_d26f83118504442b8e8bdd7e33d30b03", "value": " 48.0/48.0 [00:00<00:00, 302B/s]" } }, "494b1cb203cf441b8eff14b64371213e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "87ed154eea034d2180ccd5bab5229155": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d599ca19c22943d6aecfeb13fc16f5de": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a1ade1b5205b429894d5374b29490bb8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f3f2ec20971140b99c3be6bdf21ffa43": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "af52e94cb5c446ca972b7a7837293b03": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d26f83118504442b8e8bdd7e33d30b03": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "88439fa69b2d4b41a20dffe1c3c62258": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a64be3a4ebe34c0085c256c5dcaa2a09", "IPY_MODEL_64b358189e6a4c47a622846aeb0c7566", "IPY_MODEL_e9feed4a39f64631b2dfe746a583a03c" ], "layout": "IPY_MODEL_1f77f1a77fa4486a937b3a33c994e348" } }, "a64be3a4ebe34c0085c256c5dcaa2a09": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_237c63d8f3cd4794a61ab360fff10ac2", "placeholder": "​", "style": "IPY_MODEL_506133fbc79541d2936f8efc78dd7dde", "value": "vocab.txt: 100%" } }, "64b358189e6a4c47a622846aeb0c7566": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_30c34b5a4eef43cda64fb5568cb7dc4f", "max": 231508, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_073eb0f4e8f440acadeb1a326a96a178", "value": 231508 } }, "e9feed4a39f64631b2dfe746a583a03c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9258c3a11e4b4c4b98e433d44cea9554", "placeholder": "​", "style": "IPY_MODEL_3c32d976a9314b0bb6d74d5cf2488134", "value": " 232k/232k [00:00<00:00, 1.55MB/s]" } }, "1f77f1a77fa4486a937b3a33c994e348": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "237c63d8f3cd4794a61ab360fff10ac2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "506133fbc79541d2936f8efc78dd7dde": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "30c34b5a4eef43cda64fb5568cb7dc4f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "073eb0f4e8f440acadeb1a326a96a178": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9258c3a11e4b4c4b98e433d44cea9554": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3c32d976a9314b0bb6d74d5cf2488134": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a7e453f3b0494ba7a7ddbd703468b377": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a2f8e2390258408899d6ead630fe2037", "IPY_MODEL_8951cf1b3ef24ce6bb857f70929444e1", "IPY_MODEL_f776aaf69f1c452daf5b441b85aac33e" ], "layout": "IPY_MODEL_12cc0ac549734af28451abb30e1cda83" } }, "a2f8e2390258408899d6ead630fe2037": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4b50bdc9ce454a3f914452480613bfe9", "placeholder": "​", "style": "IPY_MODEL_5cdd170770a04b72af140314eb64b505", "value": "tokenizer.json: 100%" } }, "8951cf1b3ef24ce6bb857f70929444e1": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6b711f56c8d549d1b11790071647c7ad", "max": 466062, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_99c9a4bc768e40a582bd73b88f21eb96", "value": 466062 } }, "f776aaf69f1c452daf5b441b85aac33e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_28e9d0f866a1405d94f8c9b2a2f18490", "placeholder": "​", "style": "IPY_MODEL_385c6ece940b42c8b53bf9a42775a1d7", "value": " 466k/466k [00:00<00:00, 2.37MB/s]" } }, "12cc0ac549734af28451abb30e1cda83": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4b50bdc9ce454a3f914452480613bfe9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5cdd170770a04b72af140314eb64b505": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6b711f56c8d549d1b11790071647c7ad": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "99c9a4bc768e40a582bd73b88f21eb96": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "28e9d0f866a1405d94f8c9b2a2f18490": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "385c6ece940b42c8b53bf9a42775a1d7": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 0 }