{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xJYU-39CIa93", "outputId": "00f0cfb8-6f80-41ba-8327-f7401f3e6011" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: tensorflow in /usr/local/lib/python3.10/dist-packages (2.12.0)\n", "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.4.0)\n", "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.6.3)\n", "Requirement already satisfied: flatbuffers>=2.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (23.5.26)\n", "Requirement already satisfied: gast<=0.4.0,>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.4.0)\n", "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.2.0)\n", "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.56.0)\n", "Requirement already satisfied: h5py>=2.9.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.8.0)\n", "Requirement already satisfied: jax>=0.3.15 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.4.13)\n", "Requirement already satisfied: keras<2.13,>=2.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.12.0)\n", "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (16.0.0)\n", "Requirement already satisfied: numpy<1.24,>=1.22 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.22.4)\n", "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.3.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tensorflow) (23.1)\n", "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.20.3)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tensorflow) (67.7.2)\n", "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0)\n", "Requirement already satisfied: tensorboard<2.13,>=2.12 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.12.3)\n", "Requirement already satisfied: tensorflow-estimator<2.13,>=2.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.12.0)\n", "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.3.0)\n", "Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (4.7.1)\n", "Requirement already satisfied: wrapt<1.15,>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.14.1)\n", "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.32.0)\n", "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0->tensorflow) (0.40.0)\n", "Requirement already satisfied: ml-dtypes>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from jax>=0.3.15->tensorflow) (0.2.0)\n", "Requirement already satisfied: scipy>=1.7 in /usr/local/lib/python3.10/dist-packages (from jax>=0.3.15->tensorflow) (1.10.1)\n", "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (2.17.3)\n", "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (1.0.0)\n", "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (3.4.3)\n", "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (2.27.1)\n", "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (0.7.1)\n", "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (2.3.6)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow) (5.3.1)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow) (0.3.0)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow) (4.9)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.13,>=2.12->tensorflow) (1.3.1)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow) (1.26.16)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow) (2023.5.7)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow) (3.4)\n", "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard<2.13,>=2.12->tensorflow) (2.1.3)\n", "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow) (0.5.0)\n", "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.13,>=2.12->tensorflow) (3.2.2)\n", "Collecting tensorflow-gpu\n", " Downloading tensorflow-gpu-2.12.0.tar.gz (2.6 kB)\n", " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n", " \n", " \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n", " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n", " \u001b[31m╰─>\u001b[0m See above for output.\n", " \n", " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25herror\n", "\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n", "\n", "\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n", "\u001b[31m╰─>\u001b[0m See above for output.\n", "\n", "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n", "\u001b[1;36mhint\u001b[0m: See above for details.\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.7.1)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.2.2)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2022.7.1)\n", "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.22.4)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.1.0)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.41.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (23.1)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (8.4.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.1.0)\n", "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.10.1)\n", "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.3.1)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.1.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" ] } ], "source": [ "!pip install tensorflow\n", "!pip install tensorflow-gpu\n", "!pip install pandas matplotlib scikit-learn\n" ] }, { "cell_type": "code", "source": [ "import os\n", "import pandas as pd\n", "import numpy as np\n", "import tensorflow as tf\n" ], "metadata": { "id": "HpSljCmKI2MU" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df = pd.read_csv('/content/drive/MyDrive/Data-for-ML-projects/comment_toxicity/jigsaw-toxic-comment-classification-challenge/train.csv/train.csv')" ], "metadata": { "id": "VheoMBumKcvv" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df.sample(6)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 237 }, "id": "_iRqFMgMLX7l", "outputId": "f43e5eb9-173d-4b18-abe8-c8b774c458a8" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id comment_text \\\n", "88124 ebbccab2b7350f91 I have removed your 3 unsourced edits. Feel fr... \n", "72052 c0e58fe6aaf5efe6 . Back to edits. In the coming days I will wor... \n", "13275 231c58ea2f2ff9bf \" \\n\\nDave1185 is just an insulting, rude edit... \n", "46559 7c66f44d1ae93fc5 Alright, and also \\n\\nI want to make Mileena a... \n", "15342 288bbd602abedf63 \"\\n\\n My talk page \\nI kind of feel silly, I'v... \n", "115375 68ffaeaeb144964b (Presumably still active, last edit was in mid... \n", "\n", " toxic severe_toxic obscene threat insult identity_hate \n", "88124 0 0 0 0 0 0 \n", "72052 0 0 0 0 0 0 \n", "13275 1 0 0 0 0 0 \n", "46559 0 0 0 0 0 0 \n", "15342 0 0 0 0 0 0 \n", "115375 0 0 0 0 0 0 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcomment_texttoxicsevere_toxicobscenethreatinsultidentity_hate
88124ebbccab2b7350f91I have removed your 3 unsourced edits. Feel fr...000000
72052c0e58fe6aaf5efe6. Back to edits. In the coming days I will wor...000000
13275231c58ea2f2ff9bf\" \\n\\nDave1185 is just an insulting, rude edit...100000
465597c66f44d1ae93fc5Alright, and also \\n\\nI want to make Mileena a...000000
15342288bbd602abedf63\"\\n\\n My talk page \\nI kind of feel silly, I'v...000000
11537568ffaeaeb144964b(Presumably still active, last edit was in mid...000000
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "code", "source": [ "df.iloc[0]['comment_text']" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 53 }, "id": "FfDaL2m0Lp7U", "outputId": "5fb4d767-ccdf-4b87-9f02-936a46e8b2ec" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "\"Explanation\\nWhy the edits made under my username Hardcore Metallica Fan were reverted? They weren't vandalisms, just closure on some GAs after I voted at New York Dolls FAC. And please don't remove the template from the talk page since I'm retired now.89.205.38.27\"" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "source": [ "df[df['toxic'] == 1].sample(8) # looking at some comments for which toxic == 1 (i.e. they're indeed toxic)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "id": "H5i7cEgaL_c_", "outputId": "c36b6cbd-6d9f-4f4c-95e6-2f3d65c605e8" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id comment_text \\\n", "125434 9ef2277ac833af5b \"\\nWikipedia needs you to stop repeatedly fuck... \n", "116669 6fb2bf5cbcb25c24 Well instead of 'it's up to you' 'ing' me do ... \n", "74556 c7757e065e806ed9 because you fucking retard, the original ova i... \n", "136882 dc51c66ff7e78396 your all a bunch of idiots, rename the page ha... \n", "39364 690a488e39190fca Hi \\n\\nAfter writing to ANI...What next. YOU C... \n", "67311 b41b79873203cfce FUCKIN' HELL! \\nYou are such an idiot. Stop ca... \n", "44098 75c4c4d2ea6094a1 \"\\n\\n Reply to \"\"On gender\"\" \\n\\nYour posted: ... \n", "128536 af72187c9602430e \"\\n\\n you're a munter \\n\\n spastic special me... \n", "\n", " toxic severe_toxic obscene threat insult identity_hate \n", "125434 1 0 1 0 0 0 \n", "116669 1 0 0 0 0 0 \n", "74556 1 0 1 0 1 0 \n", "136882 1 0 0 0 1 0 \n", "39364 1 0 0 0 0 0 \n", "67311 1 0 1 0 1 0 \n", "44098 1 0 1 0 1 0 \n", "128536 1 0 0 0 0 0 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcomment_texttoxicsevere_toxicobscenethreatinsultidentity_hate
1254349ef2277ac833af5b\"\\nWikipedia needs you to stop repeatedly fuck...101000
1166696fb2bf5cbcb25c24Well instead of 'it's up to you' 'ing' me do ...100000
74556c7757e065e806ed9because you fucking retard, the original ova i...101010
136882dc51c66ff7e78396your all a bunch of idiots, rename the page ha...100010
39364690a488e39190fcaHi \\n\\nAfter writing to ANI...What next. YOU C...100000
67311b41b79873203cfceFUCKIN' HELL! \\nYou are such an idiot. Stop ca...101010
4409875c4c4d2ea6094a1\"\\n\\n Reply to \"\"On gender\"\" \\n\\nYour posted: ...101010
128536af72187c9602430e\"\\n\\n you're a munter \\n\\n spastic special me...100000
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "df.iloc[88807]['comment_text']" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "m2TIoGysMmKs", "outputId": "61e85dfb-ae39-477a-e3c8-7603807acca0" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'OIH!` \\n\\nWhy u delete my edit????\\n\\nU idiot!~~ Explain..'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "df[df['toxic'] == 1].head(10) # looking at some more comments that are toxic" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "id": "G6oIzutjM4gu", "outputId": "01eabe2e-ff2a-406b-ec9a-42a1f3d9a6a8" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id comment_text \\\n", "6 0002bcb3da6cb337 COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK \n", "12 0005c987bdfc9d4b Hey... what is it..\\n@ | talk .\\nWhat is it...... \n", "16 0007e25b2121310b Bye! \\n\\nDon't look, come or think of comming ... \n", "42 001810bf8c45bf5f You are gay or antisemmitian? \\n\\nArchangel WH... \n", "43 00190820581d90ce FUCK YOUR FILTHY MOTHER IN THE ASS, DRY! \n", "44 001956c382006abd I'm Sorry \\n\\nI'm sorry I screwed around with ... \n", "51 001dc38a83d420cf GET FUCKED UP. GET FUCKEEED UP. GOT A DRINK T... \n", "55 0020e7119b96eeeb Stupid peace of shit stop deleting my stuff as... \n", "56 0020fd96ed3b8c8b =Tony Sidaway is obviously a fistfuckee. He lo... \n", "58 0021fe88bc4da3e6 My Band Page's deletion. You thought I was gon... \n", "\n", " toxic severe_toxic obscene threat insult identity_hate \n", "6 1 1 1 0 1 0 \n", "12 1 0 0 0 0 0 \n", "16 1 0 0 0 0 0 \n", "42 1 0 1 0 1 1 \n", "43 1 0 1 0 1 0 \n", "44 1 0 0 0 0 0 \n", "51 1 0 1 0 0 0 \n", "55 1 1 1 0 1 0 \n", "56 1 0 1 0 1 0 \n", "58 1 0 1 0 0 0 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcomment_texttoxicsevere_toxicobscenethreatinsultidentity_hate
60002bcb3da6cb337COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK111010
120005c987bdfc9d4bHey... what is it..\\n@ | talk .\\nWhat is it......100000
160007e25b2121310bBye! \\n\\nDon't look, come or think of comming ...100000
42001810bf8c45bf5fYou are gay or antisemmitian? \\n\\nArchangel WH...101011
4300190820581d90ceFUCK YOUR FILTHY MOTHER IN THE ASS, DRY!101010
44001956c382006abdI'm Sorry \\n\\nI'm sorry I screwed around with ...100000
51001dc38a83d420cfGET FUCKED UP. GET FUCKEEED UP. GOT A DRINK T...101000
550020e7119b96eeebStupid peace of shit stop deleting my stuff as...111010
560020fd96ed3b8c8b=Tony Sidaway is obviously a fistfuckee. He lo...101010
580021fe88bc4da3e6My Band Page's deletion. You thought I was gon...101000
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "markdown", "source": [ "## Data Preprocessing" ], "metadata": { "id": "Dz42jPccReKS" } }, { "cell_type": "code", "source": [ "# Tokenizing the data\n", "from tensorflow.keras.layers import TextVectorization" ], "metadata": { "id": "tlMEEIzTNLPm" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df[df.columns[2:]] # taking a look at the columns that'll act as features and help us predict whether or not a comment is toxic" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "sCUbz7IOSa06", "outputId": "75a1d53c-06ae-46ef-af72-3b855aed8ddf" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " toxic severe_toxic obscene threat insult identity_hate\n", "0 0 0 0 0 0 0\n", "1 0 0 0 0 0 0\n", "2 0 0 0 0 0 0\n", "3 0 0 0 0 0 0\n", "4 0 0 0 0 0 0\n", "... ... ... ... ... ... ...\n", "159566 0 0 0 0 0 0\n", "159567 0 0 0 0 0 0\n", "159568 0 0 0 0 0 0\n", "159569 0 0 0 0 0 0\n", "159570 0 0 0 0 0 0\n", "\n", "[159571 rows x 6 columns]" ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
toxicsevere_toxicobscenethreatinsultidentity_hate
0000000
1000000
2000000
3000000
4000000
.....................
159566000000
159567000000
159568000000
159569000000
159570000000
\n", "

159571 rows × 6 columns

\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "code", "source": [ "X = df['comment_text']\n", "y = df[df.columns[2:]].values # .values is used to a numpy ndarray corresponding to each comment, i.e. basically a vector of values which will be either 0 or 1 depending on whether or not a certain feature is true for that comment (the features being all the columns starting from index 2 (=toxic) and upto index 7 (=identity_hate))" ], "metadata": { "id": "1eF33UqTRq7t" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "22:24 - Next we would like to define how many words we would like to have inside of our dictionary. Inside of our te4xt vevtorization layer we can specify how many words we would like to store inside of that vocab. The more words we store the larger our model is effectively going to be.\n", "\n", "If we've got massive word embeddings then we will need one word embeddings for every single word." ], "metadata": { "id": "tQh2As2lYQ6h" } }, { "cell_type": "code", "source": [ "MAX_FEATURES = 200000 # no of words in the vocabulary (vocab)" ], "metadata": { "id": "JIm2iAzhYHhn" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "vectorizer = TextVectorization(max_tokens= MAX_FEATURES, output_sequence_length=1800, output_mode='int')\n", "# the max length of the sentence from our dataset that we're going to analyze is going to be capped at 1800 words. If we increase this limit that would also mean increasing the computational load\n" ], "metadata": { "id": "qUUmc2s3dFnR" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "vectorizer.adapt(X.values)" ], "metadata": { "id": "Du9O7d3WdgDv" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# print(type(vectorizer.get_vocabulary())) # vectorizer.get_vocabulary() -> returns a python 'list' containing the words learned by the vectorizer\n", "# vectorizer.get_vocabulary() # taking a look at some of the words learned by the vectorizer\n", "print(vectorizer.get_vocabulary()[:25])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pqmfuNqEfDP1", "outputId": "498d43d6-9939-4567-cdaa-2699f05cd359" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "['', '[UNK]', 'the', 'to', 'of', 'and', 'a', 'you', 'i', 'is', 'that', 'in', 'it', 'for', 'this', 'not', 'on', 'be', 'as', 'have', 'are', 'your', 'with', 'if', 'article']\n" ] } ] }, { "cell_type": "code", "source": [ "# vectorizer(\"hello everyone how's it going\")" ], "metadata": { "id": "MHzsP-3NfW1v" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "vectorized_text = vectorizer(X.values)" ], "metadata": { "id": "WQp7kXamrqxT" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "print(vectorized_text)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xswHvLKutPYT", "outputId": "545cb3cd-def0-410d-8b25-cbd362693a67" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "tf.Tensor(\n", "[[ 645 76 2 ... 0 0 0]\n", " [ 1 54 2489 ... 0 0 0]\n", " [ 425 441 70 ... 0 0 0]\n", " ...\n", " [32445 7392 383 ... 0 0 0]\n", " [ 5 12 534 ... 0 0 0]\n", " [ 5 8 130 ... 0 0 0]], shape=(159571, 1800), dtype=int64)\n" ] } ] }, { "cell_type": "code", "source": [ "# Steps to creating a pipeline -> MCSHBAP - map, cache, shuffle, batch, prefetch\n", "dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))\n", "dataset = dataset.cache()\n", "dataset = dataset.shuffle(160000)\n", "dataset = dataset.batch(16)\n", "dataset = dataset.prefetch(8) # helps prevent bottlenecks" ], "metadata": { "id": "kebPi3X0gX1k" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "batch_X , batch_y = dataset.as_numpy_iterator().next()" ], "metadata": { "id": "DugdUJQFrg7E" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# batch_X will be our vectorized text examples\n", "batch_X.shape # note that we've got 16 examples, of 1800 words at max in each" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MSxyQ430sF_Y", "outputId": "fa0b322a-a177-4945-f722-49cf131b84f7" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(16, 1800)" ] }, "metadata": {}, "execution_count": 21 } ] }, { "cell_type": "code", "source": [ "batch_y.shape # note we got 16 samples, and a vector of size 6 corresponding to each" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_zG1QQtDxo7V", "outputId": "cf28b30d-0a09-445f-d917-46ff00c978be" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(16, 6)" ] }, "metadata": {}, "execution_count": 22 } ] }, { "cell_type": "markdown", "source": [ "## Creating our training, validation and test partitions" ], "metadata": { "id": "gZgMwlFsyR8v" } }, { "cell_type": "code", "source": [ "print(f\"No of batches in the dataset : {len(dataset)}\")\n", "print(f\"No of samples in the whole dataset : {len(dataset) * 16}\") # since each batch contains 16 samples" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nB8TcEzTykW-", "outputId": "62179c94-a210-4aa3-908b-ac7aa9afed0a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "No of batches in the dataset : 9974\n", "No of samples in the whole dataset : 159584\n" ] } ] }, { "cell_type": "code", "source": [ "# we'll use 70% of the length of our dataset for training\n", "train = dataset.take(int(len(dataset) * 0.7))\n", "\n", "# skip 70% of the datset and take the next 20% for the validation set\n", "val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))\n", "\n", "# skip 90% of the dataset and take the next 10% for the test set\n", "test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))\n" ], "metadata": { "id": "p34ekUDWxyBO" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "print(f\"Length of the train set : {len(train)}. 'Type' of the training set : {type(train)}\")\n", "print(f\"Length of the validation set : {len(val)}. 'Type' of the training set : {type(val)}\")\n", "print(f\"Length of the test set : {len(test)}. 'Type' of the training set : {type(test)}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MorPEljt2u7d", "outputId": "9cf155af-e778-42f0-a788-e992e6690bc0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Length of the train set : 6981. 'Type' of the training set : \n", "Length of the validation set : 1994. 'Type' of the training set : \n", "Length of the test set : 997. 'Type' of the training set : \n" ] } ] }, { "cell_type": "markdown", "source": [ "## Building our neural n/w" ], "metadata": { "id": "vG2TaCvS31HY" } }, { "cell_type": "code", "source": [ "# Create a sequential model (import the necessary dependencies for the same)\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding" ], "metadata": { "id": "lPqW8SrQ3H2p" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# print(y[0])\n", "print(y.shape) # (159571, 6) => 159571 samples and 6 parameters (a vector of 6 values) corresponding to each\n", "y[0] # taking a look at one of the samples" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JR_D26MB93b6", "outputId": "f4b90cc9-379d-4335-8945-4747b3818709" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(159571, 6)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 0])" ] }, "metadata": {}, "execution_count": 27 } ] }, { "cell_type": "code", "source": [ "model = Sequential()\n", "# First - Create the embedding layer\n", "model.add(Embedding(MAX_FEATURES+1, 32))\n", "# Bidirectional LSTM Layer\n", "# 41:00 - 42:00 -> 'Bidirectional' is reqd because in a way we'd like to enable the neural n/w to be able to look at the sentences from both directions??\n", "# for eg. -> The sentence - \"I don't hate you\" - the presence of \"don't\" before the word \"hate\" significantly affects the meaning of the sentence\n", "model.add(Bidirectional(LSTM(32, activation='tanh')))\n", "# 41:00 -> the reason that we're using 'tanh' as the activation function, instead of the immensely popular 'relu', is because the gpu acceleration that\n", "# is required for an lstm layout needs to be tanh (this is something that is dictated by tensorflow)\n", "\n", "# Feature extractor Fully connected layers\n", "model.add(Dense(128, activation='relu'))\n", "model.add(Dense(256, activation='relu'))\n", "model.add(Dense(128, activation='relu'))\n", "\n", "# Final layer\n", "# By having 6 layers in the final o/p layer, we're going to be able to o/p the exact same style of o/p as our target labels (which contains a vector of 6 values)\n", "model.add(Dense(6, activation='sigmoid'))" ], "metadata": { "id": "LN89EAY64Hlf" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# 45:00 onwards - explanation of why we're using 'BinaryCrossentropy'\n", "\n", "model.compile(loss='BinaryCrossentropy', optimizer='Adam')" ], "metadata": { "id": "22yxOAlK-nns" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "model.summary()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_jvnynfL-2N1", "outputId": "8b81832b-f790-4490-e1a0-72d8d1dfdc81" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Model: \"sequential\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " embedding (Embedding) (None, None, 32) 6400032 \n", " \n", " bidirectional (Bidirectiona (None, 64) 16640 \n", " l) \n", " \n", " dense (Dense) (None, 128) 8320 \n", " \n", " dense_1 (Dense) (None, 256) 33024 \n", " \n", " dense_2 (Dense) (None, 128) 32896 \n", " \n", " dense_3 (Dense) (None, 6) 774 \n", " \n", "=================================================================\n", "Total params: 6,491,686\n", "Trainable params: 6,491,686\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ] }, { "cell_type": "code", "source": [ "# Assuming you have defined and compiled your model\n", "# model.save('/content/drive/MyDrive/Data-for-ML-projects/comment_toxicity')\n" ], "metadata": { "id": "kTIP5RFWGryj" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Training our model" ], "metadata": { "id": "nz6LVR4r_luv" } }, { "cell_type": "code", "source": [ "history = model.fit(train, epochs=1, validation_data=val)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 328 }, "id": "LXgEnd4N_lP6", "outputId": "b666768c-d6bf-405a-8776-5582f4ef5472" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "4864/6981 [===================>..........] - ETA: 1:13:22 - loss: 0.0682" ] }, { "output_type": "error", "ename": "KeyboardInterrupt", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mhistory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py\u001b[0m in \u001b[0;36merror_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0mfiltered_tb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 65\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 66\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0mfiltered_tb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_process_traceback_frames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__traceback__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1683\u001b[0m ):\n\u001b[1;32m 1684\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1685\u001b[0;31m \u001b[0mtmp_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1686\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1687\u001b[0m \u001b[0mcontext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/util/traceback_utils.py\u001b[0m in \u001b[0;36merror_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[0mfiltered_tb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 150\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 151\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0mfiltered_tb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_process_traceback_frames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__traceback__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 892\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mOptionalXlaContext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jit_compile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 894\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 895\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 896\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 924\u001b[0m \u001b[0;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 925\u001b[0m \u001b[0;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 926\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_no_variable_creation_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=not-callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 927\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_variable_creation_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/tracing_compiler.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 141\u001b[0m (concrete_function,\n\u001b[1;32m 142\u001b[0m filtered_flat_args) = self._maybe_define_function(args, kwargs)\n\u001b[0;32m--> 143\u001b[0;31m return concrete_function._call_flat(\n\u001b[0m\u001b[1;32m 144\u001b[0m filtered_flat_args, captured_inputs=concrete_function.captured_inputs) # pylint: disable=protected-access\n\u001b[1;32m 145\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1755\u001b[0m and executing_eagerly):\n\u001b[1;32m 1756\u001b[0m \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1757\u001b[0;31m return self._build_call_outputs(self._inference_function.call(\n\u001b[0m\u001b[1;32m 1758\u001b[0m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[1;32m 1759\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_InterpolateFunctionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcancellation_manager\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 381\u001b[0;31m outputs = execute.execute(\n\u001b[0m\u001b[1;32m 382\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msignature\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0mnum_outputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_outputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0m\u001b[1;32m 53\u001b[0m inputs, attrs, num_outputs)\n\u001b[1;32m 54\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ] }, { "cell_type": "code", "source": [ "!pip install gradio jinja2" ], "metadata": { "id": "QgPIpYks_Pbf", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9a903584-1270-4e4a-ecb3-72bc2e6afe32" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting gradio\n", " Downloading gradio-3.38.0-py3-none-any.whl (19.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m44.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (3.1.2)\n", "Collecting aiofiles<24.0,>=22.0 (from gradio)\n", " Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)\n", "Requirement already satisfied: aiohttp~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.8.4)\n", "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n", "Collecting fastapi (from gradio)\n", " Downloading fastapi-0.100.0-py3-none-any.whl (65 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.7/65.7 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting ffmpy (from gradio)\n", " Downloading ffmpy-0.3.1.tar.gz (5.5 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting gradio-client>=0.2.10 (from gradio)\n", " Downloading gradio_client-0.2.10-py3-none-any.whl (288 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m289.0/289.0 kB\u001b[0m \u001b[31m26.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting httpx (from gradio)\n", " Downloading httpx-0.24.1-py3-none-any.whl (75 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting huggingface-hub>=0.14.0 (from gradio)\n", " Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: markdown-it-py[linkify]>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.0.0)\n", "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.3)\n", "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n", "Collecting mdit-py-plugins<=0.3.3 (from gradio)\n", " Downloading mdit_py_plugins-0.3.3-py3-none-any.whl (50 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy~=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.22.4)\n", "Collecting orjson~=3.0 (from gradio)\n", " Downloading orjson-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.7/138.7 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (23.1)\n", "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.5.3)\n", "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (8.4.0)\n", "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.10.11)\n", "Collecting pydub (from gradio)\n", " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", "Collecting python-multipart (from gradio)\n", " Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0)\n", "Requirement already satisfied: requests~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.27.1)\n", "Collecting semantic-version~=2.0 (from gradio)\n", " Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.7.1)\n", "Collecting uvicorn>=0.14.0 (from gradio)\n", " Downloading uvicorn-0.23.1-py3-none-any.whl (59 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting websockets<12.0,>=10.0 (from gradio)\n", " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp~=3.0->gradio) (23.1.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp~=3.0->gradio) (2.0.12)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp~=3.0->gradio) (6.0.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp~=3.0->gradio) (4.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp~=3.0->gradio) (1.9.2)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp~=3.0->gradio) (1.4.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp~=3.0->gradio) (1.3.1)\n", "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.4)\n", "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (4.3.3)\n", "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.12.0)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client>=0.2.10->gradio) (2023.6.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (3.12.2)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (4.65.0)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py[linkify]>=2.0.0->gradio) (0.1.2)\n", "Collecting linkify-it-py<3,>=1 (from markdown-it-py[linkify]>=2.0.0->gradio)\n", " Downloading linkify_it_py-2.0.2-py3-none-any.whl (19 kB)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.1.0)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.41.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.4)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.0)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n", "INFO: pip is looking at multiple versions of mdit-py-plugins to determine which version is compatible with other requirements. This could take a while.\n", "Collecting mdit-py-plugins<=0.3.3 (from gradio)\n", " Downloading mdit_py_plugins-0.3.2-py3-none-any.whl (50 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Downloading mdit_py_plugins-0.3.1-py3-none-any.whl (46 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.5/46.5 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Downloading mdit_py_plugins-0.3.0-py3-none-any.whl (43 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Downloading mdit_py_plugins-0.2.8-py3-none-any.whl (41 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Downloading mdit_py_plugins-0.2.7-py3-none-any.whl (41 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Downloading mdit_py_plugins-0.2.6-py3-none-any.whl (39 kB)\n", " Downloading mdit_py_plugins-0.2.5-py3-none-any.whl (39 kB)\n", "INFO: pip is looking at multiple versions of mdit-py-plugins to determine which version is compatible with other requirements. This could take a while.\n", " Downloading mdit_py_plugins-0.2.4-py3-none-any.whl (39 kB)\n", " Downloading mdit_py_plugins-0.2.3-py3-none-any.whl (39 kB)\n", " Downloading mdit_py_plugins-0.2.2-py3-none-any.whl (39 kB)\n", " Downloading mdit_py_plugins-0.2.1-py3-none-any.whl (38 kB)\n", " Downloading mdit_py_plugins-0.2.0-py3-none-any.whl (38 kB)\n", "INFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n", " Downloading mdit_py_plugins-0.1.0-py3-none-any.whl (37 kB)\n", "Collecting markdown-it-py[linkify]>=2.0.0 (from gradio)\n", " Downloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.5/87.5 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Downloading markdown_it_py-2.2.0-py3-none-any.whl (84 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2022.7.1)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (1.26.16)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (2023.5.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (3.4)\n", "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn>=0.14.0->gradio) (8.1.4)\n", "Collecting h11>=0.8 (from uvicorn>=0.14.0->gradio)\n", " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting starlette<0.28.0,>=0.27.0 (from fastapi->gradio)\n", " Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting httpcore<0.18.0,>=0.15.0 (from httpx->gradio)\n", " Downloading httpcore-0.17.3-py3-none-any.whl (74 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.5/74.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (1.3.0)\n", "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from httpcore<0.18.0,>=0.15.0->httpx->gradio) (3.7.1)\n", "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.19.3)\n", "Collecting uc-micro-py (from linkify-it-py<3,>=1->markdown-it-py[linkify]>=2.0.0->gradio)\n", " Downloading uc_micro_py-1.0.2-py3-none-any.whl (6.2 kB)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n", "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->httpcore<0.18.0,>=0.15.0->httpx->gradio) (1.1.2)\n", "Building wheels for collected packages: ffmpy\n", " Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for ffmpy: filename=ffmpy-0.3.1-py3-none-any.whl size=5579 sha256=3661731f9e462876b7a4a918b40a127047fc10136594cd5e631561318d6e5a94\n", " Stored in directory: /root/.cache/pip/wheels/01/a6/d1/1c0828c304a4283b2c1639a09ad86f83d7c487ef34c6b4a1bf\n", "Successfully built ffmpy\n", "Installing collected packages: pydub, ffmpy, websockets, uc-micro-py, semantic-version, python-multipart, orjson, markdown-it-py, h11, aiofiles, uvicorn, starlette, mdit-py-plugins, linkify-it-py, huggingface-hub, httpcore, httpx, fastapi, gradio-client, gradio\n", " Attempting uninstall: markdown-it-py\n", " Found existing installation: markdown-it-py 3.0.0\n", " Uninstalling markdown-it-py-3.0.0:\n", " Successfully uninstalled markdown-it-py-3.0.0\n", "Successfully installed aiofiles-23.1.0 fastapi-0.100.0 ffmpy-0.3.1 gradio-3.38.0 gradio-client-0.2.10 h11-0.14.0 httpcore-0.17.3 httpx-0.24.1 huggingface-hub-0.16.4 linkify-it-py-2.0.2 markdown-it-py-2.2.0 mdit-py-plugins-0.3.3 orjson-3.9.2 pydub-0.25.1 python-multipart-0.0.6 semantic-version-2.10.0 starlette-0.27.0 uc-micro-py-1.0.2 uvicorn-0.23.1 websockets-11.0.3\n" ] } ] }, { "cell_type": "code", "source": [ "import tensorflow as tf\n", "from tensorflow import keras\n", "import gradio as gr\n" ], "metadata": { "id": "xs7T5-Zv4Fm6" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "model = tf.keras.models.load_model('/content/drive/MyDrive/Data-for-ML-projects/comment_toxicity/trained_toxicity_model/toxicity_det.h5')" ], "metadata": { "id": "2ILRcXiP14WI" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "input_str = vectorizer(\"Hey I will fucking kill you man!\")" ], "metadata": { "id": "XGF5aRzQ3edS" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "pred_res = model.predict(np.expand_dims(input_str, 0))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gnZY3AYW5ZJ9", "outputId": "68c3433f-391f-41e2-fd25-fd62d3096646" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1/1 [==============================] - 1s 1s/step\n" ] } ] }, { "cell_type": "code", "source": [ "pred_res" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "06aL99Gg5jQG", "outputId": "1870511d-2486-44fa-d7e2-d5760672d14a" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[0.993094 , 0.34385905, 0.9560297 , 0.0484327 , 0.8572675 ,\n", " 0.22735107]], dtype=float32)" ] }, "metadata": {}, "execution_count": 43 } ] }, { "cell_type": "code", "source": [ "df.columns[2:]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6fn0SZkf5ylg", "outputId": "1ad5c0e8-067d-4373-9577-6c942bbc343e" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Index(['toxic', 'severe_toxic', 'obscene', 'threat', 'insult',\n", " 'identity_hate'],\n", " dtype='object')" ] }, "metadata": {}, "execution_count": 44 } ] }, { "cell_type": "code", "source": [ "def score_comment(comment): # a function that we're going to hook into our Gradio model\n", " \"\"\"\n", " This function will take in a comment and then pass it through a prediction pipeline.\n", " \"\"\"\n", " vectorized_comment = vectorizer([comment]) # First, we pass the comment through a vectorizer to convert the text into a vector of numbers\n", " results = model.predict(vectorized_comment) # Second, we then run the vectorized text (i.e. the comment) through our model to get the predictions\n", "\n", " # We then unpack all the results. The loop below goes through all of the 6 columns corresponding to any comment which describe its characteristics, viz. 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'\n", " text = ''\n", " for idx, col in enumerate(df.columns[2:]):\n", " text += '{}: {}\\n'.format(col, results[0][idx] > 0.40)\n", "\n", " return text\n" ], "metadata": { "id": "1YmsVJfn53M_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "interface = gr.Interface(fn=score_comment,\n", " inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),\n", " outputs='text')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5-naT_gm7eGY", "outputId": "70879b8a-9562-4438-93ef-1297ae8d8ce6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":2: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n", " inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),\n", ":2: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n", " inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),\n", ":2: GradioDeprecationWarning: `numeric` parameter is deprecated, and it has no effect\n", " inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),\n" ] } ] }, { "cell_type": "code", "source": [ "interface.launch(share=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 591 }, "id": "c-QSTB4y78Ds", "outputId": "ccf7a71a-6720-477c-ddc4-a3303aaea4f9" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", "Running on public URL: https://2abb09c6118cc199ab.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "
" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "fc74PnYL9DAj" }, "execution_count": null, "outputs": [] } ] }