{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "m8fE5WS67LOk", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "8b4c6ecf-030b-4ad6-f17c-12cbdd20f943" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m25.3/25.3 MB\u001b[0m \u001b[31m50.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m981.5/981.5 kB\u001b[0m \u001b[31m73.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m653.6/653.6 kB\u001b[0m \u001b[31m59.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m99.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m66.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m468.8/468.8 kB\u001b[0m \u001b[31m51.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m76.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Building wheel for ktrain (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for keras_bert (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for keras-transformer (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for keras-embed-sim (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for keras-layer-normalization (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for keras-multi-head (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for keras-pos-embd (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for keras-position-wise-feed-forward (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for keras-self-attention (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for cchardet (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for langdetect (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Building wheel for tika (setup.py) ... \u001b[?25l\u001b[?25hdone\n" ] } ], "source": [ "!pip install -q ktrain" ] }, { "cell_type": "code", "source": [ "import ktrain\n", "from ktrain import text\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "import os\n", "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix" ], "metadata": { "id": "F8OQn0v18Zuw" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" ], "metadata": { "id": "QKUWKSZE8j70" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "root_folder = \"/content/drive/MyDrive/Colab Notebooks/\"\n", "test_data_file = root_folder + \"data/internet_provider_test.csv\"\n", "test_data = pd.read_csv(test_data_file)\n", "categories = ['Slow Connection', 'Billing', 'Setup', 'No Connectivity']" ], "metadata": { "id": "_6ofxOvA8arZ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "predictor = ktrain.load_predictor(root_folder + \"models/distilbert-model\")" ], "metadata": { "id": "jfwfAwGE_qJo" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "test_predictions = predictor.predict(test_data[\"Text\"].tolist())" ], "metadata": { "id": "wNN96flfMNky" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "accuracy = accuracy_score(test_data[\"Category\"].tolist(), test_predictions)\n", "print(f'Test Accuracy: {accuracy}')\n", "print(classification_report(test_data[\"Category\"].tolist(), test_predictions))\n", "\n", "conf_matrix = confusion_matrix(test_data[\"Category\"].tolist(), test_predictions)\n", "print('Confusion Matrix:')\n", "print(conf_matrix)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "T-m4v3U2S0cY", "outputId": "2c0587f8-e834-41d3-8407-01c0a34cc84a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Test Accuracy: 0.9923664122137404\n", " precision recall f1-score support\n", "\n", " Billing 1.00 0.96 0.98 28\n", "No Connectivity 1.00 1.00 1.00 27\n", " Setup 1.00 1.00 1.00 57\n", "Slow Connection 0.95 1.00 0.97 19\n", "\n", " accuracy 0.99 131\n", " macro avg 0.99 0.99 0.99 131\n", " weighted avg 0.99 0.99 0.99 131\n", "\n", "Confusion Matrix:\n", "[[27 0 0 1]\n", " [ 0 27 0 0]\n", " [ 0 0 57 0]\n", " [ 0 0 0 19]]\n" ] } ] }, { "cell_type": "markdown", "source": [], "metadata": { "id": "m3qtEjRXSsiu" } }, { "cell_type": "code", "source": [ "def print_prediction(predictor, text):\n", " labels = predictor.get_classes()\n", " preds = predictor.predict_proba(text)\n", " probs = [f\"{label}: {float(pred)}\" for label, pred in zip(labels, preds)]\n", " print(probs)" ], "metadata": { "id": "6ZRMQU3duT95" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "x = \"I connection is very slow\"\n", "prediction = predictor.predict(x)\n", "print(f\"prediction: {prediction}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Kl1F196gS0UP", "outputId": "9aee3f66-6f5f-414a-9045-e16986dfcd11" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "prediction: Slow Connection\n" ] } ] }, { "cell_type": "code", "source": [ "x = \"I can't connect to any website\"\n", "prediction = predictor.predict(x)\n", "print(f\"prediction: {prediction}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BQytvMlgS9cW", "outputId": "7bf660aa-3014-4a86-c0cd-c94219b33e5c" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "prediction: No Connectivity\n" ] } ] }, { "cell_type": "code", "source": [ "x = \"I am paying too much for the service\"\n", "prediction = predictor.predict(x)\n", "print(f\"prediction: {prediction}\")" ], "metadata": { "id": "VK0avHJ6TEWD", "outputId": "8108186a-228a-4733-c22b-0b42e0a647d1", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "prediction: Billing\n" ] } ] }, { "cell_type": "code", "source": [ "x = \"I am waiting for engineer to configure the connection\"\n", "prediction = predictor.predict(x)\n", "print(f\"prediction: {prediction}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-qnLDDmPTWXb", "outputId": "770b0922-6f50-4f96-d309-5b1caae07831" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "prediction: Setup\n" ] } ] }, { "cell_type": "code", "source": [ "x = \"My internet is not\"\n", "prediction = predictor.predict(x)\n", "print(f\"prediction: {prediction}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "08G-waRmsIot", "outputId": "caf48b98-4146-4557-d68e-df73434bad8e" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "prediction: No Connectivity\n" ] } ] }, { "cell_type": "code", "source": [ "x = \"My internet is not working\"\n", "prediction = predictor.predict(x)\n", "print(f\"prediction: {prediction}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CijAiOSRsOYq", "outputId": "64e4ef62-0300-4b29-b5c0-8c9be45532dd" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "prediction: Slow Connection\n" ] } ] }, { "cell_type": "code", "source": [ "x = \"My internet is not working.\"\n", "prediction = predictor.predict(x)\n", "print(f\"prediction: {prediction}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2AjgbowSsPkf", "outputId": "6c78da5e-f100-4a88-e7c2-ca99245c1f7b" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "prediction: Slow Connection\n" ] } ] }, { "cell_type": "code", "source": [ "x = \"My internet is not working at all\"\n", "prediction = predictor.predict(x)\n", "print(f\"prediction: {prediction}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_11U1q7ysRb5", "outputId": "c1fd45ac-cdb2-41f2-bcd1-89b919f894a4" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "prediction: Slow Connection\n" ] } ] }, { "cell_type": "code", "source": [ "print_prediction(predictor, \"My internet is not working at all\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SXmSCN3cu9cX", "outputId": "fe179c02-9483-4b94-e970-e761f91e18e4" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "['Billing: 0.0002786574768833816', 'No Connectivity: 0.008474737405776978', 'Setup: 0.0002650754468049854', 'Slow Connection: 0.9909815192222595']\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "IUcD_l2MvFDy" }, "execution_count": null, "outputs": [] } ] }