amlanscloud
/

deploy-rnn

Text Classification

English

Model card Files Files and versions

amlanscloud commited on Jun 23, 2023

Commit

0bcc91b

•

1 Parent(s): 293c45c

Upload text_sentiment_analysis_blog_notebook.ipynb

Browse files

Files changed (1) hide show

text_sentiment_analysis_blog_notebook.ipynb +42 -34

text_sentiment_analysis_blog_notebook.ipynb CHANGED Viewed

@@ -2768,7 +2768,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -2893,7 +2893,8 @@
     {
       "cell_type": "markdown",
       "source": [
-        "# Get and process Dataset"
       ],
       "metadata": {
         "id": "1f0WYksXHPAM"
@@ -3003,7 +3004,7 @@
         "id": "mqwYf9xWHUuh",
         "outputId": "66965e88-8115-4d73-db90-6706371a0654"
       },
-      "execution_count": 2,
       "outputs": [
         {
           "output_type": "display_data",
@@ -3155,7 +3156,7 @@
       "metadata": {
         "id": "GuCNFWV_HXlf"
       },
-      "execution_count": 3,
       "outputs": []
     },
     {
@@ -3169,7 +3170,7 @@
       "metadata": {
         "id": "NiSQuUUgHdLn"
       },
-      "execution_count": 4,
       "outputs": []
     },
     {
@@ -3181,7 +3182,7 @@
       "metadata": {
         "id": "W_50sqXGHetc"
       },
-      "execution_count": 5,
       "outputs": []
     },
     {
@@ -3197,7 +3198,7 @@
         "id": "TntoBCAvHg0D",
         "outputId": "3820572d-2293-4b08-b9c5-7a6953c19ce8"
       },
-      "execution_count": 6,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -3385,7 +3386,7 @@
       "metadata": {
         "id": "asZstARkHjz_"
       },
-      "execution_count": 7,
       "outputs": []
     },
     {
@@ -3403,7 +3404,7 @@
         "id": "qPKqxIVwHu_9",
         "outputId": "112271bf-d4db-47cd-9410-b92482488dd2"
       },
-      "execution_count": 8,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -3575,7 +3576,7 @@
         "id": "QwE34N-1Hw4O",
         "outputId": "7a0ec80d-1c2e-497b-bc3a-a5eceae4fc11"
       },
-      "execution_count": 9,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -3601,7 +3602,7 @@
         "id": "R-bQ7kIfH4Lr",
         "outputId": "1514f226-26dc-4ed4-adbc-edfef2c25e31"
       },
-      "execution_count": 10,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -3629,7 +3630,7 @@
         "id": "vZ91HtgCH5-B",
         "outputId": "45ffe43b-9718-421d-c8d0-8fc2be5d9c5c"
       },
-      "execution_count": 11,
       "outputs": [
         {
           "output_type": "stream",
@@ -3651,7 +3652,8 @@
     {
       "cell_type": "markdown",
       "source": [
-        "# Limit dataset for quick training"
       ],
       "metadata": {
         "id": "6PQddaR0H-g-"
@@ -3666,7 +3668,7 @@
       "metadata": {
         "id": "DrtZRy3gH8IC"
       },
-      "execution_count": 12,
       "outputs": []
     },
     {
@@ -3681,7 +3683,8 @@
     {
       "cell_type": "markdown",
       "source": [
-        "# Train test split"
       ],
       "metadata": {
         "id": "bUEBfxgkIHQv"
@@ -3698,7 +3701,7 @@
       "metadata": {
         "id": "W7qXhZvdIJni"
       },
-      "execution_count": 13,
       "outputs": []
     },
     {
@@ -3716,7 +3719,7 @@
         "id": "fxqHXivAILVc",
         "outputId": "d5ce0660-92b9-445a-9e26-52445a7c1bb7"
       },
-      "execution_count": 14,
       "outputs": [
         {
           "output_type": "stream",
@@ -3739,7 +3742,7 @@
       "metadata": {
         "id": "c4a1lNf2INRx"
       },
-      "execution_count": 15,
       "outputs": []
     },
     {
@@ -3755,6 +3758,7 @@
       "cell_type": "markdown",
       "source": [
         "# Pre process steps  \n",
         "\n",
         "1. Stemming and Lemmatization\n",
         "2. Tokenizer\n",
@@ -3810,7 +3814,7 @@
       "metadata": {
         "id": "wrs4BuXGIaQP"
       },
-      "execution_count": 16,
       "outputs": []
     },
     {
@@ -3838,7 +3842,7 @@
       "metadata": {
         "id": "z4bO5f5KIinV"
       },
-      "execution_count": 17,
       "outputs": []
     },
     {
@@ -3865,7 +3869,7 @@
         "id": "gWpOtBFtIqnm",
         "outputId": "d4a8c94c-65c8-4f7a-9248-071d0d427772"
       },
-      "execution_count": 18,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -3896,7 +3900,7 @@
       "metadata": {
         "id": "kropkedQI0zE"
       },
-      "execution_count": 19,
       "outputs": []
     },
     {
@@ -3927,7 +3931,7 @@
         "id": "x0Kp2C5LI9nP",
         "outputId": "b13990c0-8aaf-42a2-ee6d-7122bf0f72a7"
       },
-      "execution_count": 20,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -3950,7 +3954,8 @@
     {
       "cell_type": "markdown",
       "source": [
-        "# Create Model"
       ],
       "metadata": {
         "id": "OnZDEGl2JJ2c"
@@ -3970,7 +3975,7 @@
       "metadata": {
         "id": "8ENmS8h0JHcm"
       },
-      "execution_count": 21,
       "outputs": []
     },
     {
@@ -3992,7 +3997,7 @@
         "id": "7sByLvCUJQZP",
         "outputId": "16daa2e5-3955-4885-d8bc-84f05399bd52"
       },
-      "execution_count": 22,
       "outputs": [
         {
           "output_type": "stream",
@@ -4023,7 +4028,8 @@
     {
       "cell_type": "markdown",
       "source": [
-        "# Train Model"
       ],
       "metadata": {
         "id": "CxpgHz4AJZm1"
@@ -4041,7 +4047,7 @@
         "id": "m5nEI4mUJV0C",
         "outputId": "7fb00a14-e2c2-41ee-b0b1-0d9e7325937a"
       },
-      "execution_count": 23,
       "outputs": [
         {
           "output_type": "stream",
@@ -4105,7 +4111,7 @@
       "source": [
         "# Evaluate model  \n",
         "\n",
-        "A very bad case of overfitting happening in this trained model because of the limited data I used. Can be improved by increasing training data and tuning other parameters."
       ],
       "metadata": {
         "id": "J69DUun-JtTH"
@@ -4136,7 +4142,7 @@
         "id": "I0bm7lZLJwZH",
         "outputId": "f92ef16a-dac1-454b-ceb6-d9077dea4d59"
       },
-      "execution_count": 24,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -4163,7 +4169,8 @@
     {
       "cell_type": "markdown",
       "source": [
-        "# Peform Inference"
       ],
       "metadata": {
         "id": "AWOv0GINKJGj"
@@ -4189,7 +4196,7 @@
         "id": "lF-CiYmbJ1Eb",
         "outputId": "c70f4f33-a9b7-4338-83e8-5f3357cbd787"
       },
-      "execution_count": 25,
       "outputs": [
         {
           "output_type": "stream",
@@ -4204,7 +4211,8 @@
     {
       "cell_type": "markdown",
       "source": [
-        "# Save the Model files"
       ],
       "metadata": {
         "id": "_GfgPLt6KQRn"
@@ -4250,7 +4258,7 @@
       "metadata": {
         "id": "tVlsCgDMKkom"
       },
-      "execution_count": 27,
       "outputs": []
     },
     {

     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
     {
       "cell_type": "markdown",
       "source": [
+        "# Get and process Dataset\n",
+        "Downloading and loading the dataset from Huggingface. The dataset package is used to get the dataset."
       ],
       "metadata": {
         "id": "1f0WYksXHPAM"
         "id": "mqwYf9xWHUuh",
         "outputId": "66965e88-8115-4d73-db90-6706371a0654"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
       "metadata": {
         "id": "GuCNFWV_HXlf"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
       "metadata": {
         "id": "NiSQuUUgHdLn"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
       "metadata": {
         "id": "W_50sqXGHetc"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
         "id": "TntoBCAvHg0D",
         "outputId": "3820572d-2293-4b08-b9c5-7a6953c19ce8"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
       "metadata": {
         "id": "asZstARkHjz_"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
         "id": "qPKqxIVwHu_9",
         "outputId": "112271bf-d4db-47cd-9410-b92482488dd2"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
         "id": "QwE34N-1Hw4O",
         "outputId": "7a0ec80d-1c2e-497b-bc3a-a5eceae4fc11"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
         "id": "R-bQ7kIfH4Lr",
         "outputId": "1514f226-26dc-4ed4-adbc-edfef2c25e31"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
         "id": "vZ91HtgCH5-B",
         "outputId": "45ffe43b-9718-421d-c8d0-8fc2be5d9c5c"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
     {
       "cell_type": "markdown",
       "source": [
+        "# Limit dataset for quick training\n",
+        "This step is only done for this post example. In real scenario, good amount of data will be needed for the training."
       ],
       "metadata": {
         "id": "6PQddaR0H-g-"
       "metadata": {
         "id": "DrtZRy3gH8IC"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
     {
       "cell_type": "markdown",
       "source": [
+        "# Train test split  \n",
+        "Splitting the dataset into Training and Testing sets. The Train set will be used for training and the Test one will be used for evaluating the model."
       ],
       "metadata": {
         "id": "bUEBfxgkIHQv"
       "metadata": {
         "id": "W7qXhZvdIJni"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
         "id": "fxqHXivAILVc",
         "outputId": "d5ce0660-92b9-445a-9e26-52445a7c1bb7"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
       "metadata": {
         "id": "c4a1lNf2INRx"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
       "cell_type": "markdown",
       "source": [
         "# Pre process steps  \n",
+        "For an efficient training, dataset need to be pre-processed to get better results. Below are the steps I am handling here.\n",
         "\n",
         "1. Stemming and Lemmatization\n",
         "2. Tokenizer\n",
       "metadata": {
         "id": "wrs4BuXGIaQP"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
       "metadata": {
         "id": "z4bO5f5KIinV"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
         "id": "gWpOtBFtIqnm",
         "outputId": "d4a8c94c-65c8-4f7a-9248-071d0d427772"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
       "metadata": {
         "id": "kropkedQI0zE"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
         "id": "x0Kp2C5LI9nP",
         "outputId": "b13990c0-8aaf-42a2-ee6d-7122bf0f72a7"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
     {
       "cell_type": "markdown",
       "source": [
+        "# Create Model  \n",
+        "I am creating an LSTM model with dropout layer for this example"
       ],
       "metadata": {
         "id": "OnZDEGl2JJ2c"
       "metadata": {
         "id": "8ENmS8h0JHcm"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
         "id": "7sByLvCUJQZP",
         "outputId": "16daa2e5-3955-4885-d8bc-84f05399bd52"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
     {
       "cell_type": "markdown",
       "source": [
+        "# Train Model  \n",
+        "The actual training step for the model"
       ],
       "metadata": {
         "id": "CxpgHz4AJZm1"
         "id": "m5nEI4mUJV0C",
         "outputId": "7fb00a14-e2c2-41ee-b0b1-0d9e7325937a"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
       "source": [
         "# Evaluate model  \n",
         "\n",
+        "Evaluting the performance of the model. A very bad case of overfitting happening in this trained model because of the limited data I used. Can be improved by increasing training data and tuning other parameters."
       ],
       "metadata": {
         "id": "J69DUun-JtTH"
         "id": "I0bm7lZLJwZH",
         "outputId": "f92ef16a-dac1-454b-ceb6-d9077dea4d59"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
     {
       "cell_type": "markdown",
       "source": [
+        "# Peform Inference  \n",
+        "Here the  model is being tested with some text input"
       ],
       "metadata": {
         "id": "AWOv0GINKJGj"
         "id": "lF-CiYmbJ1Eb",
         "outputId": "c70f4f33-a9b7-4338-83e8-5f3357cbd787"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
     {
       "cell_type": "markdown",
       "source": [
+        "# Save the Model files  \n",
+        "Using MLEM package to save the model files for deployment"
       ],
       "metadata": {
         "id": "_GfgPLt6KQRn"
       "metadata": {
         "id": "tVlsCgDMKkom"
       },
+      "execution_count": null,
       "outputs": []
     },
     {