arfat-xyz
/

ex4

Model card Files Files and versions Community

arfat-xyz commited on Nov 24, 2022

Commit

546448e

•

1 Parent(s): cdd336d

Upload Section_8_Text2MCQ_practice.ipynb

Browse files

Files changed (1) hide show

Section_8_Text2MCQ_practice.ipynb +1251 -0

Section_8_Text2MCQ_practice.ipynb ADDED Viewed

	@@ -0,0 +1,1251 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "8JqpxyBueqTH",
+        "outputId": "6c2c3908-9067-496c-ad64-74f21895232a"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "  Building wheel for flashtext (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+            "Collecting git+https://github.com/boudinfl/pke.git\n",
+            "  Cloning https://github.com/boudinfl/pke.git to /tmp/pip-req-build-s0vst_dk\n",
+            "  Running command git clone -q https://github.com/boudinfl/pke.git /tmp/pip-req-build-s0vst_dk\n",
+            "Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from pke==2.0.0) (3.7)\n",
+            "Requirement already satisfied: networkx in /usr/local/lib/python3.7/dist-packages (from pke==2.0.0) (2.6.3)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from pke==2.0.0) (1.21.6)\n",
+            "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from pke==2.0.0) (1.7.3)\n",
+            "Collecting sklearn\n",
+            "  Downloading sklearn-0.0.post1.tar.gz (3.6 kB)\n",
+            "Collecting unidecode\n",
+            "  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)\n",
+            "\u001b[K     |████████████████████████████████| 235 kB 6.2 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from pke==2.0.0) (0.16.0)\n",
+            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from pke==2.0.0) (1.2.0)\n",
+            "Requirement already satisfied: spacy>=3.2.3 in /usr/local/lib/python3.7/dist-packages (from pke==2.0.0) (3.4.3)\n",
+            "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (2.0.7)\n",
+            "Requirement already satisfied: typing-extensions<4.2.0,>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (4.1.1)\n",
+            "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (1.0.3)\n",
+            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (57.4.0)\n",
+            "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.10 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (3.0.10)\n",
+            "Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (0.10.1)\n",
+            "Requirement already satisfied: typer<0.8.0,>=0.3.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (0.7.0)\n",
+            "Requirement already satisfied: thinc<8.2.0,>=8.1.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (8.1.5)\n",
+            "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (2.4.5)\n",
+            "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (3.0.8)\n",
+            "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (4.64.1)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (21.3)\n",
+            "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (1.0.9)\n",
+            "Requirement already satisfied: pathy>=0.3.5 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (0.8.1)\n",
+            "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (1.10.2)\n",
+            "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (2.23.0)\n",
+            "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (3.3.0)\n",
+            "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (2.0.8)\n",
+            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.7/dist-packages (from spacy>=3.2.3->pke==2.0.0) (2.11.3)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from catalogue<2.1.0,>=2.0.6->spacy>=3.2.3->pke==2.0.0) (3.10.0)\n",
+            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->spacy>=3.2.3->pke==2.0.0) (3.0.9)\n",
+            "Requirement already satisfied: smart-open<6.0.0,>=5.2.1 in /usr/local/lib/python3.7/dist-packages (from pathy>=0.3.5->spacy>=3.2.3->pke==2.0.0) (5.2.1)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=3.2.3->pke==2.0.0) (2.10)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=3.2.3->pke==2.0.0) (2022.9.24)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=3.2.3->pke==2.0.0) (3.0.4)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=3.2.3->pke==2.0.0) (1.24.3)\n",
+            "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.7/dist-packages (from thinc<8.2.0,>=8.1.0->spacy>=3.2.3->pke==2.0.0) (0.0.3)\n",
+            "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.7/dist-packages (from thinc<8.2.0,>=8.1.0->spacy>=3.2.3->pke==2.0.0) (0.7.9)\n",
+            "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.7/dist-packages (from typer<0.8.0,>=0.3.0->spacy>=3.2.3->pke==2.0.0) (7.1.2)\n",
+            "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2->spacy>=3.2.3->pke==2.0.0) (2.0.1)\n",
+            "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.7/dist-packages (from nltk->pke==2.0.0) (2022.6.2)\n",
+            "Building wheels for collected packages: pke, sklearn\n",
+            "  Building wheel for pke (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for pke: filename=pke-2.0.0-py3-none-any.whl size=6160276 sha256=6967c9216d570e0bbc7bab2c16f5f1810ecd62dcc9fad636e26ff35edbab3a68\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-_mu5g7sn/wheels/fa/b3/09/612ee93bf3ee4164bcd5783e742942cdfc892a86039d3e0a33\n",
+            "  Building wheel for sklearn (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for sklearn: filename=sklearn-0.0.post1-py3-none-any.whl size=2344 sha256=47f5287c3e5d1518e0617e1db17d093069e553338d6c0e359aa70352e6c78d66\n",
+            "  Stored in directory: /root/.cache/pip/wheels/42/56/cc/4a8bf86613aafd5b7f1b310477667c1fca5c51c3ae4124a003\n",
+            "Successfully built pke sklearn\n",
+            "Installing collected packages: unidecode, sklearn, pke\n",
+            "Successfully installed pke-2.0.0 sklearn-0.0.post1 unidecode-1.3.6\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install --quiet flashtext==2.7\n",
+        "!pip install git+https://github.com/boudinfl/pke.git\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "am3XUlr5evYK"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install --quiet transformers==4.8.1\n",
+        "!pip install --quiet sentencepiece==0.1.95\n",
+        "!pip install --quiet textwrap3==0.9.2\n",
+        "!pip install  gradio"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "mhwpLyuBfFUK",
+        "outputId": "dc6f4900-429d-4815-c98c-b8625efcbe7b"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\u001b[?25l\r\u001b[K     |███████▊                        | 10 kB 27.7 MB/s eta 0:00:01\r\u001b[K     |███████████████▌                | 20 kB 34.6 MB/s eta 0:00:01\r\u001b[K     |███████████████████████▏        | 30 kB 15.4 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████ | 40 kB 6.6 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 42 kB 955 kB/s \n",
+            "\u001b[?25h"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install --quiet strsim==0.0.3\n",
+        "!pip install --quiet sense2vec==2.0.0"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "NcNXz17EfQLJ",
+        "outputId": "c90851f7-e320-48e3-d994-fcc5c174c636"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\u001b[?25l\r\u001b[K     |▏                               | 10 kB 10.5 MB/s eta 0:00:01\r\u001b[K     |▍                               | 20 kB 7.8 MB/s eta 0:00:01\r\u001b[K     |▋                               | 30 kB 11.1 MB/s eta 0:00:01\r\u001b[K     |▉                               | 40 kB 6.3 MB/s eta 0:00:01\r\u001b[K     |█                               | 51 kB 6.3 MB/s eta 0:00:01\r\u001b[K     |█▎                              | 61 kB 7.4 MB/s eta 0:00:01\r\u001b[K     |█▌                              | 71 kB 7.9 MB/s eta 0:00:01\r\u001b[K     |█▊                              | 81 kB 8.7 MB/s eta 0:00:01\r\u001b[K     |█▉                              | 92 kB 8.7 MB/s eta 0:00:01\r\u001b[K     |██                              | 102 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██▎                             | 112 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██▌                             | 122 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██▊                             | 133 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███                             | 143 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███▏                            | 153 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███▍                            | 163 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███▌                            | 174 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███▊                            | 184 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████                            | 194 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████▏                           | 204 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████▍                           | 215 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████▋                           | 225 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████▉                           | 235 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████                           | 245 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████▎                          | 256 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████▍                          | 266 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████▋                          | 276 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████▉                          | 286 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████                          | 296 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████▎                         | 307 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████▌                         | 317 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████▊                         | 327 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████                         | 337 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████                         | 348 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████▎                        | 358 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████▌                        | 368 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████▊                        | 378 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████                        | 389 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████▏                       | 399 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████▍                       | 409 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████▋                       | 419 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████▊                       | 430 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████                       | 440 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████▏                      | 450 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████▍                      | 460 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████▋                      | 471 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████▉                      | 481 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████                      | 491 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████▎                     | 501 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████▌                     | 512 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████▋                     | 522 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████▉                     | 532 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████                     | 542 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████▎                    | 552 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████▌                    | 563 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████▊                    | 573 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████���█████                    | 583 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████▏                   | 593 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████▎                   | 604 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████▌                   | 614 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████▊                   | 624 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████                   | 634 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████▏                  | 645 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████▍                  | 655 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████▋                  | 665 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████▉                  | 675 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████                  | 686 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████▏                 | 696 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████▍                 | 706 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████▋                 | 716 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████▉                 | 727 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████                 | 737 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████▎                | 747 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████▌                | 757 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████▊                | 768 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████▉                | 778 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████                | 788 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████▎               | 798 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████▌               | 808 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████▊               | 819 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████               | 829 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████▏              | 839 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████▍              | 849 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████▌              | 860 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████▊              | 870 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████              | 880 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████▏             | 890 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████▍             | 901 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████▋             | 911 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████▉             | 921 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████             | 931 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████▎            | 942 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████▍            | 952 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████▋            | 962 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████▉            | 972 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████            | 983 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████▎           | 993 kB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████▌           | 1.0 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████▊           | 1.0 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████           | 1.0 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████           | 1.0 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████▎          | 1.0 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████▌          | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████▊          | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████          | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████▏         | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████▍         | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████▋         | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████▊         | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████         | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████▏        | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████▍        | 1.1 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████▋        | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████▉        | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████        | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████▎       | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████▌       | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████▋       | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████▉       | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████       | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████▎      | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████▌      | 1.2 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████▊      | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████      | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████▏     | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████▎     | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████▌     | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████▊     | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████     | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████▏    | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████▍    | 1.3 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████▋    | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████▉    | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████    | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████▏   | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████▍   | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████▋   | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████▉   | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████████   | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████████▎  | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████████▌  | 1.4 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████████▊  | 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████████▉  | 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████  | 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████▎ | 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████▌ | 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████▊ | 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████ | 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████▏| 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████▍| 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████▌| 1.5 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████▊| 1.6 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 1.6 MB 7.5 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 1.6 MB 7.5 MB/s \n",
+            "\u001b[?25htime: 506 µs (started: 2022-11-24 06:06:09 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install --quiet ipython-autotime\n",
+        "%load_ext autotime"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "Bijc_hfbfUwp",
+        "outputId": "54a7f895-8f08-452d-8f3a-8e5310a1aa6c"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\u001b[K     |████████████████████████████████| 85 kB 3.9 MB/s \n",
+            "\u001b[K     |████████████████████████████████| 182 kB 49.1 MB/s \n",
+            "\u001b[K     |████████████████████████████████| 5.5 MB 54.9 MB/s \n",
+            "\u001b[K     |████████████████████████████████| 7.6 MB 55.0 MB/s \n",
+            "\u001b[?25h  Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "time: 10.4 s (started: 2022-11-24 06:06:09 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install --quiet sentence-transformers==2.2.2"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bmVx9L0yfgvR"
+      },
+      "source": [
+        "The below code restarts the colab notebook. Once it is restarted continue from next section and no need to run this section (installation) again."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "uPO9U__1fZWh",
+        "outputId": "31e8d745-2a88-4bd6-f136-55cd2147ee3f"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "time: 556 µs (started: 2022-11-24 06:06:20 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "# import os\n",
+        "# os.kill(os.getpid(), 9)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "POh2_zvgrk0h"
+      },
+      "source": [
+        "## Example 1"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VJP4CDBBrnNY"
+      },
+      "source": [
+        "Text taken from: \n",
+        "https://gadgets.ndtv.com/internet/news/dogecoin-price-rally-surge-elon-musk-tweet-twitter-working-developers-improve-transaction-efficiency-2442120"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "P_jlw7MUfjOp",
+        "outputId": "fd3e08da-3595-445d-941f-2c8047e34f08"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Elon Musk has shown again he can influence the digital currency market with just his tweets. After saying that his electric vehicle-making company\n",
+            "Tesla will not accept payments in Bitcoin because of environmental concerns, he tweeted that he was working with developers of Dogecoin to improve\n",
+            "system transaction efficiency. Following the two distinct statements from him, the world's largest cryptocurrency hit a two-month low, while Dogecoin\n",
+            "rallied by about 20 percent. The SpaceX CEO has in recent months often tweeted in support of Dogecoin, but rarely for Bitcoin.  In a recent tweet,\n",
+            "Musk put out a statement from Tesla that it was “concerned” about the rapidly increasing use of fossil fuels for Bitcoin (price in India) mining and\n",
+            "transaction, and hence was suspending vehicle purchases using the cryptocurrency.  A day later he again tweeted saying, “To be clear, I strongly\n",
+            "believe in crypto, but it can't drive a massive increase in fossil fuel use, especially coal”.  It triggered a downward spiral for Bitcoin value but\n",
+            "the cryptocurrency has stabilised since.   A number of Twitter users welcomed Musk's statement. One of them said it's time people started realising\n",
+            "that Dogecoin “is here to stay” and another referred to Musk's previous assertion that crypto could become the world's future currency.\n",
+            "\n",
+            "\n",
+            "time: 18.8 ms (started: 2022-11-24 06:06:20 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "from textwrap3 import wrap\n",
+        "\n",
+        "text = \"\"\"Elon Musk has shown again he can influence the digital currency market with just his tweets. After saying that his electric vehicle-making company\n",
+        "Tesla will not accept payments in Bitcoin because of environmental concerns, he tweeted that he was working with developers of Dogecoin to improve\n",
+        "system transaction efficiency. Following the two distinct statements from him, the world's largest cryptocurrency hit a two-month low, while Dogecoin\n",
+        "rallied by about 20 percent. The SpaceX CEO has in recent months often tweeted in support of Dogecoin, but rarely for Bitcoin.  In a recent tweet,\n",
+        "Musk put out a statement from Tesla that it was “concerned” about the rapidly increasing use of fossil fuels for Bitcoin (price in India) mining and\n",
+        "transaction, and hence was suspending vehicle purchases using the cryptocurrency.  A day later he again tweeted saying, “To be clear, I strongly\n",
+        "believe in crypto, but it can't drive a massive increase in fossil fuel use, especially coal”.  It triggered a downward spiral for Bitcoin value but\n",
+        "the cryptocurrency has stabilised since.   A number of Twitter users welcomed Musk's statement. One of them said it's time people started realising\n",
+        "that Dogecoin “is here to stay” and another referred to Musk's previous assertion that crypto could become the world's future currency.\"\"\"\n",
+        "\n",
+        "for wrp in wrap(text, 150):\n",
+        "  print (wrp)\n",
+        "print (\"\\n\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ShPNEZz8u7s6"
+      },
+      "source": [
+        "# **Summarization with T5**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true,
+          "referenced_widgets": [
+            "c9c2e5d5824345f780befcf11d6ff946",
+            "c39b4e7e424d4f64a8fb25495f8c7026",
+            "543714c7a41a4429a57a069bc2eca1dc"
+          ]
+        },
+        "id": "H1eIU521rrn5",
+        "outputId": "d3bb1402-1cba-4881-b05f-b8e24bb19278"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "c9c2e5d5824345f780befcf11d6ff946",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/1.20k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "c39b4e7e424d4f64a8fb25495f8c7026",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/892M [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "543714c7a41a4429a57a069bc2eca1dc",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/transformers/models/t5/tokenization_t5.py:174: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
+            "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
+            "- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.\n",
+            "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
+            "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
+            "  FutureWarning,\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "time: 30.6 s (started: 2022-11-24 06:06:20 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch\n",
+        "from transformers import T5ForConditionalGeneration,T5Tokenizer\n",
+        "summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')\n",
+        "summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')\n",
+        "\n",
+        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "summary_model = summary_model.to(device)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "8mVsjMPTu-bj",
+        "outputId": "e0ac198d-4625-4f8f-a2fd-9968c0a5a72d"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "time: 1.03 ms (started: 2022-11-24 06:06:50 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "import random\n",
+        "import numpy as np\n",
+        "\n",
+        "def set_seed(seed: int):\n",
+        "    random.seed(seed)\n",
+        "    np.random.seed(seed)\n",
+        "    torch.manual_seed(seed)\n",
+        "    torch.cuda.manual_seed_all(seed)\n",
+        "\n",
+        "set_seed(42)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "Gh2Xc5JRvQDp",
+        "outputId": "c1198166-2a2b-4571-b831-3ed1a8705c9e"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n",
+            "[nltk_data] Downloading package brown to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/brown.zip.\n",
+            "[nltk_data] Downloading package wordnet to /root/nltk_data...\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "original Text >>\n",
+            "Elon Musk has shown again he can influence the digital currency market with just his tweets. After saying that his electric vehicle-making company\n",
+            "Tesla will not accept payments in Bitcoin because of environmental concerns, he tweeted that he was working with developers of Dogecoin to improve\n",
+            "system transaction efficiency. Following the two distinct statements from him, the world's largest cryptocurrency hit a two-month low, while Dogecoin\n",
+            "rallied by about 20 percent. The SpaceX CEO has in recent months often tweeted in support of Dogecoin, but rarely for Bitcoin.  In a recent tweet,\n",
+            "Musk put out a statement from Tesla that it was “concerned” about the rapidly increasing use of fossil fuels for Bitcoin (price in India) mining and\n",
+            "transaction, and hence was suspending vehicle purchases using the cryptocurrency.  A day later he again tweeted saying, “To be clear, I strongly\n",
+            "believe in crypto, but it can't drive a massive increase in fossil fuel use, especially coal”.  It triggered a downward spiral for Bitcoin value but\n",
+            "the cryptocurrency has stabilised since.   A number of Twitter users welcomed Musk's statement. One of them said it's time people started realising\n",
+            "that Dogecoin “is here to stay” and another referred to Musk's previous assertion that crypto could become the world's future currency.\n",
+            "\n",
+            "\n",
+            "Summarized Text >>\n",
+            "Musk tweeted that his electric vehicle-making company tesla will not accept payments in bitcoin because of environmental concerns. He also said that\n",
+            "the company was working with developers of dogecoin to improve system transaction efficiency. The world's largest cryptocurrency hit a two-month low,\n",
+            "while doge coin rallied by about 20 percent. Musk has in recent months often tweeted in support of crypto, but rarely for bitcoin.\n",
+            "\n",
+            "\n",
+            "time: 6.14 s (started: 2022-11-24 06:06:50 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "import nltk\n",
+        "nltk.download('punkt')\n",
+        "nltk.download('brown')\n",
+        "nltk.download('wordnet')\n",
+        "from nltk.corpus import wordnet as wn\n",
+        "from nltk.tokenize import sent_tokenize\n",
+        "\n",
+        "def postprocesstext (content):\n",
+        "  final=\"\"\n",
+        "  for sent in sent_tokenize(content):\n",
+        "    sent = sent.capitalize()\n",
+        "    final = final +\" \"+sent\n",
+        "  return final\n",
+        "\n",
+        "\n",
+        "def summarizer(text,model,tokenizer):\n",
+        "  text = text.strip().replace(\"\\n\",\" \")\n",
+        "  text = \"summarize: \"+text\n",
+        "  # print (text)\n",
+        "  max_len = 512\n",
+        "  encoding = tokenizer.encode_plus(text,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors=\"pt\").to(device)\n",
+        "\n",
+        "  input_ids, attention_mask = encoding[\"input_ids\"], encoding[\"attention_mask\"]\n",
+        "\n",
+        "  outs = model.generate(input_ids=input_ids,\n",
+        "                                  attention_mask=attention_mask,\n",
+        "                                  early_stopping=True,\n",
+        "                                  num_beams=3,\n",
+        "                                  num_return_sequences=1,\n",
+        "                                  no_repeat_ngram_size=2,\n",
+        "                                  min_length = 75,\n",
+        "                                  max_length=300)\n",
+        "\n",
+        "\n",
+        "  dec = [tokenizer.decode(ids,skip_special_tokens=True) for ids in outs]\n",
+        "  summary = dec[0]\n",
+        "  summary = postprocesstext(summary)\n",
+        "  summary= summary.strip()\n",
+        "\n",
+        "  return summary\n",
+        "\n",
+        "\n",
+        "summarized_text = summarizer(text,summary_model,summary_tokenizer)\n",
+        "\n",
+        "\n",
+        "print (\"\\noriginal Text >>\")\n",
+        "for wrp in wrap(text, 150):\n",
+        "  print (wrp)\n",
+        "print (\"\\n\")\n",
+        "print (\"Summarized Text >>\")\n",
+        "for wrp in wrap(summarized_text, 150):\n",
+        "  print (wrp)\n",
+        "print (\"\\n\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JvBHu5eXv_wp"
+      },
+      "source": [
+        "# **Answer Span Extraction (Keywords and Noun Phrases)**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "84DxJGFn4MfD",
+        "outputId": "27c39b58-dcaa-4b92-ff9e-0da292be34d9"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "time: 8.23 s (started: 2022-11-24 06:06:56 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "import nltk\n",
+        "nltk.download('stopwords')\n",
+        "from nltk.corpus import stopwords\n",
+        "import string\n",
+        "import pke\n",
+        "import traceback\n",
+        "\n",
+        "def get_nouns_multipartite(content):\n",
+        "    out=[]\n",
+        "    try:\n",
+        "        extractor = pke.unsupervised.MultipartiteRank()\n",
+        "        extractor.load_document(input=content,language='en')\n",
+        "        #    not contain punctuation marks or stopwords as candidates.\n",
+        "        pos = {'PROPN','NOUN'}\n",
+        "        #pos = {'PROPN','NOUN'}\n",
+        "        stoplist = list(string.punctuation)\n",
+        "        stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']\n",
+        "        stoplist += stopwords.words('english')\n",
+        "        # extractor.candidate_selection(pos=pos, stoplist=stoplist)\n",
+        "        extractor.candidate_selection(pos=pos)\n",
+        "        # 4. build the Multipartite graph and rank candidates using random walk,\n",
+        "        #    alpha controls the weight adjustment mechanism, see TopicRank for\n",
+        "        #    threshold/method parameters.\n",
+        "        extractor.candidate_weighting(alpha=1.1,\n",
+        "                                      threshold=0.75,\n",
+        "                                      method='average')\n",
+        "        keyphrases = extractor.get_n_best(n=15)\n",
+        "        \n",
+        "\n",
+        "        for val in keyphrases:\n",
+        "            out.append(val[0])\n",
+        "    except:\n",
+        "        out = []\n",
+        "        traceback.print_exc()\n",
+        "\n",
+        "    return out"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "E8LNRzDVwDbp",
+        "outputId": "c2ae2bda-8250-4e82-ed71-d10568251e68"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "keywords unsummarized:  ['elon musk', 'dogecoin', 'bitcoin', 'statements', 'use', 'cryptocurrency', 'tesla', 'tweets', 'musk', 'system transaction efficiency', 'currency market', 'world', 'price', 'payments', 'company']\n",
+            "keywords_found in summarized:  ['world', 'dogecoin', 'musk', 'cryptocurrency', 'system transaction efficiency', 'payments', 'company', 'bitcoin', 'tesla']\n",
+            "['dogecoin', 'bitcoin', 'cryptocurrency', 'tesla', 'musk', 'system transaction efficiency', 'world', 'payments', 'company']\n",
+            "time: 785 ms (started: 2022-11-24 06:07:05 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "from flashtext import KeywordProcessor\n",
+        "\n",
+        "\n",
+        "def get_keywords(originaltext,summarytext):\n",
+        "  keywords = get_nouns_multipartite(originaltext)\n",
+        "  print (\"keywords unsummarized: \",keywords)\n",
+        "  keyword_processor = KeywordProcessor()\n",
+        "  for keyword in keywords:\n",
+        "    keyword_processor.add_keyword(keyword)\n",
+        "\n",
+        "  keywords_found = keyword_processor.extract_keywords(summarytext)\n",
+        "  keywords_found = list(set(keywords_found))\n",
+        "  print (\"keywords_found in summarized: \",keywords_found)\n",
+        "\n",
+        "  important_keywords =[]\n",
+        "  for keyword in keywords:\n",
+        "    if keyword in keywords_found:\n",
+        "      important_keywords.append(keyword)\n",
+        "\n",
+        "  return important_keywords[:10]\n",
+        "\n",
+        "\n",
+        "imp_keywords = get_keywords(text,summarized_text)\n",
+        "print (imp_keywords)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true,
+          "referenced_widgets": [
+            "24334ddee9f74d3c82a575f0edbc8720",
+            "c884156893794fa6bad4171a9aacbd2f",
+            "2f0d8bf7b60a423383ae6ab2469106eb",
+            "70c932999b0f4dcda0525b9a81ceabf3",
+            "7897cc69283d475694042ed9cbc6e92c"
+          ]
+        },
+        "id": "m44RM44OwGzR",
+        "outputId": "ca45cae8-a813-4425-9adc-3d8e0f886324"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "24334ddee9f74d3c82a575f0edbc8720",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/1.21k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "c884156893794fa6bad4171a9aacbd2f",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/892M [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "2f0d8bf7b60a423383ae6ab2469106eb",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "70c932999b0f4dcda0525b9a81ceabf3",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/1.79k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "7897cc69283d475694042ed9cbc6e92c",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/1.86k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "time: 35.2 s (started: 2022-11-24 06:07:05 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')\n",
+        "question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')\n",
+        "question_model = question_model.to(device)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "1usLabLu5DUB",
+        "outputId": "69d364b6-ee46-46d2-ee22-19b1fe5b2411"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Musk tweeted that his electric vehicle-making company tesla will not accept payments in bitcoin because of environmental concerns. He also said that\n",
+            "the company was working with developers of dogecoin to improve system transaction efficiency. The world's largest cryptocurrency hit a two-month low,\n",
+            "while doge coin rallied by about 20 percent. Musk has in recent months often tweeted in support of crypto, but rarely for bitcoin.\n",
+            "\n",
+            "\n",
+            "What did Musk say he was working with to improve system transaction efficiency?\n",
+            "Dogecoin\n",
+            "\n",
+            "\n",
+            "What cryptocurrency did Musk rarely tweet about?\n",
+            "Bitcoin\n",
+            "\n",
+            "\n",
+            "What has Musk often tweeted in support of?\n",
+            "Cryptocurrency\n",
+            "\n",
+            "\n",
+            "What company did Musk say would not accept bitcoin payments?\n",
+            "Tesla\n",
+            "\n",
+            "\n",
+            "Who said tesla would not accept bitcoin payments?\n",
+            "Musk\n",
+            "\n",
+            "\n",
+            "What did Musk want to improve with dogecoin?\n",
+            "System transaction efficiency\n",
+            "\n",
+            "\n",
+            "What is the largest cryptocurrency?\n",
+            "World\n",
+            "\n",
+            "\n",
+            "What did Musk say his company would not accept in bitcoin?\n",
+            "Payments\n",
+            "\n",
+            "\n",
+            "What did Musk say was working with dogecoin developers?\n",
+            "Company\n",
+            "\n",
+            "\n",
+            "time: 2.78 s (started: 2022-11-24 06:07:41 +00:00)\n"
+          ]
+        }
+      ],
+      "source": [
+        "def get_question(context,answer,model,tokenizer):\n",
+        "  text = \"context: {} answer: {}\".format(context,answer)\n",
+        "  encoding = tokenizer.encode_plus(text,max_length=384, pad_to_max_length=False,truncation=True, return_tensors=\"pt\").to(device)\n",
+        "  input_ids, attention_mask = encoding[\"input_ids\"], encoding[\"attention_mask\"]\n",
+        "\n",
+        "  outs = model.generate(input_ids=input_ids,\n",
+        "                                  attention_mask=attention_mask,\n",
+        "                                  early_stopping=True,\n",
+        "                                  num_beams=5,\n",
+        "                                  num_return_sequences=1,\n",
+        "                                  no_repeat_ngram_size=2,\n",
+        "                                  max_length=72)\n",
+        "\n",
+        "\n",
+        "  dec = [tokenizer.decode(ids,skip_special_tokens=True) for ids in outs]\n",
+        "\n",
+        "\n",
+        "  Question = dec[0].replace(\"question:\",\"\")\n",
+        "  Question= Question.strip()\n",
+        "  return Question\n",
+        "\n",
+        "\n",
+        "\n",
+        "for wrp in wrap(summarized_text, 150):\n",
+        "  print (wrp)\n",
+        "print (\"\\n\")\n",
+        "\n",
+        "for answer in imp_keywords:\n",
+        "  ques = get_question(summarized_text,answer,question_model,question_tokenizer)\n",
+        "  print (ques)\n",
+        "  print (answer.capitalize())\n",
+        "  print (\"\\n\")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4kEuH__G6oDK",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 740
+        },
+        "outputId": "8a8b7911-1e79-403e-9601-6f7221fc8bd7"
+      },
+      "outputs": [
+        {
+          "metadata": {
+            "tags": null
+          },
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/gradio/inputs.py:27: UserWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
+            "  \"Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\",\n",
+            "/usr/local/lib/python3.7/dist-packages/gradio/deprecation.py:40: UserWarning: `optional` parameter is deprecated, and it has no effect\n",
+            "  warnings.warn(value)\n",
+            "/usr/local/lib/python3.7/dist-packages/gradio/deprecation.py:40: UserWarning: `numeric` parameter is deprecated, and it has no effect\n",
+            "  warnings.warn(value)\n"
+          ]
+        },
+        {
+          "metadata": {
+            "tags": null
+          },
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n",
+            "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n",
+            "\n",
+            "To create a public link, set `share=True` in `launch()`.\n"
+          ]
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(async (port, path, width, height, cache, element) => {\n",
+              "                        if (!google.colab.kernel.accessAllowed && !cache) {\n",
+              "                            return;\n",
+              "                        }\n",
+              "                        element.appendChild(document.createTextNode(''));\n",
+              "                        const url = await google.colab.kernel.proxyPort(port, {cache});\n",
+              "\n",
+              "                        const external_link = document.createElement('div');\n",
+              "                        external_link.innerHTML = `\n",
+              "                            <div style=\"font-family: monospace; margin-bottom: 0.5rem\">\n",
+              "                                Running on <a href=${new URL(path, url).toString()} target=\"_blank\">\n",
+              "                                    https://localhost:${port}${path}\n",
+              "                                </a>\n",
+              "                            </div>\n",
+              "                        `;\n",
+              "                        element.appendChild(external_link);\n",
+              "\n",
+              "                        const iframe = document.createElement('iframe');\n",
+              "                        iframe.src = new URL(path, url).toString();\n",
+              "                        iframe.height = height;\n",
+              "                        iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n",
+              "                        iframe.width = width;\n",
+              "                        iframe.style.border = 0;\n",
+              "                        element.appendChild(iframe);\n",
+              "                    })(7860, \"/\", \"100%\", 500, false, window.element)"
+            ],
+            "text/plain": [
+              "<IPython.core.display.Javascript object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "import gradio as gr\n",
+        "\n",
+        "context = gr.inputs.Textbox(lines=10, placeholder=\"Enter paragraph/content here...\")\n",
+        "output = gr.outputs.HTML(  label=\"Question and Answers\")\n",
+        "\n",
+        "\n",
+        "def generate_question(context):\n",
+        "  summary_text = summarizer(context,summary_model,summary_tokenizer)\n",
+        "  for wrp in wrap(summary_text, 150):\n",
+        "    print (wrp)\n",
+        "  np =  get_keywords(context,summary_text)\n",
+        "  print (\"\\n\\nNoun phrases\",np)\n",
+        "  output=\"\"\n",
+        "  for answer in np:\n",
+        "    ques = get_question(summary_text,answer,question_model,question_tokenizer)\n",
+        "    # output= output + ques + \"\\n\" + \"Ans: \"+answer.capitalize() + \"\\n\\n\"\n",
+        "    output = output + \"<b style='color:blue;'>\" + ques + \"</b>\"\n",
+        "    output = output + \"<br>\"\n",
+        "    output = output + \"<b style='color:green;'>\" + \"Ans: \" +answer.capitalize()+  \"</b>\"\n",
+        "    output = output + \"<br>\"\n",
+        "\n",
+        "  summary =\"Summary: \"+ summary_text\n",
+        "  for answer in np:\n",
+        "    summary = summary.replace(answer,\"<b>\"+answer+\"</b>\")\n",
+        "    summary = summary.replace(answer.capitalize(),\"<b>\"+answer.capitalize()+\"</b>\")\n",
+        "  output = output + \"<p>\"+summary+\"</p>\"\n",
+        "  \n",
+        "  return output\n",
+        "\n",
+        "iface = gr.Interface(\n",
+        "  fn=generate_question, \n",
+        "  inputs=context, \n",
+        "  outputs=output)\n",
+        "iface.launch(debug=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dNmJx7QNfLcy"
+      },
+      "source": [
+        "# **Filter keywords with Maximum marginal Relevance**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zPBj-IUL7L8x"
+      },
+      "outputs": [],
+      "source": [
+        "!wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz\n",
+        "!tar -xvf  s2v_reddit_2015_md.tar.gz"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "s5RI3fk9fOOz"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "from sense2vec import Sense2Vec\n",
+        "s2v = Sense2Vec().from_disk('s2v_old')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "J2y3unpvfo1y"
+      },
+      "outputs": [],
+      "source": [
+        "from sentence_transformers import SentenceTransformer\n",
+        "# paraphrase-distilroberta-base-v1\n",
+        "sentence_transformer_model = SentenceTransformer('msmarco-distilbert-base-v3')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pvfmhuWVfsJb"
+      },
+      "outputs": [],
+      "source": [
+        "from similarity.normalized_levenshtein import NormalizedLevenshtein\n",
+        "normalized_levenshtein = NormalizedLevenshtein()\n",
+        "\n",
+        "def filter_same_sense_words(original,wordlist):\n",
+        "  filtered_words=[]\n",
+        "  base_sense =original.split('|')[1] \n",
+        "  print (base_sense)\n",
+        "  for eachword in wordlist:\n",
+        "    if eachword[0].split('|')[1] == base_sense:\n",
+        "      filtered_words.append(eachword[0].split('|')[0].replace(\"_\", \" \").title().strip())\n",
+        "  return filtered_words\n",
+        "\n",
+        "def get_highest_similarity_score(wordlist,wrd):\n",
+        "  score=[]\n",
+        "  for each in wordlist:\n",
+        "    score.append(normalized_levenshtein.similarity(each.lower(),wrd.lower()))\n",
+        "  return max(score)\n",
+        "\n",
+        "def sense2vec_get_words(word,s2v,topn,question):\n",
+        "    output = []\n",
+        "    print (\"word \",word)\n",
+        "    try:\n",
+        "      sense = s2v.get_best_sense(word, senses= [\"NOUN\", \"PERSON\",\"PRODUCT\",\"LOC\",\"ORG\",\"EVENT\",\"NORP\",\"WORK OF ART\",\"FAC\",\"GPE\",\"NUM\",\"FACILITY\"])\n",
+        "      most_similar = s2v.most_similar(sense, n=topn)\n",
+        "      # print (most_similar)\n",
+        "      output = filter_same_sense_words(sense,most_similar)\n",
+        "      print (\"Similar \",output)\n",
+        "    except:\n",
+        "      output =[]\n",
+        "\n",
+        "    threshold = 0.6\n",
+        "    final=[word]\n",
+        "    checklist =question.split()\n",
+        "    for x in output:\n",
+        "      if get_highest_similarity_score(final,x)<threshold and x not in final and x not in checklist:\n",
+        "        final.append(x)\n",
+        "    \n",
+        "    return final[1:]\n",
+        "\n",
+        "def mmr(doc_embedding, word_embeddings, words, top_n, lambda_param):\n",
+        "\n",
+        "    # Extract similarity within words, and between words and the document\n",
+        "    word_doc_similarity = cosine_similarity(word_embeddings, doc_embedding)\n",
+        "    word_similarity = cosine_similarity(word_embeddings)\n",
+        "\n",
+        "    # Initialize candidates and already choose best keyword/keyphrase\n",
+        "    keywords_idx = [np.argmax(word_doc_similarity)]\n",
+        "    candidates_idx = [i for i in range(len(words)) if i != keywords_idx[0]]\n",
+        "\n",
+        "    for _ in range(top_n - 1):\n",
+        "        # Extract similarities within candidates and\n",
+        "        # between candidates and selected keywords/phrases\n",
+        "        candidate_similarities = word_doc_similarity[candidates_idx, :]\n",
+        "        target_similarities = np.max(word_similarity[candidates_idx][:, keywords_idx], axis=1)\n",
+        "\n",
+        "        # Calculate MMR\n",
+        "        mmr = (lambda_param) * candidate_similarities - (1-lambda_param) * target_similarities.reshape(-1, 1)\n",
+        "        mmr_idx = candidates_idx[np.argmax(mmr)]\n",
+        "\n",
+        "        # Update keywords & candidates\n",
+        "        keywords_idx.append(mmr_idx)\n",
+        "        candidates_idx.remove(mmr_idx)\n",
+        "\n",
+        "    return [words[idx] for idx in keywords_idx]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UCN0-kXEfxwy"
+      },
+      "outputs": [],
+      "source": [
+        "from collections import OrderedDict\n",
+        "from sklearn.metrics.pairwise import cosine_similarity\n",
+        "import nltk\n",
+        "nltk.download('omw-1.4')\n",
+        "\n",
+        "def get_distractors_wordnet(word):\n",
+        "    distractors=[]\n",
+        "    try:\n",
+        "      syn = wn.synsets(word,'n')[0]\n",
+        "      \n",
+        "      word= word.lower()\n",
+        "      orig_word = word\n",
+        "      if len(word.split())>0:\n",
+        "          word = word.replace(\" \",\"_\")\n",
+        "      hypernym = syn.hypernyms()\n",
+        "      if len(hypernym) == 0: \n",
+        "          return distractors\n",
+        "      for item in hypernym[0].hyponyms():\n",
+        "          name = item.lemmas()[0].name()\n",
+        "          #print (\"name \",name, \" word\",orig_word)\n",
+        "          if name == orig_word:\n",
+        "              continue\n",
+        "          name = name.replace(\"_\",\" \")\n",
+        "          name = \" \".join(w.capitalize() for w in name.split())\n",
+        "          if name is not None and name not in distractors:\n",
+        "              distractors.append(name)\n",
+        "    except:\n",
+        "      print (\"Wordnet distractors not found\")\n",
+        "    return distractors\n",
+        "\n",
+        "def get_distractors (word,origsentence,sense2vecmodel,sentencemodel,top_n,lambdaval):\n",
+        "  distractors = sense2vec_get_words(word,sense2vecmodel,top_n,origsentence)\n",
+        "  print (\"distractors \",distractors)\n",
+        "  if len(distractors) ==0:\n",
+        "    return distractors\n",
+        "  distractors_new = [word.capitalize()]\n",
+        "  distractors_new.extend(distractors)\n",
+        "  # print (\"distractors_new .. \",distractors_new)\n",
+        "\n",
+        "  embedding_sentence = origsentence+ \" \"+word.capitalize()\n",
+        "  # embedding_sentence = word\n",
+        "  keyword_embedding = sentencemodel.encode([embedding_sentence])\n",
+        "  distractor_embeddings = sentencemodel.encode(distractors_new)\n",
+        "\n",
+        "  # filtered_keywords = mmr(keyword_embedding, distractor_embeddings,distractors,4,0.7)\n",
+        "  max_keywords = min(len(distractors_new),5)\n",
+        "  filtered_keywords = mmr(keyword_embedding, distractor_embeddings,distractors_new,max_keywords,lambdaval)\n",
+        "  # filtered_keywords = filtered_keywords[1:]\n",
+        "  final = [word.capitalize()]\n",
+        "  for wrd in filtered_keywords:\n",
+        "    if wrd.lower() !=word.lower():\n",
+        "      final.append(wrd.capitalize())\n",
+        "  final = final[1:]\n",
+        "  return final\n",
+        "\n",
+        "sent = \"What cryptocurrency did Musk rarely tweet about?\"\n",
+        "keyword = \"Bitcoin\"\n",
+        "\n",
+        "# sent = \"What did Musk say he was working with to improve system transaction efficiency?\"\n",
+        "# keyword= \"Dogecoin\"\n",
+        "\n",
+        "\n",
+        "# sent = \"What company did Musk say would not accept bitcoin payments?\"\n",
+        "# keyword= \"Tesla\"\n",
+        "\n",
+        "\n",
+        "# sent = \"What has Musk often tweeted in support of?\"\n",
+        "# keyword = \"Cryptocurrency\"\n",
+        "\n",
+        "print (get_distractors(keyword,sent,s2v,sentence_transformer_model,40,0.2))\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "s2FX-mGdf08p"
+      },
+      "outputs": [],
+      "source": [
+        "get_distractors_wordnet('lion')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vgvffLecf4Cq"
+      },
+      "outputs": [],
+      "source": [
+        "import gradio as gr\n",
+        "\n",
+        "context = gr.inputs.Textbox(lines=10, placeholder=\"Enter paragraph/content here...\")\n",
+        "output = gr.outputs.HTML(  label=\"Question and Answers\")\n",
+        "radiobutton = gr.inputs.Radio([\"Wordnet\", \"Sense2Vec\"])\n",
+        "\n",
+        "def generate_question(context,radiobutton):\n",
+        "  summary_text = summarizer(context,summary_model,summary_tokenizer)\n",
+        "  for wrp in wrap(summary_text, 100):\n",
+        "    print (wrp)\n",
+        "  # np = getnounphrases(summary_text,sentence_transformer_model,3)\n",
+        "  np =  get_keywords(context,summary_text)\n",
+        "  print (\"\\n\\nNoun phrases\",np)\n",
+        "  output=\"\"\n",
+        "  for answer in np:\n",
+        "    ques = get_question(summary_text,answer,question_model,question_tokenizer)\n",
+        "    if radiobutton==\"Wordnet\":\n",
+        "      distractors = get_distractors_wordnet(answer)\n",
+        "    else:\n",
+        "      distractors = get_distractors(answer.capitalize(),ques,s2v,sentence_transformer_model,40,0.2)\n",
+        "    # output= output + ques + \"\\n\" + \"Ans: \"+answer.capitalize() + \"\\n\\n\"\n",
+        "    output = output + \"<b style='color:blue;'>\" + ques + \"</b>\"\n",
+        "    output = output + \"<br>\"\n",
+        "    output = output + \"<b style='color:green;'>\" + \"Ans: \" +answer.capitalize()+  \"</b>\"+\"<br>\"\n",
+        "    if len(distractors)>0:\n",
+        "      for distractor in distractors[:4]:\n",
+        "        output = output + \"<b style='color:brown;'>\" + distractor+  \"</b>\"+\"<br>\"\n",
+        "    output = output + \"<br>\"\n",
+        "\n",
+        "  summary =\"Summary: \"+ summary_text\n",
+        "  for answer in np:\n",
+        "    summary = summary.replace(answer,\"<b>\"+answer+\"</b>\" + \"<br>\")\n",
+        "    summary = summary.replace(answer.capitalize(),\"<b>\"+answer.capitalize()+\"</b>\")\n",
+        "  output = output + \"<p>\"+summary+\"</p>\"\n",
+        "  output = output + \"<br>\"\n",
+        "  return output\n",
+        "\n",
+        "\n",
+        "iface = gr.Interface(\n",
+        "  fn=generate_question, \n",
+        "  inputs=[context,radiobutton], \n",
+        "  outputs=output)\n",
+        "iface.launch(debug=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EhKGhA1ff7Hi"
+      },
+      "outputs": [],
+      "source": [
+        "import requests\n",
+        "\n",
+        "url = \"https://question-answer.p.rapidapi.com/question-answer\"\n",
+        "\n",
+        "querystring = {\"question\":\"What are some tips to starting up your own small business?\"}\n",
+        "\n",
+        "headers = {\n",
+        "\t\"X-RapidAPI-Key\": \"SIGN-UP-FOR-KEY\",\n",
+        "\t\"X-RapidAPI-Host\": \"question-answer.p.rapidapi.com\"\n",
+        "}\n",
+        "\n",
+        "response = requests.request(\"GET\", url, headers=headers, params=querystring)\n",
+        "\n",
+        "print(response.text)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "provenance": []
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}