{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Summarisation.ipynb", "provenance": [], "authorship_tag": "ABX9TyOitTe/P44ZGiLTVtPjOQvz", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DHzgmow9DZK7", "outputId": "8ca1398e-4eef-41c9-d253-195f0fd356a5" }, "source": [ "!git clone https://dagshub.com/gagan3012/summarization.git" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "Cloning into 'summarization'...\n", "remote: Enumerating objects: 494, done.\u001b[K\n", "remote: Counting objects: 100% (494/494), done.\u001b[K\n", "remote: Compressing objects: 100% (488/488), done.\u001b[K\n", "remote: Total 494 (delta 281), reused 0 (delta 0)\u001b[K\n", "Receiving objects: 100% (494/494), 71.22 KiB | 588.00 KiB/s, done.\n", "Resolving deltas: 100% (281/281), done.\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "a_gw9JaVEslh" }, "source": [ "import os\n", "os.chdir('/content/summarization')" ], "execution_count": 2, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "dcozx2lVFOd-", "outputId": "7b4324b3-f5ae-497c-eea2-64b89b75e6e8" }, "source": [ "!git pull" ], "execution_count": 3, "outputs": [ { "output_type": "stream", "text": [ "Already up to date.\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fcJkF3ZtEyVm", "outputId": "32956dfe-d46e-4d6d-91d0-afdbf660d4f1" }, "source": [ "!make dirs" ], "execution_count": 4, "outputs": [ { "output_type": "stream", "text": [ "mkdir -p data/raw data/processed models\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6jClm3fBE9XL", "outputId": "6a68e85a-ea79-474c-9d11-3e6c907a4ae1" }, "source": [ "!make requirements" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "python -m pip install -U pip setuptools wheel\n", "Requirement already satisfied: pip in /usr/local/lib/python3.7/dist-packages (21.1.3)\n", "Collecting pip\n", " Downloading pip-21.2.1-py3-none-any.whl (1.6 MB)\n", "\u001b[K |████████████████████████████████| 1.6 MB 8.2 MB/s \n", "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (57.2.0)\n", "Collecting setuptools\n", " Downloading setuptools-57.4.0-py3-none-any.whl (819 kB)\n", "\u001b[K |████████████████████████████████| 819 kB 58.0 MB/s \n", "\u001b[?25hRequirement already satisfied: wheel in /usr/local/lib/python3.7/dist-packages (0.36.2)\n", "Installing collected packages: setuptools, pip\n", " Attempting uninstall: setuptools\n", " Found existing installation: setuptools 57.2.0\n", " Uninstalling setuptools-57.2.0:\n", " Successfully uninstalled setuptools-57.2.0\n", " Attempting uninstall: pip\n", " Found existing installation: pip 21.1.3\n", " Uninstalling pip-21.1.3:\n", " Successfully uninstalled pip-21.1.3\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\u001b[0m\n", "Successfully installed pip-21.2.1 setuptools-57.4.0\n", "python -m pip install -r requirements.txt\n", "\u001b[33mWARNING: Value for scheme.platlib does not match. Please report this to \n", "distutils: /usr/local/lib/python3.7/dist-packages\n", "sysconfig: /usr/lib/python3.7/site-packages\u001b[0m\n", "\u001b[33mWARNING: Value for scheme.purelib does not match. Please report this to \n", "distutils: /usr/local/lib/python3.7/dist-packages\n", "sysconfig: /usr/lib/python3.7/site-packages\u001b[0m\n", "\u001b[33mWARNING: Value for scheme.headers does not match. Please report this to \n", "distutils: /usr/local/include/python3.7/UNKNOWN\n", "sysconfig: /usr/include/python3.7m/UNKNOWN\u001b[0m\n", "\u001b[33mWARNING: Value for scheme.scripts does not match. Please report this to \n", "distutils: /usr/local/bin\n", "sysconfig: /usr/bin\u001b[0m\n", "\u001b[33mWARNING: Value for scheme.data does not match. Please report this to \n", "distutils: /usr/local\n", "sysconfig: /usr\u001b[0m\n", "\u001b[33mWARNING: Additional context:\n", "user = False\n", "home = None\n", "root = None\n", "prefix = None\u001b[0m\n", "Collecting numpy==1.21.1\n", " Downloading numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)\n", "\u001b[K |████████████████████████████████| 15.7 MB 75 kB/s \n", "\u001b[?25hCollecting datasets==1.10.2\n", " Downloading datasets-1.10.2-py3-none-any.whl (542 kB)\n", "\u001b[K |████████████████████████████████| 542 kB 66.2 MB/s \n", "\u001b[?25hCollecting pytorch_lightning==1.3.5\n", " Downloading pytorch_lightning-1.3.5-py3-none-any.whl (808 kB)\n", "\u001b[K |████████████████████████████████| 808 kB 59.0 MB/s \n", "\u001b[?25hCollecting transformers==4.9.0\n", " Downloading transformers-4.9.0-py3-none-any.whl (2.6 MB)\n", "\u001b[K |████████████████████████████████| 2.6 MB 36.6 MB/s \n", "\u001b[?25hRequirement already satisfied: torch==1.9.0 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 5)) (1.9.0+cu102)\n", "Collecting dagshub==0.1.6\n", " Downloading dagshub-0.1.6-py3-none-any.whl (9.3 kB)\n", "Requirement already satisfied: pandas==1.1.5 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 7)) (1.1.5)\n", "Collecting rouge_score\n", " Downloading rouge_score-0.0.4-py2.py3-none-any.whl (22 kB)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 9)) (3.13)\n", "Collecting dvc\n", " Downloading dvc-2.5.4-py3-none-any.whl (638 kB)\n", "\u001b[K |████████████████████████████████| 638 kB 62.4 MB/s \n", "\u001b[?25hCollecting mlflow\n", " Downloading mlflow-1.19.0-py3-none-any.whl (14.4 MB)\n", "\u001b[K |████████████████████████████████| 14.4 MB 66 kB/s \n", "\u001b[?25hCollecting wandb\n", " Downloading wandb-0.11.0-py2.py3-none-any.whl (1.8 MB)\n", "\u001b[K |████████████████████████████████| 1.8 MB 52.8 MB/s \n", "\u001b[?25hRequirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 15)) (7.1.2)\n", "Requirement already satisfied: coverage in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 16)) (3.7.1)\n", "Collecting awscli\n", " Downloading awscli-1.20.6-py3-none-any.whl (3.7 MB)\n", "\u001b[K |████████████████████████████████| 3.7 MB 65.9 MB/s \n", "\u001b[?25hCollecting flake8\n", " Downloading flake8-3.9.2-py2.py3-none-any.whl (73 kB)\n", "\u001b[K |████████████████████████████████| 73 kB 2.1 MB/s \n", "\u001b[?25hCollecting python-dotenv>=0.5.1\n", " Downloading python_dotenv-0.19.0-py2.py3-none-any.whl (17 kB)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (21.0)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (2.23.0)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (0.3.4)\n", "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (4.6.1)\n", "Collecting xxhash\n", " Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n", "\u001b[K |████████████████████████████████| 243 kB 69.5 MB/s \n", "\u001b[?25hCollecting fsspec>=2021.05.0\n", " Downloading fsspec-2021.7.0-py3-none-any.whl (118 kB)\n", "\u001b[K |████████████████████████████████| 118 kB 76.5 MB/s \n", "\u001b[?25hCollecting tqdm>=4.42\n", " Downloading tqdm-4.61.2-py2.py3-none-any.whl (76 kB)\n", "\u001b[K |████████████████████████████████| 76 kB 6.1 MB/s \n", "\u001b[?25hCollecting huggingface-hub<0.1.0\n", " Downloading huggingface_hub-0.0.14-py3-none-any.whl (43 kB)\n", "\u001b[K |████████████████████████████████| 43 kB 2.0 MB/s \n", "\u001b[?25hRequirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (3.0.0)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (0.70.12.2)\n", "Collecting future>=0.17.1\n", " Downloading future-0.18.2.tar.gz (829 kB)\n", "\u001b[K |████████████████████████████████| 829 kB 57.6 MB/s \n", "\u001b[?25hCollecting torchmetrics>=0.2.0\n", " Downloading torchmetrics-0.4.1-py3-none-any.whl (234 kB)\n", "\u001b[K |████████████████████████████████| 234 kB 62.8 MB/s \n", "\u001b[?25hCollecting pyDeprecate==0.3.0\n", " Downloading pyDeprecate-0.3.0-py3-none-any.whl (10 kB)\n", "Collecting pyyaml\n", " Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)\n", "\u001b[K |████████████████████████████████| 636 kB 62.7 MB/s \n", "\u001b[?25hCollecting tensorboard!=2.5.0,>=2.2.0\n", " Downloading tensorboard-2.4.1-py3-none-any.whl (10.6 MB)\n", "\u001b[K |████████████████████████████████| 10.6 MB 60.2 MB/s \n", "\u001b[?25hCollecting sacremoses\n", " Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)\n", "\u001b[K |████████████████████████████████| 895 kB 52.2 MB/s \n", "\u001b[?25hCollecting tokenizers<0.11,>=0.10.1\n", " Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n", "\u001b[K |████████████████████████████████| 3.3 MB 46.3 MB/s \n", "\u001b[?25hCollecting huggingface-hub<0.1.0\n", " Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.9.0->-r requirements.txt (line 4)) (2019.12.20)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.9.0->-r requirements.txt (line 4)) (3.0.12)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch==1.9.0->-r requirements.txt (line 5)) (3.7.4.3)\n", "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas==1.1.5->-r requirements.txt (line 7)) (2018.9)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas==1.1.5->-r requirements.txt (line 7)) (2.8.1)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from rouge_score->-r requirements.txt (line 8)) (3.2.5)\n", "Requirement already satisfied: absl-py in /usr/local/lib/python3.7/dist-packages (from rouge_score->-r requirements.txt (line 8)) (0.12.0)\n", "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from rouge_score->-r requirements.txt (line 8)) (1.15.0)\n", "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (1.4.4)\n", "Collecting diskcache>=5.2.1\n", " Downloading diskcache-5.2.1-py3-none-any.whl (44 kB)\n", "\u001b[K |████████████████████████████████| 44 kB 3.4 MB/s \n", "\u001b[?25hCollecting jsonpath-ng>=1.5.1\n", " Downloading jsonpath_ng-1.5.3-py3-none-any.whl (29 kB)\n", "Requirement already satisfied: pyasn1>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (0.4.8)\n", "Collecting distro>=1.3.0\n", " Downloading distro-1.5.0-py2.py3-none-any.whl (18 kB)\n", "Collecting rich>=10.0.0\n", " Downloading rich-10.6.0-py3-none-any.whl (208 kB)\n", "\u001b[K |████████████████████████████████| 208 kB 50.0 MB/s \n", "\u001b[?25hCollecting zc.lockfile>=1.2.1\n", " Downloading zc.lockfile-2.0-py2.py3-none-any.whl (9.7 kB)\n", "Collecting configobj>=5.0.6\n", " Downloading configobj-5.0.6.tar.gz (33 kB)\n", "Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (2.4.7)\n", "Requirement already satisfied: pydot>=1.2.4 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (1.3.0)\n", "Collecting ply>=3.9\n", " Downloading ply-3.11-py2.py3-none-any.whl (49 kB)\n", "\u001b[K |████████████████████████████████| 49 kB 7.1 MB/s \n", "\u001b[?25hCollecting shtab<2,>=1.3.4\n", " Downloading shtab-1.3.9-py2.py3-none-any.whl (12 kB)\n", "Collecting nanotime>=0.5.2\n", " Downloading nanotime-0.5.2.tar.gz (3.2 kB)\n", "Collecting shortuuid>=0.5.0\n", " Downloading shortuuid-1.0.1-py3-none-any.whl (7.5 kB)\n", "Collecting funcy>=1.14\n", " Downloading funcy-1.16-py2.py3-none-any.whl (32 kB)\n", "Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (0.8.9)\n", "Collecting gitpython>3\n", " Downloading GitPython-3.1.18-py3-none-any.whl (170 kB)\n", "\u001b[K |████████████████████████████████| 170 kB 58.7 MB/s \n", "\u001b[?25hCollecting ruamel.yaml>=0.16.1\n", " Downloading ruamel.yaml-0.17.10-py3-none-any.whl (108 kB)\n", "\u001b[K |████████████████████████████████| 108 kB 48.6 MB/s \n", "\u001b[?25hCollecting flatten-dict<1,>=0.3.0\n", " Downloading flatten_dict-0.4.1-py2.py3-none-any.whl (9.5 kB)\n", "Collecting voluptuous>=0.11.7\n", " Downloading voluptuous-0.12.1-py3-none-any.whl (29 kB)\n", "Collecting python-benedict>=0.21.1\n", " Downloading python_benedict-0.24.0-py3-none-any.whl (40 kB)\n", "\u001b[K |████████████████████████████████| 40 kB 7.5 MB/s \n", "\u001b[?25hCollecting dpath<3,>=2.0.1\n", " Downloading dpath-2.0.1.tar.gz (21 kB)\n", "Collecting grandalf==0.6\n", " Downloading grandalf-0.6-py3-none-any.whl (31 kB)\n", "Requirement already satisfied: networkx~=2.5 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (2.5.1)\n", "Collecting psutil>=5.8.0\n", " Downloading psutil-5.8.0-cp37-cp37m-manylinux2010_x86_64.whl (296 kB)\n", "\u001b[K |████████████████████████████████| 296 kB 65.5 MB/s \n", "\u001b[?25hRequirement already satisfied: toml>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (0.10.2)\n", "Collecting pygtrie>=2.3.2\n", " Downloading pygtrie-2.4.2.tar.gz (35 kB)\n", "Collecting colorama>=0.3.9\n", " Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n", "Collecting dulwich>=0.20.23\n", " Downloading dulwich-0.20.24-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (529 kB)\n", "\u001b[K |████████████████████████████████| 529 kB 59.7 MB/s \n", "\u001b[?25hCollecting pathspec>=0.6.0\n", " Downloading pathspec-0.9.0-py2.py3-none-any.whl (31 kB)\n", "Collecting dictdiffer>=0.8.1\n", " Downloading dictdiffer-0.9.0-py2.py3-none-any.whl (16 kB)\n", "Requirement already satisfied: setuptools>=34.0.0 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (57.4.0)\n", "Collecting pygit2>=1.5.0\n", " Downloading pygit2-1.6.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n", "\u001b[K |████████████████████████████████| 4.6 MB 54.2 MB/s \n", "\u001b[?25hCollecting flufl.lock<4,>=3.2\n", " Downloading flufl.lock-3.2.tar.gz (19 kB)\n", "Requirement already satisfied: entrypoints in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (0.3)\n", "Requirement already satisfied: sqlparse>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (0.4.1)\n", "Collecting databricks-cli>=0.8.7\n", " Downloading databricks-cli-0.14.3.tar.gz (54 kB)\n", "\u001b[K |████████████████████████████████| 54 kB 4.0 MB/s \n", "\u001b[?25hCollecting prometheus-flask-exporter\n", " Downloading prometheus_flask_exporter-0.18.2.tar.gz (22 kB)\n", "Collecting gunicorn\n", " Downloading gunicorn-20.1.0-py3-none-any.whl (79 kB)\n", "\u001b[K |████████████████████████████████| 79 kB 10.0 MB/s \n", "\u001b[?25hRequirement already satisfied: Flask in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (1.1.4)\n", "Collecting docker>=4.0.0\n", " Downloading docker-5.0.0-py2.py3-none-any.whl (146 kB)\n", "\u001b[K |████████████████████████████████| 146 kB 75.1 MB/s \n", "\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (1.3.0)\n", "Collecting querystring-parser\n", " Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)\n", "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (1.4.20)\n", "Collecting alembic<=1.4.1\n", " Downloading alembic-1.4.1.tar.gz (1.1 MB)\n", "\u001b[K |████████████████████████████████| 1.1 MB 54.1 MB/s \n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "bXKYITHyFBnz" }, "source": [ "!make run" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "J0n8bj738wxb" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }