{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Summarisation.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyOitTe/P44ZGiLTVtPjOQvz",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DHzgmow9DZK7",
"outputId": "8ca1398e-4eef-41c9-d253-195f0fd356a5"
},
"source": [
"!git clone https://dagshub.com/gagan3012/summarization.git"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Cloning into 'summarization'...\n",
"remote: Enumerating objects: 494, done.\u001b[K\n",
"remote: Counting objects: 100% (494/494), done.\u001b[K\n",
"remote: Compressing objects: 100% (488/488), done.\u001b[K\n",
"remote: Total 494 (delta 281), reused 0 (delta 0)\u001b[K\n",
"Receiving objects: 100% (494/494), 71.22 KiB | 588.00 KiB/s, done.\n",
"Resolving deltas: 100% (281/281), done.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "a_gw9JaVEslh"
},
"source": [
"import os\n",
"os.chdir('/content/summarization')"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dcozx2lVFOd-",
"outputId": "7b4324b3-f5ae-497c-eea2-64b89b75e6e8"
},
"source": [
"!git pull"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Already up to date.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fcJkF3ZtEyVm",
"outputId": "32956dfe-d46e-4d6d-91d0-afdbf660d4f1"
},
"source": [
"!make dirs"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": [
"mkdir -p data/raw data/processed models\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6jClm3fBE9XL",
"outputId": "6a68e85a-ea79-474c-9d11-3e6c907a4ae1"
},
"source": [
"!make requirements"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"python -m pip install -U pip setuptools wheel\n",
"Requirement already satisfied: pip in /usr/local/lib/python3.7/dist-packages (21.1.3)\n",
"Collecting pip\n",
" Downloading pip-21.2.1-py3-none-any.whl (1.6 MB)\n",
"\u001b[K |████████████████████████████████| 1.6 MB 8.2 MB/s \n",
"\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (57.2.0)\n",
"Collecting setuptools\n",
" Downloading setuptools-57.4.0-py3-none-any.whl (819 kB)\n",
"\u001b[K |████████████████████████████████| 819 kB 58.0 MB/s \n",
"\u001b[?25hRequirement already satisfied: wheel in /usr/local/lib/python3.7/dist-packages (0.36.2)\n",
"Installing collected packages: setuptools, pip\n",
" Attempting uninstall: setuptools\n",
" Found existing installation: setuptools 57.2.0\n",
" Uninstalling setuptools-57.2.0:\n",
" Successfully uninstalled setuptools-57.2.0\n",
" Attempting uninstall: pip\n",
" Found existing installation: pip 21.1.3\n",
" Uninstalling pip-21.1.3:\n",
" Successfully uninstalled pip-21.1.3\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\u001b[0m\n",
"Successfully installed pip-21.2.1 setuptools-57.4.0\n",
"python -m pip install -r requirements.txt\n",
"\u001b[33mWARNING: Value for scheme.platlib does not match. Please report this to \n",
"distutils: /usr/local/lib/python3.7/dist-packages\n",
"sysconfig: /usr/lib/python3.7/site-packages\u001b[0m\n",
"\u001b[33mWARNING: Value for scheme.purelib does not match. Please report this to \n",
"distutils: /usr/local/lib/python3.7/dist-packages\n",
"sysconfig: /usr/lib/python3.7/site-packages\u001b[0m\n",
"\u001b[33mWARNING: Value for scheme.headers does not match. Please report this to \n",
"distutils: /usr/local/include/python3.7/UNKNOWN\n",
"sysconfig: /usr/include/python3.7m/UNKNOWN\u001b[0m\n",
"\u001b[33mWARNING: Value for scheme.scripts does not match. Please report this to \n",
"distutils: /usr/local/bin\n",
"sysconfig: /usr/bin\u001b[0m\n",
"\u001b[33mWARNING: Value for scheme.data does not match. Please report this to \n",
"distutils: /usr/local\n",
"sysconfig: /usr\u001b[0m\n",
"\u001b[33mWARNING: Additional context:\n",
"user = False\n",
"home = None\n",
"root = None\n",
"prefix = None\u001b[0m\n",
"Collecting numpy==1.21.1\n",
" Downloading numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)\n",
"\u001b[K |████████████████████████████████| 15.7 MB 75 kB/s \n",
"\u001b[?25hCollecting datasets==1.10.2\n",
" Downloading datasets-1.10.2-py3-none-any.whl (542 kB)\n",
"\u001b[K |████████████████████████████████| 542 kB 66.2 MB/s \n",
"\u001b[?25hCollecting pytorch_lightning==1.3.5\n",
" Downloading pytorch_lightning-1.3.5-py3-none-any.whl (808 kB)\n",
"\u001b[K |████████████████████████████████| 808 kB 59.0 MB/s \n",
"\u001b[?25hCollecting transformers==4.9.0\n",
" Downloading transformers-4.9.0-py3-none-any.whl (2.6 MB)\n",
"\u001b[K |████████████████████████████████| 2.6 MB 36.6 MB/s \n",
"\u001b[?25hRequirement already satisfied: torch==1.9.0 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 5)) (1.9.0+cu102)\n",
"Collecting dagshub==0.1.6\n",
" Downloading dagshub-0.1.6-py3-none-any.whl (9.3 kB)\n",
"Requirement already satisfied: pandas==1.1.5 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 7)) (1.1.5)\n",
"Collecting rouge_score\n",
" Downloading rouge_score-0.0.4-py2.py3-none-any.whl (22 kB)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 9)) (3.13)\n",
"Collecting dvc\n",
" Downloading dvc-2.5.4-py3-none-any.whl (638 kB)\n",
"\u001b[K |████████████████████████████████| 638 kB 62.4 MB/s \n",
"\u001b[?25hCollecting mlflow\n",
" Downloading mlflow-1.19.0-py3-none-any.whl (14.4 MB)\n",
"\u001b[K |████████████████████████████████| 14.4 MB 66 kB/s \n",
"\u001b[?25hCollecting wandb\n",
" Downloading wandb-0.11.0-py2.py3-none-any.whl (1.8 MB)\n",
"\u001b[K |████████████████████████████████| 1.8 MB 52.8 MB/s \n",
"\u001b[?25hRequirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 15)) (7.1.2)\n",
"Requirement already satisfied: coverage in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 16)) (3.7.1)\n",
"Collecting awscli\n",
" Downloading awscli-1.20.6-py3-none-any.whl (3.7 MB)\n",
"\u001b[K |████████████████████████████████| 3.7 MB 65.9 MB/s \n",
"\u001b[?25hCollecting flake8\n",
" Downloading flake8-3.9.2-py2.py3-none-any.whl (73 kB)\n",
"\u001b[K |████████████████████████████████| 73 kB 2.1 MB/s \n",
"\u001b[?25hCollecting python-dotenv>=0.5.1\n",
" Downloading python_dotenv-0.19.0-py2.py3-none-any.whl (17 kB)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (21.0)\n",
"Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (2.23.0)\n",
"Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (0.3.4)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (4.6.1)\n",
"Collecting xxhash\n",
" Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n",
"\u001b[K |████████████████████████████████| 243 kB 69.5 MB/s \n",
"\u001b[?25hCollecting fsspec>=2021.05.0\n",
" Downloading fsspec-2021.7.0-py3-none-any.whl (118 kB)\n",
"\u001b[K |████████████████████████████████| 118 kB 76.5 MB/s \n",
"\u001b[?25hCollecting tqdm>=4.42\n",
" Downloading tqdm-4.61.2-py2.py3-none-any.whl (76 kB)\n",
"\u001b[K |████████████████████████████████| 76 kB 6.1 MB/s \n",
"\u001b[?25hCollecting huggingface-hub<0.1.0\n",
" Downloading huggingface_hub-0.0.14-py3-none-any.whl (43 kB)\n",
"\u001b[K |████████████████████████████████| 43 kB 2.0 MB/s \n",
"\u001b[?25hRequirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (3.0.0)\n",
"Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets==1.10.2->-r requirements.txt (line 2)) (0.70.12.2)\n",
"Collecting future>=0.17.1\n",
" Downloading future-0.18.2.tar.gz (829 kB)\n",
"\u001b[K |████████████████████████████████| 829 kB 57.6 MB/s \n",
"\u001b[?25hCollecting torchmetrics>=0.2.0\n",
" Downloading torchmetrics-0.4.1-py3-none-any.whl (234 kB)\n",
"\u001b[K |████████████████████████████████| 234 kB 62.8 MB/s \n",
"\u001b[?25hCollecting pyDeprecate==0.3.0\n",
" Downloading pyDeprecate-0.3.0-py3-none-any.whl (10 kB)\n",
"Collecting pyyaml\n",
" Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)\n",
"\u001b[K |████████████████████████████████| 636 kB 62.7 MB/s \n",
"\u001b[?25hCollecting tensorboard!=2.5.0,>=2.2.0\n",
" Downloading tensorboard-2.4.1-py3-none-any.whl (10.6 MB)\n",
"\u001b[K |████████████████████████████████| 10.6 MB 60.2 MB/s \n",
"\u001b[?25hCollecting sacremoses\n",
" Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)\n",
"\u001b[K |████████████████████████████████| 895 kB 52.2 MB/s \n",
"\u001b[?25hCollecting tokenizers<0.11,>=0.10.1\n",
" Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
"\u001b[K |████████████████████████████████| 3.3 MB 46.3 MB/s \n",
"\u001b[?25hCollecting huggingface-hub<0.1.0\n",
" Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.9.0->-r requirements.txt (line 4)) (2019.12.20)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.9.0->-r requirements.txt (line 4)) (3.0.12)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch==1.9.0->-r requirements.txt (line 5)) (3.7.4.3)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas==1.1.5->-r requirements.txt (line 7)) (2018.9)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas==1.1.5->-r requirements.txt (line 7)) (2.8.1)\n",
"Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from rouge_score->-r requirements.txt (line 8)) (3.2.5)\n",
"Requirement already satisfied: absl-py in /usr/local/lib/python3.7/dist-packages (from rouge_score->-r requirements.txt (line 8)) (0.12.0)\n",
"Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from rouge_score->-r requirements.txt (line 8)) (1.15.0)\n",
"Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (1.4.4)\n",
"Collecting diskcache>=5.2.1\n",
" Downloading diskcache-5.2.1-py3-none-any.whl (44 kB)\n",
"\u001b[K |████████████████████████████████| 44 kB 3.4 MB/s \n",
"\u001b[?25hCollecting jsonpath-ng>=1.5.1\n",
" Downloading jsonpath_ng-1.5.3-py3-none-any.whl (29 kB)\n",
"Requirement already satisfied: pyasn1>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (0.4.8)\n",
"Collecting distro>=1.3.0\n",
" Downloading distro-1.5.0-py2.py3-none-any.whl (18 kB)\n",
"Collecting rich>=10.0.0\n",
" Downloading rich-10.6.0-py3-none-any.whl (208 kB)\n",
"\u001b[K |████████████████████████████████| 208 kB 50.0 MB/s \n",
"\u001b[?25hCollecting zc.lockfile>=1.2.1\n",
" Downloading zc.lockfile-2.0-py2.py3-none-any.whl (9.7 kB)\n",
"Collecting configobj>=5.0.6\n",
" Downloading configobj-5.0.6.tar.gz (33 kB)\n",
"Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (2.4.7)\n",
"Requirement already satisfied: pydot>=1.2.4 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (1.3.0)\n",
"Collecting ply>=3.9\n",
" Downloading ply-3.11-py2.py3-none-any.whl (49 kB)\n",
"\u001b[K |████████████████████████████████| 49 kB 7.1 MB/s \n",
"\u001b[?25hCollecting shtab<2,>=1.3.4\n",
" Downloading shtab-1.3.9-py2.py3-none-any.whl (12 kB)\n",
"Collecting nanotime>=0.5.2\n",
" Downloading nanotime-0.5.2.tar.gz (3.2 kB)\n",
"Collecting shortuuid>=0.5.0\n",
" Downloading shortuuid-1.0.1-py3-none-any.whl (7.5 kB)\n",
"Collecting funcy>=1.14\n",
" Downloading funcy-1.16-py2.py3-none-any.whl (32 kB)\n",
"Requirement already satisfied: tabulate>=0.8.7 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (0.8.9)\n",
"Collecting gitpython>3\n",
" Downloading GitPython-3.1.18-py3-none-any.whl (170 kB)\n",
"\u001b[K |████████████████████████████████| 170 kB 58.7 MB/s \n",
"\u001b[?25hCollecting ruamel.yaml>=0.16.1\n",
" Downloading ruamel.yaml-0.17.10-py3-none-any.whl (108 kB)\n",
"\u001b[K |████████████████████████████████| 108 kB 48.6 MB/s \n",
"\u001b[?25hCollecting flatten-dict<1,>=0.3.0\n",
" Downloading flatten_dict-0.4.1-py2.py3-none-any.whl (9.5 kB)\n",
"Collecting voluptuous>=0.11.7\n",
" Downloading voluptuous-0.12.1-py3-none-any.whl (29 kB)\n",
"Collecting python-benedict>=0.21.1\n",
" Downloading python_benedict-0.24.0-py3-none-any.whl (40 kB)\n",
"\u001b[K |████████████████████████████████| 40 kB 7.5 MB/s \n",
"\u001b[?25hCollecting dpath<3,>=2.0.1\n",
" Downloading dpath-2.0.1.tar.gz (21 kB)\n",
"Collecting grandalf==0.6\n",
" Downloading grandalf-0.6-py3-none-any.whl (31 kB)\n",
"Requirement already satisfied: networkx~=2.5 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (2.5.1)\n",
"Collecting psutil>=5.8.0\n",
" Downloading psutil-5.8.0-cp37-cp37m-manylinux2010_x86_64.whl (296 kB)\n",
"\u001b[K |████████████████████████████████| 296 kB 65.5 MB/s \n",
"\u001b[?25hRequirement already satisfied: toml>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (0.10.2)\n",
"Collecting pygtrie>=2.3.2\n",
" Downloading pygtrie-2.4.2.tar.gz (35 kB)\n",
"Collecting colorama>=0.3.9\n",
" Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n",
"Collecting dulwich>=0.20.23\n",
" Downloading dulwich-0.20.24-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (529 kB)\n",
"\u001b[K |████████████████████████████████| 529 kB 59.7 MB/s \n",
"\u001b[?25hCollecting pathspec>=0.6.0\n",
" Downloading pathspec-0.9.0-py2.py3-none-any.whl (31 kB)\n",
"Collecting dictdiffer>=0.8.1\n",
" Downloading dictdiffer-0.9.0-py2.py3-none-any.whl (16 kB)\n",
"Requirement already satisfied: setuptools>=34.0.0 in /usr/local/lib/python3.7/dist-packages (from dvc->-r requirements.txt (line 10)) (57.4.0)\n",
"Collecting pygit2>=1.5.0\n",
" Downloading pygit2-1.6.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n",
"\u001b[K |████████████████████████████████| 4.6 MB 54.2 MB/s \n",
"\u001b[?25hCollecting flufl.lock<4,>=3.2\n",
" Downloading flufl.lock-3.2.tar.gz (19 kB)\n",
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (0.3)\n",
"Requirement already satisfied: sqlparse>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (0.4.1)\n",
"Collecting databricks-cli>=0.8.7\n",
" Downloading databricks-cli-0.14.3.tar.gz (54 kB)\n",
"\u001b[K |████████████████████████████████| 54 kB 4.0 MB/s \n",
"\u001b[?25hCollecting prometheus-flask-exporter\n",
" Downloading prometheus_flask_exporter-0.18.2.tar.gz (22 kB)\n",
"Collecting gunicorn\n",
" Downloading gunicorn-20.1.0-py3-none-any.whl (79 kB)\n",
"\u001b[K |████████████████████████████████| 79 kB 10.0 MB/s \n",
"\u001b[?25hRequirement already satisfied: Flask in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (1.1.4)\n",
"Collecting docker>=4.0.0\n",
" Downloading docker-5.0.0-py2.py3-none-any.whl (146 kB)\n",
"\u001b[K |████████████████████████████████| 146 kB 75.1 MB/s \n",
"\u001b[?25hRequirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (1.3.0)\n",
"Collecting querystring-parser\n",
" Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)\n",
"Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.7/dist-packages (from mlflow->-r requirements.txt (line 11)) (1.4.20)\n",
"Collecting alembic<=1.4.1\n",
" Downloading alembic-1.4.1.tar.gz (1.1 MB)\n",
"\u001b[K |████████████████████████████████| 1.1 MB 54.1 MB/s \n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "bXKYITHyFBnz"
},
"source": [
"!make run"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "J0n8bj738wxb"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}