diff --git "a/NBDT_Data_Recs.ipynb" "b/NBDT_Data_Recs.ipynb" new file mode 100644--- /dev/null +++ "b/NBDT_Data_Recs.ipynb" @@ -0,0 +1,1294 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "H-pmLfmm7lus", + "outputId": "c76797e6-3e80-408f-99d2-c853f3b29ce9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting gradio\n", + " Downloading gradio-3.35.2-py3-none-any.whl (19.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m80.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting aiofiles (from gradio)\n", + " Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from gradio) (3.8.4)\n", + "Requirement already satisfied: altair>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n", + "Collecting fastapi (from gradio)\n", + " Downloading fastapi-0.98.0-py3-none-any.whl (56 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.0/57.0 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting ffmpy (from gradio)\n", + " Downloading ffmpy-0.3.0.tar.gz (4.8 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting gradio-client>=0.2.7 (from gradio)\n", + " Downloading gradio_client-0.2.7-py3-none-any.whl (288 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m288.4/288.4 kB\u001b[0m \u001b[31m32.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting httpx (from gradio)\n", + " Downloading httpx-0.24.1-py3-none-any.whl (75 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting huggingface-hub>=0.14.0 (from gradio)\n", + " Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m31.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.2)\n", + "Requirement already satisfied: markdown-it-py[linkify]>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.0.0)\n", + "Requirement already satisfied: markupsafe in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.3)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n", + "Collecting mdit-py-plugins<=0.3.3 (from gradio)\n", + " Downloading mdit_py_plugins-0.3.3-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from gradio) (1.22.4)\n", + "Collecting orjson (from gradio)\n", + " Downloading orjson-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (136 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.0/137.0 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from gradio) (1.5.3)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from gradio) (8.4.0)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from gradio) (1.10.9)\n", + "Collecting pydub (from gradio)\n", + " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", + "Requirement already satisfied: pygments>=2.12.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.14.0)\n", + "Collecting python-multipart (from gradio)\n", + " Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from gradio) (2.27.1)\n", + "Collecting semantic-version (from gradio)\n", + " Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", + "Collecting uvicorn>=0.14.0 (from gradio)\n", + " Downloading uvicorn-0.22.0-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting websockets>=10.0 (from gradio)\n", + " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->gradio) (0.4)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->gradio) (4.3.3)\n", + "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->gradio) (0.12.0)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client>=0.2.7->gradio) (2023.6.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio-client>=0.2.7->gradio) (23.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from gradio-client>=0.2.7->gradio) (4.6.3)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (3.12.2)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (4.65.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py[linkify]>=2.0.0->gradio) (0.1.2)\n", + "Collecting linkify-it-py<3,>=1 (from markdown-it-py[linkify]>=2.0.0->gradio)\n", + " Downloading linkify_it_py-2.0.2-py3-none-any.whl (19 kB)\n", + "INFO: pip is looking at multiple versions of mdit-py-plugins to determine which version is compatible with other requirements. This could take a while.\n", + "Collecting mdit-py-plugins<=0.3.3 (from gradio)\n", + " Downloading mdit_py_plugins-0.3.2-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Downloading mdit_py_plugins-0.3.1-py3-none-any.whl (46 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.5/46.5 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Downloading mdit_py_plugins-0.3.0-py3-none-any.whl (43 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Downloading mdit_py_plugins-0.2.8-py3-none-any.whl (41 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Downloading mdit_py_plugins-0.2.7-py3-none-any.whl (41 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Downloading mdit_py_plugins-0.2.6-py3-none-any.whl (39 kB)\n", + " Downloading mdit_py_plugins-0.2.5-py3-none-any.whl (39 kB)\n", + "INFO: pip is looking at multiple versions of mdit-py-plugins to determine which version is compatible with other requirements. This could take a while.\n", + " Downloading mdit_py_plugins-0.2.4-py3-none-any.whl (39 kB)\n", + " Downloading mdit_py_plugins-0.2.3-py3-none-any.whl (39 kB)\n", + " Downloading mdit_py_plugins-0.2.2-py3-none-any.whl (39 kB)\n", + " Downloading mdit_py_plugins-0.2.1-py3-none-any.whl (38 kB)\n", + " Downloading mdit_py_plugins-0.2.0-py3-none-any.whl (38 kB)\n", + "INFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n", + " Downloading mdit_py_plugins-0.1.0-py3-none-any.whl (37 kB)\n", + "Collecting markdown-it-py[linkify]>=2.0.0 (from gradio)\n", + " Downloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.5/87.5 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Downloading markdown_it_py-2.2.0-py3-none-any.whl (84 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->gradio) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->gradio) (2022.7.1)\n", + "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn>=0.14.0->gradio) (8.1.3)\n", + "Collecting h11>=0.8 (from uvicorn>=0.14.0->gradio)\n", + " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (23.1.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (2.0.12)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (4.0.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (1.3.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->gradio) (1.3.1)\n", + "Collecting starlette<0.28.0,>=0.27.0 (from fastapi->gradio)\n", + " Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (2023.5.7)\n", + "Collecting httpcore<0.18.0,>=0.15.0 (from httpx->gradio)\n", + " Downloading httpcore-0.17.2-py3-none-any.whl (72 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.5/72.5 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (3.4)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (1.3.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (4.40.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (1.4.4)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->gradio) (3.1.0)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->gradio) (1.26.16)\n", + "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from httpcore<0.18.0,>=0.15.0->httpx->gradio) (3.7.0)\n", + "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->gradio) (0.19.3)\n", + "Collecting uc-micro-py (from linkify-it-py<3,>=1->markdown-it-py[linkify]>=2.0.0->gradio)\n", + " Downloading uc_micro_py-1.0.2-py3-none-any.whl (6.2 kB)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->gradio) (1.16.0)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->httpcore<0.18.0,>=0.15.0->httpx->gradio) (1.1.1)\n", + "Building wheels for collected packages: ffmpy\n", + " Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for ffmpy: filename=ffmpy-0.3.0-py3-none-any.whl size=4694 sha256=37c8296251b0f6a8cf0a1c9c0470f09722c9b3b0153d4ece85681f5c343fe7ca\n", + " Stored in directory: /root/.cache/pip/wheels/0c/c2/0e/3b9c6845c6a4e35beb90910cc70d9ac9ab5d47402bd62af0df\n", + "Successfully built ffmpy\n", + "Installing collected packages: pydub, ffmpy, websockets, uc-micro-py, semantic-version, python-multipart, orjson, markdown-it-py, h11, aiofiles, uvicorn, starlette, mdit-py-plugins, linkify-it-py, huggingface-hub, httpcore, httpx, fastapi, gradio-client, gradio\n", + " Attempting uninstall: markdown-it-py\n", + " Found existing installation: markdown-it-py 3.0.0\n", + " Uninstalling markdown-it-py-3.0.0:\n", + " Successfully uninstalled markdown-it-py-3.0.0\n", + "Successfully installed aiofiles-23.1.0 fastapi-0.98.0 ffmpy-0.3.0 gradio-3.35.2 gradio-client-0.2.7 h11-0.14.0 httpcore-0.17.2 httpx-0.24.1 huggingface-hub-0.15.1 linkify-it-py-2.0.2 markdown-it-py-2.2.0 mdit-py-plugins-0.3.3 orjson-3.9.1 pydub-0.25.1 python-multipart-0.0.6 semantic-version-2.10.0 starlette-0.27.0 uc-micro-py-1.0.2 uvicorn-0.22.0 websockets-11.0.3\n", + "Collecting langchain\n", + " Downloading langchain-0.0.218-py3-none-any.whl (1.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting bitsandbytes\n", + " Downloading bitsandbytes-0.39.1-py3-none-any.whl (97.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.1/97.1 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: PyYAML>=5.4.1 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.16)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.8.4)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.2)\n", + "Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain)\n", + " Downloading dataclasses_json-0.5.8-py3-none-any.whl (26 kB)\n", + "Collecting langchainplus-sdk>=0.0.17 (from langchain)\n", + " Downloading langchainplus_sdk-0.0.17-py3-none-any.whl (25 kB)\n", + "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.8.4)\n", + "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.22.4)\n", + "Collecting openapi-schema-pydantic<2.0,>=1.2 (from langchain)\n", + " Downloading openapi_schema_pydantic-1.2.4-py3-none-any.whl (90 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pydantic<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.10.9)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.27.1)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.2.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.0.12)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Collecting marshmallow<4.0.0,>=3.3.0 (from dataclasses-json<0.6.0,>=0.5.7->langchain)\n", + " Downloading marshmallow-3.19.0-py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.1/49.1 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting marshmallow-enum<2.0.0,>=1.5.1 (from dataclasses-json<0.6.0,>=0.5.7->langchain)\n", + " Downloading marshmallow_enum-1.5.1-py2.py3-none-any.whl (4.2 kB)\n", + "Collecting typing-inspect>=0.4.0 (from dataclasses-json<0.6.0,>=0.5.7->langchain)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<2,>=1->langchain) (4.6.3)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2023.5.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.4)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (2.0.2)\n", + "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (23.1)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Installing collected packages: bitsandbytes, mypy-extensions, marshmallow, typing-inspect, openapi-schema-pydantic, marshmallow-enum, langchainplus-sdk, dataclasses-json, langchain\n", + "Successfully installed bitsandbytes-0.39.1 dataclasses-json-0.5.8 langchain-0.0.218 langchainplus-sdk-0.0.17 marshmallow-3.19.0 marshmallow-enum-1.5.1 mypy-extensions-1.0.0 openapi-schema-pydantic-1.2.4 typing-inspect-0.9.0\n", + "Collecting faiss-cpu\n", + " Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.6/17.6 MB\u001b[0m \u001b[31m62.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: faiss-cpu\n", + "Successfully installed faiss-cpu-1.7.4\n", + "\u001b[31mERROR: Could not find a version that satisfies the requirement textwrap (from versions: none)\u001b[0m\u001b[31m\n", + "\u001b[0m\u001b[31mERROR: No matching distribution found for textwrap\u001b[0m\u001b[31m\n", + "\u001b[0mCollecting sentence-transformers\n", + " Downloading sentence-transformers-2.2.2.tar.gz (85 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting transformers<5.0.0,>=4.6.0 (from sentence-transformers)\n", + " Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m57.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.65.0)\n", + "Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (2.0.1+cu118)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.15.2+cu118)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.22.4)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.2.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.10.1)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (3.8.1)\n", + "Collecting sentencepiece (from sentence-transformers)\n", + " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m77.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.15.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (3.12.2)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2023.6.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2.27.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (4.6.3)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers) (23.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence-transformers) (1.11.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence-transformers) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence-transformers) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence-transformers) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence-transformers) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence-transformers) (16.0.6)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers) (2022.10.31)\n", + "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers<5.0.0,>=4.6.0->sentence-transformers)\n", + " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m78.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers<5.0.0,>=4.6.0->sentence-transformers)\n", + " Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━��━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m61.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->sentence-transformers) (8.1.3)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence-transformers) (1.2.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.1.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->sentence-transformers) (8.4.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->sentence-transformers) (2.1.3)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (2023.5.7)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (2.0.12)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (3.4)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->sentence-transformers) (1.3.0)\n", + "Building wheels for collected packages: sentence-transformers\n", + " Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for sentence-transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125926 sha256=8ac9b584f42597843ce222154270f2fe13d6ba19bdb343887f1c2a956f073a78\n", + " Stored in directory: /root/.cache/pip/wheels/62/f2/10/1e606fd5f02395388f74e7462910fe851042f97238cbbd902f\n", + "Successfully built sentence-transformers\n", + "Installing collected packages: tokenizers, sentencepiece, safetensors, transformers, sentence-transformers\n", + "Successfully installed safetensors-0.3.1 sentence-transformers-2.2.2 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.30.2\n", + "Collecting accelerate\n", + " Downloading accelerate-0.20.3-py3-none-any.whl (227 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.6/227.6 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.22.4)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.1)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0)\n", + "Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1+cu118)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->accelerate) (3.12.2)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->accelerate) (4.6.3)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->accelerate) (1.11.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->accelerate) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->accelerate) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->accelerate) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->accelerate) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->accelerate) (16.0.6)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->accelerate) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->accelerate) (1.3.0)\n", + "Installing collected packages: accelerate\n", + "Successfully installed accelerate-0.20.3\n", + "Collecting llama-cpp-python\n", + " Downloading llama_cpp_python-0.1.66.tar.gz (1.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m41.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (4.6.3)\n", + "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (1.22.4)\n", + "Collecting diskcache>=5.6.1 (from llama-cpp-python)\n", + " Downloading diskcache-5.6.1-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.6/45.6 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: llama-cpp-python\n", + " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.1.66-cp310-cp310-linux_x86_64.whl size=265566 sha256=0ad80b78bd391d6e1f5bb2b9feeabc814bdfd95917bbf73c2b7bb7219a199dea\n", + " Stored in directory: /root/.cache/pip/wheels/9c/0e/e3/5c3b6fdb7a015cc1c18596fddf79d795b64347f18809c78fee\n", + "Successfully built llama-cpp-python\n", + "Installing collected packages: diskcache, llama-cpp-python\n", + "Successfully installed diskcache-5.6.1 llama-cpp-python-0.1.66\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "!pip install gradio\n", + "!pip install langchain bitsandbytes\n", + "!pip install faiss-cpu\n", + "!pip install textwrap torch datasets loralib sentencepiece\n", + "!pip install sentence-transformers\n", + "!pip install accelerate\n", + "!pip install llama-cpp-python\n", + "!pip -q install git+https://github.com/huggingface/transformers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kzA0z-IWIDCW" + }, + "outputs": [], + "source": [ + "# !cd llama.cpp && pwd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GQU3WlKeHeEv" + }, + "outputs": [], + "source": [ + "# !git clone https://github.com/ggerganov/llama.cpp\n", + "# !cd llama.cpp && make" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OPowJEDgJjPc" + }, + "outputs": [], + "source": [ + "# !ls ./models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JuRpTwzFIHvY", + "outputId": "d483850b-8c45-48a8-a89d-3ed54377d576" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading...\n", + "From: https://drive.google.com/uc?id=1oSCsACt5FrBLCD_T5FTzremT5y2b2KxK\n", + "To: /content/index.faiss\n", + "100% 121M/121M [00:04<00:00, 27.5MB/s]\n", + "Downloading...\n", + "From: https://drive.google.com/uc?id=1989gHL2kGm1sSLuVKk6ao8loGLvt8uR-\n", + "To: /content/index.pkl\n", + "100% 37.5M/37.5M [00:01<00:00, 29.2MB/s]\n" + ] + } + ], + "source": [ + "!gdown 1oSCsACt5FrBLCD_T5FTzremT5y2b2KxK\n", + "!gdown 1989gHL2kGm1sSLuVKk6ao8loGLvt8uR-\n", + "!mkdir nbdt_data\n", + "!cp index.faiss nbdt_data\n", + "!cp index.pkl nbdt_data" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "b427d9e2d2ee4f828ee896a03e9f38eb", + "68db08a240724f9fb38f464db4c73b84", + "7da5f39db40f465caea5dd36ce881237", + "56796deef330429d9a9fec4432ca54d4", + "435b61fe3135482587f1dcc65306e114", + "d83e0c6ff5db4a29a05fed5b72773be7", + "6885776cbe9449f8864676e1e83b4c19", + "f32bc75b18d140aaba55de24947dbb3f", + "7530c375556442249c0f19ea435a629f", + "05a91c8da3054845bff97967fb17372d", + "64906f3cba5c433baae65ecaf06612af" + ] + }, + "id": "4xhtkUdY7K6F", + "outputId": "6223fbc0-3105-4e83-b71f-c15a7cad9f5b" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:accelerate.utils.modeling:The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b427d9e2d2ee4f828ee896a03e9f38eb", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/14 [00:00" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Keyboard interruption in main thread... closing server.\n" + ] + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
+              " /usr/local/lib/python3.10/dist-packages/gradio/blocks.py:2058 in block_thread                    \n",
+              "                                                                                                  \n",
+              "   2055 │   │   \"\"\"Block main thread until interrupted by user.\"\"\"                                \n",
+              "   2056 │   │   try:                                                                              \n",
+              "   2057 │   │   │   while True:                                                                   \n",
+              " 2058 │   │   │   │   time.sleep(0.1)                                                           \n",
+              "   2059 │   │   except (KeyboardInterrupt, OSError):                                              \n",
+              "   2060 │   │   │   print(\"Keyboard interruption in main thread... closing server.\")              \n",
+              "   2061 │   │   │   self.server.close()                                                           \n",
+              "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+              "KeyboardInterrupt\n",
+              "\n",
+              "During handling of the above exception, another exception occurred:\n",
+              "\n",
+              "╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
+              " in <cell line: 236>:236                                                                          \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/gradio/blocks.py:1970 in launch                          \n",
+              "                                                                                                  \n",
+              "   1967 │   │                                                                                     \n",
+              "   1968 │   │   # Block main thread if debug==True                                                \n",
+              "   1969 │   │   if debug or int(os.getenv(\"GRADIO_DEBUG\", 0)) == 1:                               \n",
+              " 1970 │   │   │   self.block_thread()                                                           \n",
+              "   1971 │   │   # Block main thread if running in a script to stop script from exiting            \n",
+              "   1972 │   │   is_in_interactive_mode = bool(getattr(sys, \"ps1\", sys.flags.interactive))         \n",
+              "   1973                                                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/gradio/blocks.py:2061 in block_thread                    \n",
+              "                                                                                                  \n",
+              "   2058 │   │   │   │   time.sleep(0.1)                                                           \n",
+              "   2059 │   │   except (KeyboardInterrupt, OSError):                                              \n",
+              "   2060 │   │   │   print(\"Keyboard interruption in main thread... closing server.\")              \n",
+              " 2061 │   │   │   self.server.close()                                                           \n",
+              "   2062 │   │   │   for tunnel in CURRENT_TUNNELS:                                                \n",
+              "   2063 │   │   │   │   tunnel.kill()                                                             \n",
+              "   2064                                                                                           \n",
+              "                                                                                                  \n",
+              " /usr/local/lib/python3.10/dist-packages/gradio/networking.py:43 in close                         \n",
+              "                                                                                                  \n",
+              "    40                                                                                        \n",
+              "    41 def close(self):                                                                       \n",
+              "    42 │   │   self.should_exit = True                                                            \n",
+              "  43 │   │   self.thread.join()                                                                 \n",
+              "    44                                                                                            \n",
+              "    45                                                                                            \n",
+              "    46 def get_first_available_port(initial: int, final: int) -> int:                             \n",
+              "                                                                                                  \n",
+              " /usr/lib/python3.10/threading.py:1096 in join                                                    \n",
+              "                                                                                                  \n",
+              "   1093 │   │   │   raise RuntimeError(\"cannot join current thread\")                              \n",
+              "   1094 │   │                                                                                     \n",
+              "   1095 │   │   if timeout is None:                                                               \n",
+              " 1096 │   │   │   self._wait_for_tstate_lock()                                                  \n",
+              "   1097 │   │   else:                                                                             \n",
+              "   1098 │   │   │   # the behavior of a negative timeout isn't documented, but                    \n",
+              "   1099 │   │   │   # historically .join(timeout=x) for x<0 has acted as if timeout=0             \n",
+              "                                                                                                  \n",
+              " /usr/lib/python3.10/threading.py:1116 in _wait_for_tstate_lock                                   \n",
+              "                                                                                                  \n",
+              "   1113 │   │   │   return                                                                        \n",
+              "   1114 │   │                                                                                     \n",
+              "   1115 │   │   try:                                                                              \n",
+              " 1116 │   │   │   if lock.acquire(block, timeout):                                              \n",
+              "   1117 │   │   │   │   lock.release()                                                            \n",
+              "   1118 │   │   │   │   self._stop()                                                              \n",
+              "   1119 │   │   except:                                                                           \n",
+              "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+              "KeyboardInterrupt\n",
+              "
\n" + ], + "text/plain": [ + "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/gradio/\u001b[0m\u001b[1;33mblocks.py\u001b[0m:\u001b[94m2058\u001b[0m in \u001b[92mblock_thread\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2055 \u001b[0m\u001b[2;90m│ │ \u001b[0m\u001b[33m\"\"\"Block main thread until interrupted by user.\"\"\"\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2056 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2057 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwhile\u001b[0m \u001b[94mTrue\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2058 \u001b[2m│ │ │ │ \u001b[0mtime.sleep(\u001b[94m0.1\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2059 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m (\u001b[96mKeyboardInterrupt\u001b[0m, \u001b[96mOSError\u001b[0m): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2060 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mprint\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mKeyboard interruption in main thread... closing server.\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2061 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.server.close() \u001b[31m│\u001b[0m\n", + "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", + "\u001b[1;91mKeyboardInterrupt\u001b[0m\n", + "\n", + "\u001b[3mDuring handling of the above exception, another exception occurred:\u001b[0m\n", + "\n", + "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", + "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m236\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/gradio/\u001b[0m\u001b[1;33mblocks.py\u001b[0m:\u001b[94m1970\u001b[0m in \u001b[92mlaunch\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1967 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1968 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Block main thread if debug==True\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1969 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m debug \u001b[95mor\u001b[0m \u001b[96mint\u001b[0m(os.getenv(\u001b[33m\"\u001b[0m\u001b[33mGRADIO_DEBUG\u001b[0m\u001b[33m\"\u001b[0m, \u001b[94m0\u001b[0m)) == \u001b[94m1\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1970 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.block_thread() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1971 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Block main thread if running in a script to stop script from exiting\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1972 \u001b[0m\u001b[2m│ │ \u001b[0mis_in_interactive_mode = \u001b[96mbool\u001b[0m(\u001b[96mgetattr\u001b[0m(sys, \u001b[33m\"\u001b[0m\u001b[33mps1\u001b[0m\u001b[33m\"\u001b[0m, sys.flags.interactive)) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1973 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/gradio/\u001b[0m\u001b[1;33mblocks.py\u001b[0m:\u001b[94m2061\u001b[0m in \u001b[92mblock_thread\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2058 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mtime.sleep(\u001b[94m0.1\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2059 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m (\u001b[96mKeyboardInterrupt\u001b[0m, \u001b[96mOSError\u001b[0m): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2060 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mprint\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mKeyboard interruption in main thread... closing server.\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2061 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.server.close() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2062 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mfor\u001b[0m tunnel \u001b[95min\u001b[0m CURRENT_TUNNELS: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2063 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mtunnel.kill() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2064 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/gradio/\u001b[0m\u001b[1;33mnetworking.py\u001b[0m:\u001b[94m43\u001b[0m in \u001b[92mclose\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 40 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 41 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mclose\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 42 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m.should_exit = \u001b[94mTrue\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 43 \u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m.thread.join() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 44 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 45 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 46 \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mget_first_available_port\u001b[0m(initial: \u001b[96mint\u001b[0m, final: \u001b[96mint\u001b[0m) -> \u001b[96mint\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/lib/python3.10/\u001b[0m\u001b[1;33mthreading.py\u001b[0m:\u001b[94m1096\u001b[0m in \u001b[92mjoin\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1093 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mcannot join current thread\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1094 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1095 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m timeout \u001b[95mis\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1096 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m._wait_for_tstate_lock() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1097 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1098 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# the behavior of a negative timeout isn't documented, but\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1099 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/lib/python3.10/\u001b[0m\u001b[1;33mthreading.py\u001b[0m:\u001b[94m1116\u001b[0m in \u001b[92m_wait_for_tstate_lock\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1113 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1114 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1115 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1116 \u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m lock.acquire(block, timeout): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1117 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mlock.release() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1118 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[96mself\u001b[0m._stop() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1119 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", + "\u001b[1;91mKeyboardInterrupt\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import gradio as gr\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline\n", + "from transformers import BitsAndBytesConfig\n", + "import textwrap\n", + "import torch\n", + "\n", + "quant_config = BitsAndBytesConfig(load_in_4bit=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_use_double_quant=True,\n", + " )\n", + "\n", + "prompt = 'BEGINNING OF CONVERSATION: USER: \\\n", + "I will provide you with two abstracts, I intend to use the author of the second to review the first. Tell me in 15 words why or why not the second author is a good fit to review the first paper.\\n\\\n", + "Abstract To Be Reviewed: '\n", + "\n", + "tokenizer = LlamaTokenizer.from_pretrained(\"samwit/koala-7b\")\n", + "\n", + "base_model = LlamaForCausalLM.from_pretrained(\n", + " \"samwit/koala-7b\",\n", + " load_in_8bit=False,\n", + " load_in_4bit=True,\n", + " device_map='auto',\n", + " offload_folder='offloaded',\n", + " quantization_config=quant_config\n", + ")\n", + "\n", + "pipe = pipeline(\n", + " \"text-generation\",\n", + " model=base_model,\n", + " tokenizer=tokenizer,\n", + " max_length=1024,\n", + " temperature=0.7,\n", + " top_p=0.95,\n", + " repetition_penalty=1.15,\n", + " # device=-1\n", + ")\n", + "\n", + "\n", + "def wrap_text_preserve_newlines(text, width=110):\n", + " # Split the input text into lines based on newline characters\n", + " lines = text.split('\\n')\n", + " # Wrap each line individually\n", + " wrapped_lines = [textwrap.fill(line, width=width) for line in lines]\n", + " # Join the wrapped lines back together using newline characters\n", + " wrapped_text = '\\n'.join(wrapped_lines)\n", + " return wrapped_text\n", + "\n", + "\n", + "def create_miread_embed(sents, bundle):\n", + " tokenizer = bundle[0]\n", + " model = bundle[1]\n", + " model.cpu()\n", + " tokens = tokenizer(sents,\n", + " max_length=512,\n", + " padding=True,\n", + " truncation=True,\n", + " return_tensors=\"pt\"\n", + " )\n", + " device = torch.device('cpu')\n", + " tokens = tokens.to(device)\n", + " with torch.no_grad():\n", + " out = model.bert(**tokens)\n", + " feature = out.last_hidden_state[:, 0, :]\n", + " return feature.cpu()\n", + "\n", + "\n", + "def get_matches(query, k):\n", + " matches = vecdb.similarity_search_with_score(query, k=k)\n", + " return matches\n", + "\n", + "\n", + "def inference(query,k=30,mode=''):\n", + " matches = get_matches(query,k)\n", + " j_bucket = {}\n", + " n_table = []\n", + " a_table = []\n", + " r_table = []\n", + " scores = [round(match[1].item(),3) for match in matches]\n", + " min_score = min(scores)\n", + " max_score = max(scores)\n", + " normaliser = lambda x: round(1 - (x-min_score)/max_score,3)\n", + " for i,match in enumerate(matches):\n", + " doc = match[0]\n", + " score = normaliser(round(match[1].item(),3))\n", + " title = doc.metadata['title']\n", + " author = doc.metadata['authors'][0]\n", + " date = doc.metadata.get('date','None')\n", + " link = doc.metadata.get('link','None')\n", + " submitter = doc.metadata.get('submitter','None')\n", + " journal = doc.metadata.get('journal','None')\n", + " abstract = doc.metadata.get('abstract','')\n", + "\n", + " # For journals\n", + " if journal not in j_bucket:\n", + " j_bucket[journal] = score\n", + " else:\n", + " j_bucket[journal] += score\n", + "\n", + " # For authors\n", + " record = [i+1,\n", + " score,\n", + " author,\n", + " title,\n", + " link,\n", + " date]\n", + " n_table.append(record)\n", + "\n", + " # For abstracts\n", + " record = [i+1,\n", + " title,\n", + " author,\n", + " submitter,\n", + " journal,\n", + " date,\n", + " link,\n", + " score\n", + " ]\n", + " a_table.append(record)\n", + "\n", + " # For reviewer\n", + " r_record = [i+1,\n", + " score,\n", + " author,\n", + " abstract,\n", + " title,\n", + " link,\n", + " date]\n", + " r_table.append(r_record)\n", + "\n", + "\n", + "\n", + " if (mode):\n", + " dataset = [prompt + query + '\\n Candidate Abstract: ' + row[3] + '\\n GPT:' for row in r_table[:5]]\n", + " outputs = pipe(dataset)\n", + " outputs = [output[0]['generated_text'].split('GPT:')[1] for output in outputs]\n", + " r_table = [[r[0],r[1],r[2],outputs[i],r[4],r[5],r[6]] for i,r in enumerate(r_table[:5])]\n", + " # print(f\"{i}/5 done\")\n", + " else:\n", + " outputs = ['']*5\n", + " r_table = [[r[0],r[1],r[2],outputs[i],r[4],r[5],r[6]] for i,r in enumerate(r_table[:5])]\n", + "\n", + " j_table = sorted([[journal,score] for journal,score in j_bucket.items()],key= lambda x : x[1],reverse=True)\n", + " j_table = [[i+1,item[0],item[1]] for i,item in enumerate(j_table)]\n", + " j_output= gr.Dataframe.update(value=j_table,visible=True)\n", + " n_output= gr.Dataframe.update(value=n_table,visible=True)\n", + " a_output = gr.Dataframe.update(value=a_table,visible=True)\n", + " if mode:\n", + " r_output = gr.Dataframe.update(value=r_table,visible=True)\n", + " return r_output\n", + "\n", + " return [a_output,j_output,n_output]\n", + "\n", + "def k_inference(query,k):\n", + " return inference(query,k,'koala')\n", + "\n", + "model_name = \"biodatlab/MIReAD-Neuro\"\n", + "model_kwargs = {'device': 'cpu'}\n", + "encode_kwargs = {'normalize_embeddings': False}\n", + "faiss_embedder = HuggingFaceEmbeddings(\n", + " model_name=model_name,\n", + " model_kwargs=model_kwargs,\n", + " encode_kwargs=encode_kwargs\n", + ")\n", + "\n", + "vecdb = FAISS.load_local(\"nbdt_data\", faiss_embedder)\n", + "\n", + "\n", + "with gr.Blocks(theme=gr.themes.Soft()) as demo:\n", + " gr.Markdown(\"# NBDT Recommendation Engine for Editors\")\n", + " gr.Markdown(\"NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \\\n", + " It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\\\n", + " To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \\\"Find Matches\\\".\\\n", + " Then, you can hover to authors/abstracts/journals tab to find a suggested list.\\\n", + " The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.\")\n", + "\n", + "\n", + " abst = gr.Textbox(label=\"Abstract\",lines=10)\n", + "\n", + " k = gr.Slider(1,100,step=1,value=50,label=\"Number of matches to consider\")\n", + "\n", + " action_btn = gr.Button(value=\"Find Matches\")\n", + "\n", + " with gr.Tab(\"Authors\"):\n", + " n_output = gr.Dataframe(\n", + " headers=['No.','Score','Name','Title','Link','Date'],\n", + " datatype=['number','number','str','str','str','str'],\n", + " col_count=(6, \"fixed\"),\n", + " wrap=True,\n", + " visible=False\n", + " )\n", + " with gr.Tab(\"Abstracts\"):\n", + " a_output = gr.Dataframe(\n", + " headers=['No.','Title','Author','Corresponding Author','Journal','Date','Link','Score'],\n", + " datatype=['number','str','str','str','str','str','str','number'],\n", + " col_count=(8,\"fixed\"),\n", + " wrap=True,\n", + " visible=False\n", + " )\n", + " with gr.Tab(\"Journals\"):\n", + " j_output = gr.Dataframe(\n", + " headers=['No.','Name','Score'],\n", + " datatype=['number','str','number'],\n", + " col_count=(3, \"fixed\"),\n", + " wrap=True,\n", + " visible=False\n", + " )\n", + "\n", + " llm_btn = gr.Button(value=\"Listen to Koala's advice\")\n", + " with gr.Tab(\"Reviewers (New)\"):\n", + " r_output = gr.Dataframe(\n", + " headers=['No.','Score','Name','Title','Reasoning','Link','Date'],\n", + " datatype=['number','number','str','str','str','str','str'],\n", + " col_count=(7,\"fixed\"),\n", + " wrap=True,\n", + " visible=True\n", + " )\n", + "\n", + " action_btn.click(fn=inference,\n", + " inputs=[\n", + " abst,\n", + " k,\n", + " # modes,\n", + " ],\n", + " outputs=[a_output,j_output,n_output],\n", + " api_name=\"neurojane\")\n", + " llm_btn.click(fn=k_inference,\n", + " inputs=[\n", + " abst,\n", + " k,\n", + " ],\n", + " outputs = [r_output],\n", + " api_name=\"koala\")\n", + "\n", + "demo.launch(debug=True,share=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UQhTTPJGBdwo", + "outputId": "93a00886-db64-4b6a-88b5-51f2d88c58e5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_rows': None,\n", + " 'max_cols': None,\n", + " 'label': None,\n", + " 'show_label': None,\n", + " 'scale': None,\n", + " 'min_width': None,\n", + " 'interactive': None,\n", + " 'visible': True,\n", + " 'value': [[1,\n", + " 1.0,\n", + " 'philipp berens',\n", + " \" Both authors have expertise in neuroscience and machine learning, making them suitable candidates to review each other's work. The first author's research focuses on understanding the mechanisms underlying learning and plasticity, while the second author's work involves developing new algorithms for spike inference from calcium signals. These areas of expertise complement each other, as the first author's insights into the nature of plasticity can inform the development of more effective spike inference algorithms.\",\n", + " 'Supervised learning sets benchmark for robust spike detection from calcium imaging signals',\n", + " 'https://www.semanticscholar.org/paper/786f9831900ca34d8200291e5a72d2f14ad9b336',\n", + " 2015],\n", + " [2,\n", + " 0.974,\n", + " 'daniel butts',\n", + " \" Both authors have expertise in neuroscience and machine learning, making them suitable candidates to review each other's work. The first author's research focuses on understanding the mechanisms underlying learning and plasticity, while the second author's work uses information theory to analyze neural ensembles. While both authors may have different perspectives and approaches, they share a common goal of advancing our understanding of the brain and its functions.\",\n", + " 'Information Theoretic Measure of Stimulus Significance is Not Confounded by Stimulus Correlations or Non-Linearities',\n", + " 'https://www.semanticscholar.org/paper/f6767047e84872f4cb09f94c26b1d21e3ef9d55d',\n", + " 2002],\n", + " [3,\n", + " 0.952,\n", + " 'alex williams',\n", + " ' The candidate abstract provides a novel approach to understanding neural sequence data using a point process model that captures both the timing and occurrence of neural activity. This approach addresses some of the limitations of previous methods, including the need for spike times to be discretized and the use of a sub-optimal least-squares criterion. Additionally, the proposed method allows for the incorporation of learned time warping parameters, which have been shown to be effective in modeling variable sequence lengths. Overall, this approach has potential applications in understanding various aspects of neural processing, including working memory, motor production, and learning.',\n", + " 'Point process models for sequence detection in high-dimensional neural spike trains',\n", + " 'https://www.semanticscholar.org/paper/2942ed449bc35f06170342b61ebb6fc2dd6183c9',\n", + " 2020],\n", + " [4,\n", + " 0.931,\n", + " 'philipp berens',\n", + " \" Both authors have expertise in neuroscience and machine learning, making them suitable candidates to review each other's work. The first author's research focuses on understanding the mechanisms underlying learning and plasticity, while the second author's work involves developing new algorithms for spike rate inference in calcium imaging studies.\",\n", + " 'Supervised learning sets benchmark for robust spike rate inference from calcium imaging signals',\n", + " 'https://www.semanticscholar.org/paper/ff3c6755a17026d234978095dcc36a43b551ac89',\n", + " 2015],\n", + " [5,\n", + " 0.924,\n", + " 'konrad kording',\n", + " \" Both authors have expertise in the field of artificial intelligence and machine learning, specifically in the area of deep learning. They also have experience in theoretical neuroscience and cognitive psychology. However, the candidate's research focuses more specifically on the topic of perceptual processing and the use of adversarial algorithms as a potential mechanism for learning in the brain. On the other hand, the author of the original abstract reviews the existing literature on plasticity-related mechanisms and argues that gradients are a key concept for understanding the process of neuronal plasticity. While both authors have valuable insights into the topic, it may be beneficial to consult with experts in both fields to gain a comprehensive understanding of the subject matter.\",\n", + " 'Learning to infer in recurrent biological networks',\n", + " 'https://www.semanticscholar.org/paper/482085ff18eaefb262b8217797e99a2cca339938',\n", + " 2020]],\n", + " '__type__': 'update'}" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k_inference('The experimental study of learning and plasticity has always been driven by an implicit question: how can physiological changes be adaptive and improve performance? For example, in Hebbian plasticity only synapses from presynaptic neurons that were active are changed, avoiding useless changes. Similarly, in dopamine-gated learning synapse changes depend on reward or lack thereof and do not change when everything is predictable. Within machine learning we can make the question of which changes are adaptive concrete: performance improves when changes correlate with the gradient of an objective function quantifying performance. This result is general for any system that improves through small changes. As such, physiology has always implicitly been seeking mechanisms that allow the brain to approximate gradients. Coming from this perspective we review the existing literature on plasticity-related mechanisms, and we show how these mechanisms relate to gradient estimation. We argue that gradients are a unifying idea to explain the many facets of neuronal plasticity.',k=10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Nxi4msI_aIl2" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "05a91c8da3054845bff97967fb17372d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "435b61fe3135482587f1dcc65306e114": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "56796deef330429d9a9fec4432ca54d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_05a91c8da3054845bff97967fb17372d", + "placeholder": "​", + "style": "IPY_MODEL_64906f3cba5c433baae65ecaf06612af", + "value": " 14/14 [02:48<00:00, 11.14s/it]" + } + }, + "64906f3cba5c433baae65ecaf06612af": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6885776cbe9449f8864676e1e83b4c19": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "68db08a240724f9fb38f464db4c73b84": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d83e0c6ff5db4a29a05fed5b72773be7", + "placeholder": "​", + "style": "IPY_MODEL_6885776cbe9449f8864676e1e83b4c19", + "value": "Loading checkpoint shards: 100%" + } + }, + "7530c375556442249c0f19ea435a629f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7da5f39db40f465caea5dd36ce881237": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f32bc75b18d140aaba55de24947dbb3f", + "max": 14, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7530c375556442249c0f19ea435a629f", + "value": 14 + } + }, + "b427d9e2d2ee4f828ee896a03e9f38eb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_68db08a240724f9fb38f464db4c73b84", + "IPY_MODEL_7da5f39db40f465caea5dd36ce881237", + "IPY_MODEL_56796deef330429d9a9fec4432ca54d4" + ], + "layout": "IPY_MODEL_435b61fe3135482587f1dcc65306e114" + } + }, + "d83e0c6ff5db4a29a05fed5b72773be7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f32bc75b18d140aaba55de24947dbb3f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}