diff --git "a/pdf_chat.ipynb" "b/pdf_chat.ipynb" new file mode 100644--- /dev/null +++ "b/pdf_chat.ipynb" @@ -0,0 +1,1007 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "zCspd2zaQWeg", + "outputId": "855b92eb-3795-4b9f-d2ae-35389eddd487" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting langchain\n", + " Downloading langchain-0.1.4-py3-none-any.whl (803 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m803.6/803.6 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.24)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.1)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n", + "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)\n", + " Downloading dataclasses_json-0.6.3-py3-none-any.whl (28 kB)\n", + "Collecting jsonpatch<2.0,>=1.33 (from langchain)\n", + " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "Collecting langchain-community<0.1,>=0.0.14 (from langchain)\n", + " Downloading langchain_community-0.0.16-py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m71.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langchain-core<0.2,>=0.1.16 (from langchain)\n", + " Downloading langchain_core-0.1.16-py3-none-any.whl (230 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m230.3/230.3 kB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langsmith<0.1,>=0.0.83 (from langchain)\n", + " Downloading langsmith-0.0.83-py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.23.5)\n", + "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.10.14)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.2.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading marshmallow-3.20.2-py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain)\n", + " Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", + "Requirement already satisfied: anyio<5,>=3 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2,>=0.1.16->langchain) (3.7.1)\n", + "Requirement already satisfied: packaging<24.0,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2,>=0.1.16->langchain) (23.2)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (4.5.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2023.11.17)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1.16->langchain) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1.16->langchain) (1.2.0)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Installing collected packages: mypy-extensions, marshmallow, jsonpointer, typing-inspect, langsmith, jsonpatch, langchain-core, dataclasses-json, langchain-community, langchain\n", + "Successfully installed dataclasses-json-0.6.3 jsonpatch-1.33 jsonpointer-2.4 langchain-0.1.4 langchain-community-0.0.16 langchain-core-0.1.16 langsmith-0.0.83 marshmallow-3.20.2 mypy-extensions-1.0.0 typing-inspect-0.9.0\n", + "Collecting unstructured\n", + " Downloading unstructured-0.12.2-py3-none-any.whl (1.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from unstructured) (5.2.0)\n", + "Collecting filetype (from unstructured)\n", + " Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", + "Collecting python-magic (from unstructured)\n", + " Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)\n", + "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from unstructured) (4.9.4)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from unstructured) (3.8.1)\n", + "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from unstructured) (0.9.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from unstructured) (2.31.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from unstructured) (4.11.2)\n", + "Collecting emoji (from unstructured)\n", + " Downloading emoji-2.10.0-py2.py3-none-any.whl (457 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m457.9/457.9 kB\u001b[0m \u001b[31m45.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: dataclasses-json in /usr/local/lib/python3.10/dist-packages (from unstructured) (0.6.3)\n", + "Collecting python-iso639 (from unstructured)\n", + " Downloading python_iso639-2024.1.2-py3-none-any.whl (274 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.7/274.7 kB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langdetect (from unstructured)\n", + " Downloading langdetect-1.0.9.tar.gz (981 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━\u001b[0m \u001b[32m981.5/981.5 kB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from unstructured) (1.23.5)\n", + "Collecting rapidfuzz (from unstructured)\n", + " Downloading rapidfuzz-3.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m42.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting backoff (from unstructured)\n", + " Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from unstructured) (4.5.0)\n", + "Collecting unstructured-client>=0.15.1 (from unstructured)\n", + " Downloading unstructured_client-0.16.0-py3-none-any.whl (20 kB)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from unstructured) (1.14.1)\n", + "Requirement already satisfied: certifi>=2023.7.22 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (2023.11.17)\n", + "Requirement already satisfied: charset-normalizer>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (3.3.2)\n", + "Requirement already satisfied: idna>=3.4 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (3.6)\n", + "Collecting jsonpath-python>=1.0.6 (from unstructured-client>=0.15.1->unstructured)\n", + " Downloading jsonpath_python-1.0.6-py3-none-any.whl (7.6 kB)\n", + "Requirement already satisfied: marshmallow>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (3.20.2)\n", + "Requirement already satisfied: mypy-extensions>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (1.0.0)\n", + "Requirement already satisfied: packaging>=23.1 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (23.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (2.8.2)\n", + "Requirement already satisfied: six>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (1.16.0)\n", + "Requirement already satisfied: typing-inspect>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (0.9.0)\n", + "Collecting typing-extensions (from unstructured)\n", + " Downloading typing_extensions-4.9.0-py3-none-any.whl (32 kB)\n", + "Requirement already satisfied: urllib3>=1.26.18 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured) (2.0.7)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->unstructured) (2.5)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured) (2023.6.3)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured) (4.66.1)\n", + "Building wheels for collected packages: langdetect\n", + " Building wheel for langdetect (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993225 sha256=ec290325f5923d43eb6659f704fd7b54331ebbdb4651c9729224e1ce0b708843\n", + " Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106\n", + "Successfully built langdetect\n", + "Installing collected packages: filetype, typing-extensions, rapidfuzz, python-magic, python-iso639, langdetect, jsonpath-python, emoji, backoff, unstructured-client, unstructured\n", + " Attempting uninstall: typing-extensions\n", + " Found existing installation: typing_extensions 4.5.0\n", + " Uninstalling typing_extensions-4.5.0:\n", + " Successfully uninstalled typing_extensions-4.5.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed backoff-2.2.1 emoji-2.10.0 filetype-1.2.0 jsonpath-python-1.0.6 langdetect-1.0.9 python-iso639-2024.1.2 python-magic-0.4.27 rapidfuzz-3.6.1 typing-extensions-4.9.0 unstructured-0.12.2 unstructured-client-0.16.0\n", + "Collecting openai\n", + " Downloading openai-1.10.0-py3-none-any.whl (225 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.1/225.1 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai) (1.7.0)\n", + "Collecting httpx<1,>=0.23.0 (from openai)\n", + " Downloading httpx-0.26.0-py3-none-any.whl (75 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai) (1.10.14)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.0)\n", + "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai) (4.66.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai) (4.9.0)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (3.6)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.0)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (2023.11.17)\n", + "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)\n", + " Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)\n", + " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: h11, httpcore, httpx, openai\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "llmx 0.0.15a0 requires cohere, which is not installed.\n", + "llmx 0.0.15a0 requires tiktoken, which is not installed.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed h11-0.14.0 httpcore-1.0.2 httpx-0.26.0 openai-1.10.0\n", + "Collecting pybind11\n", + " Downloading pybind11-2.11.1-py3-none-any.whl (227 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.7/227.7 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: pybind11\n", + "Successfully installed pybind11-2.11.1\n", + "Collecting chromadb\n", + " Downloading chromadb-0.4.22-py3-none-any.whl (509 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m509.0/509.0 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: build>=1.0.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.0.3)\n", + "Requirement already satisfied: requests>=2.28 in /usr/local/lib/python3.10/dist-packages (from chromadb) (2.31.0)\n", + "Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.10.14)\n", + "Collecting chroma-hnswlib==0.7.3 (from chromadb)\n", + " Downloading chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m51.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting fastapi>=0.95.2 (from chromadb)\n", + " Downloading fastapi-0.109.0-py3-none-any.whl (92 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting uvicorn[standard]>=0.18.3 (from chromadb)\n", + " Downloading uvicorn-0.27.0-py3-none-any.whl (60 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.6/60.6 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.22.5 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.23.5)\n", + "Collecting posthog>=2.4.0 (from chromadb)\n", + " Downloading posthog-3.3.3-py2.py3-none-any.whl (40 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.7/40.7 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.9.0)\n", + "Collecting pulsar-client>=3.1.0 (from chromadb)\n", + " Downloading pulsar_client-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m64.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting onnxruntime>=1.14.1 (from chromadb)\n", + " Downloading onnxruntime-1.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m89.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting opentelemetry-api>=1.2.0 (from chromadb)\n", + " Downloading opentelemetry_api-1.22.0-py3-none-any.whl (57 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.9/57.9 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)\n", + " Downloading opentelemetry_exporter_otlp_proto_grpc-1.22.0-py3-none-any.whl (18 kB)\n", + "Collecting opentelemetry-instrumentation-fastapi>=0.41b0 (from chromadb)\n", + " Downloading opentelemetry_instrumentation_fastapi-0.43b0-py3-none-any.whl (11 kB)\n", + "Collecting opentelemetry-sdk>=1.2.0 (from chromadb)\n", + " Downloading opentelemetry_sdk-1.22.0-py3-none-any.whl (105 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.6/105.6 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tokenizers>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.15.1)\n", + "Collecting pypika>=0.48.9 (from chromadb)\n", + " Downloading PyPika-0.48.9.tar.gz (67 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: tqdm>=4.65.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.66.1)\n", + "Collecting overrides>=7.3.1 (from chromadb)\n", + " Downloading overrides-7.7.0-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.1.1)\n", + "Requirement already satisfied: grpcio>=1.58.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.60.0)\n", + "Collecting bcrypt>=4.0.1 (from chromadb)\n", + " Downloading bcrypt-4.1.2-cp39-abi3-manylinux_2_28_x86_64.whl (698 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m698.9/698.9 kB\u001b[0m \u001b[31m57.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: typer>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.9.0)\n", + "Collecting kubernetes>=28.1.0 (from chromadb)\n", + " Downloading kubernetes-29.0.0-py2.py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m75.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tenacity>=8.2.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (8.2.3)\n", + "Requirement already satisfied: PyYAML>=6.0.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.0.1)\n", + "Collecting mmh3>=4.0.1 (from chromadb)\n", + " Downloading mmh3-4.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (67 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: packaging>=19.0 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (23.2)\n", + "Requirement already satisfied: pyproject_hooks in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (1.0.0)\n", + "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (2.0.1)\n", + "Collecting starlette<0.36.0,>=0.35.0 (from fastapi>=0.95.2->chromadb)\n", + " Downloading starlette-0.35.1-py3-none-any.whl (71 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.1/71.1 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2023.11.17)\n", + "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.8.2)\n", + "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.17.3)\n", + "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.7.0)\n", + "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.3.1)\n", + "Requirement already satisfied: oauthlib>=3.2.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n", + "Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.0.7)\n", + "Collecting coloredlogs (from onnxruntime>=1.14.1->chromadb)\n", + " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n", + "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (3.20.3)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", + "Collecting deprecated>=1.2.6 (from opentelemetry-api>=1.2.0->chromadb)\n", + " Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", + "Collecting importlib-metadata<7.0,>=6.0 (from opentelemetry-api>=1.2.0->chromadb)\n", + " Downloading importlib_metadata-6.11.0-py3-none-any.whl (23 kB)\n", + "Requirement already satisfied: backoff<3.0.0,>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (2.2.1)\n", + "Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.62.0)\n", + "Collecting opentelemetry-exporter-otlp-proto-common==1.22.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n", + " Downloading opentelemetry_exporter_otlp_proto_common-1.22.0-py3-none-any.whl (17 kB)\n", + "Collecting opentelemetry-proto==1.22.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n", + " Downloading opentelemetry_proto-1.22.0-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting opentelemetry-instrumentation-asgi==0.43b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading opentelemetry_instrumentation_asgi-0.43b0-py3-none-any.whl (14 kB)\n", + "Collecting opentelemetry-instrumentation==0.43b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading opentelemetry_instrumentation-0.43b0-py3-none-any.whl (28 kB)\n", + "Collecting opentelemetry-semantic-conventions==0.43b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading opentelemetry_semantic_conventions-0.43b0-py3-none-any.whl (36 kB)\n", + "Collecting opentelemetry-util-http==0.43b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading opentelemetry_util_http-0.43b0-py3-none-any.whl (6.9 kB)\n", + "Requirement already satisfied: setuptools>=16.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.43b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (67.7.2)\n", + "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.43b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (1.14.1)\n", + "Collecting asgiref~=3.0 (from opentelemetry-instrumentation-asgi==0.43b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb)\n", + " Downloading asgiref-3.7.2-py3-none-any.whl (24 kB)\n", + "Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb)\n", + " Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.28->chromadb) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.28->chromadb) (3.6)\n", + "Requirement already satisfied: huggingface_hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers>=0.13.2->chromadb) (0.20.3)\n", + "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (8.1.7)\n", + "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", + "Collecting httptools>=0.5.0 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m36.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting python-dotenv>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", + "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m89.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading watchfiles-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m70.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting websockets>=10.4 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m19.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (2023.6.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<7.0,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.17.0)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from starlette<0.36.0,>=0.35.0->fastapi>=0.95.2->chromadb) (3.7.1)\n", + "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.14.1->chromadb)\n", + " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.36.0,>=0.35.0->fastapi>=0.95.2->chromadb) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.36.0,>=0.35.0->fastapi>=0.95.2->chromadb) (1.2.0)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.5.1)\n", + "Building wheels for collected packages: pypika\n", + " Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pypika: filename=PyPika-0.48.9-py2.py3-none-any.whl size=53723 sha256=4b6d461cff9cca4beff72e78a82aabcb0df72fefdda00824e3cee50951eb1051\n", + " Stored in directory: /root/.cache/pip/wheels/e1/26/51/d0bffb3d2fd82256676d7ad3003faea3bd6dddc9577af665f4\n", + "Successfully built pypika\n", + "Installing collected packages: pypika, monotonic, mmh3, websockets, uvloop, uvicorn, python-dotenv, pulsar-client, overrides, opentelemetry-util-http, opentelemetry-semantic-conventions, opentelemetry-proto, importlib-metadata, humanfriendly, httptools, deprecated, chroma-hnswlib, bcrypt, asgiref, watchfiles, starlette, posthog, opentelemetry-exporter-otlp-proto-common, opentelemetry-api, coloredlogs, opentelemetry-sdk, opentelemetry-instrumentation, onnxruntime, kubernetes, fastapi, opentelemetry-instrumentation-asgi, opentelemetry-exporter-otlp-proto-grpc, opentelemetry-instrumentation-fastapi, chromadb\n", + " Attempting uninstall: importlib-metadata\n", + " Found existing installation: importlib-metadata 7.0.1\n", + " Uninstalling importlib-metadata-7.0.1:\n", + " Successfully uninstalled importlib-metadata-7.0.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "lida 0.0.10 requires kaleido, which is not installed.\n", + "lida 0.0.10 requires python-multipart, which is not installed.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed asgiref-3.7.2 bcrypt-4.1.2 chroma-hnswlib-0.7.3 chromadb-0.4.22 coloredlogs-15.0.1 deprecated-1.2.14 fastapi-0.109.0 httptools-0.6.1 humanfriendly-10.0 importlib-metadata-6.11.0 kubernetes-29.0.0 mmh3-4.1.0 monotonic-1.6 onnxruntime-1.16.3 opentelemetry-api-1.22.0 opentelemetry-exporter-otlp-proto-common-1.22.0 opentelemetry-exporter-otlp-proto-grpc-1.22.0 opentelemetry-instrumentation-0.43b0 opentelemetry-instrumentation-asgi-0.43b0 opentelemetry-instrumentation-fastapi-0.43b0 opentelemetry-proto-1.22.0 opentelemetry-sdk-1.22.0 opentelemetry-semantic-conventions-0.43b0 opentelemetry-util-http-0.43b0 overrides-7.7.0 posthog-3.3.3 pulsar-client-3.4.0 pypika-0.48.9 python-dotenv-1.0.1 starlette-0.35.1 uvicorn-0.27.0 uvloop-0.19.0 watchfiles-0.21.0 websockets-12.0\n", + "Requirement already satisfied: Cython in /usr/local/lib/python3.10/dist-packages (3.0.8)\n", + "Collecting pycocotools\n", + " Cloning https://github.com/philferriere/cocoapi.git to /tmp/pip-install-lf__mh57/pycocotools_383e0546ba2240b1a4965ce109c7d45e\n", + " Running command git clone --filter=blob:none --quiet https://github.com/philferriere/cocoapi.git /tmp/pip-install-lf__mh57/pycocotools_383e0546ba2240b1a4965ce109c7d45e\n", + " Resolved https://github.com/philferriere/cocoapi.git to commit 2929bd2ef6b451054755dfd7ceb09278f935f7ad\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Building wheels for collected packages: pycocotools\n", + " Building wheel for pycocotools (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pycocotools: filename=pycocotools-2.0-cp310-cp310-linux_x86_64.whl size=375462 sha256=40199694fc1dfba426e7a2c4c7e28545b29c4bbdd21a0f2874784552f38134cd\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-y6k2ky1r/wheels/8e/1a/5f/0c984ae3d65a4d7da4ba3407d2a8d8c8e85dd55f84d4936f04\n", + "Successfully built pycocotools\n", + "Installing collected packages: pycocotools\n", + " Attempting uninstall: pycocotools\n", + " Found existing installation: pycocotools 2.0.7\n", + " Uninstalling pycocotools-2.0.7:\n", + " Successfully uninstalled pycocotools-2.0.7\n", + "Successfully installed pycocotools-2.0\n", + "Requirement already satisfied: unstructured[local-inference] in /usr/local/lib/python3.10/dist-packages (0.12.2)\n", + "Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (5.2.0)\n", + "Requirement already satisfied: filetype in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (1.2.0)\n", + "Requirement already satisfied: python-magic in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (0.4.27)\n", + "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (4.9.4)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.8.1)\n", + "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (0.9.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (2.31.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (4.11.2)\n", + "Requirement already satisfied: emoji in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (2.10.0)\n", + "Requirement already satisfied: dataclasses-json in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (0.6.3)\n", + "Requirement already satisfied: python-iso639 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (2024.1.2)\n", + "Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (1.0.9)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (1.23.5)\n", + "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.6.1)\n", + "Requirement already satisfied: backoff in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (2.2.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (4.9.0)\n", + "Requirement already satisfied: unstructured-client>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (0.16.0)\n", + "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (1.14.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (1.5.3)\n", + "Collecting msg-parser (from unstructured[local-inference])\n", + " Downloading msg_parser-1.2.0-py2.py3-none-any.whl (101 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.8/101.8 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pikepdf (from unstructured[local-inference])\n", + " Downloading pikepdf-8.11.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m48.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pypdf (from unstructured[local-inference])\n", + " Downloading pypdf-4.0.0-py3-none-any.whl (283 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.9/283.9 kB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting onnx (from unstructured[local-inference])\n", + " Downloading onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m37.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.5.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.2.1)\n", + "Requirement already satisfied: openpyxl in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.1.2)\n", + "Collecting unstructured-inference==0.7.23 (from unstructured[local-inference])\n", + " Downloading unstructured_inference-0.7.23-py3-none-any.whl (60 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.1/60.1 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pypandoc (from unstructured[local-inference])\n", + " Downloading pypandoc-1.12-py3-none-any.whl (20 kB)\n", + "Collecting unstructured.pytesseract>=0.3.12 (from unstructured[local-inference])\n", + " Downloading unstructured.pytesseract-0.3.12-py3-none-any.whl (14 kB)\n", + "Collecting python-pptx<=0.6.23 (from unstructured[local-inference])\n", + " Downloading python_pptx-0.6.23-py3-none-any.whl (471 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m471.6/471.6 kB\u001b[0m \u001b[31m44.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pdf2image (from unstructured[local-inference])\n", + " Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)\n", + "Requirement already satisfied: xlrd in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (2.0.1)\n", + "Collecting python-docx (from unstructured[local-inference])\n", + " Downloading python_docx-1.1.0-py3-none-any.whl (239 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m239.6/239.6 kB\u001b[0m \u001b[31m28.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pdfminer.six (from unstructured[local-inference])\n", + " Downloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m75.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting layoutparser[layoutmodels,tesseract] (from unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading layoutparser-0.3.4-py3-none-any.whl (19.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.2/19.2 MB\u001b[0m \u001b[31m59.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting python-multipart (from unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.23->unstructured[local-inference]) (0.20.3)\n", + "Requirement already satisfied: opencv-python!=4.7.0.68 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.23->unstructured[local-inference]) (4.8.0.76)\n", + "Collecting onnxruntime<1.16 (from unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading onnxruntime-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m91.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: transformers>=4.25.1 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.23->unstructured[local-inference]) (4.35.2)\n", + "Requirement already satisfied: Pillow>=3.3.2 in /usr/local/lib/python3.10/dist-packages (from python-pptx<=0.6.23->unstructured[local-inference]) (9.4.0)\n", + "Collecting XlsxWriter>=0.5.7 (from python-pptx<=0.6.23->unstructured[local-inference])\n", + " Downloading XlsxWriter-3.1.9-py3-none-any.whl (154 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.8/154.8 kB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: certifi>=2023.7.22 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (2023.11.17)\n", + "Requirement already satisfied: charset-normalizer>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (3.3.2)\n", + "Requirement already satisfied: idna>=3.4 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (3.6)\n", + "Requirement already satisfied: jsonpath-python>=1.0.6 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (1.0.6)\n", + "Requirement already satisfied: marshmallow>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (3.20.2)\n", + "Requirement already satisfied: mypy-extensions>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (1.0.0)\n", + "Requirement already satisfied: packaging>=23.1 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (23.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (2.8.2)\n", + "Requirement already satisfied: six>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (1.16.0)\n", + "Requirement already satisfied: typing-inspect>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (0.9.0)\n", + "Requirement already satisfied: urllib3>=1.26.18 in /usr/local/lib/python3.10/dist-packages (from unstructured-client>=0.15.1->unstructured[local-inference]) (2.0.7)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->unstructured[local-inference]) (2.5)\n", + "Collecting olefile>=0.46 (from msg-parser->unstructured[local-inference])\n", + " Downloading olefile-0.47-py2.py3-none-any.whl (114 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.6/114.6 kB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[local-inference]) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[local-inference]) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[local-inference]) (2023.6.3)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[local-inference]) (4.66.1)\n", + "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx->unstructured[local-inference]) (3.20.3)\n", + "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl->unstructured[local-inference]) (1.1.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->unstructured[local-inference]) (2023.3.post1)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six->unstructured[local-inference]) (42.0.0)\n", + "Collecting Pillow>=3.3.2 (from python-pptx<=0.6.23->unstructured[local-inference])\n", + " Downloading pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m71.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: Deprecated in /usr/local/lib/python3.10/dist-packages (from pikepdf->unstructured[local-inference]) (1.2.14)\n", + "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[local-inference]) (1.16.0)\n", + "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from onnxruntime<1.16->unstructured-inference==0.7.23->unstructured[local-inference]) (15.0.1)\n", + "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime<1.16->unstructured-inference==0.7.23->unstructured[local-inference]) (23.5.26)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime<1.16->unstructured-inference==0.7.23->unstructured[local-inference]) (1.12)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.23->unstructured[local-inference]) (3.13.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.23->unstructured[local-inference]) (6.0.1)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.23->unstructured[local-inference]) (0.15.1)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.23->unstructured[local-inference]) (0.4.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->unstructured-inference==0.7.23->unstructured[local-inference]) (2023.6.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (1.11.4)\n", + "Collecting iopath (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading iopath-0.1.10.tar.gz (42 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting pdfplumber (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading pdfplumber-0.10.3-py3-none-any.whl (48 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.0/49.0 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (2.1.0+cu121)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (0.16.0+cu121)\n", + "Collecting effdet (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading effdet-0.4.1-py3-none-any.whl (112 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.5/112.5 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pytesseract (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[local-inference]) (2.21)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->onnxruntime<1.16->unstructured-inference==0.7.23->unstructured[local-inference]) (10.0)\n", + "Collecting timm>=0.9.2 (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m78.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pycocotools>=2.0.2 (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading pycocotools-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (426 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m426.2/426.2 kB\u001b[0m \u001b[31m39.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting omegaconf>=2.0 (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (3.1.3)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (2.1.0)\n", + "Collecting portalocker (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n", + "Collecting pdfminer.six (from unstructured[local-inference])\n", + " Downloading pdfminer.six-20221105-py3-none-any.whl (5.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m84.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pypdfium2>=4.18.0 (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading pypdfium2-4.26.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m80.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime<1.16->unstructured-inference==0.7.23->unstructured[local-inference]) (1.3.0)\n", + "Collecting antlr4-python3-runtime==4.9.* (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference])\n", + " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: matplotlib>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (3.7.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (2.1.4)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (1.2.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (4.47.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (1.4.5)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.23->unstructured[local-inference]) (3.1.1)\n", + "Building wheels for collected packages: iopath, antlr4-python3-runtime\n", + " Building wheel for iopath (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31532 sha256=7f345e0a5a40b2aa6832b0321a7bf04960e8191952a8ead883ace7dcfd26d224\n", + " Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d\n", + " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=62ae158a406d57b14167667d4a0afc4c0b07f6ce6f647bb5054e1bab6c78ae96\n", + " Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n", + "Successfully built iopath antlr4-python3-runtime\n", + "Installing collected packages: antlr4-python3-runtime, XlsxWriter, python-multipart, python-docx, pypdfium2, pypdf, pypandoc, portalocker, Pillow, onnx, omegaconf, olefile, unstructured.pytesseract, python-pptx, pytesseract, pikepdf, pdf2image, onnxruntime, msg-parser, iopath, pycocotools, pdfminer.six, timm, pdfplumber, layoutparser, effdet, unstructured-inference\n", + " Attempting uninstall: Pillow\n", + " Found existing installation: Pillow 9.4.0\n", + " Uninstalling Pillow-9.4.0:\n", + " Successfully uninstalled Pillow-9.4.0\n", + " Attempting uninstall: onnxruntime\n", + " Found existing installation: onnxruntime 1.16.3\n", + " Uninstalling onnxruntime-1.16.3:\n", + " Successfully uninstalled onnxruntime-1.16.3\n", + " Attempting uninstall: pycocotools\n", + " Found existing installation: pycocotools 2.0\n", + " Uninstalling pycocotools-2.0:\n", + " Successfully uninstalled pycocotools-2.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "lida 0.0.10 requires kaleido, which is not installed.\n", + "imageio 2.31.6 requires pillow<10.1.0,>=8.3.2, but you have pillow 10.2.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed Pillow-10.2.0 XlsxWriter-3.1.9 antlr4-python3-runtime-4.9.3 effdet-0.4.1 iopath-0.1.10 layoutparser-0.3.4 msg-parser-1.2.0 olefile-0.47 omegaconf-2.3.0 onnx-1.15.0 onnxruntime-1.15.1 pdf2image-1.17.0 pdfminer.six-20221105 pdfplumber-0.10.3 pikepdf-8.11.2 portalocker-2.8.2 pycocotools-2.0.7 pypandoc-1.12 pypdf-4.0.0 pypdfium2-4.26.0 pytesseract-0.3.10 python-docx-1.1.0 python-multipart-0.0.6 python-pptx-0.6.23 timm-0.9.12 unstructured-inference-0.7.23 unstructured.pytesseract-0.3.12\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "PIL", + "pydevd_plugins" + ] + } + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting git+https://github.com/facebookresearch/detectron2.git\n", + " Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-j0znfy12\n", + " Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-j0znfy12\n", + " Resolved https://github.com/facebookresearch/detectron2.git to commit b7ff9466d174fbb7061ff6d3773cd9c372a8e56f\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: Pillow>=7.1 in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (10.2.0)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (3.7.1)\n", + "Requirement already satisfied: pycocotools>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (2.0.7)\n", + "Requirement already satisfied: termcolor>=1.1 in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (2.4.0)\n", + "Collecting yacs>=0.1.8 (from detectron2==0.6)\n", + " Downloading yacs-0.1.8-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (0.9.0)\n", + "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (2.2.1)\n", + "Requirement already satisfied: tqdm>4.29.0 in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (4.66.1)\n", + "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (2.15.1)\n", + "Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)\n", + " Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.2/50.2 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6)\n", + " Downloading iopath-0.1.9-py3-none-any.whl (27 kB)\n", + "Requirement already satisfied: omegaconf<2.4,>=2.1 in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (2.3.0)\n", + "Collecting hydra-core>=1.1 (from detectron2==0.6)\n", + " Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting black (from detectron2==0.6)\n", + " Downloading black-24.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m43.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from detectron2==0.6) (23.2)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.1)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.10/dist-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3)\n", + "Requirement already satisfied: portalocker in /usr/local/lib/python3.10/dist-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (2.8.2)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->detectron2==0.6) (1.2.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->detectron2==0.6) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->detectron2==0.6) (4.47.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->detectron2==0.6) (1.4.5)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->detectron2==0.6) (3.1.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->detectron2==0.6) (2.8.2)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from black->detectron2==0.6) (8.1.7)\n", + "Requirement already satisfied: mypy-extensions>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from black->detectron2==0.6) (1.0.0)\n", + "Collecting pathspec>=0.9.0 (from black->detectron2==0.6)\n", + " Downloading pathspec-0.12.1-py3-none-any.whl (31 kB)\n", + "Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.10/dist-packages (from black->detectron2==0.6) (4.1.0)\n", + "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from black->detectron2==0.6) (2.0.1)\n", + "Requirement already satisfied: typing-extensions>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from black->detectron2==0.6) (4.9.0)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (1.4.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (1.60.0)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (2.17.3)\n", + "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (1.2.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (3.5.2)\n", + "Requirement already satisfied: protobuf<4.24,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (3.20.3)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (2.31.0)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (67.7.2)\n", + "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (1.16.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->detectron2==0.6) (3.0.1)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.3.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->detectron2==0.6) (1.3.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2023.11.17)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.4)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.5.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->detectron2==0.6) (3.2.2)\n", + "Building wheels for collected packages: detectron2, fvcore\n", + " Building wheel for detectron2 (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for detectron2: filename=detectron2-0.6-cp310-cp310-linux_x86_64.whl size=2149521 sha256=9d1e1b9d848b3a3dbf7b01fd6e8566c6ab6a6fa4f0c354aa67946c07956cc385\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-taid36sk/wheels/47/e5/15/94c80df2ba85500c5d76599cc307c0a7079d0e221bb6fc4375\n", + " Building wheel for fvcore (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fvcore: filename=fvcore-0.1.5.post20221221-py3-none-any.whl size=61400 sha256=df175e823bf4fb60f8e4e0ad660ea4a8b3e73cf6eedef70a25c47b902d110ae2\n", + " Stored in directory: /root/.cache/pip/wheels/01/c0/af/77c1cf53a1be9e42a52b48e5af2169d40ec2e89f7362489dd0\n", + "Successfully built detectron2 fvcore\n", + "Installing collected packages: yacs, pathspec, iopath, hydra-core, fvcore, black, detectron2\n", + " Attempting uninstall: iopath\n", + " Found existing installation: iopath 0.1.10\n", + " Uninstalling iopath-0.1.10:\n", + " Successfully uninstalled iopath-0.1.10\n", + "Successfully installed black-24.1.1 detectron2-0.6 fvcore-0.1.5.post20221221 hydra-core-1.3.2 iopath-0.1.9 pathspec-0.12.1 yacs-0.1.8\n", + "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /usr/local/lib/python3.10/dist-packages (0.3.4)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (1.23.5)\n", + "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (4.8.0.76)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (1.11.4)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (1.5.3)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (10.2.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (6.0.1)\n", + "Requirement already satisfied: iopath in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (0.1.9)\n", + "Requirement already satisfied: pdfplumber in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (0.10.3)\n", + "Requirement already satisfied: pdf2image in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (1.17.0)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (2.1.0+cu121)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (0.16.0+cu121)\n", + "Requirement already satisfied: effdet in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (0.4.1)\n", + "Requirement already satisfied: pytesseract in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]) (0.3.10)\n", + "Requirement already satisfied: timm>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from effdet->layoutparser[layoutmodels,tesseract]) (0.9.12)\n", + "Requirement already satisfied: pycocotools>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from effdet->layoutparser[layoutmodels,tesseract]) (2.0.7)\n", + "Requirement already satisfied: omegaconf>=2.0 in /usr/local/lib/python3.10/dist-packages (from effdet->layoutparser[layoutmodels,tesseract]) (2.3.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]) (3.13.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]) (4.9.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]) (3.1.3)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]) (2023.6.0)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]) (2.1.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from iopath->layoutparser[layoutmodels,tesseract]) (4.66.1)\n", + "Requirement already satisfied: portalocker in /usr/local/lib/python3.10/dist-packages (from iopath->layoutparser[layoutmodels,tesseract]) (2.8.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->layoutparser[layoutmodels,tesseract]) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->layoutparser[layoutmodels,tesseract]) (2023.3.post1)\n", + "Requirement already satisfied: pdfminer.six==20221105 in /usr/local/lib/python3.10/dist-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]) (20221105)\n", + "Requirement already satisfied: pypdfium2>=4.18.0 in /usr/local/lib/python3.10/dist-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]) (4.26.0)\n", + "Requirement already satisfied: charset-normalizer>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six==20221105->pdfplumber->layoutparser[layoutmodels,tesseract]) (3.3.2)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six==20221105->pdfplumber->layoutparser[layoutmodels,tesseract]) (42.0.0)\n", + "Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from pytesseract->layoutparser[layoutmodels,tesseract]) (23.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision->layoutparser[layoutmodels,tesseract]) (2.31.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.10/dist-packages (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]) (4.9.3)\n", + "Requirement already satisfied: matplotlib>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]) (3.7.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->layoutparser[layoutmodels,tesseract]) (1.16.0)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from timm>=0.9.2->effdet->layoutparser[layoutmodels,tesseract]) (0.20.3)\n", + "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from timm>=0.9.2->effdet->layoutparser[layoutmodels,tesseract]) (0.4.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]) (2.1.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->layoutparser[layoutmodels,tesseract]) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->layoutparser[layoutmodels,tesseract]) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->layoutparser[layoutmodels,tesseract]) (2023.11.17)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->layoutparser[layoutmodels,tesseract]) (1.3.0)\n", + "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six==20221105->pdfplumber->layoutparser[layoutmodels,tesseract]) (1.16.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]) (1.2.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]) (4.47.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]) (1.4.5)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]) (3.1.1)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six==20221105->pdfplumber->layoutparser[layoutmodels,tesseract]) (2.21)\n", + "Requirement already satisfied: pytesseract in /usr/local/lib/python3.10/dist-packages (0.3.10)\n", + "Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from pytesseract) (23.2)\n", + "Requirement already satisfied: Pillow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from pytesseract) (10.2.0)\n", + "Collecting Pillow==9.0.0\n", + " Downloading Pillow-9.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.3/4.3 MB\u001b[0m \u001b[31m28.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: Pillow\n", + " Attempting uninstall: Pillow\n", + " Found existing installation: pillow 10.2.0\n", + " Uninstalling pillow-10.2.0:\n", + " Successfully uninstalled pillow-10.2.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "pdfplumber 0.10.3 requires Pillow>=9.1, but you have pillow 9.0.0 which is incompatible.\n", + "pikepdf 8.11.2 requires Pillow>=10.0.1, but you have pillow 9.0.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed Pillow-9.0.0\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "PIL" + ] + } + } + }, + "metadata": {} + } + ], + "source": [ + "!pip install langchain\n", + "!pip install unstructured # The unstructured library provides open-source components for pre-processing text documents such as PDFs, HTML and Word Documents.\n", + "!pip install openai\n", + "!pip install pybind11 # pybind11 is a lightweight header-only library that exposes C++ types in Python\n", + "!pip install chromadb # the AI-native open-source embedding database\n", + "!pip install Cython # Cython is an optimising static compiler for both the Python programming language\n", + "!pip3 install \"git+https://github.com/philferriere/cocoapi.git#egg=pycocotools&subdirectory=PythonAPI\" # COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation\n", + "!pip install unstructured[local-inference]\n", + "!CC=clang CXX=clang++ ARCHFLAGS=\"-arch x86_64\" pip install 'git+https://github.com/facebookresearch/detectron2.git' # Detectron2 is Facebook AI Research's next generation library that provides state-of-the-art detection and segmentation algorithms.\n", + "!pip install layoutparser[layoutmodels,tesseract] # A Unified Toolkit for Deep Learning Based Document Image Analysis\n", + "!pip install pytesseract # Python-tesseract is an optical character recognition (OCR) tool for python.\n", + "!pip install Pillow==9.0.0 # The Python Imaging Library adds image processing capabilities to your Python interpreter. Need this version, otherwise errors occur.\n", + "!pip install tiktoken\n", + "!pip install --upgrade Pillow" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "os.environ['OPENAI_API_KEY'] = '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'" + ], + "metadata": { + "id": "qzou8IvvQpwr" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from langchain.document_loaders import UnstructuredPDFLoader\n", + "from langchain.indexes import VectorstoreIndexCreator" + ], + "metadata": { + "id": "kzFhWwEeQvV3" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from detectron2.config import get_cfg\n", + "cfg = get_cfg()\n", + "cfg.MODEL.DEVICE = 'gpu' #GPU is recommended" + ], + "metadata": { + "id": "SJsxayI6QwE9" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!wget https://pgcag.files.wordpress.com/2010/01/48lawsofpower.pdf #meta earnings; replace with any pdf" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VyX-UbKLQxoS", + "outputId": "e1a05875-29f1-49f5-cc09-0a8fe51a9781" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2024-01-28 14:40:20-- https://pgcag.files.wordpress.com/2010/01/48lawsofpower.pdf\n", + "Resolving pgcag.files.wordpress.com (pgcag.files.wordpress.com)... 192.0.72.24, 192.0.72.25\n", + "Connecting to pgcag.files.wordpress.com (pgcag.files.wordpress.com)|192.0.72.24|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 104926 (102K) [application/pdf]\n", + "Saving to: ‘48lawsofpower.pdf’\n", + "\n", + "\r48lawsofpower.pdf 0%[ ] 0 --.-KB/s \r48lawsofpower.pdf 100%[===================>] 102.47K --.-KB/s in 0.004s \n", + "\n", + "2024-01-28 14:40:20 (24.0 MB/s) - ‘48lawsofpower.pdf’ saved [104926/104926]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!mkdir docs\n", + "!mv 48lawsofpower.pdf docs" + ], + "metadata": { + "id": "Se8-yf-7R0Pl", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b3a1e8d7-c91c-4050-8373-44529aea226f" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "mkdir: cannot create directory ‘docs’: File exists\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "text_folder = 'docs'\n", + "loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)]" + ], + "metadata": { + "id": "YzOpYMfFSQ7x" + }, + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!apt-get install poppler-utils # error occurs without this, pdf rendering library" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-jlnhpUHTc7T", + "outputId": "59f7ad0c-64a8-4115-e108-9e20f79e09ea" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "poppler-utils is already the newest version (22.02.0-2ubuntu0.3).\n", + "0 upgraded, 0 newly installed, 0 to remove and 31 not upgraded.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "index = VectorstoreIndexCreator().from_loaders(loaders)" + ], + "metadata": { + "id": "rRKCE8ucSW4H" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "query = \"Can you give me an example from history where the enemy was crushed totally from the book?\"\n", + "index.query(query)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 157 + }, + "id": "_vpx0nTqUvBv", + "outputId": "f7674f5b-072e-4f0f-93c7-66eeea9ac263" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "\" One example from history where the enemy was crushed totally is the defeat of the Aztec Empire by the Spanish conquistador Hernán Cortés in the early 16th century. Cortés used a combination of military tactics, alliances with rival tribes, and psychological warfare to defeat the Aztecs and conquer their capital city of Tenochtitlan. He also took advantage of the Aztecs' belief in prophecy and their fear of the god Quetzalcoatl, who they believed would return in the form of a light-skinned, bearded man. By playing on these fears and manipulating the Aztecs' religious beliefs, Cortés was able to gain the upper hand and ultimately defeat them. This example demonstrates the power of using psychological tactics and exploiting an enemy's weaknesses to achieve a total victory.\"" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "query = \"What's the point of making myself less accessible?\"\n", + "index.query(query)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 88 + }, + "id": "2H1cV4VowRoX", + "outputId": "c8a3fbf0-2714-4a89-be1a-7c4b53145a55" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "' The point of making yourself less accessible is to create a sense of scarcity and value around yourself. By being constantly available, people may take you for granted and your presence may lose its impact. By making yourself less accessible, you can maintain a sense of mystery and importance, making people more eager to interact with you and increasing your power and influence.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "query = \"Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?\"\n", + "index.query(query)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 157 + }, + "id": "IXHFgLw4hxOK", + "outputId": "77e2098f-88be-414e-83ee-8a1e59004835" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "' According to the book, Queen Elizabeth I was a master of the power game. She was able to avoid the trap of marriage and war by dangling the possibility of marriage to all who courted her. This allowed her to forge alliances with different countries and maintain her independence and power. She also used the tactic of refusing to commit, but allowing herself to be courted, which made her desirable and powerful. She also maintained a regal bearing and dignity, which helped her to appear unshakable and in control. Additionally, she was skilled in the art of timing, knowing when to make her moves and when to hold back. Finally, she embraced the idea of formlessness, being adaptable and fluid in her ruling style, which allowed her to break her enemies from the inside and maintain her power.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "query = \"State the names of 5 laws?\"\n", + "index.query(query)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "id": "rvvLMIQhipAa", + "outputId": "b4da4e37-2fa7-4561-975f-51953f6a93c5" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "\"\\n1. Never Outshine the Master\\n2. Court Attention at All Cost\\n3. Know Who You're Dealing With - Do Not Offend the Wrong Person\\n4. Use Absence to Increase Respect and Honor\\n5. Banish Your Enemies: Completely\"" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "0VqKqve9jvwc" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file