{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "!pip install -r requirements.txt" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lVZX4hy1Ruq_", "outputId": "0c963932-2266-4c44-d671-07dc23625bae" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: langchain_community in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 1)) (0.0.32)\n", "Requirement already satisfied: langchain-text-splitters in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 2)) (0.0.1)\n", "Requirement already satisfied: langchain-together in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 3)) (0.1.0)\n", "Requirement already satisfied: pdf2image in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 5)) (1.17.0)\n", "Requirement already satisfied: pdfminer.six in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 6)) (20231228)\n", "Requirement already satisfied: pillow_heif in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 7)) (0.16.0)\n", "Requirement already satisfied: doctran in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 8)) (0.0.14)\n", "Collecting python-dotenv (from -r requirements.txt (line 9))\n", " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", "Requirement already satisfied: unstructured[local-inference] in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 4)) (0.13.2)\n", "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (6.0.1)\n", "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (2.0.29)\n", "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (3.9.3)\n", "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (0.6.4)\n", "Requirement already satisfied: langchain-core<0.2.0,>=0.1.41 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (0.1.42)\n", "Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (0.1.47)\n", "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (1.25.2)\n", "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (2.31.0)\n", "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain_community->-r requirements.txt (line 1)) (8.2.3)\n", "Requirement already satisfied: together<0.3.0,>=0.2.10 in /usr/local/lib/python3.10/dist-packages (from langchain-together->-r requirements.txt (line 3)) (0.2.11)\n", "Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (5.2.0)\n", "Requirement already satisfied: filetype in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (1.2.0)\n", "Requirement already satisfied: python-magic in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (0.4.27)\n", "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (4.9.4)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (3.8.1)\n", "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (0.9.0)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (4.12.3)\n", "Requirement already satisfied: emoji in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (2.11.0)\n", "Requirement already satisfied: python-iso639 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (2024.2.7)\n", "Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (1.0.9)\n", "Requirement already satisfied: rapidfuzz in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (3.8.1)\n", "Requirement already satisfied: backoff in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (2.2.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (4.11.0)\n", "Requirement already satisfied: unstructured-client<=0.18.0 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (0.18.0)\n", "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (1.14.1)\n", "Requirement already satisfied: unstructured.pytesseract>=0.3.12 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (0.3.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (3.3)\n", "Requirement already satisfied: pypdf in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (4.2.0)\n", "Requirement already satisfied: python-pptx<=0.6.23 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (0.6.23)\n", "Requirement already satisfied: pypandoc in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (1.13)\n", "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (3.6)\n", "Requirement already satisfied: openpyxl in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (3.1.2)\n", "Requirement already satisfied: xlrd in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (2.0.1)\n", "Requirement already satisfied: python-docx in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (1.1.0)\n", "Requirement already satisfied: pikepdf in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (8.15.0)\n", "Requirement already satisfied: unstructured-inference==0.7.25 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (0.7.25)\n", "Requirement already satisfied: onnx in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (1.16.0)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (2.0.3)\n", "Requirement already satisfied: msg-parser in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]->-r requirements.txt (line 4)) (1.2.0)\n", "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]->-r requirements.txt (line 4)) (0.3.4)\n", "Requirement already satisfied: python-multipart in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]->-r requirements.txt (line 4)) (0.0.9)\n", "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]->-r requirements.txt (line 4)) (0.20.3)\n", "Requirement already satisfied: opencv-python!=4.7.0.68 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]->-r requirements.txt (line 4)) (4.8.0.76)\n", "Requirement already satisfied: onnxruntime<1.16 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]->-r requirements.txt (line 4)) (1.15.1)\n", "Requirement already satisfied: transformers>=4.25.1 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]->-r requirements.txt (line 4)) (4.38.2)\n", "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from pdf2image->-r requirements.txt (line 5)) (10.3.0)\n", "Requirement already satisfied: charset-normalizer>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six->-r requirements.txt (line 6)) (3.3.2)\n", "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six->-r requirements.txt (line 6)) (42.0.5)\n", "Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from doctran->-r requirements.txt (line 8)) (0.27.10)\n", "Requirement already satisfied: presidio-analyzer<3.0.0,>=2.2.33 in /usr/local/lib/python3.10/dist-packages (from doctran->-r requirements.txt (line 8)) (2.2.354)\n", "Requirement already satisfied: presidio-anonymizer<3.0.0,>=2.2.33 in /usr/local/lib/python3.10/dist-packages (from doctran->-r requirements.txt (line 8)) (2.2.354)\n", "Requirement already satisfied: pydantic<2.0.0,>=1.10.9 in /usr/local/lib/python3.10/dist-packages (from doctran->-r requirements.txt (line 8)) (1.10.15)\n", "Requirement already satisfied: spacy<4.0.0,>=3.5.4 in /usr/local/lib/python3.10/dist-packages (from doctran->-r requirements.txt (line 8)) (3.7.4)\n", "Requirement already satisfied: tiktoken<0.6.0,>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from doctran->-r requirements.txt (line 8)) (0.5.2)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community->-r requirements.txt (line 1)) (1.3.1)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community->-r requirements.txt (line 1)) (23.2.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community->-r requirements.txt (line 1)) (1.4.1)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community->-r requirements.txt (line 1)) (6.0.5)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community->-r requirements.txt (line 1)) (1.9.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community->-r requirements.txt (line 1)) (4.0.3)\n", "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six->-r requirements.txt (line 6)) (1.16.0)\n", "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community->-r requirements.txt (line 1)) (3.21.1)\n", "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community->-r requirements.txt (line 1)) (0.9.0)\n", "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.41->langchain_community->-r requirements.txt (line 1)) (1.33)\n", "Requirement already satisfied: packaging<24.0,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.41->langchain_community->-r requirements.txt (line 1)) (23.2)\n", "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.0->langchain_community->-r requirements.txt (line 1)) (3.10.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->doctran->-r requirements.txt (line 8)) (4.66.2)\n", "Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from presidio-analyzer<3.0.0,>=2.2.33->doctran->-r requirements.txt (line 8)) (2023.12.25)\n", "Requirement already satisfied: tldextract in /usr/local/lib/python3.10/dist-packages (from presidio-analyzer<3.0.0,>=2.2.33->doctran->-r requirements.txt (line 8)) (5.1.2)\n", "Requirement already satisfied: phonenumbers<9.0.0,>=8.12 in /usr/local/lib/python3.10/dist-packages (from presidio-analyzer<3.0.0,>=2.2.33->doctran->-r requirements.txt (line 8)) (8.13.34)\n", "Requirement already satisfied: pycryptodome>=3.10.1 in /usr/local/lib/python3.10/dist-packages (from presidio-anonymizer<3.0.0,>=2.2.33->doctran->-r requirements.txt (line 8)) (3.20.0)\n", "Requirement already satisfied: XlsxWriter>=0.5.7 in /usr/local/lib/python3.10/dist-packages (from python-pptx<=0.6.23->unstructured[local-inference]->-r requirements.txt (line 4)) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain_community->-r requirements.txt (line 1)) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain_community->-r requirements.txt (line 1)) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain_community->-r requirements.txt (line 1)) (2024.2.2)\n", "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (3.0.12)\n", "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (1.0.5)\n", "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (1.0.10)\n", "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (2.0.8)\n", "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (3.0.9)\n", "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (8.2.3)\n", "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (1.1.2)\n", "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (2.4.8)\n", "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (2.0.10)\n", "Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (0.3.4)\n", "Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (0.9.4)\n", "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (6.4.0)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (3.1.3)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (69.5.0)\n", "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8)) (3.3.0)\n", "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain_community->-r requirements.txt (line 1)) (3.0.3)\n", "INFO: pip is looking at multiple versions of together to determine which version is compatible with other requirements. This could take a while.\n", "Collecting together<0.3.0,>=0.2.10 (from langchain-together->-r requirements.txt (line 3))\n", " Downloading together-0.2.10-py3-none-any.whl.metadata (26 kB)\n", "Collecting spacy<4.0.0,>=3.5.4 (from doctran->-r requirements.txt (line 8))\n", " Downloading spacy-3.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (27 kB)\n", " Downloading spacy-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", " Downloading spacy-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", "Collecting pathy>=0.10.0 (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8))\n", " Downloading pathy-0.11.0-py3-none-any.whl.metadata (16 kB)\n", "INFO: pip is still looking at multiple versions of together to determine which version is compatible with other requirements. This could take a while.\n", "Collecting spacy<4.0.0,>=3.5.4 (from doctran->-r requirements.txt (line 8))\n", " Downloading spacy-3.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", " Downloading spacy-3.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", "Collecting thinc<8.2.0,>=8.1.8 (from spacy<4.0.0,>=3.5.4->doctran->-r requirements.txt (line 8))\n", " Downloading thinc-8.1.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (15 kB)\n", "Collecting spacy<4.0.0,>=3.5.4 (from doctran->-r requirements.txt (line 8))\n", " Downloading spacy-3.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)\n", "INFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n", " Downloading spacy-3.5.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", "Collecting presidio-analyzer<3.0.0,>=2.2.33 (from doctran->-r requirements.txt (line 8))\n", " Downloading presidio_analyzer-2.2.354-py3-none-any.whl.metadata (2.6 kB)\n", "Collecting langsmith<0.2.0,>=0.1.0 (from langchain_community->-r requirements.txt (line 1))\n", " Downloading langsmith-0.1.47-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.46-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.45-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.44-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.43-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.42-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.41-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.40-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.39-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.38-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.37-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.36-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.35-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.34-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.33-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.31-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.30-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.29-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.28-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.27-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.26-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.25-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.24-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.23-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.22-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.21-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.20-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.19-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.18-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.17-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.16-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.15-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.14-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.13-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.12-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.11-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.10-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.9-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.8-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.7-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.6-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.5-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.4-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.3-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.2-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.1-py3-none-any.whl.metadata (13 kB)\n", " Downloading langsmith-0.1.0-py3-none-any.whl.metadata (13 kB)\n", "Collecting langchain-core<0.2.0,>=0.1.41 (from langchain_community->-r requirements.txt (line 1))\n", " Downloading langchain_core-0.1.42-py3-none-any.whl.metadata (5.9 kB)\n", " Downloading langchain_core-0.1.41-py3-none-any.whl.metadata (5.9 kB)\n", "Collecting doctran (from -r requirements.txt (line 8))\n", " Downloading doctran-0.0.14-py3-none-any.whl.metadata (8.6 kB)\n", " Downloading doctran-0.0.13-py3-none-any.whl.metadata (8.6 kB)\n", " Downloading doctran-0.0.12-py3-none-any.whl.metadata (8.6 kB)\n", " Downloading doctran-0.0.11-py3-none-any.whl.metadata (8.5 kB)\n", " Downloading doctran-0.0.10-py3-none-any.whl.metadata (8.9 kB)\n", "Collecting tiktoken<0.5.0,>=0.4.0 (from doctran->-r requirements.txt (line 8))\n", " Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)\n", "Collecting doctran (from -r requirements.txt (line 8))\n", " Downloading doctran-0.0.9-py3-none-any.whl.metadata (8.9 kB)\n", "Collecting tiktoken<0.4.0,>=0.3.3 (from doctran->-r requirements.txt (line 8))\n", " Downloading tiktoken-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)\n", "Collecting doctran (from -r requirements.txt (line 8))\n", " Downloading doctran-0.0.8-py3-none-any.whl.metadata (8.9 kB)\n", " Downloading doctran-0.0.7-py3-none-any.whl.metadata (8.9 kB)\n", " Downloading doctran-0.0.6-py3-none-any.whl.metadata (8.9 kB)\n", " Downloading doctran-0.0.5-py3-none-any.whl.metadata (8.9 kB)\n", " Downloading doctran-0.0.4-py3-none-any.whl.metadata (8.9 kB)\n", " Downloading doctran-0.0.3-py3-none-any.whl.metadata (8.8 kB)\n", " Downloading doctran-0.0.2-py3-none-any.whl.metadata (8.9 kB)\n", "Collecting bs4<0.0.2,>=0.0.1 (from doctran->-r requirements.txt (line 8))\n", " Downloading bs4-0.0.1.tar.gz (1.1 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: jsonschema<5.0.0,>=4.17.3 in /usr/local/lib/python3.10/dist-packages (from doctran->-r requirements.txt (line 8)) (4.19.2)\n", "Collecting mailbox<0.5,>=0.4 (from doctran->-r requirements.txt (line 8))\n", " Downloading mailbox-0.4.tar.gz (4.1 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting pdfplumber<0.10.0,>=0.9.0 (from doctran->-r requirements.txt (line 8))\n", " Downloading pdfplumber-0.9.0-py3-none-any.whl.metadata (35 kB)\n", "Collecting doctran (from -r requirements.txt (line 8))\n", " Downloading doctran-0.0.1-py3-none-any.whl.metadata (5.5 kB)\n", " Downloading doctran-0.0.0-py3-none-any.whl.metadata (599 bytes)\n", "Collecting langchain-together (from -r requirements.txt (line 3))\n", " Downloading langchain_together-0.1.0-py3-none-any.whl.metadata (1.9 kB)\n", " Downloading langchain_together-0.0.2.post2-py3-none-any.whl.metadata (1.9 kB)\n", " Downloading langchain_together-0.0.2.post1-py3-none-any.whl.metadata (806 bytes)\n", " Downloading langchain_together-0.0.2-py3-none-any.whl.metadata (577 bytes)\n", " Downloading langchain_together-0.0.1-py3-none-any.whl.metadata (504 bytes)\n", "Collecting langchain-text-splitters (from -r requirements.txt (line 2))\n", " Downloading langchain_text_splitters-0.0.1-py3-none-any.whl.metadata (2.0 kB)\n", "Collecting langchain_community (from -r requirements.txt (line 1))\n", " Downloading langchain_community-0.0.32-py3-none-any.whl.metadata (8.5 kB)\n", " Downloading langchain_community-0.0.31-py3-none-any.whl.metadata (8.4 kB)\n", "Collecting langchain-core<0.2.0,>=0.1.37 (from langchain_community->-r requirements.txt (line 1))\n", " Downloading langchain_core-0.1.40-py3-none-any.whl.metadata (5.9 kB)\n", " Downloading langchain_core-0.1.39-py3-none-any.whl.metadata (5.9 kB)\n", " Downloading langchain_core-0.1.38-py3-none-any.whl.metadata (6.0 kB)\n", " Downloading langchain_core-0.1.37-py3-none-any.whl.metadata (6.0 kB)\n", "Collecting langchain_community (from -r requirements.txt (line 1))\n", " Downloading langchain_community-0.0.30-py3-none-any.whl.metadata (8.4 kB)\n", " Downloading langchain_community-0.0.29-py3-none-any.whl.metadata (8.3 kB)\n", "Collecting langchain-core<0.2.0,>=0.1.33 (from langchain_community->-r requirements.txt (line 1))\n", " Downloading langchain_core-0.1.36-py3-none-any.whl.metadata (6.0 kB)\n", " Downloading langchain_core-0.1.35-py3-none-any.whl.metadata (6.0 kB)\n", " Downloading langchain_core-0.1.34-py3-none-any.whl.metadata (6.0 kB)\n", " Downloading langchain_core-0.1.33-py3-none-any.whl.metadata (6.0 kB)\n", "Requirement already satisfied: anyio<5,>=3 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.33->langchain_community->-r requirements.txt (line 1)) (3.7.1)\n", "Collecting langchain_community (from -r requirements.txt (line 1))\n", " Downloading langchain_community-0.0.28-py3-none-any.whl.metadata (8.3 kB)\n", "Collecting langchain-core<0.2.0,>=0.1.31 (from langchain_community->-r requirements.txt (line 1))\n", " Downloading langchain_core-0.1.32-py3-none-any.whl.metadata (6.0 kB)\n", " Downloading langchain_core-0.1.31-py3-none-any.whl.metadata (6.0 kB)\n", "Collecting langchain_community (from -r requirements.txt (line 1))\n", " Downloading langchain_community-0.0.27-py3-none-any.whl.metadata (8.2 kB)\n", "Collecting langchain-core<0.2.0,>=0.1.30 (from langchain_community->-r requirements.txt (line 1))\n", " Downloading langchain_core-0.1.30-py3-none-any.whl.metadata (6.0 kB)\n", "Collecting langchain_community (from -r requirements.txt (line 1))\n", " Downloading langchain_community-0.0.26-py3-none-any.whl.metadata (8.2 kB)\n", "Collecting langchain-core<0.2.0,>=0.1.29 (from langchain_community->-r requirements.txt (line 1))\n", " Downloading langchain_core-0.1.29-py3-none-any.whl.metadata (6.0 kB)\n", "Collecting langchain_community (from -r requirements.txt (line 1))\n", " Downloading langchain_community-0.0.25-py3-none-any.whl.metadata (8.1 kB)\n", "\u001b[31mERROR: Exception:\n", "Traceback (most recent call last):\n", " File \"/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/base_command.py\", line 180, in exc_logging_wrapper\n", " status = run_func(*args)\n", " File \"/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/req_command.py\", line 245, in wrapper\n", " return func(self, options, args)\n", " File \"/usr/local/lib/python3.10/dist-packages/pip/_internal/commands/install.py\", line 377, in run\n", " requirement_set = resolver.resolve(\n", " File \"/usr/local/lib/python3.10/dist-packages/pip/_internal/resolution/resolvelib/resolver.py\", line 95, in resolve\n", " result = self._result = resolver.resolve(\n", " File \"/usr/local/lib/python3.10/dist-packages/pip/_vendor/resolvelib/resolvers.py\", line 546, in resolve\n", " state = resolution.resolve(requirements, max_rounds=max_rounds)\n", " File \"/usr/local/lib/python3.10/dist-packages/pip/_vendor/resolvelib/resolvers.py\", line 457, in resolve\n", " raise ResolutionTooDeep(max_rounds)\n", "pip._vendor.resolvelib.resolvers.ResolutionTooDeep: 200000\u001b[0m\u001b[31m\n", "\u001b[0m" ] } ] }, { "cell_type": "code", "source": [ "!pip install langchain_community langchain-text-splitters unstructured[local-inference] pdf2image pdfminer.six langchain-together pillow_heif" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "j06J9xE60u0C", "outputId": "06248856-b7d5-402f-f38d-03e475f2786b" }, "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting langchain_community\n", " Downloading langchain_community-0.0.32-py3-none-any.whl (1.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting langchain-text-splitters\n", " Downloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\n", "Collecting unstructured[local-inference]\n", " Downloading unstructured-0.13.2-py3-none-any.whl (1.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting pdf2image\n", " Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)\n", "Collecting pdfminer.six\n", " Downloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m21.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting langchain-together\n", " Downloading langchain_together-0.1.0-py3-none-any.whl (6.7 kB)\n", "Collecting pillow_heif\n", " Downloading pillow_heif-0.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain_community) (6.0.1)\n", "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain_community) (2.0.29)\n", "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain_community) (3.9.3)\n", "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)\n", " Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)\n", "Collecting langchain-core<0.2.0,>=0.1.41 (from langchain_community)\n", " Downloading langchain_core-0.1.42-py3-none-any.whl (287 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m287.5/287.5 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting langsmith<0.2.0,>=0.1.0 (from langchain_community)\n", " Downloading langsmith-0.1.47-py3-none-any.whl (113 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m113.0/113.0 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain_community) (1.25.2)\n", "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain_community) (2.31.0)\n", "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain_community) (8.2.3)\n", "Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (5.2.0)\n", "Collecting filetype (from unstructured[local-inference])\n", " Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", "Collecting python-magic (from unstructured[local-inference])\n", " Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)\n", "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (4.9.4)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.8.1)\n", "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (0.9.0)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (4.12.3)\n", "Collecting emoji (from unstructured[local-inference])\n", " Downloading emoji-2.11.0-py2.py3-none-any.whl (433 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m433.8/433.8 kB\u001b[0m \u001b[31m29.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting python-iso639 (from unstructured[local-inference])\n", " Downloading python_iso639-2024.2.7-py3-none-any.whl (274 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.7/274.7 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting langdetect (from unstructured[local-inference])\n", " Downloading langdetect-1.0.9.tar.gz (981 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m981.5/981.5 kB\u001b[0m \u001b[31m38.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting rapidfuzz (from unstructured[local-inference])\n", " Downloading rapidfuzz-3.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting backoff (from unstructured[local-inference])\n", " Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (4.11.0)\n", "Collecting unstructured-client<=0.18.0 (from unstructured[local-inference])\n", " Downloading unstructured_client-0.18.0-py3-none-any.whl (21 kB)\n", "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (1.14.1)\n", "Collecting unstructured.pytesseract>=0.3.12 (from unstructured[local-inference])\n", " Downloading unstructured.pytesseract-0.3.12-py3-none-any.whl (14 kB)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.3)\n", "Collecting pypdf (from unstructured[local-inference])\n", " Downloading pypdf-4.2.0-py3-none-any.whl (290 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting python-pptx<=0.6.23 (from unstructured[local-inference])\n", " Downloading python_pptx-0.6.23-py3-none-any.whl (471 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m471.6/471.6 kB\u001b[0m \u001b[31m28.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting pypandoc (from unstructured[local-inference])\n", " Downloading pypandoc-1.13-py3-none-any.whl (21 kB)\n", "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.6)\n", "Requirement already satisfied: openpyxl in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (3.1.2)\n", "Requirement already satisfied: xlrd in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (2.0.1)\n", "Collecting python-docx (from unstructured[local-inference])\n", " Downloading python_docx-1.1.0-py3-none-any.whl (239 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m239.6/239.6 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting pikepdf (from unstructured[local-inference])\n", " Downloading pikepdf-8.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting unstructured-inference==0.7.25 (from unstructured[local-inference])\n", " Downloading unstructured_inference-0.7.25-py3-none-any.whl (58 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.9/58.9 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting onnx (from unstructured[local-inference])\n", " Downloading onnx-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m43.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from unstructured[local-inference]) (2.0.3)\n", "Collecting msg-parser (from unstructured[local-inference])\n", " Downloading msg_parser-1.2.0-py2.py3-none-any.whl (101 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.8/101.8 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting layoutparser[layoutmodels,tesseract] (from unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading layoutparser-0.3.4-py3-none-any.whl (19.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.2/19.2 MB\u001b[0m \u001b[31m39.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting python-multipart (from unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n", "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]) (0.20.3)\n", "Requirement already satisfied: opencv-python!=4.7.0.68 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]) (4.8.0.76)\n", "Collecting onnxruntime<1.16 (from unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading onnxruntime-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m58.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: transformers>=4.25.1 in /usr/local/lib/python3.10/dist-packages (from unstructured-inference==0.7.25->unstructured[local-inference]) (4.38.2)\n", "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from pdf2image) (9.4.0)\n", "Requirement already satisfied: charset-normalizer>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six) (3.3.2)\n", "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six) (42.0.5)\n", "Collecting together<0.3.0,>=0.2.10 (from langchain-together)\n", " Downloading together-0.2.11-py3-none-any.whl (43 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.8/43.8 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting pillow (from pdf2image)\n", " Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m38.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.3.1)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (23.2.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.4.1)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (6.0.5)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.9.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (4.0.3)\n", "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six) (1.16.0)\n", "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)\n", " Downloading marshmallow-3.21.1-py3-none-any.whl (49 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)\n", " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.2.0,>=0.1.41->langchain_community)\n", " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", "Collecting packaging<24.0,>=23.2 (from langchain-core<0.2.0,>=0.1.41->langchain_community)\n", " Downloading packaging-23.2-py3-none-any.whl (53 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.41->langchain_community) (2.6.4)\n", "Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.0->langchain_community)\n", " Downloading orjson-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m144.8/144.8 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting XlsxWriter>=0.5.7 (from python-pptx<=0.6.23->unstructured[local-inference])\n", " Downloading XlsxWriter-3.2.0-py3-none-any.whl (159 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m159.9/159.9 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain_community) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain_community) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain_community) (2024.2.2)\n", "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain_community) (3.0.3)\n", "Collecting sseclient-py<2.0.0,>=1.7.2 (from together<0.3.0,>=0.2.10->langchain-together)\n", " Downloading sseclient_py-1.8.0-py2.py3-none-any.whl (8.8 kB)\n", "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /usr/local/lib/python3.10/dist-packages (from together<0.3.0,>=0.2.10->langchain-together) (4.66.2)\n", "Requirement already satisfied: typer<0.10.0,>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from together<0.3.0,>=0.2.10->langchain-together) (0.9.4)\n", "Collecting dataclasses-json-speakeasy>=0.5.11 (from unstructured-client<=0.18.0->unstructured[local-inference])\n", " Downloading dataclasses_json_speakeasy-0.5.11-py3-none-any.whl (28 kB)\n", "Collecting jsonpath-python>=1.0.6 (from unstructured-client<=0.18.0->unstructured[local-inference])\n", " Downloading jsonpath_python-1.0.6-py3-none-any.whl (7.6 kB)\n", "Collecting mypy-extensions>=1.0.0 (from unstructured-client<=0.18.0->unstructured[local-inference])\n", " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from unstructured-client<=0.18.0->unstructured[local-inference]) (2.8.2)\n", "Requirement already satisfied: six>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from unstructured-client<=0.18.0->unstructured[local-inference]) (1.16.0)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->unstructured[local-inference]) (2.5)\n", "Collecting olefile>=0.46 (from msg-parser->unstructured[local-inference])\n", " Downloading olefile-0.47-py2.py3-none-any.whl (114 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.6/114.6 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[local-inference]) (8.1.7)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[local-inference]) (1.4.0)\n", "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->unstructured[local-inference]) (2023.12.25)\n", "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx->unstructured[local-inference]) (3.20.3)\n", "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl->unstructured[local-inference]) (1.1.0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->unstructured[local-inference]) (2023.4)\n", "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->unstructured[local-inference]) (2024.1)\n", "Collecting Deprecated (from pikepdf->unstructured[local-inference])\n", " Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six) (2.22)\n", "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.2.0,>=0.1.41->langchain_community)\n", " Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", "Collecting coloredlogs (from onnxruntime<1.16->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime<1.16->unstructured-inference==0.7.25->unstructured[local-inference]) (24.3.25)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime<1.16->unstructured-inference==0.7.25->unstructured[local-inference]) (1.12)\n", "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.2.0,>=0.1.41->langchain_community) (0.6.0)\n", "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.2.0,>=0.1.41->langchain_community) (2.16.3)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.25->unstructured[local-inference]) (3.13.4)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.25->unstructured[local-inference]) (0.15.2)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.25.1->unstructured-inference==0.7.25->unstructured[local-inference]) (0.4.2)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->unstructured-inference==0.7.25->unstructured[local-inference]) (2023.6.0)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (1.11.4)\n", "Collecting iopath (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading iopath-0.1.10.tar.gz (42 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting pdfplumber (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading pdfplumber-0.11.0-py3-none-any.whl (56 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.4/56.4 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting pytesseract (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (2.2.1+cu121)\n", "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (0.17.1+cu121)\n", "Collecting effdet (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading effdet-0.4.1-py3-none-any.whl (112 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.5/112.5 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting humanfriendly>=9.1 (from coloredlogs->onnxruntime<1.16->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting timm>=0.9.2 (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading timm-0.9.16-py3-none-any.whl (2.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m52.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pycocotools>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (2.0.7)\n", "Collecting omegaconf>=2.0 (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (3.1.3)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", "Collecting nvidia-curand-cu12==10.3.2.106 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", "Collecting nvidia-nccl-cu12==2.19.3 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n", "Collecting nvidia-nvtx-cu12==12.1.105 (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", "Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (2.2.0)\n", "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Using cached nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", "Collecting portalocker (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n", "Collecting pypdfium2>=4.18.0 (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading pypdfium2-4.29.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m76.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime<1.16->unstructured-inference==0.7.25->unstructured[local-inference]) (1.3.0)\n", "Collecting antlr4-python3-runtime==4.9.* (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference])\n", " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: matplotlib>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (3.7.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (2.1.5)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (1.2.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (4.51.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (1.4.5)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.7.25->unstructured[local-inference]) (3.1.2)\n", "Building wheels for collected packages: langdetect, iopath, antlr4-python3-runtime\n", " Building wheel for langdetect (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993227 sha256=e39cbb9b4aa1aad74d62b4cff3f3c84256c3b6b555b3762b863406e1ea056f1e\n", " Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106\n", " Building wheel for iopath (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for iopath: filename=iopath-0.1.10-py3-none-any.whl size=31532 sha256=b2aa25855332a5c43eef0c3000cb3b16d8a4c9f2578bc12e110a880bc2523f92\n", " Stored in directory: /root/.cache/pip/wheels/9a/a3/b6/ac0fcd1b4ed5cfeb3db92e6a0e476cfd48ed0df92b91080c1d\n", " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=376b87330fe481da2030f2484d7d0ac28fbeb173911aef693e27e4bfb094c6a0\n", " Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n", "Successfully built langdetect iopath antlr4-python3-runtime\n", "Installing collected packages: sseclient-py, filetype, antlr4-python3-runtime, XlsxWriter, rapidfuzz, python-multipart, python-magic, python-iso639, python-docx, pypdfium2, pypdf, pypandoc, portalocker, pillow, packaging, orjson, onnx, omegaconf, olefile, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, mypy-extensions, langdetect, jsonpointer, jsonpath-python, humanfriendly, emoji, Deprecated, backoff, unstructured.pytesseract, typing-inspect, python-pptx, pytesseract, pillow_heif, pikepdf, pdf2image, nvidia-cusparse-cu12, nvidia-cudnn-cu12, msg-parser, marshmallow, jsonpatch, iopath, coloredlogs, together, pdfminer.six, onnxruntime, nvidia-cusolver-cu12, langsmith, dataclasses-json-speakeasy, dataclasses-json, unstructured-client, pdfplumber, langchain-core, unstructured, layoutparser, langchain-together, langchain-text-splitters, langchain_community, timm, effdet, unstructured-inference\n", " Attempting uninstall: pillow\n", " Found existing installation: Pillow 9.4.0\n", " Uninstalling Pillow-9.4.0:\n", " Successfully uninstalled Pillow-9.4.0\n", " Attempting uninstall: packaging\n", " Found existing installation: packaging 24.0\n", " Uninstalling packaging-24.0:\n", " Successfully uninstalled packaging-24.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "imageio 2.31.6 requires pillow<10.1.0,>=8.3.2, but you have pillow 10.3.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed Deprecated-1.2.14 XlsxWriter-3.2.0 antlr4-python3-runtime-4.9.3 backoff-2.2.1 coloredlogs-15.0.1 dataclasses-json-0.6.4 dataclasses-json-speakeasy-0.5.11 effdet-0.4.1 emoji-2.11.0 filetype-1.2.0 humanfriendly-10.0 iopath-0.1.10 jsonpatch-1.33 jsonpath-python-1.0.6 jsonpointer-2.4 langchain-core-0.1.42 langchain-text-splitters-0.0.1 langchain-together-0.1.0 langchain_community-0.0.32 langdetect-1.0.9 langsmith-0.1.47 layoutparser-0.3.4 marshmallow-3.21.1 msg-parser-1.2.0 mypy-extensions-1.0.0 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.1.105 olefile-0.47 omegaconf-2.3.0 onnx-1.16.0 onnxruntime-1.15.1 orjson-3.10.0 packaging-23.2 pdf2image-1.17.0 pdfminer.six-20231228 pdfplumber-0.11.0 pikepdf-8.15.0 pillow-10.3.0 pillow_heif-0.16.0 portalocker-2.8.2 pypandoc-1.13 pypdf-4.2.0 pypdfium2-4.29.0 pytesseract-0.3.10 python-docx-1.1.0 python-iso639-2024.2.7 python-magic-0.4.27 python-multipart-0.0.9 python-pptx-0.6.23 rapidfuzz-3.8.1 sseclient-py-1.8.0 timm-0.9.16 together-0.2.11 typing-inspect-0.9.0 unstructured-0.13.2 unstructured-client-0.18.0 unstructured-inference-0.7.25 unstructured.pytesseract-0.3.12\n" ] }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "PIL", "pydevd_plugins" ] }, "id": "a05b30d70de54e25a3d0c32fffc55ab0" } }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "from langchain_community.document_loaders import TextLoader\n", "from langchain_community.embeddings.fake import FakeEmbeddings\n", "from langchain_community.vectorstores import Vectara\n", "from langchain_text_splitters import CharacterTextSplitter" ], "metadata": { "id": "bSRybIQ60tRl" }, "execution_count": 5, "outputs": [] }, { "cell_type": "code", "source": [ "from google.colab import userdata\n", "\n", "TOGETHER_API_KEY = userdata.get('TOGETHER_API_KEY')\n", "vectara_customer_id = userdata.get('VECTARA_CUSTOMER_ID')\n", "vectara_corpus_id = userdata.get('VECTARA_CORPUS_ID')\n", "vectara_api_key = userdata.get('VECTARA_API_KEY')" ], "metadata": { "id": "d98hRDFC3WyH" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "vectorstore = Vectara(\n", " vectara_customer_id=vectara_customer_id,\n", " vectara_corpus_id=vectara_corpus_id,\n", " vectara_api_key=vectara_api_key\n", " )" ], "metadata": { "id": "n7aGHYcyzgXK" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "from langchain_community.document_loaders import UnstructuredPDFLoader" ], "metadata": { "id": "aX5VJiU07RZs" }, "execution_count": 8, "outputs": [] }, { "cell_type": "code", "source": [ "!mkdir docs\n", "# upload sample file" ], "metadata": { "id": "UQors5XgGPV7" }, "execution_count": 37, "outputs": [] }, { "cell_type": "code", "source": [ "loader = UnstructuredPDFLoader('ISB-020-U3-W-S-01-B18003-001-020.pdf', strategy='fast')\n", "data = loader.load()" ], "metadata": { "id": "ULSBXZRcI_4R" }, "execution_count": 9, "outputs": [] }, { "cell_type": "code", "source": [ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(data)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rd_8GLJrPT5T", "outputId": "002488bd-f8a0-4099-c4c4-b685da7a8195" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "WARNING:langchain_text_splitters.base:Created a chunk of size 3260, which is longer than the specified 1000\n", "WARNING:langchain_text_splitters.base:Created a chunk of size 1754, which is longer than the specified 1000\n", "WARNING:langchain_text_splitters.base:Created a chunk of size 1556, which is longer than the specified 1000\n", "WARNING:langchain_text_splitters.base:Created a chunk of size 2529, which is longer than the specified 1000\n", "WARNING:langchain_text_splitters.base:Created a chunk of size 2108, which is longer than the specified 1000\n", "WARNING:langchain_text_splitters.base:Created a chunk of size 1240, which is longer than the specified 1000\n", "WARNING:langchain_text_splitters.base:Created a chunk of size 1122, which is longer than the specified 1000\n" ] } ] }, { "cell_type": "code", "source": [ "import json\n", "\n", "from langchain_community.document_transformers import DoctranPropertyExtractor\n", "from langchain_core.documents import Document" ], "metadata": { "id": "6CM6bL6JRCCA" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "source": [ "properties = [\n", " {\n", " \"name\": \"document_number\",\n", " \"description\": \"Unique identifier for the document within its project.\",\n", " \"type\": \"string\",\n", " \"required\": True\n", " },\n", " {\n", " \"name\": \"discipline\",\n", " \"description\": \"The discipline associated with the document.\",\n", " \"type\": \"string\",\n", " \"required\": True\n", " },\n", " {\n", " \"name\": \"title\",\n", " \"description\": \"Title of the document.\",\n", " \"type\": \"string\",\n", " \"required\": True\n", " },\n", " {\n", " \"name\": \"version\",\n", " \"description\": \"Version number of the document.\",\n", " \"type\": \"integer\",\n", " \"required\": True\n", " },\n", " {\n", " \"name\": \"date\",\n", " \"description\": \"Creation date of the document.\",\n", " \"type\": \"string\",\n", " \"format\": \"date\",\n", " \"required\": True\n", " },\n", " {\n", " \"name\": \"author\",\n", " \"description\": \"Author of the document.\",\n", " \"type\": \"object\",\n", " \"properties\": {\n", " \"name\": {\n", " \"type\": \"string\",\n", " \"required\": True\n", " },\n", " \"email\": {\n", " \"type\": \"string\",\n", " \"format\": \"email\",\n", " \"required\": False\n", " }\n", " },\n", " \"required\": True\n", " },\n", " {\n", " \"name\": \"related_documents\",\n", " \"description\": \"List of related documents.\",\n", " \"type\": \"array\",\n", " \"items\": {\n", " \"type\": \"string\"\n", " },\n", " \"required\": False\n", " },\n", " {\n", " \"name\": \"status\",\n", " \"description\": \"Current status of the document.\",\n", " \"type\": \"string\",\n", " \"enum\": [\"draft\", \"under_review\", \"approved\", \"rejected\"],\n", " \"required\": True\n", " },\n", " {\n", " \"name\": \"keywords\",\n", " \"description\": \"Keywords associated with the document.\",\n", " \"type\": \"array\",\n", " \"items\": {\n", " \"type\": \"string\"\n", " },\n", " \"required\": False\n", " },\n", " {\n", " \"name\": \"summary\",\n", " \"description\": \"Short summary of the document content.\",\n", " \"type\": \"string\",\n", " \"required\": False\n", " }\n", "]" ], "metadata": { "id": "9rBUSiR-bDAD" }, "execution_count": 12, "outputs": [] }, { "cell_type": "code", "source": [ "property_extractor = DoctranPropertyExtractor(properties=properties)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 339 }, "id": "H5jIV-OYfJRg", "outputId": "0eb7dc55-088e-4912-c85e-dc4eb87e442c" }, "execution_count": 13, "outputs": [ { "output_type": "error", "ename": "ValueError", "evalue": "Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter.", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mproperty_extractor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDoctranPropertyExtractor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproperties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mproperties\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_community/document_transformers/doctran_text_extract.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, properties, openai_api_key, openai_api_model)\u001b[0m\n\u001b[1;32m 57\u001b[0m ) -> None:\n\u001b[1;32m 58\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproperties\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mproperties\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m self.openai_api_key = openai_api_key or get_from_env(\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0;34m\"openai_api_key\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"OPENAI_API_KEY\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m )\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/utils/env.py\u001b[0m in \u001b[0;36mget_from_env\u001b[0;34m(key, env_key, default)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34mf\"Did not find {key}, please add an environment variable\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;34mf\" `{env_key}` which contains it, or pass\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter." ] } ] }, { "cell_type": "markdown", "source": [], "metadata": { "id": "hVjJAK-KTTEE" } }, { "cell_type": "code", "source": [ "from dotenv import load_dotenv\n", "\n", "load_dotenv()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 349 }, "id": "HVDCIqIDRJ3Z", "outputId": "4fbd508a-2179-4251-d2ae-2df5cce24187" }, "execution_count": 4, "outputs": [ { "output_type": "error", "ename": "ModuleNotFoundError", "evalue": "No module named 'dotenv'", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mdotenv\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mload_dotenv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mload_dotenv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'dotenv'", "", "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n" ], "errorDetails": { "actions": [ { "action": "open_url", "actionText": "Open Examples", "url": "/notebooks/snippets/importing_libraries.ipynb" } ] } } ] } ] }